fix: use fuse.js for both fuzzy search and full-text search

pull/1347/head
Tienson Qin 2021-02-21 21:54:46 +08:00
parent 88d1fa8b91
commit fdd1538820
6 changed files with 64 additions and 82 deletions

View File

@ -62,9 +62,8 @@
"diff": "5.0.0",
"diff-match-patch": "^1.0.5",
"electron": "^11.2.0",
"flexsearch": "git+https://github.com/logseq/flexsearch",
"fs": "^0.0.1-security",
"fuzzysort": "git+https://github.com/getstation/fuzzysort#a66f5813825d2415b606cc69129070c4eb612ae2",
"fuse.js": "^6.4.6",
"gulp-cached": "^1.1.1",
"ignore": "^5.1.8",
"jszip": "^3.5.0",

View File

@ -127,13 +127,17 @@
(rum/defc search-auto-complete
[{:keys [pages files blocks]} search-q]
(rum/with-context [[t] i18n/*tongue-context*]
(let [new-page [{:type :new-page}]
new-file (when-let [ext (util/get-file-ext search-q)]
(let [new-file (when-let [ext (util/get-file-ext search-q)]
(when (contains? config/mldoc-support-formats (keyword (string/lower-case ext)))
[{:type :new-file}]))
pages (map (fn [page] {:type :page :data page}) pages)
files (map (fn [file] {:type :file :data file}) files)
blocks (map (fn [block] {:type :block :data block}) blocks)
new-page (if (and (seq pages)
(= (string/lower-case search-q)
(string/lower-case (:data (first pages)))))
[]
[{:type :new-page}])
result (if config/publishing?
(concat pages files blocks)
(concat new-page pages new-file files blocks))]
@ -221,6 +225,8 @@
nil))})])))
(defonce search-timeout (atom nil))
(rum/defc search < rum/reactive
(mixins/event-mixin
(fn [state]
@ -253,12 +259,17 @@
:auto-complete (if (util/chrome?) "chrome-off" "off") ; off not working here
:default-value ""
:on-change (fn [e]
(when @search-timeout
(js/clearTimeout @search-timeout))
(let [value (util/evalue e)]
(if (string/blank? value)
(search-handler/clear-search!)
(do
(state/set-q! value)
(search-handler/search value)))))}]
(reset! search-timeout
(js/setTimeout
#(search-handler/search value)
500))))))}]
(when-not (string/blank? search-q)
(ui/css-transition
{:class-names "fade"

View File

@ -8,12 +8,10 @@
(defn search
[q]
;; TODO: separate rendering for blocks
(p/let [blocks-result (search/block-search q 10)]
(swap! state/state assoc :search/result
{:pages (search/page-search q)
:files (search/file-search q)
:blocks blocks-result})))
(swap! state/state assoc :search/result
{:pages (search/page-search q)
:files (search/file-search q)
:blocks (search/block-search q 10)}))
(defn clear-search!
[]

View File

@ -11,24 +11,14 @@
[frontend.text :as text]
[cljs-bean.core :as bean]
[goog.object :as gobj]
["fuzzysort" :as fuzzy]
["flexsearch" :as flexsearch]
["fuse.js" :as fuse]
[medley.core :as medley]
[promesa.core :as p]
["/frontend/utils" :as utils]))
(def fuzzy-go (gobj/get fuzzy "go"))
(defonce prepare (gobj/get fuzzy "prepare"))
(defonce highlight (gobj/get fuzzy "highlight"))
(defn go
[q indice-type indice opts]
(case indice-type
:page
(fuzzy-go q indice opts)
:block
(.search indice q opts)))
[q indice opts]
(.search indice q opts))
(defn block->index
[{:block/keys [uuid content format] :as block}]
@ -38,13 +28,6 @@
:uuid (str uuid)
:content result}))
(def default-block-indice (flexsearch.
(clj->js
{:encode "icase"
:tokenize utils/searchTokenize
:doc {:id "id"
:field ["content"]}
:async true})))
(defn make-blocks-indice!
[]
(when-let [repo (state/get-current-repo)]
@ -52,9 +35,10 @@
(map block->index)
(remove nil?)
(bean/->js))
indice default-block-indice]
(p/let [result (.add indice blocks)]
(swap! indices assoc-in [repo :blocks] indice))
indice (fuse. blocks
(clj->js {:keys ["uuid" "content"]
}))]
(swap! indices assoc-in [repo :blocks] indice)
indice)))
(defn make-pages-indice!
@ -63,9 +47,11 @@
(let [pages (->> (db/get-pages (state/get-current-repo))
(remove string/blank?)
(map (fn [p] {:name p}))
(bean/->js))]
(swap! indices assoc-in [repo :pages] pages)
pages)))
(bean/->js))
indice (fuse. pages
(clj->js {:keys ["name"]}))]
(swap! indices assoc-in [repo :pages] indice)
indice)))
;; TODO: persist indices to indexeddb, it'll be better if the future db
;; can has the direct fuzzy search support.
@ -82,7 +68,7 @@
(defn reset-indice!
[repo]
(swap! indices assoc repo {:pages #js []
:blocks default-block-indice}))
:blocks #js []}))
;; Copied from https://gist.github.com/vaughnd/5099299
(defn str-len-distance
@ -162,14 +148,15 @@
(when-not (string/blank? q)
(let [indice (or (get-in @indices [repo :blocks])
(make-blocks-indice!))]
(p/let [result (go q :block indice (clj->js {:limit limit}))
result (bean/->clj result)]
(let [result (go q indice (clj->js {:limit limit}))
result (bean/->clj result)]
(->>
(map
(fn [{:keys [content uuid] :as block}]
{:block/uuid uuid
:block/content content
:block/page (:block/page (db/entity [:block/uuid (medley/uuid (str uuid))]))})
(fn [{:keys [item] :as block}]
(let [{:keys [content uuid]} item]
{:block/uuid uuid
:block/content content
:block/page (:block/page (db/entity [:block/uuid (medley/uuid (str uuid))]))}))
result)
(remove nil?))))))))))
@ -183,15 +170,12 @@
(when-not (string/blank? q)
(let [indice (or (get-in @indices [repo :pages])
(make-pages-indice!))
result (->> (go q :page indice (clj->js {:limit limit
:key "name"
:allowTypo false
:threshold -10000}))
result (->> (go q indice {})
(bean/->clj))]
;; TODO: add indexes for highlights
(->> (map
(fn [{:keys [obj]}]
(:name obj))
(fn [{:keys [item]}]
(:name item))
result)
(remove nil?))))))))
@ -234,19 +218,20 @@
pages-to-add (->> (filter (fn [page]
(contains? pages-to-add-set (:db/id page))) pages-result)
(map (fn [p] {:name (or (:page/original-name p)
(:page/name p))}))
(set))
(:page/name p))})))
pages-to-remove-set (->> (remove :added pages)
(map :v)
(set))]
(map :v))]
(swap! search-db/indices update-in [repo :pages]
(fn [pages]
(let [pages (or pages (array))
pages (.filter pages (fn [page]
(when-let [page-name (gobj/get page "name")]
(not (contains? pages-to-remove-set
(string/lower-case page-name))))))]
(.concat pages (bean/->js pages-to-add)))))))
(fn [indice]
(when indice
(doseq [page-name pages-to-remove-set]
(.remove indice
(fn [page]
(= page-name (gobj/get page "name")))))
(when (seq pages-to-add)
(doseq [page pages-to-add]
(.add indice (bean/->js page)))))
indice))))
(when (seq blocks)
(let [blocks-result (db/pull-many '[:db/id :block/uuid :block/format :block/content] (set (map :e blocks)))
blocks-to-add-set (->> (filter :added blocks)
@ -255,8 +240,7 @@
blocks-to-add (->> (filter (fn [block]
(contains? blocks-to-add-set (:db/id block)))
blocks-result)
(map block->index)
(set))
(map block->index))
blocks-to-remove-set (->> (remove :added blocks)
(map :e)
(set))]
@ -264,7 +248,10 @@
(fn [indice]
(when indice
(doseq [block-id blocks-to-remove-set]
(.remove indice #js {:id block-id}))
(.remove indice
(fn [block]
(= block-id (gobj/get block "id")))))
(when (seq blocks-to-add)
(.add indice (bean/->js blocks-to-add))))
(doseq [block blocks-to-add]
(.add indice (bean/->js block)))))
indice))))))))

View File

@ -204,13 +204,3 @@ export const win32 = path => {
// UNC paths are always absolute
return Boolean(result[2] || isUnc);
};
export const searchTokenize = str => {
let ascii_words = str.split(/\W+/);
let non_ascii_str = str.replace(/[\x00-\x7F]/g, '');
if (non_ascii_str == '') {
return ascii_words;
} else {
return ascii_words.concat(non_ascii_str.split('')).filter(e => !!e);
}
};

View File

@ -2324,10 +2324,6 @@ flatted@^3.1.0:
resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.1.0.tgz#a5d06b4a8b01e3a63771daa5cb7a1903e2e57067"
integrity sha512-tW+UkmtNg/jv9CSofAKvgVcO7c2URjhTdW1ZTkcAritblu8tajiYy7YisnIflEwtKssCtOxpnBRoCB7iap0/TA==
"flexsearch@git+https://github.com/logseq/flexsearch":
version "0.6.32"
resolved "git+https://github.com/logseq/flexsearch#0a04c518ef0a9b3c76e18da893642835f98d5616"
flush-write-stream@^1.0.2:
version "1.1.1"
resolved "https://registry.yarnpkg.com/flush-write-stream/-/flush-write-stream-1.1.1.tgz#8dd7d873a1babc207d94ead0c2e0e44276ebf2e8"
@ -2415,9 +2411,10 @@ function-bind@^1.1.1:
resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d"
integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==
"fuzzysort@git+https://github.com/getstation/fuzzysort#a66f5813825d2415b606cc69129070c4eb612ae2":
version "1.1.4"
resolved "git+https://github.com/getstation/fuzzysort#a66f5813825d2415b606cc69129070c4eb612ae2"
fuse.js@^6.4.6:
version "6.4.6"
resolved "https://registry.yarnpkg.com/fuse.js/-/fuse.js-6.4.6.tgz#62f216c110e5aa22486aff20be7896d19a059b79"
integrity sha512-/gYxR/0VpXmWSfZOIPS3rWwU8SHgsRTwWuXhyb2O6s7aRuVtHtxCkR33bNYu3wyLyNx/Wpv0vU7FZy8Vj53VNw==
gensync@^1.0.0-beta.1:
version "1.0.0-beta.2"