fix: use fuse.js for both fuzzy search and full-text search

pull/1347/head
Tienson Qin 2021-02-21 21:54:46 +08:00
parent 88d1fa8b91
commit fdd1538820
6 changed files with 64 additions and 82 deletions

View File

@ -62,9 +62,8 @@
"diff": "5.0.0", "diff": "5.0.0",
"diff-match-patch": "^1.0.5", "diff-match-patch": "^1.0.5",
"electron": "^11.2.0", "electron": "^11.2.0",
"flexsearch": "git+https://github.com/logseq/flexsearch",
"fs": "^0.0.1-security", "fs": "^0.0.1-security",
"fuzzysort": "git+https://github.com/getstation/fuzzysort#a66f5813825d2415b606cc69129070c4eb612ae2", "fuse.js": "^6.4.6",
"gulp-cached": "^1.1.1", "gulp-cached": "^1.1.1",
"ignore": "^5.1.8", "ignore": "^5.1.8",
"jszip": "^3.5.0", "jszip": "^3.5.0",

View File

@ -127,13 +127,17 @@
(rum/defc search-auto-complete (rum/defc search-auto-complete
[{:keys [pages files blocks]} search-q] [{:keys [pages files blocks]} search-q]
(rum/with-context [[t] i18n/*tongue-context*] (rum/with-context [[t] i18n/*tongue-context*]
(let [new-page [{:type :new-page}] (let [new-file (when-let [ext (util/get-file-ext search-q)]
new-file (when-let [ext (util/get-file-ext search-q)]
(when (contains? config/mldoc-support-formats (keyword (string/lower-case ext))) (when (contains? config/mldoc-support-formats (keyword (string/lower-case ext)))
[{:type :new-file}])) [{:type :new-file}]))
pages (map (fn [page] {:type :page :data page}) pages) pages (map (fn [page] {:type :page :data page}) pages)
files (map (fn [file] {:type :file :data file}) files) files (map (fn [file] {:type :file :data file}) files)
blocks (map (fn [block] {:type :block :data block}) blocks) blocks (map (fn [block] {:type :block :data block}) blocks)
new-page (if (and (seq pages)
(= (string/lower-case search-q)
(string/lower-case (:data (first pages)))))
[]
[{:type :new-page}])
result (if config/publishing? result (if config/publishing?
(concat pages files blocks) (concat pages files blocks)
(concat new-page pages new-file files blocks))] (concat new-page pages new-file files blocks))]
@ -221,6 +225,8 @@
nil))})]))) nil))})])))
(defonce search-timeout (atom nil))
(rum/defc search < rum/reactive (rum/defc search < rum/reactive
(mixins/event-mixin (mixins/event-mixin
(fn [state] (fn [state]
@ -253,12 +259,17 @@
:auto-complete (if (util/chrome?) "chrome-off" "off") ; off not working here :auto-complete (if (util/chrome?) "chrome-off" "off") ; off not working here
:default-value "" :default-value ""
:on-change (fn [e] :on-change (fn [e]
(when @search-timeout
(js/clearTimeout @search-timeout))
(let [value (util/evalue e)] (let [value (util/evalue e)]
(if (string/blank? value) (if (string/blank? value)
(search-handler/clear-search!) (search-handler/clear-search!)
(do (do
(state/set-q! value) (state/set-q! value)
(search-handler/search value)))))}] (reset! search-timeout
(js/setTimeout
#(search-handler/search value)
500))))))}]
(when-not (string/blank? search-q) (when-not (string/blank? search-q)
(ui/css-transition (ui/css-transition
{:class-names "fade" {:class-names "fade"

View File

@ -8,12 +8,10 @@
(defn search (defn search
[q] [q]
;; TODO: separate rendering for blocks (swap! state/state assoc :search/result
(p/let [blocks-result (search/block-search q 10)] {:pages (search/page-search q)
(swap! state/state assoc :search/result :files (search/file-search q)
{:pages (search/page-search q) :blocks (search/block-search q 10)}))
:files (search/file-search q)
:blocks blocks-result})))
(defn clear-search! (defn clear-search!
[] []

View File

@ -11,24 +11,14 @@
[frontend.text :as text] [frontend.text :as text]
[cljs-bean.core :as bean] [cljs-bean.core :as bean]
[goog.object :as gobj] [goog.object :as gobj]
["fuzzysort" :as fuzzy] ["fuse.js" :as fuse]
["flexsearch" :as flexsearch]
[medley.core :as medley] [medley.core :as medley]
[promesa.core :as p] [promesa.core :as p]
["/frontend/utils" :as utils])) ["/frontend/utils" :as utils]))
(def fuzzy-go (gobj/get fuzzy "go"))
(defonce prepare (gobj/get fuzzy "prepare"))
(defonce highlight (gobj/get fuzzy "highlight"))
(defn go (defn go
[q indice-type indice opts] [q indice opts]
(case indice-type (.search indice q opts))
:page
(fuzzy-go q indice opts)
:block
(.search indice q opts)))
(defn block->index (defn block->index
[{:block/keys [uuid content format] :as block}] [{:block/keys [uuid content format] :as block}]
@ -38,13 +28,6 @@
:uuid (str uuid) :uuid (str uuid)
:content result})) :content result}))
(def default-block-indice (flexsearch.
(clj->js
{:encode "icase"
:tokenize utils/searchTokenize
:doc {:id "id"
:field ["content"]}
:async true})))
(defn make-blocks-indice! (defn make-blocks-indice!
[] []
(when-let [repo (state/get-current-repo)] (when-let [repo (state/get-current-repo)]
@ -52,9 +35,10 @@
(map block->index) (map block->index)
(remove nil?) (remove nil?)
(bean/->js)) (bean/->js))
indice default-block-indice] indice (fuse. blocks
(p/let [result (.add indice blocks)] (clj->js {:keys ["uuid" "content"]
(swap! indices assoc-in [repo :blocks] indice)) }))]
(swap! indices assoc-in [repo :blocks] indice)
indice))) indice)))
(defn make-pages-indice! (defn make-pages-indice!
@ -63,9 +47,11 @@
(let [pages (->> (db/get-pages (state/get-current-repo)) (let [pages (->> (db/get-pages (state/get-current-repo))
(remove string/blank?) (remove string/blank?)
(map (fn [p] {:name p})) (map (fn [p] {:name p}))
(bean/->js))] (bean/->js))
(swap! indices assoc-in [repo :pages] pages) indice (fuse. pages
pages))) (clj->js {:keys ["name"]}))]
(swap! indices assoc-in [repo :pages] indice)
indice)))
;; TODO: persist indices to indexeddb, it'll be better if the future db ;; TODO: persist indices to indexeddb, it'll be better if the future db
;; can has the direct fuzzy search support. ;; can has the direct fuzzy search support.
@ -82,7 +68,7 @@
(defn reset-indice! (defn reset-indice!
[repo] [repo]
(swap! indices assoc repo {:pages #js [] (swap! indices assoc repo {:pages #js []
:blocks default-block-indice})) :blocks #js []}))
;; Copied from https://gist.github.com/vaughnd/5099299 ;; Copied from https://gist.github.com/vaughnd/5099299
(defn str-len-distance (defn str-len-distance
@ -162,14 +148,15 @@
(when-not (string/blank? q) (when-not (string/blank? q)
(let [indice (or (get-in @indices [repo :blocks]) (let [indice (or (get-in @indices [repo :blocks])
(make-blocks-indice!))] (make-blocks-indice!))]
(p/let [result (go q :block indice (clj->js {:limit limit})) (let [result (go q indice (clj->js {:limit limit}))
result (bean/->clj result)] result (bean/->clj result)]
(->> (->>
(map (map
(fn [{:keys [content uuid] :as block}] (fn [{:keys [item] :as block}]
{:block/uuid uuid (let [{:keys [content uuid]} item]
:block/content content {:block/uuid uuid
:block/page (:block/page (db/entity [:block/uuid (medley/uuid (str uuid))]))}) :block/content content
:block/page (:block/page (db/entity [:block/uuid (medley/uuid (str uuid))]))}))
result) result)
(remove nil?)))))))))) (remove nil?))))))))))
@ -183,15 +170,12 @@
(when-not (string/blank? q) (when-not (string/blank? q)
(let [indice (or (get-in @indices [repo :pages]) (let [indice (or (get-in @indices [repo :pages])
(make-pages-indice!)) (make-pages-indice!))
result (->> (go q :page indice (clj->js {:limit limit result (->> (go q indice {})
:key "name"
:allowTypo false
:threshold -10000}))
(bean/->clj))] (bean/->clj))]
;; TODO: add indexes for highlights ;; TODO: add indexes for highlights
(->> (map (->> (map
(fn [{:keys [obj]}] (fn [{:keys [item]}]
(:name obj)) (:name item))
result) result)
(remove nil?)))))))) (remove nil?))))))))
@ -234,19 +218,20 @@
pages-to-add (->> (filter (fn [page] pages-to-add (->> (filter (fn [page]
(contains? pages-to-add-set (:db/id page))) pages-result) (contains? pages-to-add-set (:db/id page))) pages-result)
(map (fn [p] {:name (or (:page/original-name p) (map (fn [p] {:name (or (:page/original-name p)
(:page/name p))})) (:page/name p))})))
(set))
pages-to-remove-set (->> (remove :added pages) pages-to-remove-set (->> (remove :added pages)
(map :v) (map :v))]
(set))]
(swap! search-db/indices update-in [repo :pages] (swap! search-db/indices update-in [repo :pages]
(fn [pages] (fn [indice]
(let [pages (or pages (array)) (when indice
pages (.filter pages (fn [page] (doseq [page-name pages-to-remove-set]
(when-let [page-name (gobj/get page "name")] (.remove indice
(not (contains? pages-to-remove-set (fn [page]
(string/lower-case page-name))))))] (= page-name (gobj/get page "name")))))
(.concat pages (bean/->js pages-to-add))))))) (when (seq pages-to-add)
(doseq [page pages-to-add]
(.add indice (bean/->js page)))))
indice))))
(when (seq blocks) (when (seq blocks)
(let [blocks-result (db/pull-many '[:db/id :block/uuid :block/format :block/content] (set (map :e blocks))) (let [blocks-result (db/pull-many '[:db/id :block/uuid :block/format :block/content] (set (map :e blocks)))
blocks-to-add-set (->> (filter :added blocks) blocks-to-add-set (->> (filter :added blocks)
@ -255,8 +240,7 @@
blocks-to-add (->> (filter (fn [block] blocks-to-add (->> (filter (fn [block]
(contains? blocks-to-add-set (:db/id block))) (contains? blocks-to-add-set (:db/id block)))
blocks-result) blocks-result)
(map block->index) (map block->index))
(set))
blocks-to-remove-set (->> (remove :added blocks) blocks-to-remove-set (->> (remove :added blocks)
(map :e) (map :e)
(set))] (set))]
@ -264,7 +248,10 @@
(fn [indice] (fn [indice]
(when indice (when indice
(doseq [block-id blocks-to-remove-set] (doseq [block-id blocks-to-remove-set]
(.remove indice #js {:id block-id})) (.remove indice
(fn [block]
(= block-id (gobj/get block "id")))))
(when (seq blocks-to-add) (when (seq blocks-to-add)
(.add indice (bean/->js blocks-to-add)))) (doseq [block blocks-to-add]
(.add indice (bean/->js block)))))
indice)))))))) indice))))))))

View File

@ -204,13 +204,3 @@ export const win32 = path => {
// UNC paths are always absolute // UNC paths are always absolute
return Boolean(result[2] || isUnc); return Boolean(result[2] || isUnc);
}; };
export const searchTokenize = str => {
let ascii_words = str.split(/\W+/);
let non_ascii_str = str.replace(/[\x00-\x7F]/g, '');
if (non_ascii_str == '') {
return ascii_words;
} else {
return ascii_words.concat(non_ascii_str.split('')).filter(e => !!e);
}
};

View File

@ -2324,10 +2324,6 @@ flatted@^3.1.0:
resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.1.0.tgz#a5d06b4a8b01e3a63771daa5cb7a1903e2e57067" resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.1.0.tgz#a5d06b4a8b01e3a63771daa5cb7a1903e2e57067"
integrity sha512-tW+UkmtNg/jv9CSofAKvgVcO7c2URjhTdW1ZTkcAritblu8tajiYy7YisnIflEwtKssCtOxpnBRoCB7iap0/TA== integrity sha512-tW+UkmtNg/jv9CSofAKvgVcO7c2URjhTdW1ZTkcAritblu8tajiYy7YisnIflEwtKssCtOxpnBRoCB7iap0/TA==
"flexsearch@git+https://github.com/logseq/flexsearch":
version "0.6.32"
resolved "git+https://github.com/logseq/flexsearch#0a04c518ef0a9b3c76e18da893642835f98d5616"
flush-write-stream@^1.0.2: flush-write-stream@^1.0.2:
version "1.1.1" version "1.1.1"
resolved "https://registry.yarnpkg.com/flush-write-stream/-/flush-write-stream-1.1.1.tgz#8dd7d873a1babc207d94ead0c2e0e44276ebf2e8" resolved "https://registry.yarnpkg.com/flush-write-stream/-/flush-write-stream-1.1.1.tgz#8dd7d873a1babc207d94ead0c2e0e44276ebf2e8"
@ -2415,9 +2411,10 @@ function-bind@^1.1.1:
resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d" resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d"
integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A== integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==
"fuzzysort@git+https://github.com/getstation/fuzzysort#a66f5813825d2415b606cc69129070c4eb612ae2": fuse.js@^6.4.6:
version "1.1.4" version "6.4.6"
resolved "git+https://github.com/getstation/fuzzysort#a66f5813825d2415b606cc69129070c4eb612ae2" resolved "https://registry.yarnpkg.com/fuse.js/-/fuse.js-6.4.6.tgz#62f216c110e5aa22486aff20be7896d19a059b79"
integrity sha512-/gYxR/0VpXmWSfZOIPS3rWwU8SHgsRTwWuXhyb2O6s7aRuVtHtxCkR33bNYu3wyLyNx/Wpv0vU7FZy8Vj53VNw==
gensync@^1.0.0-beta.1: gensync@^1.0.0-beta.1:
version "1.0.0-beta.2" version "1.0.0-beta.2"