enhance: apply normalization on page indexing & diacritics / accents searching compat.

pull/3954/head
Junyi Du 2022-01-18 21:03:49 +08:00 committed by Tienson Qin
parent ef2c337e2e
commit 226541ff5e
5 changed files with 20 additions and 11 deletions

View File

@ -95,6 +95,7 @@
"is-svg": "4.3.0",
"jszip": "3.5.0",
"mldoc": "1.2.7",
"remove-accents": "0.4.2",
"path": "0.12.7",
"pixi-graph-fork": "0.2.0",
"pixi.js": "6.2.0",

View File

@ -129,13 +129,13 @@
;; TODO: add indexes for highlights
(->> (map
(fn [{:keys [item]}]
(:name item))
(:original-name item))
result)
(remove nil?)
(map string/trim)
(distinct)
(filter (fn [name]
(exact-matched? q name))))))))))
(filter (fn [original-name]
(exact-matched? q original-name))))))))))
(defn file-search
([q]

View File

@ -60,7 +60,8 @@
(when-let [repo (state/get-current-repo)]
(let [pages (->> (db/get-pages (state/get-current-repo))
(remove string/blank?)
(map (fn [p] {:name p}))
(map (fn [p] {:name (util/search-normalize p)
:original-name p}))
(bean/->js))
indice (fuse. pages
(clj->js {:keys ["name"]

View File

@ -5,6 +5,7 @@
["/frontend/selection" :as selection]
["/frontend/utils" :as utils]
["grapheme-splitter" :as GraphemeSplitter]
["remove-accents" :as removeAccents]
[camel-snake-kebab.core :as csk]
[camel-snake-kebab.extras :as cske]
[cljs-bean.core :as bean]
@ -1189,16 +1190,17 @@
[s]
(.normalize s "NFC"))
(defn search-normalize
"Normalize string for searching (loose)"
[s]
(.normalize (string/lower-case s) "NFKD")
)
#?(:cljs
(defn search-normalize
"Normalize string for searching (loose)"
[s]
(removeAccents (.normalize (string/lower-case s) "NFKC"))))
(defn safe-search-normalize
#?(:cljs
(defn safe-search-normalize
[s]
(if (string? s)
(.normalize (string/lower-case s) "NFKD") s))
(removeAccents (.normalize (string/lower-case s) "NFKC")) s)))
(defn page-name-sanity
"Sanitize the page-name for file name (strict), for file writting"

View File

@ -6960,6 +6960,11 @@ remark@^13.0.0:
remark-stringify "^9.0.0"
unified "^9.1.0"
remove-accents@0.4.2:
version "0.4.2"
resolved "https://registry.yarnpkg.com/remove-accents/-/remove-accents-0.4.2.tgz#0a43d3aaae1e80db919e07ae254b285d9e1c7bb5"
integrity sha1-CkPTqq4egNuRngeuJUsoXZ4ce7U=
remove-bom-buffer@^3.0.0:
version "3.0.0"
resolved "https://registry.yarnpkg.com/remove-bom-buffer/-/remove-bom-buffer-3.0.0.tgz#c2bf1e377520d324f623892e33c10cac2c252b53"