wip: async parsing

perf/async-parsing
Tienson Qin 2022-09-07 20:29:07 +08:00
parent 6785d05e92
commit b1ca81806a
11 changed files with 253 additions and 42 deletions

View File

@ -6,7 +6,8 @@
[logseq.graph-parser.date-time-util :as date-time-util] [logseq.graph-parser.date-time-util :as date-time-util]
[logseq.graph-parser.config :as gp-config] [logseq.graph-parser.config :as gp-config]
[clojure.string :as string] [clojure.string :as string]
[clojure.set :as set])) [clojure.set :as set]
[promesa.core :as p]))
(defn- db-set-file-content! (defn- db-set-file-content!
"Modified copy of frontend.db.model/db-set-file-content!" "Modified copy of frontend.db.model/db-set-file-content!"
@ -21,7 +22,7 @@
:or {new? true :or {new? true
delete-blocks-fn (constantly [])} delete-blocks-fn (constantly [])}
:as options}] :as options}]
(db-set-file-content! conn file content) (frontend.util/profile "set db file" (db-set-file-content! conn file content))
(let [format (gp-util/get-format file) (let [format (gp-util/get-format file)
file-content [{:file/path file}] file-content [{:file/path file}]
{:keys [tx ast]} {:keys [tx ast]}
@ -32,7 +33,7 @@
extract-options extract-options
{:db @conn}) {:db @conn})
{:keys [pages blocks ast]} {:keys [pages blocks ast]}
(extract/extract file content extract-options') (frontend.util/profile "extract" (extract/extract file content extract-options'))
delete-blocks (delete-blocks-fn (first pages) file) delete-blocks (delete-blocks-fn (first pages) file)
block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks) block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks)
block-refs-ids (->> (mapcat :block/refs blocks) block-refs-ids (->> (mapcat :block/refs blocks)
@ -42,7 +43,8 @@
(seq)) (seq))
;; To prevent "unique constraint" on datascript ;; To prevent "unique constraint" on datascript
block-ids (set/union (set block-ids) (set block-refs-ids)) block-ids (set/union (set block-ids) (set block-refs-ids))
pages (extract/with-ref-pages pages blocks) pages (frontend.util/profile "with ref pages"
(extract/with-ref-pages pages blocks))
pages-index (map #(select-keys % [:block/name]) pages)] pages-index (map #(select-keys % [:block/name]) pages)]
;; does order matter? ;; does order matter?
{:tx (concat file-content pages-index delete-blocks pages block-ids blocks) {:tx (concat file-content pages-index delete-blocks pages block-ids blocks)
@ -53,10 +55,54 @@
;; TODO: use file system timestamp? ;; TODO: use file system timestamp?
(assoc :file/created-at (date-time-util/time-ms)))]) (assoc :file/created-at (date-time-util/time-ms)))])
tx' (gp-util/remove-nils tx) tx' (gp-util/remove-nils tx)
result (d/transact! conn tx' (select-keys options [:new-graph? :from-disk?]))] result (frontend.util/profile "transact db" (d/transact! conn tx' (select-keys options [:new-graph? :from-disk?])))]
{:tx result {:tx result
:ast ast})) :ast ast}))
(defn parse-file-async
"Parse file asynchronously and save parsed data to the given db. Main parse fn used by logseq app"
[conn file content {:keys [new? delete-blocks-fn extract-options async-parse-fn]
:or {new? true
delete-blocks-fn (constantly [])}
:as options}]
(when async-parse-fn
(db-set-file-content! conn file content)
(p/let [format (gp-util/get-format file)
file-content [{:file/path file}]
{:keys [tx ast]}
(if (contains? gp-config/mldoc-support-formats format)
(p/let [extract-options' (merge {:block-pattern (gp-config/get-block-pattern format)
:date-formatter "MMM do, yyyy"
:supported-formats (gp-config/supported-formats)
:async-parse-fn async-parse-fn}
extract-options
{:db @conn})
{:keys [pages blocks ast]}
(extract/extract-async file content extract-options')
delete-blocks (delete-blocks-fn (first pages) file)
block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks)
block-refs-ids (->> (mapcat :block/refs blocks)
(filter (fn [ref] (and (vector? ref)
(= :block/uuid (first ref)))))
(map (fn [ref] {:block/uuid (second ref)}))
(seq))
;; To prevent "unique constraint" on datascript
block-ids (set/union (set block-ids) (set block-refs-ids))
pages (extract/with-ref-pages pages blocks)
pages-index (map #(select-keys % [:block/name]) pages)]
;; does order matter?
{:tx (concat file-content pages-index delete-blocks pages block-ids blocks)
:ast ast})
(p/resolved {:tx file-content}))
tx (concat tx [(cond-> {:file/path file}
new?
;; TODO: use file system timestamp?
(assoc :file/created-at (date-time-util/time-ms)))])
tx' (gp-util/remove-nils tx)
result (d/transact! conn tx' (select-keys options [:new-graph? :from-disk?]))]
{:tx result
:ast ast})))
(defn filter-files (defn filter-files
"Filters files in preparation for parsing. Only includes files that are "Filters files in preparation for parsing. Only includes files that are
supported by parser" supported by parser"

View File

@ -13,7 +13,8 @@
[logseq.graph-parser.property :as gp-property] [logseq.graph-parser.property :as gp-property]
[logseq.graph-parser.config :as gp-config] [logseq.graph-parser.config :as gp-config]
#?(:org.babashka/nbb [logseq.graph-parser.log :as log] #?(:org.babashka/nbb [logseq.graph-parser.log :as log]
:default [lambdaisland.glogi :as log]))) :default [lambdaisland.glogi :as log])
#?(:cljs [promesa.core :as p])))
(defn- get-page-name (defn- get-page-name
[file ast page-name-order] [file ast page-name-order]
@ -159,8 +160,8 @@
(let [format (gp-util/get-format file) (let [format (gp-util/get-format file)
_ (when verbose (println "Parsing start: " file)) _ (when verbose (println "Parsing start: " file))
ast (gp-mldoc/->edn content (gp-mldoc/default-config format ast (gp-mldoc/->edn content (gp-mldoc/default-config format
;; {:parse_outline_only? true} ;; {:parse_outline_only? true}
) )
user-config)] user-config)]
(when verbose (println "Parsing finished: " file)) (when verbose (println "Parsing finished: " file))
(let [first-block (ffirst ast) (let [first-block (ffirst ast)
@ -185,6 +186,42 @@
:blocks blocks :blocks blocks
:ast ast})))) :ast ast}))))
#?(:cljs
(defn extract-async
"Extracts pages, blocks and ast from given file"
[file content {:keys [user-config verbose] :or {verbose true} :as options}]
(when-let [parse-fn (:async-parse-fn options)]
(if (string/blank? content)
(p/resolved [])
(p/let [format (gp-util/get-format file)
_ (when verbose (println "Parsing start: " file))
ast (parse-fn content (gp-mldoc/default-config format
;; {:parse_outline_only? true}
)
user-config)]
(when verbose (println "Parsing finished: " file))
(let [first-block (ffirst ast)
properties (let [properties (and (gp-property/properties-ast? first-block)
(->> (last first-block)
(map (fn [[x y]]
[x (if (and (string? y)
(not (and (= (keyword x) :file-path)
(string/starts-with? y "file:"))))
(text/parse-property format x y user-config)
y)]))
(into {})
(walk/keywordize-keys)))]
(when (and properties (seq properties))
(if (:filters properties)
(update properties :filters
(fn [v]
(string/replace (or v "") "\\" "")))
properties)))
[pages blocks] (extract-pages-and-blocks format ast properties file content options)]
{:pages pages
:blocks blocks
:ast ast}))))))
(defn- with-block-uuid (defn- with-block-uuid
[pages] [pages]
(->> (gp-util/distinct-by :block/name pages) (->> (gp-util/distinct-by :block/name pages)

View File

@ -78,7 +78,7 @@
content (if remove-first-line? body (cons f body))] content (if remove-first-line? body (cons f body))]
(string/join "\n" content))) (string/join "\n" content)))
(defn- update-src-full-content (defn update-src-full-content
[ast content] [ast content]
(let [content (utf8/encode content)] (let [content (utf8/encode content)]
(map (fn [[block pos-meta]] (map (fn [[block pos-meta]]

View File

@ -50,19 +50,19 @@
"gulp:build": "cross-env NODE_ENV=production gulp build", "gulp:build": "cross-env NODE_ENV=production gulp build",
"css:build": "postcss tailwind.all.css -o static/css/style.css --verbose --env production", "css:build": "postcss tailwind.all.css -o static/css/style.css --verbose --env production",
"css:watch": "cross-env TAILWIND_MODE=watch postcss tailwind.all.css -o static/css/style.css --verbose --watch", "css:watch": "cross-env TAILWIND_MODE=watch postcss tailwind.all.css -o static/css/style.css --verbose --watch",
"cljs:watch": "clojure -M:cljs watch app electron", "cljs:watch": "clojure -M:cljs watch parser-worker app electron",
"cljs:app-watch": "clojure -M:cljs watch app", "cljs:app-watch": "clojure -M:cljs watch parser-worker app",
"cljs:electron-watch": "clojure -M:cljs watch app electron --config-merge \"{:asset-path \\\"./js\\\"}\"", "cljs:electron-watch": "clojure -M:cljs watch parser-worker app electron --config-merge \"{:asset-path \\\"./js\\\"}\"",
"cljs:release": "clojure -M:cljs release app publishing electron", "cljs:release": "clojure -M:cljs release parser-worker app publishing electron",
"cljs:release-electron": "clojure -M:cljs release app electron --debug && clojure -M:cljs release publishing", "cljs:release-electron": "clojure -M:cljs release parser-worker app electron --debug && clojure -M:cljs release publishing",
"cljs:release-app": "clojure -M:cljs release app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"", "cljs:release-app": "clojure -M:cljs release parser-worker app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"",
"cljs:release-android-app": "clojure -M:cljs release app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"", "cljs:release-android-app": "clojure -M:cljs release parser-worker app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"",
"cljs:test": "clojure -M:test compile test", "cljs:test": "clojure -M:test compile test",
"cljs:run-test": "node static/tests.js", "cljs:run-test": "node static/tests.js",
"cljs:dev-release-app": "clojure -M:cljs release app --config-merge \"{:closure-defines {frontend.config/DEV-RELEASE true}}\"", "cljs:dev-release-app": "clojure -M:cljs release parser-worker app --config-merge \"{:closure-defines {frontend.config/DEV-RELEASE true}}\"",
"cljs:debug": "clojure -M:cljs release app --debug", "cljs:debug": "clojure -M:cljs release parser-worker app --debug",
"cljs:report": "clojure -M:cljs run shadow.cljs.build-report app report.html", "cljs:report": "clojure -M:cljs run shadow.cljs.build-report app report.html",
"cljs:build-electron": "clojure -A:cljs compile app electron", "cljs:build-electron": "clojure -A:cljs compile parser-worker app electron",
"cljs:lint": "clojure -M:clj-kondo --parallel --lint src --cache false" "cljs:lint": "clojure -M:clj-kondo --parallel --lint src --cache false"
}, },
"dependencies": { "dependencies": {
@ -124,7 +124,7 @@
"reakit": "0.11.1", "reakit": "0.11.1",
"remove-accents": "0.4.2", "remove-accents": "0.4.2",
"send-intent": "3.0.11", "send-intent": "3.0.11",
"threads": "1.6.5", "threads": "^1.7.0",
"url": "^0.11.0", "url": "^0.11.0",
"yargs-parser": "20.2.4" "yargs-parser": "20.2.4"
}, },

View File

@ -60,6 +60,14 @@
:warnings {:fn-deprecated false :warnings {:fn-deprecated false
:redef false}}} :redef false}}}
:parser-worker {:target :browser
:output-dir "./static/js"
:asset-path "./js"
:compiler-options {:source-map false}
:modules {:parser-worker {:entries [frontend.worker.parser]
:web-worker true}}
:release {:compiler-options {:infer-externs :auto}}}
:test {:target :node-test :test {:target :node-test
:output-to "static/tests.js" :output-to "static/tests.js"
:closure-defines {frontend.util/NODETEST true} :closure-defines {frontend.util/NODETEST true}

View File

@ -7,7 +7,10 @@
[lambdaisland.glogi :as log] [lambdaisland.glogi :as log]
["mldoc" :as mldoc :refer [Mldoc]] ["mldoc" :as mldoc :refer [Mldoc]]
[logseq.graph-parser.mldoc :as gp-mldoc] [logseq.graph-parser.mldoc :as gp-mldoc]
[logseq.graph-parser.util :as gp-util])) [logseq.graph-parser.util :as gp-util]
[frontend.util :as util]
[frontend.util.pool :as pool]
[promesa.core :as p]))
(defonce anchorLink (gobj/get Mldoc "anchorLink")) (defonce anchorLink (gobj/get Mldoc "anchorLink"))
(defonce parseOPML (gobj/get Mldoc "parseOPML")) (defonce parseOPML (gobj/get Mldoc "parseOPML"))
@ -55,6 +58,22 @@
[content config] [content config]
(gp-mldoc/->edn content config (state/get-config))) (gp-mldoc/->edn content config (state/get-config)))
(defn ->edn-async
[content config]
(if util/node-test?
(p/resolved (->edn content config))
(try
(if (string/blank? content)
(p/resolved [])
(p/let [v (pool/add-parse-job! content config)]
(-> v
(gp-util/json->clj)
(gp-mldoc/update-src-full-content content)
(gp-mldoc/collect-page-properties gp-mldoc/parse-property {}))))
(catch js/Error e
(log/error :edn/convert-failed e)
(p/resolved [])))))
(defrecord MldocMode [] (defrecord MldocMode []
protocol/Format protocol/Format
(toEdn [_this content config] (toEdn [_this content config]

View File

@ -35,7 +35,8 @@
[goog.object :as gobj] [goog.object :as gobj]
[lambdaisland.glogi :as log] [lambdaisland.glogi :as log]
[promesa.core :as p] [promesa.core :as p]
[logseq.db.schema :as db-schema])) [logseq.db.schema :as db-schema]
[frontend.util.pool :as pool]))
(defn set-global-error-notification! (defn set-global-error-notification!
[] []
@ -222,6 +223,7 @@
(db/run-batch-txs!) (db/run-batch-txs!)
(file/<ratelimit-file-writes!) (file/<ratelimit-file-writes!)
(pool/init-parser-pool!)
(when config/dev? (when config/dev?
(enable-datalog-console)) (enable-datalog-console))

View File

@ -17,7 +17,8 @@
[promesa.core :as p] [promesa.core :as p]
[frontend.mobile.util :as mobile] [frontend.mobile.util :as mobile]
[logseq.graph-parser.config :as gp-config] [logseq.graph-parser.config :as gp-config]
[logseq.graph-parser :as graph-parser])) [logseq.graph-parser :as graph-parser]
[frontend.format.mldoc :as mldoc]))
;; TODO: extract all git ops using a channel ;; TODO: extract all git ops using a channel
@ -144,21 +145,22 @@
file) file)
file (gp-util/path-normalize file) file (gp-util/path-normalize file)
new? (nil? (db/entity [:file/path file]))] new? (nil? (db/entity [:file/path file]))]
(:tx (p/let [result (graph-parser/parse-file-async
(graph-parser/parse-file (db/get-db repo-url false)
(db/get-db repo-url false) file
file content
content (merge (dissoc options :verbose)
(merge (dissoc options :verbose) {:new? new?
{:new? new? :delete-blocks-fn (partial get-delete-blocks repo-url)
:delete-blocks-fn (partial get-delete-blocks repo-url) :extract-options (merge
:extract-options (merge {:user-config (state/get-config)
{:user-config (state/get-config) :date-formatter (state/get-date-formatter)
:date-formatter (state/get-date-formatter) :page-name-order (state/page-name-order)
:page-name-order (state/page-name-order) :block-pattern (config/get-block-pattern (gp-util/get-format file))
:block-pattern (config/get-block-pattern (gp-util/get-format file)) :supported-formats (gp-config/supported-formats)}
:supported-formats (gp-config/supported-formats)} (when (some? verbose) {:verbose verbose}))
(when (some? verbose) {:verbose verbose}))})))) :async-parse-fn mldoc/->edn-async}))]
(:tx result)))
(catch :default e (catch :default e
(prn "Reset file failed " {:file file}) (prn "Reset file failed " {:file file})
(log/error :exception e))))) (log/error :exception e)))))

View File

@ -0,0 +1,83 @@
(ns frontend.util.pool
(:require [electron.ipc :as ipc]
[frontend.config :as config]
[frontend.util :as util]
[promesa.core :as p]
[clojure.string :as string]
["threads" :refer [Pool Worker spawn]]
[frontend.mobile.util :as mobile-util]))
(defonce parser-pool (atom nil))
(defn- absolute-path-for-worker
"Returns the absolute path to the worker file, on Windows.
NOTE: This is a bug in threads.js.
See-also: https://github.com/andywer/threads.js/blob/8f94053f028b0d4e4fb1fdec535867f6d0e23946/src/master/implementation.browser.ts#L10"
[path]
(if util/win32?
(-> path
(p/then #(str "//./" (string/replace % "\\" "/"))))
path))
(defn create-parser-pool!
([]
(create-parser-pool! 8))
([num]
(p/let [static-path (if (and (util/electron?)
(= "file:" (.-protocol js/location)))
(absolute-path-for-worker (ipc/ipc :getDirname))
"/static")
path (str static-path "/js/parser-worker.js")
path (if (or (util/electron?)
(mobile-util/native-platform?))
path
(config/asset-uri path))]
(Pool.
(fn []
(spawn (Worker. path) num))))))
;; (defn finish-pool!
;; [{:keys [pool tasks]} ok-handler]
;; (-> (p/all @tasks)
;; (p/then (fn [result]
;; (ok-handler result)
;; (.completed pool)
;; (.terminate pool)
;; (reset! tasks nil)))))
(defn terminate-pool!
[^js pool]
(p/let [_ (.completed pool)]
(.terminate pool)))
(defn terminate-parser-pool!
[]
(when-let [pool @parser-pool]
(terminate-pool! pool)))
(defn add-parse-job!
[content config]
(when-let [pool @parser-pool]
(.queue ^js pool
(fn [parser]
(try
(parser.parse content config)
(catch js/Error e
(js/console.error e)
nil)))))
;; (let [task (.queue ^js pool
;; (fn [parser]
;; (parser.parse content config)))]
;; (swap! (:tasks m) conj task)
;; task)
)
(defn init-parser-pool!
[]
(p/let [pool (create-parser-pool!)]
(reset! parser-pool pool)))
(comment
(add-parse-job! "- hello" (frontend.format.mldoc/default-config :markdown))
(add-parse-job! "*world*" (frontend.format.mldoc/default-config :markdown)))

View File

@ -0,0 +1,14 @@
(ns frontend.worker.parser
(:require ["mldoc" :refer [Mldoc]]
["threads/worker" :refer [expose]]))
(def parse-json (.-parseJson Mldoc))
(expose (clj->js {:parse parse-json}))
(defn init
[]
(println "Parser worker initialized!")
(js/self.addEventListener "message"
(fn [^js e]
(js/postMessage (.. e -data)))))

View File

@ -7442,10 +7442,10 @@ thenby@^1.3.4:
resolved "https://registry.yarnpkg.com/thenby/-/thenby-1.3.4.tgz#81581f6e1bb324c6dedeae9bfc28e59b1a2201cc" resolved "https://registry.yarnpkg.com/thenby/-/thenby-1.3.4.tgz#81581f6e1bb324c6dedeae9bfc28e59b1a2201cc"
integrity sha512-89Gi5raiWA3QZ4b2ePcEwswC3me9JIg+ToSgtE0JWeCynLnLxNr/f9G+xfo9K+Oj4AFdom8YNJjibIARTJmapQ== integrity sha512-89Gi5raiWA3QZ4b2ePcEwswC3me9JIg+ToSgtE0JWeCynLnLxNr/f9G+xfo9K+Oj4AFdom8YNJjibIARTJmapQ==
threads@1.6.5: threads@^1.7.0:
version "1.6.5" version "1.7.0"
resolved "https://registry.yarnpkg.com/threads/-/threads-1.6.5.tgz#5cee7f139e3e147c5a64f0134844ee92469932a5" resolved "https://registry.yarnpkg.com/threads/-/threads-1.7.0.tgz#d9e9627bfc1ef22ada3b733c2e7558bbe78e589c"
integrity sha512-yL1NN4qZ25crW8wDoGn7TqbENJ69w3zCEjIGXpbqmQ4I+QHrG8+DLaZVKoX74OQUXWCI2lbbrUxDxAbr1xjDGQ== integrity sha512-Mx5NBSHX3sQYR6iI9VYbgHKBLisyB+xROCBGjjWm1O9wb9vfLxdaGtmT/KCjUqMsSNW6nERzCW3T6H43LqjDZQ==
dependencies: dependencies:
callsites "^3.1.0" callsites "^3.1.0"
debug "^4.2.0" debug "^4.2.0"