wip: async parsing

perf/async-parsing
Tienson Qin 2022-09-07 20:29:07 +08:00
parent 6785d05e92
commit b1ca81806a
11 changed files with 253 additions and 42 deletions

View File

@ -6,7 +6,8 @@
[logseq.graph-parser.date-time-util :as date-time-util]
[logseq.graph-parser.config :as gp-config]
[clojure.string :as string]
[clojure.set :as set]))
[clojure.set :as set]
[promesa.core :as p]))
(defn- db-set-file-content!
"Modified copy of frontend.db.model/db-set-file-content!"
@ -21,7 +22,7 @@
:or {new? true
delete-blocks-fn (constantly [])}
:as options}]
(db-set-file-content! conn file content)
(frontend.util/profile "set db file" (db-set-file-content! conn file content))
(let [format (gp-util/get-format file)
file-content [{:file/path file}]
{:keys [tx ast]}
@ -32,7 +33,7 @@
extract-options
{:db @conn})
{:keys [pages blocks ast]}
(extract/extract file content extract-options')
(frontend.util/profile "extract" (extract/extract file content extract-options'))
delete-blocks (delete-blocks-fn (first pages) file)
block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks)
block-refs-ids (->> (mapcat :block/refs blocks)
@ -42,7 +43,8 @@
(seq))
;; To prevent "unique constraint" on datascript
block-ids (set/union (set block-ids) (set block-refs-ids))
pages (extract/with-ref-pages pages blocks)
pages (frontend.util/profile "with ref pages"
(extract/with-ref-pages pages blocks))
pages-index (map #(select-keys % [:block/name]) pages)]
;; does order matter?
{:tx (concat file-content pages-index delete-blocks pages block-ids blocks)
@ -53,10 +55,54 @@
;; TODO: use file system timestamp?
(assoc :file/created-at (date-time-util/time-ms)))])
tx' (gp-util/remove-nils tx)
result (d/transact! conn tx' (select-keys options [:new-graph? :from-disk?]))]
result (frontend.util/profile "transact db" (d/transact! conn tx' (select-keys options [:new-graph? :from-disk?])))]
{:tx result
:ast ast}))
(defn parse-file-async
"Parse file asynchronously and save parsed data to the given db. Main parse fn used by logseq app"
[conn file content {:keys [new? delete-blocks-fn extract-options async-parse-fn]
:or {new? true
delete-blocks-fn (constantly [])}
:as options}]
(when async-parse-fn
(db-set-file-content! conn file content)
(p/let [format (gp-util/get-format file)
file-content [{:file/path file}]
{:keys [tx ast]}
(if (contains? gp-config/mldoc-support-formats format)
(p/let [extract-options' (merge {:block-pattern (gp-config/get-block-pattern format)
:date-formatter "MMM do, yyyy"
:supported-formats (gp-config/supported-formats)
:async-parse-fn async-parse-fn}
extract-options
{:db @conn})
{:keys [pages blocks ast]}
(extract/extract-async file content extract-options')
delete-blocks (delete-blocks-fn (first pages) file)
block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks)
block-refs-ids (->> (mapcat :block/refs blocks)
(filter (fn [ref] (and (vector? ref)
(= :block/uuid (first ref)))))
(map (fn [ref] {:block/uuid (second ref)}))
(seq))
;; To prevent "unique constraint" on datascript
block-ids (set/union (set block-ids) (set block-refs-ids))
pages (extract/with-ref-pages pages blocks)
pages-index (map #(select-keys % [:block/name]) pages)]
;; does order matter?
{:tx (concat file-content pages-index delete-blocks pages block-ids blocks)
:ast ast})
(p/resolved {:tx file-content}))
tx (concat tx [(cond-> {:file/path file}
new?
;; TODO: use file system timestamp?
(assoc :file/created-at (date-time-util/time-ms)))])
tx' (gp-util/remove-nils tx)
result (d/transact! conn tx' (select-keys options [:new-graph? :from-disk?]))]
{:tx result
:ast ast})))
(defn filter-files
"Filters files in preparation for parsing. Only includes files that are
supported by parser"

View File

@ -13,7 +13,8 @@
[logseq.graph-parser.property :as gp-property]
[logseq.graph-parser.config :as gp-config]
#?(:org.babashka/nbb [logseq.graph-parser.log :as log]
:default [lambdaisland.glogi :as log])))
:default [lambdaisland.glogi :as log])
#?(:cljs [promesa.core :as p])))
(defn- get-page-name
[file ast page-name-order]
@ -159,8 +160,8 @@
(let [format (gp-util/get-format file)
_ (when verbose (println "Parsing start: " file))
ast (gp-mldoc/->edn content (gp-mldoc/default-config format
;; {:parse_outline_only? true}
)
;; {:parse_outline_only? true}
)
user-config)]
(when verbose (println "Parsing finished: " file))
(let [first-block (ffirst ast)
@ -185,6 +186,42 @@
:blocks blocks
:ast ast}))))
#?(:cljs
(defn extract-async
"Extracts pages, blocks and ast from given file"
[file content {:keys [user-config verbose] :or {verbose true} :as options}]
(when-let [parse-fn (:async-parse-fn options)]
(if (string/blank? content)
(p/resolved [])
(p/let [format (gp-util/get-format file)
_ (when verbose (println "Parsing start: " file))
ast (parse-fn content (gp-mldoc/default-config format
;; {:parse_outline_only? true}
)
user-config)]
(when verbose (println "Parsing finished: " file))
(let [first-block (ffirst ast)
properties (let [properties (and (gp-property/properties-ast? first-block)
(->> (last first-block)
(map (fn [[x y]]
[x (if (and (string? y)
(not (and (= (keyword x) :file-path)
(string/starts-with? y "file:"))))
(text/parse-property format x y user-config)
y)]))
(into {})
(walk/keywordize-keys)))]
(when (and properties (seq properties))
(if (:filters properties)
(update properties :filters
(fn [v]
(string/replace (or v "") "\\" "")))
properties)))
[pages blocks] (extract-pages-and-blocks format ast properties file content options)]
{:pages pages
:blocks blocks
:ast ast}))))))
(defn- with-block-uuid
[pages]
(->> (gp-util/distinct-by :block/name pages)

View File

@ -78,7 +78,7 @@
content (if remove-first-line? body (cons f body))]
(string/join "\n" content)))
(defn- update-src-full-content
(defn update-src-full-content
[ast content]
(let [content (utf8/encode content)]
(map (fn [[block pos-meta]]

View File

@ -50,19 +50,19 @@
"gulp:build": "cross-env NODE_ENV=production gulp build",
"css:build": "postcss tailwind.all.css -o static/css/style.css --verbose --env production",
"css:watch": "cross-env TAILWIND_MODE=watch postcss tailwind.all.css -o static/css/style.css --verbose --watch",
"cljs:watch": "clojure -M:cljs watch app electron",
"cljs:app-watch": "clojure -M:cljs watch app",
"cljs:electron-watch": "clojure -M:cljs watch app electron --config-merge \"{:asset-path \\\"./js\\\"}\"",
"cljs:release": "clojure -M:cljs release app publishing electron",
"cljs:release-electron": "clojure -M:cljs release app electron --debug && clojure -M:cljs release publishing",
"cljs:release-app": "clojure -M:cljs release app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"",
"cljs:release-android-app": "clojure -M:cljs release app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"",
"cljs:watch": "clojure -M:cljs watch parser-worker app electron",
"cljs:app-watch": "clojure -M:cljs watch parser-worker app",
"cljs:electron-watch": "clojure -M:cljs watch parser-worker app electron --config-merge \"{:asset-path \\\"./js\\\"}\"",
"cljs:release": "clojure -M:cljs release parser-worker app publishing electron",
"cljs:release-electron": "clojure -M:cljs release parser-worker app electron --debug && clojure -M:cljs release publishing",
"cljs:release-app": "clojure -M:cljs release parser-worker app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"",
"cljs:release-android-app": "clojure -M:cljs release parser-worker app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"",
"cljs:test": "clojure -M:test compile test",
"cljs:run-test": "node static/tests.js",
"cljs:dev-release-app": "clojure -M:cljs release app --config-merge \"{:closure-defines {frontend.config/DEV-RELEASE true}}\"",
"cljs:debug": "clojure -M:cljs release app --debug",
"cljs:dev-release-app": "clojure -M:cljs release parser-worker app --config-merge \"{:closure-defines {frontend.config/DEV-RELEASE true}}\"",
"cljs:debug": "clojure -M:cljs release parser-worker app --debug",
"cljs:report": "clojure -M:cljs run shadow.cljs.build-report app report.html",
"cljs:build-electron": "clojure -A:cljs compile app electron",
"cljs:build-electron": "clojure -A:cljs compile parser-worker app electron",
"cljs:lint": "clojure -M:clj-kondo --parallel --lint src --cache false"
},
"dependencies": {
@ -124,7 +124,7 @@
"reakit": "0.11.1",
"remove-accents": "0.4.2",
"send-intent": "3.0.11",
"threads": "1.6.5",
"threads": "^1.7.0",
"url": "^0.11.0",
"yargs-parser": "20.2.4"
},

View File

@ -60,6 +60,14 @@
:warnings {:fn-deprecated false
:redef false}}}
:parser-worker {:target :browser
:output-dir "./static/js"
:asset-path "./js"
:compiler-options {:source-map false}
:modules {:parser-worker {:entries [frontend.worker.parser]
:web-worker true}}
:release {:compiler-options {:infer-externs :auto}}}
:test {:target :node-test
:output-to "static/tests.js"
:closure-defines {frontend.util/NODETEST true}

View File

@ -7,7 +7,10 @@
[lambdaisland.glogi :as log]
["mldoc" :as mldoc :refer [Mldoc]]
[logseq.graph-parser.mldoc :as gp-mldoc]
[logseq.graph-parser.util :as gp-util]))
[logseq.graph-parser.util :as gp-util]
[frontend.util :as util]
[frontend.util.pool :as pool]
[promesa.core :as p]))
(defonce anchorLink (gobj/get Mldoc "anchorLink"))
(defonce parseOPML (gobj/get Mldoc "parseOPML"))
@ -55,6 +58,22 @@
[content config]
(gp-mldoc/->edn content config (state/get-config)))
(defn ->edn-async
[content config]
(if util/node-test?
(p/resolved (->edn content config))
(try
(if (string/blank? content)
(p/resolved [])
(p/let [v (pool/add-parse-job! content config)]
(-> v
(gp-util/json->clj)
(gp-mldoc/update-src-full-content content)
(gp-mldoc/collect-page-properties gp-mldoc/parse-property {}))))
(catch js/Error e
(log/error :edn/convert-failed e)
(p/resolved [])))))
(defrecord MldocMode []
protocol/Format
(toEdn [_this content config]

View File

@ -35,7 +35,8 @@
[goog.object :as gobj]
[lambdaisland.glogi :as log]
[promesa.core :as p]
[logseq.db.schema :as db-schema]))
[logseq.db.schema :as db-schema]
[frontend.util.pool :as pool]))
(defn set-global-error-notification!
[]
@ -222,6 +223,7 @@
(db/run-batch-txs!)
(file/<ratelimit-file-writes!)
(pool/init-parser-pool!)
(when config/dev?
(enable-datalog-console))

View File

@ -17,7 +17,8 @@
[promesa.core :as p]
[frontend.mobile.util :as mobile]
[logseq.graph-parser.config :as gp-config]
[logseq.graph-parser :as graph-parser]))
[logseq.graph-parser :as graph-parser]
[frontend.format.mldoc :as mldoc]))
;; TODO: extract all git ops using a channel
@ -144,21 +145,22 @@
file)
file (gp-util/path-normalize file)
new? (nil? (db/entity [:file/path file]))]
(:tx
(graph-parser/parse-file
(db/get-db repo-url false)
file
content
(merge (dissoc options :verbose)
{:new? new?
:delete-blocks-fn (partial get-delete-blocks repo-url)
:extract-options (merge
{:user-config (state/get-config)
:date-formatter (state/get-date-formatter)
:page-name-order (state/page-name-order)
:block-pattern (config/get-block-pattern (gp-util/get-format file))
:supported-formats (gp-config/supported-formats)}
(when (some? verbose) {:verbose verbose}))}))))
(p/let [result (graph-parser/parse-file-async
(db/get-db repo-url false)
file
content
(merge (dissoc options :verbose)
{:new? new?
:delete-blocks-fn (partial get-delete-blocks repo-url)
:extract-options (merge
{:user-config (state/get-config)
:date-formatter (state/get-date-formatter)
:page-name-order (state/page-name-order)
:block-pattern (config/get-block-pattern (gp-util/get-format file))
:supported-formats (gp-config/supported-formats)}
(when (some? verbose) {:verbose verbose}))
:async-parse-fn mldoc/->edn-async}))]
(:tx result)))
(catch :default e
(prn "Reset file failed " {:file file})
(log/error :exception e)))))

View File

@ -0,0 +1,83 @@
(ns frontend.util.pool
(:require [electron.ipc :as ipc]
[frontend.config :as config]
[frontend.util :as util]
[promesa.core :as p]
[clojure.string :as string]
["threads" :refer [Pool Worker spawn]]
[frontend.mobile.util :as mobile-util]))
(defonce parser-pool (atom nil))
(defn- absolute-path-for-worker
"Returns the absolute path to the worker file, on Windows.
NOTE: This is a bug in threads.js.
See-also: https://github.com/andywer/threads.js/blob/8f94053f028b0d4e4fb1fdec535867f6d0e23946/src/master/implementation.browser.ts#L10"
[path]
(if util/win32?
(-> path
(p/then #(str "//./" (string/replace % "\\" "/"))))
path))
(defn create-parser-pool!
([]
(create-parser-pool! 8))
([num]
(p/let [static-path (if (and (util/electron?)
(= "file:" (.-protocol js/location)))
(absolute-path-for-worker (ipc/ipc :getDirname))
"/static")
path (str static-path "/js/parser-worker.js")
path (if (or (util/electron?)
(mobile-util/native-platform?))
path
(config/asset-uri path))]
(Pool.
(fn []
(spawn (Worker. path) num))))))
;; (defn finish-pool!
;; [{:keys [pool tasks]} ok-handler]
;; (-> (p/all @tasks)
;; (p/then (fn [result]
;; (ok-handler result)
;; (.completed pool)
;; (.terminate pool)
;; (reset! tasks nil)))))
(defn terminate-pool!
[^js pool]
(p/let [_ (.completed pool)]
(.terminate pool)))
(defn terminate-parser-pool!
[]
(when-let [pool @parser-pool]
(terminate-pool! pool)))
(defn add-parse-job!
[content config]
(when-let [pool @parser-pool]
(.queue ^js pool
(fn [parser]
(try
(parser.parse content config)
(catch js/Error e
(js/console.error e)
nil)))))
;; (let [task (.queue ^js pool
;; (fn [parser]
;; (parser.parse content config)))]
;; (swap! (:tasks m) conj task)
;; task)
)
(defn init-parser-pool!
[]
(p/let [pool (create-parser-pool!)]
(reset! parser-pool pool)))
(comment
(add-parse-job! "- hello" (frontend.format.mldoc/default-config :markdown))
(add-parse-job! "*world*" (frontend.format.mldoc/default-config :markdown)))

View File

@ -0,0 +1,14 @@
(ns frontend.worker.parser
(:require ["mldoc" :refer [Mldoc]]
["threads/worker" :refer [expose]]))
(def parse-json (.-parseJson Mldoc))
(expose (clj->js {:parse parse-json}))
(defn init
[]
(println "Parser worker initialized!")
(js/self.addEventListener "message"
(fn [^js e]
(js/postMessage (.. e -data)))))

View File

@ -7442,10 +7442,10 @@ thenby@^1.3.4:
resolved "https://registry.yarnpkg.com/thenby/-/thenby-1.3.4.tgz#81581f6e1bb324c6dedeae9bfc28e59b1a2201cc"
integrity sha512-89Gi5raiWA3QZ4b2ePcEwswC3me9JIg+ToSgtE0JWeCynLnLxNr/f9G+xfo9K+Oj4AFdom8YNJjibIARTJmapQ==
threads@1.6.5:
version "1.6.5"
resolved "https://registry.yarnpkg.com/threads/-/threads-1.6.5.tgz#5cee7f139e3e147c5a64f0134844ee92469932a5"
integrity sha512-yL1NN4qZ25crW8wDoGn7TqbENJ69w3zCEjIGXpbqmQ4I+QHrG8+DLaZVKoX74OQUXWCI2lbbrUxDxAbr1xjDGQ==
threads@^1.7.0:
version "1.7.0"
resolved "https://registry.yarnpkg.com/threads/-/threads-1.7.0.tgz#d9e9627bfc1ef22ada3b733c2e7558bbe78e589c"
integrity sha512-Mx5NBSHX3sQYR6iI9VYbgHKBLisyB+xROCBGjjWm1O9wb9vfLxdaGtmT/KCjUqMsSNW6nERzCW3T6H43LqjDZQ==
dependencies:
callsites "^3.1.0"
debug "^4.2.0"