From b1ca81806a60a8c881514e56943d45696763ad34 Mon Sep 17 00:00:00 2001 From: Tienson Qin Date: Wed, 7 Sep 2022 20:29:07 +0800 Subject: [PATCH] wip: async parsing --- .../graph-parser/src/logseq/graph_parser.cljs | 56 +++++++++++-- .../src/logseq/graph_parser/extract.cljc | 43 +++++++++- .../src/logseq/graph_parser/mldoc.cljc | 2 +- package.json | 22 ++--- shadow-cljs.edn | 8 ++ src/main/frontend/format/mldoc.cljs | 21 ++++- src/main/frontend/handler.cljs | 4 +- src/main/frontend/handler/file.cljs | 34 ++++---- src/main/frontend/util/pool.cljs | 83 +++++++++++++++++++ src/main/frontend/worker/parser.cljs | 14 ++++ yarn.lock | 8 +- 11 files changed, 253 insertions(+), 42 deletions(-) create mode 100644 src/main/frontend/util/pool.cljs create mode 100644 src/main/frontend/worker/parser.cljs diff --git a/deps/graph-parser/src/logseq/graph_parser.cljs b/deps/graph-parser/src/logseq/graph_parser.cljs index b733ee308..fc24b1887 100644 --- a/deps/graph-parser/src/logseq/graph_parser.cljs +++ b/deps/graph-parser/src/logseq/graph_parser.cljs @@ -6,7 +6,8 @@ [logseq.graph-parser.date-time-util :as date-time-util] [logseq.graph-parser.config :as gp-config] [clojure.string :as string] - [clojure.set :as set])) + [clojure.set :as set] + [promesa.core :as p])) (defn- db-set-file-content! "Modified copy of frontend.db.model/db-set-file-content!" @@ -21,7 +22,7 @@ :or {new? true delete-blocks-fn (constantly [])} :as options}] - (db-set-file-content! conn file content) + (frontend.util/profile "set db file" (db-set-file-content! conn file content)) (let [format (gp-util/get-format file) file-content [{:file/path file}] {:keys [tx ast]} @@ -32,7 +33,7 @@ extract-options {:db @conn}) {:keys [pages blocks ast]} - (extract/extract file content extract-options') + (frontend.util/profile "extract" (extract/extract file content extract-options')) delete-blocks (delete-blocks-fn (first pages) file) block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks) block-refs-ids (->> (mapcat :block/refs blocks) @@ -42,7 +43,8 @@ (seq)) ;; To prevent "unique constraint" on datascript block-ids (set/union (set block-ids) (set block-refs-ids)) - pages (extract/with-ref-pages pages blocks) + pages (frontend.util/profile "with ref pages" + (extract/with-ref-pages pages blocks)) pages-index (map #(select-keys % [:block/name]) pages)] ;; does order matter? {:tx (concat file-content pages-index delete-blocks pages block-ids blocks) @@ -53,10 +55,54 @@ ;; TODO: use file system timestamp? (assoc :file/created-at (date-time-util/time-ms)))]) tx' (gp-util/remove-nils tx) - result (d/transact! conn tx' (select-keys options [:new-graph? :from-disk?]))] + result (frontend.util/profile "transact db" (d/transact! conn tx' (select-keys options [:new-graph? :from-disk?])))] {:tx result :ast ast})) +(defn parse-file-async + "Parse file asynchronously and save parsed data to the given db. Main parse fn used by logseq app" + [conn file content {:keys [new? delete-blocks-fn extract-options async-parse-fn] + :or {new? true + delete-blocks-fn (constantly [])} + :as options}] + (when async-parse-fn + (db-set-file-content! conn file content) + (p/let [format (gp-util/get-format file) + file-content [{:file/path file}] + {:keys [tx ast]} + (if (contains? gp-config/mldoc-support-formats format) + (p/let [extract-options' (merge {:block-pattern (gp-config/get-block-pattern format) + :date-formatter "MMM do, yyyy" + :supported-formats (gp-config/supported-formats) + :async-parse-fn async-parse-fn} + extract-options + {:db @conn}) + {:keys [pages blocks ast]} + (extract/extract-async file content extract-options') + delete-blocks (delete-blocks-fn (first pages) file) + block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks) + block-refs-ids (->> (mapcat :block/refs blocks) + (filter (fn [ref] (and (vector? ref) + (= :block/uuid (first ref))))) + (map (fn [ref] {:block/uuid (second ref)})) + (seq)) + ;; To prevent "unique constraint" on datascript + block-ids (set/union (set block-ids) (set block-refs-ids)) + pages (extract/with-ref-pages pages blocks) + pages-index (map #(select-keys % [:block/name]) pages)] + ;; does order matter? + {:tx (concat file-content pages-index delete-blocks pages block-ids blocks) + :ast ast}) + (p/resolved {:tx file-content})) + tx (concat tx [(cond-> {:file/path file} + new? + ;; TODO: use file system timestamp? + (assoc :file/created-at (date-time-util/time-ms)))]) + tx' (gp-util/remove-nils tx) + result (d/transact! conn tx' (select-keys options [:new-graph? :from-disk?]))] + {:tx result + :ast ast}))) + (defn filter-files "Filters files in preparation for parsing. Only includes files that are supported by parser" diff --git a/deps/graph-parser/src/logseq/graph_parser/extract.cljc b/deps/graph-parser/src/logseq/graph_parser/extract.cljc index 92f52cb20..4dbd892fd 100644 --- a/deps/graph-parser/src/logseq/graph_parser/extract.cljc +++ b/deps/graph-parser/src/logseq/graph_parser/extract.cljc @@ -13,7 +13,8 @@ [logseq.graph-parser.property :as gp-property] [logseq.graph-parser.config :as gp-config] #?(:org.babashka/nbb [logseq.graph-parser.log :as log] - :default [lambdaisland.glogi :as log]))) + :default [lambdaisland.glogi :as log]) + #?(:cljs [promesa.core :as p]))) (defn- get-page-name [file ast page-name-order] @@ -159,8 +160,8 @@ (let [format (gp-util/get-format file) _ (when verbose (println "Parsing start: " file)) ast (gp-mldoc/->edn content (gp-mldoc/default-config format - ;; {:parse_outline_only? true} - ) + ;; {:parse_outline_only? true} + ) user-config)] (when verbose (println "Parsing finished: " file)) (let [first-block (ffirst ast) @@ -185,6 +186,42 @@ :blocks blocks :ast ast})))) +#?(:cljs + (defn extract-async + "Extracts pages, blocks and ast from given file" + [file content {:keys [user-config verbose] :or {verbose true} :as options}] + (when-let [parse-fn (:async-parse-fn options)] + (if (string/blank? content) + (p/resolved []) + (p/let [format (gp-util/get-format file) + _ (when verbose (println "Parsing start: " file)) + ast (parse-fn content (gp-mldoc/default-config format + ;; {:parse_outline_only? true} + ) + user-config)] + (when verbose (println "Parsing finished: " file)) + (let [first-block (ffirst ast) + properties (let [properties (and (gp-property/properties-ast? first-block) + (->> (last first-block) + (map (fn [[x y]] + [x (if (and (string? y) + (not (and (= (keyword x) :file-path) + (string/starts-with? y "file:")))) + (text/parse-property format x y user-config) + y)])) + (into {}) + (walk/keywordize-keys)))] + (when (and properties (seq properties)) + (if (:filters properties) + (update properties :filters + (fn [v] + (string/replace (or v "") "\\" ""))) + properties))) + [pages blocks] (extract-pages-and-blocks format ast properties file content options)] + {:pages pages + :blocks blocks + :ast ast})))))) + (defn- with-block-uuid [pages] (->> (gp-util/distinct-by :block/name pages) diff --git a/deps/graph-parser/src/logseq/graph_parser/mldoc.cljc b/deps/graph-parser/src/logseq/graph_parser/mldoc.cljc index f1fb6b624..72a0ecb4e 100644 --- a/deps/graph-parser/src/logseq/graph_parser/mldoc.cljc +++ b/deps/graph-parser/src/logseq/graph_parser/mldoc.cljc @@ -78,7 +78,7 @@ content (if remove-first-line? body (cons f body))] (string/join "\n" content))) -(defn- update-src-full-content +(defn update-src-full-content [ast content] (let [content (utf8/encode content)] (map (fn [[block pos-meta]] diff --git a/package.json b/package.json index df4a3b5fd..303330dcb 100644 --- a/package.json +++ b/package.json @@ -50,19 +50,19 @@ "gulp:build": "cross-env NODE_ENV=production gulp build", "css:build": "postcss tailwind.all.css -o static/css/style.css --verbose --env production", "css:watch": "cross-env TAILWIND_MODE=watch postcss tailwind.all.css -o static/css/style.css --verbose --watch", - "cljs:watch": "clojure -M:cljs watch app electron", - "cljs:app-watch": "clojure -M:cljs watch app", - "cljs:electron-watch": "clojure -M:cljs watch app electron --config-merge \"{:asset-path \\\"./js\\\"}\"", - "cljs:release": "clojure -M:cljs release app publishing electron", - "cljs:release-electron": "clojure -M:cljs release app electron --debug && clojure -M:cljs release publishing", - "cljs:release-app": "clojure -M:cljs release app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"", - "cljs:release-android-app": "clojure -M:cljs release app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"", + "cljs:watch": "clojure -M:cljs watch parser-worker app electron", + "cljs:app-watch": "clojure -M:cljs watch parser-worker app", + "cljs:electron-watch": "clojure -M:cljs watch parser-worker app electron --config-merge \"{:asset-path \\\"./js\\\"}\"", + "cljs:release": "clojure -M:cljs release parser-worker app publishing electron", + "cljs:release-electron": "clojure -M:cljs release parser-worker app electron --debug && clojure -M:cljs release publishing", + "cljs:release-app": "clojure -M:cljs release parser-worker app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"", + "cljs:release-android-app": "clojure -M:cljs release parser-worker app --config-merge \"{:compiler-options {:output-feature-set :es6}}\"", "cljs:test": "clojure -M:test compile test", "cljs:run-test": "node static/tests.js", - "cljs:dev-release-app": "clojure -M:cljs release app --config-merge \"{:closure-defines {frontend.config/DEV-RELEASE true}}\"", - "cljs:debug": "clojure -M:cljs release app --debug", + "cljs:dev-release-app": "clojure -M:cljs release parser-worker app --config-merge \"{:closure-defines {frontend.config/DEV-RELEASE true}}\"", + "cljs:debug": "clojure -M:cljs release parser-worker app --debug", "cljs:report": "clojure -M:cljs run shadow.cljs.build-report app report.html", - "cljs:build-electron": "clojure -A:cljs compile app electron", + "cljs:build-electron": "clojure -A:cljs compile parser-worker app electron", "cljs:lint": "clojure -M:clj-kondo --parallel --lint src --cache false" }, "dependencies": { @@ -124,7 +124,7 @@ "reakit": "0.11.1", "remove-accents": "0.4.2", "send-intent": "3.0.11", - "threads": "1.6.5", + "threads": "^1.7.0", "url": "^0.11.0", "yargs-parser": "20.2.4" }, diff --git a/shadow-cljs.edn b/shadow-cljs.edn index f1462f54a..ad699659e 100644 --- a/shadow-cljs.edn +++ b/shadow-cljs.edn @@ -60,6 +60,14 @@ :warnings {:fn-deprecated false :redef false}}} + :parser-worker {:target :browser + :output-dir "./static/js" + :asset-path "./js" + :compiler-options {:source-map false} + :modules {:parser-worker {:entries [frontend.worker.parser] + :web-worker true}} + :release {:compiler-options {:infer-externs :auto}}} + :test {:target :node-test :output-to "static/tests.js" :closure-defines {frontend.util/NODETEST true} diff --git a/src/main/frontend/format/mldoc.cljs b/src/main/frontend/format/mldoc.cljs index 23e649783..22168693d 100644 --- a/src/main/frontend/format/mldoc.cljs +++ b/src/main/frontend/format/mldoc.cljs @@ -7,7 +7,10 @@ [lambdaisland.glogi :as log] ["mldoc" :as mldoc :refer [Mldoc]] [logseq.graph-parser.mldoc :as gp-mldoc] - [logseq.graph-parser.util :as gp-util])) + [logseq.graph-parser.util :as gp-util] + [frontend.util :as util] + [frontend.util.pool :as pool] + [promesa.core :as p])) (defonce anchorLink (gobj/get Mldoc "anchorLink")) (defonce parseOPML (gobj/get Mldoc "parseOPML")) @@ -55,6 +58,22 @@ [content config] (gp-mldoc/->edn content config (state/get-config))) +(defn ->edn-async + [content config] + (if util/node-test? + (p/resolved (->edn content config)) + (try + (if (string/blank? content) + (p/resolved []) + (p/let [v (pool/add-parse-job! content config)] + (-> v + (gp-util/json->clj) + (gp-mldoc/update-src-full-content content) + (gp-mldoc/collect-page-properties gp-mldoc/parse-property {})))) + (catch js/Error e + (log/error :edn/convert-failed e) + (p/resolved []))))) + (defrecord MldocMode [] protocol/Format (toEdn [_this content config] diff --git a/src/main/frontend/handler.cljs b/src/main/frontend/handler.cljs index 862f6da35..acad899a1 100644 --- a/src/main/frontend/handler.cljs +++ b/src/main/frontend/handler.cljs @@ -35,7 +35,8 @@ [goog.object :as gobj] [lambdaisland.glogi :as log] [promesa.core :as p] - [logseq.db.schema :as db-schema])) + [logseq.db.schema :as db-schema] + [frontend.util.pool :as pool])) (defn set-global-error-notification! [] @@ -222,6 +223,7 @@ (db/run-batch-txs!) (file/edn-async}))] + (:tx result))) (catch :default e (prn "Reset file failed " {:file file}) (log/error :exception e))))) diff --git a/src/main/frontend/util/pool.cljs b/src/main/frontend/util/pool.cljs new file mode 100644 index 000000000..84696b5c0 --- /dev/null +++ b/src/main/frontend/util/pool.cljs @@ -0,0 +1,83 @@ +(ns frontend.util.pool + (:require [electron.ipc :as ipc] + [frontend.config :as config] + [frontend.util :as util] + [promesa.core :as p] + [clojure.string :as string] + ["threads" :refer [Pool Worker spawn]] + [frontend.mobile.util :as mobile-util])) + +(defonce parser-pool (atom nil)) + +(defn- absolute-path-for-worker + "Returns the absolute path to the worker file, on Windows. + + NOTE: This is a bug in threads.js. + See-also: https://github.com/andywer/threads.js/blob/8f94053f028b0d4e4fb1fdec535867f6d0e23946/src/master/implementation.browser.ts#L10" + [path] + (if util/win32? + (-> path + (p/then #(str "//./" (string/replace % "\\" "/")))) + path)) + +(defn create-parser-pool! + ([] + (create-parser-pool! 8)) + ([num] + (p/let [static-path (if (and (util/electron?) + (= "file:" (.-protocol js/location))) + (absolute-path-for-worker (ipc/ipc :getDirname)) + "/static") + path (str static-path "/js/parser-worker.js") + path (if (or (util/electron?) + (mobile-util/native-platform?)) + path + (config/asset-uri path))] + (Pool. + (fn [] + (spawn (Worker. path) num)))))) + +;; (defn finish-pool! +;; [{:keys [pool tasks]} ok-handler] +;; (-> (p/all @tasks) +;; (p/then (fn [result] +;; (ok-handler result) +;; (.completed pool) +;; (.terminate pool) +;; (reset! tasks nil))))) + +(defn terminate-pool! + [^js pool] + (p/let [_ (.completed pool)] + (.terminate pool))) + +(defn terminate-parser-pool! + [] + (when-let [pool @parser-pool] + (terminate-pool! pool))) + +(defn add-parse-job! + [content config] + (when-let [pool @parser-pool] + (.queue ^js pool + (fn [parser] + (try + (parser.parse content config) + (catch js/Error e + (js/console.error e) + nil))))) + ;; (let [task (.queue ^js pool + ;; (fn [parser] + ;; (parser.parse content config)))] + ;; (swap! (:tasks m) conj task) + ;; task) + ) + +(defn init-parser-pool! + [] + (p/let [pool (create-parser-pool!)] + (reset! parser-pool pool))) + +(comment + (add-parse-job! "- hello" (frontend.format.mldoc/default-config :markdown)) + (add-parse-job! "*world*" (frontend.format.mldoc/default-config :markdown))) diff --git a/src/main/frontend/worker/parser.cljs b/src/main/frontend/worker/parser.cljs new file mode 100644 index 000000000..bd1aa456f --- /dev/null +++ b/src/main/frontend/worker/parser.cljs @@ -0,0 +1,14 @@ +(ns frontend.worker.parser + (:require ["mldoc" :refer [Mldoc]] + ["threads/worker" :refer [expose]])) + +(def parse-json (.-parseJson Mldoc)) + +(expose (clj->js {:parse parse-json})) + +(defn init + [] + (println "Parser worker initialized!") + (js/self.addEventListener "message" + (fn [^js e] + (js/postMessage (.. e -data))))) diff --git a/yarn.lock b/yarn.lock index fb266f61e..f0089cc03 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7442,10 +7442,10 @@ thenby@^1.3.4: resolved "https://registry.yarnpkg.com/thenby/-/thenby-1.3.4.tgz#81581f6e1bb324c6dedeae9bfc28e59b1a2201cc" integrity sha512-89Gi5raiWA3QZ4b2ePcEwswC3me9JIg+ToSgtE0JWeCynLnLxNr/f9G+xfo9K+Oj4AFdom8YNJjibIARTJmapQ== -threads@1.6.5: - version "1.6.5" - resolved "https://registry.yarnpkg.com/threads/-/threads-1.6.5.tgz#5cee7f139e3e147c5a64f0134844ee92469932a5" - integrity sha512-yL1NN4qZ25crW8wDoGn7TqbENJ69w3zCEjIGXpbqmQ4I+QHrG8+DLaZVKoX74OQUXWCI2lbbrUxDxAbr1xjDGQ== +threads@^1.7.0: + version "1.7.0" + resolved "https://registry.yarnpkg.com/threads/-/threads-1.7.0.tgz#d9e9627bfc1ef22ada3b733c2e7558bbe78e589c" + integrity sha512-Mx5NBSHX3sQYR6iI9VYbgHKBLisyB+xROCBGjjWm1O9wb9vfLxdaGtmT/KCjUqMsSNW6nERzCW3T6H43LqjDZQ== dependencies: callsites "^3.1.0" debug "^4.2.0"