First pass at main graph-parser ns

- Copied tests pass!
- Still a number of TODOs left
pull/5420/head
Gabriel Horner 2022-05-18 17:51:07 -04:00
parent 7d00b546a0
commit 3bc2479181
5 changed files with 254 additions and 56 deletions

View File

@ -2,8 +2,6 @@
(:refer-clojure :exclude [load-file])
(:require ["/frontend/utils" :as utils]
[borkdude.rewrite-edn :as rewrite]
[cljs-time.coerce :as tc]
[cljs-time.core :as t]
[cljs.core.async.interop :refer [<p!]]
[clojure.core.async :as async]
[frontend.config :as config]
@ -11,16 +9,14 @@
[frontend.fs :as fs]
[frontend.fs.nfs :as nfs]
[frontend.handler.common :as common-handler]
[logseq.graph-parser.extract :as extract]
[frontend.handler.ui :as ui-handler]
[frontend.state :as state]
[frontend.util :as util]
[logseq.graph-parser.util :as gp-util]
[logseq.graph-parser.config :as gp-config]
[lambdaisland.glogi :as log]
[promesa.core :as p]
[frontend.mobile.util :as mobile]
[clojure.set :as set]))
[logseq.graph-parser :as graph-parser]))
;; TODO: extract all git ops using a channel
@ -91,10 +87,27 @@
(when (not= file current-file)
current-file))))
(defn- get-delete-blocks [repo-url first-page file]
(let [delete-blocks (->
(concat
(db/delete-file-blocks! repo-url file)
(when first-page (db/delete-page-blocks repo-url (:block/name first-page))))
(distinct))]
;; TODO: Remove
(when (seq delete-blocks) (prn :DELETE-BLOCKS (count delete-blocks)))
(when-let [current-file (page-exists-in-another-file repo-url first-page file)]
(when (not= file current-file)
(let [error (str "Page already exists with another file: " current-file ", current file: " file)]
(state/pub-event! [:notification/show
{:content error
:status :error
:clear? false}]))))
delete-blocks))
(defn reset-file!
([repo-url file content]
(reset-file! repo-url file content false))
([repo-url file content new-graph?]
(reset-file! repo-url file content {}))
([repo-url file content options]
(let [electron-local-repo? (and (util/electron?)
(config/local-db? repo-url))
file (cond
@ -118,52 +131,18 @@
file)
file (gp-util/path-normalize file)
new? (nil? (db/entity [:file/path file]))]
(db/set-file-content! repo-url file content)
(let [format (gp-util/get-format file)
file-content [{:file/path file}]
tx (if (contains? gp-config/mldoc-support-formats format)
(let [[pages blocks]
(extract/extract-blocks-pages
file
content
{:user-config (state/get-config)
:date-formatter (state/get-date-formatter)
:page-name-order (state/page-name-order)
:block-pattern (config/get-block-pattern format)
:supported-formats (config/supported-formats)
:db (db/get-db (state/get-current-repo))})
first-page (first pages)
delete-blocks (->
(concat
(db/delete-file-blocks! repo-url file)
(when first-page (db/delete-page-blocks repo-url (:block/name first-page))))
(distinct))
_ (when-let [current-file (page-exists-in-another-file repo-url first-page file)]
(when (not= file current-file)
(let [error (str "Page already exists with another file: " current-file ", current file: " file)]
(state/pub-event! [:notification/show
{:content error
:status :error
:clear? false}]))))
block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks)
block-refs-ids (->> (mapcat :block/refs blocks)
(filter (fn [ref] (and (vector? ref)
(= :block/uuid (first ref)))))
(map (fn [ref] {:block/uuid (second ref)}))
(seq))
;; To prevent "unique constraint" on datascript
block-ids (set/union (set block-ids) (set block-refs-ids))
pages (extract/with-ref-pages pages blocks)
pages-index (map #(select-keys % [:block/name]) pages)]
;; does order matter?
(concat file-content pages-index delete-blocks pages block-ids blocks))
file-content)
tx (concat tx [(let [t (tc/to-long (t/now))] ;; TODO: use file system timestamp?
(cond->
{:file/path file}
new?
(assoc :file/created-at t)))])]
(db/transact! repo-url tx (when new-graph? {:new-graph? true}))))))
(graph-parser/parse-file
(db/get-db repo-url false)
file
content
(merge options
{:new? new?
:delete-blocks-fn (partial get-delete-blocks repo-url)
:extract-options {:user-config (state/get-config)
:date-formatter (state/get-date-formatter)
:page-name-order (state/page-name-order)
:block-pattern (config/get-block-pattern (gp-util/get-format file))
:supported-formats (config/supported-formats)}})))))
;; TODO: Remove this function in favor of `alter-files`
(defn alter-file

View File

@ -0,0 +1,74 @@
(ns ^:nbb-compatible logseq.graph-parser
"Main ns for parsing graph from source files"
(:require [datascript.core :as d]
[logseq.graph-parser.extract :as extract]
[logseq.graph-parser.util :as gp-util]
[logseq.graph-parser.date-time-util :as date-time-util]
[logseq.graph-parser.config :as gp-config]
[frontend.db-schema :as db-schema]
[frontend.db.default :as default-db]
[clojure.set :as set]))
(defn- db-set-file-content!
"Modified copy of frontend.db.model/db-set-file-content!"
[db path content]
(let [tx-data {:file/path path
:file/content content}]
(d/transact! db [tx-data] {:skip-refresh? true})))
;; TODO: Reuse from frontend.config
(def supported-formats
#{:dat :markdown :bmp :js :png :gif :txt :yml :erl :excalidraw :css :webp :asciidoc :ts :rb :ml :java :c :org :ex :edn :svg :php :rst :json :jpeg :ico :jpg :clj :adoc :html :md})
(defn parse-file
"Parse file and save parsed data to the given db"
[db file content {:keys [new? delete-blocks-fn new-graph? extract-options]
:or {new? true
new-graph? false
delete-blocks-fn (constantly [])
;; TODO: Reuse these options from state and config
extract-options {:block-pattern "-"
:date-formatter "MMM do, yyyy"
:supported-formats supported-formats}}}]
(db-set-file-content! db file content)
(let [format (gp-util/get-format file)
file-content [{:file/path file}]
tx (if (contains? gp-config/mldoc-support-formats format)
(let [[pages blocks]
(extract/extract-blocks-pages
file
content
(merge extract-options {:db @db}))
delete-blocks (delete-blocks-fn (first pages) file)
block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks)
block-refs-ids (->> (mapcat :block/refs blocks)
(filter (fn [ref] (and (vector? ref)
(= :block/uuid (first ref)))))
(map (fn [ref] {:block/uuid (second ref)}))
(seq))
;; To prevent "unique constraint" on datascript
block-ids (set/union (set block-ids) (set block-refs-ids))
pages (extract/with-ref-pages pages blocks)
pages-index (map #(select-keys % [:block/name]) pages)]
;; does order matter?
(concat file-content pages-index delete-blocks pages block-ids blocks))
file-content)
tx (concat tx [(cond-> {:file/path file}
new?
;; TODO: use file system timestamp?
(assoc :file/created-at (date-time-util/time-ms)))])]
(d/transact! db (gp-util/remove-nils tx) (when new-graph? {:new-graph? true}))))
(defn init-db
[]
;; TODO: Reuse code from frontend
(let [conn (d/create-conn db-schema/schema)]
(d/transact! conn [{:schema/version db-schema/version}])
(d/transact! conn default-db/built-in-pages)
conn))
(defn parse
[db files]
(doseq [{:file/keys [path content]} files]
(parse-file db path content {})))

View File

@ -132,4 +132,13 @@
(->> (d/q '[:find (pull ?n [*]) :where [?b :block/namespace ?n]] db)
(map (comp :block/original-name first))
set))
"Has correct namespaces"))))
"Has correct namespaces"))
(testing "Delete previous file data when re-parsing a file"
(repo-handler/parse-files-and-load-to-db! test-helper/test-db
(filter #(re-find #"pages/tutorial.md" (:file/path %))
files)
{:re-render? false})
(is (= 206 (count files)) "Correct file count")
(is (= 40888 (count (d/datoms db :eavt))) "Correct datoms count")
)))

View File

@ -7,7 +7,8 @@
[logseq.graph-parser.mldoc-test]
[logseq.graph-parser.block-test]
[logseq.graph-parser.property-test]
[logseq.graph-parser.extract-test]))
[logseq.graph-parser.extract-test]
[logseq.graph-parser-test]))
(defmethod cljs.test/report [:cljs.test/default :end-run-tests] [m]
(when-not (cljs.test/successful? m)
@ -23,4 +24,5 @@
'logseq.graph-parser.text-test
'logseq.graph-parser.property-test
'logseq.graph-parser.block-test
'logseq.graph-parser.extract-test))
'logseq.graph-parser.extract-test
'logseq.graph-parser-test))

View File

@ -0,0 +1,134 @@
(ns logseq.graph-parser-test
"TODO: Should I reuse repo-test or split it?"
(:require [cljs.test :refer [deftest is testing]]
[logseq.graph-parser :as graph-parser]
[frontend.test.docs-graph-helper :as docs-graph-helper]
[datascript.core :as d]))
(defn- get-top-block-properties
[db]
(->> (d/q '[:find (pull ?b [*])
:where
[?b :block/properties]
[(missing? $ ?b :block/name)]]
db)
(map first)
(map (fn [m] (zipmap (keys (:block/properties m)) (repeat 1))))
(apply merge-with +)
(filter #(>= (val %) 5))
(into {})))
(defn- get-all-page-properties
[db]
(->> (d/q '[:find (pull ?b [*])
:where
[?b :block/properties]
[?b :block/name]]
db)
(map first)
(map (fn [m] (zipmap (keys (:block/properties m)) (repeat 1))))
(apply merge-with +)
(into {})))
;; Integration test that test parsing a large graph like docs
(deftest ^:integration parse-and-load-files-to-db
(let [graph-dir "src/test/docs"
_ (docs-graph-helper/clone-docs-repo-if-not-exists graph-dir)
files (docs-graph-helper/build-graph-files graph-dir)
conn (graph-parser/init-db)
; _ (repo-handler/parse-files-and-load-to-db! test-helper/test-db files {:re-render? false})
_ (graph-parser/parse conn files)
db @conn]
;; Counts assertions help check for no major regressions. These counts should
;; only increase over time as the docs graph rarely has deletions
(testing "Counts"
(is (= 206 (count files)) "Correct file count")
(is (= 40888 (count (d/datoms db :eavt))) "Correct datoms count")
(is (= 3597
(ffirst
(d/q '[:find (count ?b)
:where [?b :block/path-refs ?bp] [?bp :block/name]] db)))
"Correct referenced blocks count")
(is (= 21
(ffirst
(d/q '[:find (count ?b)
:where [?b :block/content ?content]
[(clojure.string/includes? ?content "+BEGIN_QUERY")]]
db)))
"Advanced query count"))
(testing "Query based stats"
(is (= (set (map :file/path files))
(->> (d/q '[:find (pull ?b [* {:block/file [:file/path]}])
:where [?b :block/name] [?b :block/file]]
db)
(map (comp #(get-in % [:block/file :file/path]) first))
set))
"Journal and pages files on disk should equal ones in db")
(is (= (count (filter #(re-find #"journals/" (:file/path %))
files))
(->> (d/q '[:find (count ?b)
:where
[?b :block/journal? true]
[?b :block/name]
[?b :block/file]]
db)
ffirst))
"Journal page count on disk equals count in db")
(is (= {"CANCELED" 2 "DONE" 6 "LATER" 4 "NOW" 5}
(->> (d/q '[:find (pull ?b [*]) :where [?b :block/marker] ]
db)
(map first)
(group-by :block/marker)
(map (fn [[k v]] [k (count v)]))
(into {})))
"Task marker counts")
(is (= {:markdown 3140 :org 460}
(->> (d/q '[:find (pull ?b [*]) :where [?b :block/format]] db)
(map first)
(group-by :block/format)
(map (fn [[k v]] [k (count v)]))
(into {})))
"Block format counts")
(is (= {:title 98 :id 98
:updated-at 47 :created-at 47
:collapsed 22
:card-last-score 6 :card-repeats 6 :card-next-schedule 6
:card-last-interval 6 :card-ease-factor 6 :card-last-reviewed 6
:alias 6}
(get-top-block-properties db))
"Counts for top block properties")
(is (= {:title 98
:alias 6
:tags 2 :permalink 2
:name 1 :type 1 :related 1 :sample 1 :click 1 :id 1 :example 1}
(get-all-page-properties db))
"Counts for all page properties")
(is (= {:block/scheduled 2
:block/priority 4
:block/deadline 1
:block/collapsed? 22
:block/heading-level 57
:block/repeated? 1}
(->> [:block/scheduled :block/priority :block/deadline :block/collapsed?
:block/heading-level :block/repeated?]
(map (fn [attr]
[attr
(ffirst (d/q [:find (list 'count '?b) :where ['?b attr]]
db))]))
(into {})))
"Counts for blocks with common block attributes")
(is (= #{"term" "setting" "book" "Templates" "Query" "Query/table" "page"}
(->> (d/q '[:find (pull ?n [*]) :where [?b :block/namespace ?n]] db)
(map (comp :block/original-name first))
set))
"Has correct namespaces"))))