mirror of https://github.com/logseq/logseq
First pass at main graph-parser ns
- Copied tests pass! - Still a number of TODOs leftpull/5420/head
parent
7d00b546a0
commit
3bc2479181
|
@ -2,8 +2,6 @@
|
|||
(:refer-clojure :exclude [load-file])
|
||||
(:require ["/frontend/utils" :as utils]
|
||||
[borkdude.rewrite-edn :as rewrite]
|
||||
[cljs-time.coerce :as tc]
|
||||
[cljs-time.core :as t]
|
||||
[cljs.core.async.interop :refer [<p!]]
|
||||
[clojure.core.async :as async]
|
||||
[frontend.config :as config]
|
||||
|
@ -11,16 +9,14 @@
|
|||
[frontend.fs :as fs]
|
||||
[frontend.fs.nfs :as nfs]
|
||||
[frontend.handler.common :as common-handler]
|
||||
[logseq.graph-parser.extract :as extract]
|
||||
[frontend.handler.ui :as ui-handler]
|
||||
[frontend.state :as state]
|
||||
[frontend.util :as util]
|
||||
[logseq.graph-parser.util :as gp-util]
|
||||
[logseq.graph-parser.config :as gp-config]
|
||||
[lambdaisland.glogi :as log]
|
||||
[promesa.core :as p]
|
||||
[frontend.mobile.util :as mobile]
|
||||
[clojure.set :as set]))
|
||||
[logseq.graph-parser :as graph-parser]))
|
||||
|
||||
;; TODO: extract all git ops using a channel
|
||||
|
||||
|
@ -91,10 +87,27 @@
|
|||
(when (not= file current-file)
|
||||
current-file))))
|
||||
|
||||
(defn- get-delete-blocks [repo-url first-page file]
|
||||
(let [delete-blocks (->
|
||||
(concat
|
||||
(db/delete-file-blocks! repo-url file)
|
||||
(when first-page (db/delete-page-blocks repo-url (:block/name first-page))))
|
||||
(distinct))]
|
||||
;; TODO: Remove
|
||||
(when (seq delete-blocks) (prn :DELETE-BLOCKS (count delete-blocks)))
|
||||
(when-let [current-file (page-exists-in-another-file repo-url first-page file)]
|
||||
(when (not= file current-file)
|
||||
(let [error (str "Page already exists with another file: " current-file ", current file: " file)]
|
||||
(state/pub-event! [:notification/show
|
||||
{:content error
|
||||
:status :error
|
||||
:clear? false}]))))
|
||||
delete-blocks))
|
||||
|
||||
(defn reset-file!
|
||||
([repo-url file content]
|
||||
(reset-file! repo-url file content false))
|
||||
([repo-url file content new-graph?]
|
||||
(reset-file! repo-url file content {}))
|
||||
([repo-url file content options]
|
||||
(let [electron-local-repo? (and (util/electron?)
|
||||
(config/local-db? repo-url))
|
||||
file (cond
|
||||
|
@ -118,52 +131,18 @@
|
|||
file)
|
||||
file (gp-util/path-normalize file)
|
||||
new? (nil? (db/entity [:file/path file]))]
|
||||
(db/set-file-content! repo-url file content)
|
||||
(let [format (gp-util/get-format file)
|
||||
file-content [{:file/path file}]
|
||||
tx (if (contains? gp-config/mldoc-support-formats format)
|
||||
(let [[pages blocks]
|
||||
(extract/extract-blocks-pages
|
||||
file
|
||||
content
|
||||
{:user-config (state/get-config)
|
||||
:date-formatter (state/get-date-formatter)
|
||||
:page-name-order (state/page-name-order)
|
||||
:block-pattern (config/get-block-pattern format)
|
||||
:supported-formats (config/supported-formats)
|
||||
:db (db/get-db (state/get-current-repo))})
|
||||
first-page (first pages)
|
||||
delete-blocks (->
|
||||
(concat
|
||||
(db/delete-file-blocks! repo-url file)
|
||||
(when first-page (db/delete-page-blocks repo-url (:block/name first-page))))
|
||||
(distinct))
|
||||
_ (when-let [current-file (page-exists-in-another-file repo-url first-page file)]
|
||||
(when (not= file current-file)
|
||||
(let [error (str "Page already exists with another file: " current-file ", current file: " file)]
|
||||
(state/pub-event! [:notification/show
|
||||
{:content error
|
||||
:status :error
|
||||
:clear? false}]))))
|
||||
block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks)
|
||||
block-refs-ids (->> (mapcat :block/refs blocks)
|
||||
(filter (fn [ref] (and (vector? ref)
|
||||
(= :block/uuid (first ref)))))
|
||||
(map (fn [ref] {:block/uuid (second ref)}))
|
||||
(seq))
|
||||
;; To prevent "unique constraint" on datascript
|
||||
block-ids (set/union (set block-ids) (set block-refs-ids))
|
||||
pages (extract/with-ref-pages pages blocks)
|
||||
pages-index (map #(select-keys % [:block/name]) pages)]
|
||||
;; does order matter?
|
||||
(concat file-content pages-index delete-blocks pages block-ids blocks))
|
||||
file-content)
|
||||
tx (concat tx [(let [t (tc/to-long (t/now))] ;; TODO: use file system timestamp?
|
||||
(cond->
|
||||
{:file/path file}
|
||||
new?
|
||||
(assoc :file/created-at t)))])]
|
||||
(db/transact! repo-url tx (when new-graph? {:new-graph? true}))))))
|
||||
(graph-parser/parse-file
|
||||
(db/get-db repo-url false)
|
||||
file
|
||||
content
|
||||
(merge options
|
||||
{:new? new?
|
||||
:delete-blocks-fn (partial get-delete-blocks repo-url)
|
||||
:extract-options {:user-config (state/get-config)
|
||||
:date-formatter (state/get-date-formatter)
|
||||
:page-name-order (state/page-name-order)
|
||||
:block-pattern (config/get-block-pattern (gp-util/get-format file))
|
||||
:supported-formats (config/supported-formats)}})))))
|
||||
|
||||
;; TODO: Remove this function in favor of `alter-files`
|
||||
(defn alter-file
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
(ns ^:nbb-compatible logseq.graph-parser
|
||||
"Main ns for parsing graph from source files"
|
||||
(:require [datascript.core :as d]
|
||||
[logseq.graph-parser.extract :as extract]
|
||||
[logseq.graph-parser.util :as gp-util]
|
||||
[logseq.graph-parser.date-time-util :as date-time-util]
|
||||
[logseq.graph-parser.config :as gp-config]
|
||||
[frontend.db-schema :as db-schema]
|
||||
[frontend.db.default :as default-db]
|
||||
[clojure.set :as set]))
|
||||
|
||||
(defn- db-set-file-content!
|
||||
"Modified copy of frontend.db.model/db-set-file-content!"
|
||||
[db path content]
|
||||
(let [tx-data {:file/path path
|
||||
:file/content content}]
|
||||
(d/transact! db [tx-data] {:skip-refresh? true})))
|
||||
|
||||
;; TODO: Reuse from frontend.config
|
||||
(def supported-formats
|
||||
#{:dat :markdown :bmp :js :png :gif :txt :yml :erl :excalidraw :css :webp :asciidoc :ts :rb :ml :java :c :org :ex :edn :svg :php :rst :json :jpeg :ico :jpg :clj :adoc :html :md})
|
||||
|
||||
(defn parse-file
|
||||
"Parse file and save parsed data to the given db"
|
||||
[db file content {:keys [new? delete-blocks-fn new-graph? extract-options]
|
||||
:or {new? true
|
||||
new-graph? false
|
||||
delete-blocks-fn (constantly [])
|
||||
;; TODO: Reuse these options from state and config
|
||||
extract-options {:block-pattern "-"
|
||||
:date-formatter "MMM do, yyyy"
|
||||
:supported-formats supported-formats}}}]
|
||||
|
||||
(db-set-file-content! db file content)
|
||||
(let [format (gp-util/get-format file)
|
||||
file-content [{:file/path file}]
|
||||
tx (if (contains? gp-config/mldoc-support-formats format)
|
||||
(let [[pages blocks]
|
||||
(extract/extract-blocks-pages
|
||||
file
|
||||
content
|
||||
(merge extract-options {:db @db}))
|
||||
delete-blocks (delete-blocks-fn (first pages) file)
|
||||
block-ids (map (fn [block] {:block/uuid (:block/uuid block)}) blocks)
|
||||
block-refs-ids (->> (mapcat :block/refs blocks)
|
||||
(filter (fn [ref] (and (vector? ref)
|
||||
(= :block/uuid (first ref)))))
|
||||
(map (fn [ref] {:block/uuid (second ref)}))
|
||||
(seq))
|
||||
;; To prevent "unique constraint" on datascript
|
||||
block-ids (set/union (set block-ids) (set block-refs-ids))
|
||||
pages (extract/with-ref-pages pages blocks)
|
||||
pages-index (map #(select-keys % [:block/name]) pages)]
|
||||
;; does order matter?
|
||||
(concat file-content pages-index delete-blocks pages block-ids blocks))
|
||||
file-content)
|
||||
tx (concat tx [(cond-> {:file/path file}
|
||||
new?
|
||||
;; TODO: use file system timestamp?
|
||||
(assoc :file/created-at (date-time-util/time-ms)))])]
|
||||
(d/transact! db (gp-util/remove-nils tx) (when new-graph? {:new-graph? true}))))
|
||||
|
||||
(defn init-db
|
||||
[]
|
||||
;; TODO: Reuse code from frontend
|
||||
(let [conn (d/create-conn db-schema/schema)]
|
||||
(d/transact! conn [{:schema/version db-schema/version}])
|
||||
(d/transact! conn default-db/built-in-pages)
|
||||
conn))
|
||||
|
||||
(defn parse
|
||||
[db files]
|
||||
(doseq [{:file/keys [path content]} files]
|
||||
(parse-file db path content {})))
|
|
@ -132,4 +132,13 @@
|
|||
(->> (d/q '[:find (pull ?n [*]) :where [?b :block/namespace ?n]] db)
|
||||
(map (comp :block/original-name first))
|
||||
set))
|
||||
"Has correct namespaces"))))
|
||||
"Has correct namespaces"))
|
||||
|
||||
(testing "Delete previous file data when re-parsing a file"
|
||||
(repo-handler/parse-files-and-load-to-db! test-helper/test-db
|
||||
(filter #(re-find #"pages/tutorial.md" (:file/path %))
|
||||
files)
|
||||
{:re-render? false})
|
||||
(is (= 206 (count files)) "Correct file count")
|
||||
(is (= 40888 (count (d/datoms db :eavt))) "Correct datoms count")
|
||||
)))
|
||||
|
|
|
@ -7,7 +7,8 @@
|
|||
[logseq.graph-parser.mldoc-test]
|
||||
[logseq.graph-parser.block-test]
|
||||
[logseq.graph-parser.property-test]
|
||||
[logseq.graph-parser.extract-test]))
|
||||
[logseq.graph-parser.extract-test]
|
||||
[logseq.graph-parser-test]))
|
||||
|
||||
(defmethod cljs.test/report [:cljs.test/default :end-run-tests] [m]
|
||||
(when-not (cljs.test/successful? m)
|
||||
|
@ -23,4 +24,5 @@
|
|||
'logseq.graph-parser.text-test
|
||||
'logseq.graph-parser.property-test
|
||||
'logseq.graph-parser.block-test
|
||||
'logseq.graph-parser.extract-test))
|
||||
'logseq.graph-parser.extract-test
|
||||
'logseq.graph-parser-test))
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
(ns logseq.graph-parser-test
|
||||
"TODO: Should I reuse repo-test or split it?"
|
||||
(:require [cljs.test :refer [deftest is testing]]
|
||||
[logseq.graph-parser :as graph-parser]
|
||||
[frontend.test.docs-graph-helper :as docs-graph-helper]
|
||||
[datascript.core :as d]))
|
||||
|
||||
(defn- get-top-block-properties
|
||||
[db]
|
||||
(->> (d/q '[:find (pull ?b [*])
|
||||
:where
|
||||
[?b :block/properties]
|
||||
[(missing? $ ?b :block/name)]]
|
||||
db)
|
||||
(map first)
|
||||
(map (fn [m] (zipmap (keys (:block/properties m)) (repeat 1))))
|
||||
(apply merge-with +)
|
||||
(filter #(>= (val %) 5))
|
||||
(into {})))
|
||||
|
||||
(defn- get-all-page-properties
|
||||
[db]
|
||||
(->> (d/q '[:find (pull ?b [*])
|
||||
:where
|
||||
[?b :block/properties]
|
||||
[?b :block/name]]
|
||||
db)
|
||||
(map first)
|
||||
(map (fn [m] (zipmap (keys (:block/properties m)) (repeat 1))))
|
||||
(apply merge-with +)
|
||||
(into {})))
|
||||
|
||||
;; Integration test that test parsing a large graph like docs
|
||||
(deftest ^:integration parse-and-load-files-to-db
|
||||
(let [graph-dir "src/test/docs"
|
||||
_ (docs-graph-helper/clone-docs-repo-if-not-exists graph-dir)
|
||||
files (docs-graph-helper/build-graph-files graph-dir)
|
||||
conn (graph-parser/init-db)
|
||||
; _ (repo-handler/parse-files-and-load-to-db! test-helper/test-db files {:re-render? false})
|
||||
_ (graph-parser/parse conn files)
|
||||
db @conn]
|
||||
|
||||
;; Counts assertions help check for no major regressions. These counts should
|
||||
;; only increase over time as the docs graph rarely has deletions
|
||||
(testing "Counts"
|
||||
(is (= 206 (count files)) "Correct file count")
|
||||
(is (= 40888 (count (d/datoms db :eavt))) "Correct datoms count")
|
||||
|
||||
(is (= 3597
|
||||
(ffirst
|
||||
(d/q '[:find (count ?b)
|
||||
:where [?b :block/path-refs ?bp] [?bp :block/name]] db)))
|
||||
"Correct referenced blocks count")
|
||||
(is (= 21
|
||||
(ffirst
|
||||
(d/q '[:find (count ?b)
|
||||
:where [?b :block/content ?content]
|
||||
[(clojure.string/includes? ?content "+BEGIN_QUERY")]]
|
||||
db)))
|
||||
"Advanced query count"))
|
||||
|
||||
(testing "Query based stats"
|
||||
(is (= (set (map :file/path files))
|
||||
(->> (d/q '[:find (pull ?b [* {:block/file [:file/path]}])
|
||||
:where [?b :block/name] [?b :block/file]]
|
||||
db)
|
||||
(map (comp #(get-in % [:block/file :file/path]) first))
|
||||
set))
|
||||
"Journal and pages files on disk should equal ones in db")
|
||||
|
||||
(is (= (count (filter #(re-find #"journals/" (:file/path %))
|
||||
files))
|
||||
(->> (d/q '[:find (count ?b)
|
||||
:where
|
||||
[?b :block/journal? true]
|
||||
[?b :block/name]
|
||||
[?b :block/file]]
|
||||
db)
|
||||
ffirst))
|
||||
"Journal page count on disk equals count in db")
|
||||
|
||||
(is (= {"CANCELED" 2 "DONE" 6 "LATER" 4 "NOW" 5}
|
||||
(->> (d/q '[:find (pull ?b [*]) :where [?b :block/marker] ]
|
||||
db)
|
||||
(map first)
|
||||
(group-by :block/marker)
|
||||
(map (fn [[k v]] [k (count v)]))
|
||||
(into {})))
|
||||
"Task marker counts")
|
||||
|
||||
(is (= {:markdown 3140 :org 460}
|
||||
(->> (d/q '[:find (pull ?b [*]) :where [?b :block/format]] db)
|
||||
(map first)
|
||||
(group-by :block/format)
|
||||
(map (fn [[k v]] [k (count v)]))
|
||||
(into {})))
|
||||
"Block format counts")
|
||||
|
||||
(is (= {:title 98 :id 98
|
||||
:updated-at 47 :created-at 47
|
||||
:collapsed 22
|
||||
:card-last-score 6 :card-repeats 6 :card-next-schedule 6
|
||||
:card-last-interval 6 :card-ease-factor 6 :card-last-reviewed 6
|
||||
:alias 6}
|
||||
(get-top-block-properties db))
|
||||
"Counts for top block properties")
|
||||
|
||||
(is (= {:title 98
|
||||
:alias 6
|
||||
:tags 2 :permalink 2
|
||||
:name 1 :type 1 :related 1 :sample 1 :click 1 :id 1 :example 1}
|
||||
(get-all-page-properties db))
|
||||
"Counts for all page properties")
|
||||
|
||||
(is (= {:block/scheduled 2
|
||||
:block/priority 4
|
||||
:block/deadline 1
|
||||
:block/collapsed? 22
|
||||
:block/heading-level 57
|
||||
:block/repeated? 1}
|
||||
(->> [:block/scheduled :block/priority :block/deadline :block/collapsed?
|
||||
:block/heading-level :block/repeated?]
|
||||
(map (fn [attr]
|
||||
[attr
|
||||
(ffirst (d/q [:find (list 'count '?b) :where ['?b attr]]
|
||||
db))]))
|
||||
(into {})))
|
||||
"Counts for blocks with common block attributes")
|
||||
|
||||
(is (= #{"term" "setting" "book" "Templates" "Query" "Query/table" "page"}
|
||||
(->> (d/q '[:find (pull ?n [*]) :where [?b :block/namespace ?n]] db)
|
||||
(map (comp :block/original-name first))
|
||||
set))
|
||||
"Has correct namespaces"))))
|
Loading…
Reference in New Issue