From e83b0593a05312c7db994607c91d9ff32c142466 Mon Sep 17 00:00:00 2001 From: ikappaki <34983288+ikappaki@users.noreply.github.com> Date: Fri, 22 Nov 2024 16:56:00 +0000 Subject: [PATCH] custom data readers loader should not recurse further than first subdirectories of sys.path (#1139) Hi, could you please review patch to only have the custom data reader loader only considers up to the first level subdirectories in the `sys.path` entries. It addresses #1135. I've used transducers to improve performance, which required moving the custom data loaders code further down the namespace. The only function that changes is `data-readers-files`, which now only processes the top directory and immediate subdirectories. I've adjusted the a test not to expect any readers to be found further down the subdirectories, andfixed a test which should have referred to `test/test`5`. I've also updated the documentation to indicate the change in behavior. Thanks cc @mitch-kyle --------- Co-authored-by: ikappaki --- CHANGELOG.md | 3 + docs/reader.rst | 2 +- src/basilisp/core.lpy | 194 +++++++++++++++------------ tests/basilisp/test_data_readers.lpy | 7 +- 4 files changed, 112 insertions(+), 94 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 41298ad3..4e2803a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added * Added support for a subset of qualified method syntax introduced in Clojure 1.12 (#1109) +### Changed + * The Custom Data Readers Loader will only now examine the top directory and up to its immediate subdirectories of each `sys.path` entry, instead of recursive descending into every subdirectory, improving start up performance (#1135) + ### Fixed * Fix a bug where tags in data readers were resolved as Vars within syntax quotes, rather than using standard data readers rules (#1129) * Fix a bug where `keyword` and `symbol` functions did not treat string arguments as potentially namespaced (#1131) diff --git a/docs/reader.rst b/docs/reader.rst index 96fc32bb..bcdf3107 100644 --- a/docs/reader.rst +++ b/docs/reader.rst @@ -475,7 +475,7 @@ Custom Data Readers When Basilisp starts it can load data readers from multiple sources. -It will search in :external:py:data:`sys.path` for files named ``data_readers.lpy`` or else ``data_readers.cljc``; each which must contain a mapping of qualified symbol tags to qualified symbols of function vars. +It will search in the top level directory and up to its immediate subdirectories (which typically representing installed modules) of the :external:py:data:`sys.path` entries for files named ``data_readers.lpy`` or else ``data_readers.cljc``; each which must contain a mapping of qualified symbol tags to qualified symbols of function vars. .. code-block:: clojure diff --git a/src/basilisp/core.lpy b/src/basilisp/core.lpy index 484c9fd8..026e9bbb 100644 --- a/src/basilisp/core.lpy +++ b/src/basilisp/core.lpy @@ -7214,95 +7214,6 @@ (.write writer content) nil)) -;;;;;;;;;;;;;;;;;;;;;;;;; -;; Custom Data Readers ;; -;;;;;;;;;;;;;;;;;;;;;;;;; - -(defmulti ^:private make-custom-data-readers - (fn [obj ^:no-warn-when-unused metadata] - (type obj))) - -(defmethod make-custom-data-readers :default - [obj metadata] - (throw (ex-info "Not a valid data-reader map" (assoc metadata :object obj)))) - -(defmethod make-custom-data-readers basilisp.lang.interfaces/IPersistentMap - [mappings metadata] - (reduce (fn [m [k v]] - (let [v' (if (qualified-symbol? v) - (intern (create-ns (symbol (namespace v))) - (symbol (name v))) - v)] - (cond - (not (qualified-symbol? k)) - (throw - (ex-info "Invalid tag in data-readers. Expected qualified symbol." - (merge metadata {:form k}))) - - (not (ifn? v')) - (throw (ex-info "Invalid reader function in data-readers" - (merge metadata {:form v}))) - - :else - (assoc m (with-meta k metadata) v')))) - mappings - mappings)) - -(defmethod make-custom-data-readers importlib.metadata/EntryPoint - [entry-point metadata] - (make-custom-data-readers (.load entry-point) - (assoc metadata - :basilisp.entry-point/name (.-name entry-point) - :basilisp.entry-point/group (.-group entry-point)))) - -(defmethod make-custom-data-readers pathlib/Path - [file metadata] - (make-custom-data-readers - (with-open [rdr (basilisp.io/reader file)] - (read (if (.endswith (name file) "cljc") - {:eof nil :read-cond :allow} - {:eof nil}) - rdr)) - (assoc metadata :file (str file)))) - -(defn- data-readers-entry-points [] - (when (#{"true" "t" "1" "yes" "y"} (.lower - (os/getenv - "BASILISP_USE_DATA_READERS_ENTRY_POINT" - "true"))) - (#?@(:lpy39- [get (.entry_points importlib/metadata)] - :lpy310+ [.entry_points importlib/metadata ** :group]) - "basilisp_data_readers"))) - -(defn- data-readers-files [] - (->> sys/path - (mapcat file-seq) - (filter (comp #{"data_readers.lpy" "data_readers.cljc"} name)) - (group-by #(.-parent %)) - vals - ;; Only load one data readers file per directory and prefer - ;; `data_readers.lpy` to `data_readers.cljc` - (map #(first (sort-by name > %))))) - -(defn- load-data-readers [] - (alter-var-root - #'*data-readers* - (fn [mappings additional-mappings] - (reduce (fn [m [k v]] - (if (not= (get m k v) v) - (throw (ex-info "Conflicting data-reader mapping" - (merge (meta k) {:conflict k, :mappings m}))) - (assoc m k v))) - mappings - additional-mappings)) - ;; Can't use `read` when altering `*data-readers*` so do reads ahead of time - (->> (concat (data-readers-files) - (data-readers-entry-points)) - (mapcat #(make-custom-data-readers % nil)) - doall))) - -(load-data-readers) - ;;;;;;;;;;;;;;;;; ;; Transducers ;; ;;;;;;;;;;;;;;;;; @@ -7496,6 +7407,111 @@ ([result input] (reduce rf result input)))) +;;;;;;;;;;;;;;;;;;;;;;;;; +;; Custom Data Readers ;; +;;;;;;;;;;;;;;;;;;;;;;;;; + +(defmulti ^:private make-custom-data-readers + (fn [obj ^:no-warn-when-unused metadata] + (type obj))) + +(defmethod make-custom-data-readers :default + [obj metadata] + (throw (ex-info "Not a valid data-reader map" (assoc metadata :object obj)))) + +(defmethod make-custom-data-readers basilisp.lang.interfaces/IPersistentMap + [mappings metadata] + (reduce (fn [m [k v]] + (let [v' (if (qualified-symbol? v) + (intern (create-ns (symbol (namespace v))) + (symbol (name v))) + v)] + (cond + (not (qualified-symbol? k)) + (throw + (ex-info "Invalid tag in data-readers. Expected qualified symbol." + (merge metadata {:form k}))) + + (not (ifn? v')) + (throw (ex-info "Invalid reader function in data-readers" + (merge metadata {:form v}))) + + :else + (assoc m (with-meta k metadata) v')))) + mappings + mappings)) + +(defmethod make-custom-data-readers importlib.metadata/EntryPoint + [entry-point metadata] + (make-custom-data-readers (.load entry-point) + (assoc metadata + :basilisp.entry-point/name (.-name entry-point) + :basilisp.entry-point/group (.-group entry-point)))) + +(defmethod make-custom-data-readers pathlib/Path + [file metadata] + (make-custom-data-readers + (with-open [rdr (basilisp.io/reader file)] + (read (if (.endswith (name file) "cljc") + {:eof nil :read-cond :allow} + {:eof nil}) + rdr)) + (assoc metadata :file (str file)))) + +(defn- data-readers-entry-points [] + (when (#{"true" "t" "1" "yes" "y"} (.lower + (os/getenv + "BASILISP_USE_DATA_READERS_ENTRY_POINT" + "true"))) + (#?@(:lpy39- [get (.entry_points importlib/metadata)] + :lpy310+ [.entry_points importlib/metadata ** :group]) + "basilisp_data_readers"))) + +(defn- data-readers-files + "Return a list of :external:py:class:`pathlib/Path`s pointing to + `data_readers.lpy` and `data_readers.cljc` found in each top + directory and up to its immediate subdirectories of + the :external:py:data:`sys.path` entries. The list is ordered such + that entries with the filename `data_readers.lpy` appear first." + [] + (->> sys/path + (mapcat (fn [dir] (when (os.path/isdir dir) + (-> (comp + (mapcat (fn [^os/DirEntry entry] + (if (.is-dir entry) + ;; immediate subdirectory + (os/scandir (.-path entry)) + ;; top level file + [entry]))) + (filter #(.is-file %)) + (map #(pathlib/Path (.-path %))) + (filter (comp #{"data_readers.lpy" "data_readers.cljc"} name))) + (eduction (os/scandir dir)))))) + (group-by #(.-parent %)) + vals + ;; Only load one data readers file per directory and prefer + ;; `data_readers.lpy` to `data_readers.cljc` + (map #(first (sort-by name > %))))) + +(defn- load-data-readers [] + (alter-var-root + #'*data-readers* + (fn [mappings additional-mappings] + (reduce (fn [m [k v]] + (if (not= (get m k v) v) + (throw (ex-info "Conflicting data-reader mapping" + (merge (meta k) {:conflict k, :mappings m}))) + (assoc m k v))) + mappings + additional-mappings)) + ;; Can't use `read` when altering `*data-readers*` so do reads ahead of time + (->> (concat (data-readers-files) + (data-readers-entry-points)) + (mapcat #(make-custom-data-readers % nil)) + doall))) + +(load-data-readers) + ;;;;;;;;;;;;;;;;;;;;;;;; ;; Stateful Iteration ;; ;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/tests/basilisp/test_data_readers.lpy b/tests/basilisp/test_data_readers.lpy index f22ffb2e..cadfc369 100644 --- a/tests/basilisp/test_data_readers.lpy +++ b/tests/basilisp/test_data_readers.lpy @@ -77,12 +77,11 @@ (is (= #'custom-data-reader (get *data-readers* 'test/test2))) (is (= '(x) (read-string "#test/test2 x")))) - (testing "from submodule" + (testing "from submodule are not considered" (make-path-file ["my_module" "submodule" "data_readers.lpy"] (pr-str {'test/test3 `custom-data-reader})) (#'basilisp.core/load-data-readers) - (is (= #'custom-data-reader (get *data-readers* 'test/test3))) - (is (= '(x) (read-string "#test/test3 x")))) + (is (= nil (get *data-readers* 'test/test3)))) (testing "from cljc file" (make-path-file ["from_cljc" "data_readers.cljc"] @@ -97,7 +96,7 @@ (make-path-file ["prefer_lpy_to_cljc" "data_readers.cljc"] (pr-str {'test/test5 (constantly :fail)})) (#'basilisp.core/load-data-readers) - (is (= #'custom-data-reader (get *data-readers* 'test/test3))) + (is (= #'custom-data-reader (get *data-readers* 'test/test5))) (is (= '(x) (read-string "#test/test5 x")))) (testing "does not load clj file"