From bd611503f7b646bea271634ea934ab3a52c7faae Mon Sep 17 00:00:00 2001 From: Chris Nuernberger Date: Sat, 9 Mar 2024 11:07:45 -0700 Subject: [PATCH] Revert "Possible fix for https://github.com/techascent/tech.parquet/issues/2" This reverts commit c89f2b68c046b57a446d8101ca85037244389e56. --- src/tech/v3/libs/parquet.clj | 55 ++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/src/tech/v3/libs/parquet.clj b/src/tech/v3/libs/parquet.clj index 5f58cdee..8ce4c744 100644 --- a/src/tech/v3/libs/parquet.clj +++ b/src/tech/v3/libs/parquet.clj @@ -641,34 +641,33 @@ org.xerial.snappy/snappy-java {:mvn/version \"1.1.8.4\"} (defn- row-group->ds [^PageReadStore page ^ParquetFileReader reader options block-metadata] - (with-open [page page] - (let [file-metadata (.getFileMetaData reader) - schema (.getSchema file-metadata) - col-read-store (ColumnReadStoreImpl. page (group-converter) schema - (.getCreatedBy file-metadata)) - n-rows (.getRowCount page) - parse-context (io-context/options->parser-fn options nil) - key-fn (or (:key-fn options) identity) - column-allowlist (when (seq (or (:column-allowlist options) (:column-whitelist options))) - (set (or (:column-allowlist options) (:column-whitelist options)))) - column-blocklist (when (seq (or (:column-blocklist options) (:column-blacklist options))) - (set (or (:column-blocklist options) (:column-blacklist options)))) - col-parser (partial parse-parquet-column column-allowlist column-blocklist - col-read-store n-rows parse-context key-fn) - initial-columns (->> (map col-parser - (.getColumns schema) - (:columns block-metadata)) - (remove nil?) - (vec)) - rep-counts (->> (map (comp :row-rep-counts meta) initial-columns) - (remove nil?) - (vec)) - columns (if (seq rep-counts) - (scatter-rows initial-columns rep-counts) - ;;handle repetitions - initial-columns) - retval (ds-impl/new-dataset options columns)] - (vary-meta retval assoc :parquet-metadata (dissoc block-metadata :columns))))) + (let [file-metadata (.getFileMetaData reader) + schema (.getSchema file-metadata) + col-read-store (ColumnReadStoreImpl. page (group-converter) schema + (.getCreatedBy file-metadata)) + n-rows (.getRowCount page) + parse-context (io-context/options->parser-fn options nil) + key-fn (or (:key-fn options) identity) + column-allowlist (when (seq (or (:column-allowlist options) (:column-whitelist options))) + (set (or (:column-allowlist options) (:column-whitelist options)))) + column-blocklist (when (seq (or (:column-blocklist options) (:column-blacklist options))) + (set (or (:column-blocklist options) (:column-blacklist options)))) + col-parser (partial parse-parquet-column column-allowlist column-blocklist + col-read-store n-rows parse-context key-fn) + initial-columns (->> (map col-parser + (.getColumns schema) + (:columns block-metadata)) + (remove nil?) + (vec)) + rep-counts (->> (map (comp :row-rep-counts meta) initial-columns) + (remove nil?) + (vec)) + columns (if (seq rep-counts) + (scatter-rows initial-columns rep-counts) + ;;handle repetitions + initial-columns) + retval (ds-impl/new-dataset options columns)] + (vary-meta retval assoc :parquet-metadata (dissoc block-metadata :columns)))) (defn- read-next-dataset