diff --git a/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java b/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java index 372f919532e..009f5e12815 100644 --- a/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java +++ b/java/src/main/java/ai/rapids/cudf/ORCWriterOptions.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,17 +23,34 @@ * that will be used by the ORC writer to write the file. */ public class ORCWriterOptions extends CompressionMetadataWriterOptions { + private int stripeSizeRows; private ORCWriterOptions(Builder builder) { super(builder); + this.stripeSizeRows = builder.stripeSizeRows; } public static Builder builder() { return new Builder(); } + public int getStripeSizeRows() { + return stripeSizeRows; + } + public static class Builder extends CompressionMetadataWriterOptions.Builder { + // < 1M rows default orc stripe rows, defined in cudf/cpp/include/cudf/io/orc.hpp + private int stripeSizeRows = 1000000; + + public Builder withStripeSizeRows(int stripeSizeRows) { + // maximum stripe size cannot be smaller than 512 + if (stripeSizeRows < 512) { + throw new IllegalArgumentException("Maximum stripe size cannot be smaller than 512"); + } + this.stripeSizeRows = stripeSizeRows; + return this; + } public ORCWriterOptions build() { return new ORCWriterOptions(this); diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java index 298f2cff6f3..422989143c7 100644 --- a/java/src/main/java/ai/rapids/cudf/Table.java +++ b/java/src/main/java/ai/rapids/cudf/Table.java @@ -475,6 +475,7 @@ private static native long writeORCFileBegin(String[] columnNames, int compression, int[] precisions, boolean[] isMapValues, + int stripeSizeRows, String filename) throws CudfException; /** @@ -501,6 +502,7 @@ private static native long writeORCBufferBegin(String[] columnNames, int compression, int[] precisions, boolean[] isMapValues, + int stripeSizeRows, HostBufferConsumer consumer, HostMemoryAllocator hostMemoryAllocator ) throws CudfException; @@ -1823,6 +1825,7 @@ private ORCTableWriter(ORCWriterOptions options, File outputFile) { options.getCompressionType().nativeId, options.getFlatPrecision(), options.getFlatIsMap(), + options.getStripeSizeRows(), outputFile.getAbsolutePath())); this.consumer = null; } @@ -1838,6 +1841,7 @@ private ORCTableWriter(ORCWriterOptions options, HostBufferConsumer consumer, options.getCompressionType().nativeId, options.getFlatPrecision(), options.getFlatIsMap(), + options.getStripeSizeRows(), consumer, hostMemoryAllocator)); this.consumer = consumer; } diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp index 50c6ae842f4..e1b487b1f7c 100644 --- a/java/src/main/native/src/TableJni.cpp +++ b/java/src/main/native/src/TableJni.cpp @@ -2480,6 +2480,7 @@ Java_ai_rapids_cudf_Table_writeORCBufferBegin(JNIEnv* env, jint j_compression, jintArray j_precisions, jbooleanArray j_is_map, + jint j_stripe_size_rows, jobject consumer, jobject host_memory_allocator) { @@ -2535,6 +2536,7 @@ Java_ai_rapids_cudf_Table_writeORCBufferBegin(JNIEnv* env, .enable_statistics(ORC_STATISTICS_ROW_GROUP) .key_value_metadata(kv_metadata) .compression_statistics(stats) + .stripe_size_rows(j_stripe_size_rows) .build(); auto writer_ptr = std::make_unique(opts); cudf::jni::native_orc_writer_handle* ret = new cudf::jni::native_orc_writer_handle( @@ -2555,6 +2557,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(JNIEnv* env, jint j_compression, jintArray j_precisions, jbooleanArray j_is_map, + jint j_stripe_size_rows, jstring j_output_path) { JNI_NULL_CHECK(env, j_col_names, "null columns", 0); @@ -2606,6 +2609,7 @@ JNIEXPORT long JNICALL Java_ai_rapids_cudf_Table_writeORCFileBegin(JNIEnv* env, .enable_statistics(ORC_STATISTICS_ROW_GROUP) .key_value_metadata(kv_metadata) .compression_statistics(stats) + .stripe_size_rows(j_stripe_size_rows) .build(); auto writer_ptr = std::make_unique(opts); cudf::jni::native_orc_writer_handle* ret =