diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py
index 95394cdd37d02..819f978a862c1 100644
--- a/dev/archery/archery/integration/runner.py
+++ b/dev/archery/archery/integration/runner.py
@@ -135,9 +135,10 @@ def _gold_tests(self, gold_dir):
skip.add("Rust")
if prefix == '2.0.0-compression':
skip.add("Go")
- skip.add("Java")
skip.add("JS")
skip.add("Rust")
+ if name == 'zstd':
+ skip.add("Java")
yield datagen.File(name, None, None, skip=skip, path=out_path)
def _run_test_cases(self, producer, consumer, case_runner,
diff --git a/java/compression/pom.xml b/java/compression/pom.xml
new file mode 100644
index 0000000000000..9a6ab3508ed40
--- /dev/null
+++ b/java/compression/pom.xml
@@ -0,0 +1,51 @@
+
+
+
+ 4.0.0
+
+ org.apache.arrow
+ arrow-java-root
+ 4.0.0-SNAPSHOT
+
+ arrow-compression
+ Arrow Compression
+ (Experimental/Contrib) A library for working with the compression/decompression of Arrow data.
+
+
+
+ org.apache.arrow
+ arrow-vector
+ ${project.version}
+ ${arrow.vector.classifier}
+
+
+ org.apache.arrow
+ arrow-memory-core
+ ${project.version}
+
+
+ org.apache.arrow
+ arrow-memory-unsafe
+ ${project.version}
+ test
+
+
+ org.apache.commons
+ commons-compress
+ 1.20
+
+
+ io.netty
+ netty-common
+
+
+
diff --git a/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java b/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java
new file mode 100644
index 0000000000000..4becbbe78c964
--- /dev/null
+++ b/java/compression/src/main/java/org/apache/arrow/compression/CommonsCompressionFactory.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+
+/**
+ * A factory implementation based on Apache Commons library.
+ */
+public class CommonsCompressionFactory implements CompressionCodec.Factory {
+
+ public static final CommonsCompressionFactory INSTANCE = new CommonsCompressionFactory();
+
+ @Override
+ public CompressionCodec createCodec(CompressionUtil.CodecType codecType) {
+ switch (codecType) {
+ case LZ4_FRAME:
+ return new Lz4CompressionCodec();
+ default:
+ throw new IllegalArgumentException("Compression type not supported: " + codecType);
+ }
+ }
+}
diff --git a/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java b/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
new file mode 100644
index 0000000000000..af34a8fdd706f
--- /dev/null
+++ b/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.util.MemoryUtil;
+import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorInputStream;
+import org.apache.commons.compress.compressors.lz4.FramedLZ4CompressorOutputStream;
+import org.apache.commons.compress.utils.IOUtils;
+
+import io.netty.util.internal.PlatformDependent;
+
+/**
+ * Compression codec for the LZ4 algorithm.
+ */
+public class Lz4CompressionCodec implements CompressionCodec {
+
+ @Override
+ public ArrowBuf compress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) {
+ Preconditions.checkArgument(uncompressedBuffer.writerIndex() <= Integer.MAX_VALUE,
+ "The uncompressed buffer size exceeds the integer limit");
+
+ if (uncompressedBuffer.writerIndex() == 0L) {
+ // shortcut for empty buffer
+ ArrowBuf compressedBuffer = allocator.buffer(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
+ compressedBuffer.setLong(0, 0);
+ compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH);
+ uncompressedBuffer.close();
+ return compressedBuffer;
+ }
+
+ try {
+ ArrowBuf compressedBuffer = doCompress(allocator, uncompressedBuffer);
+ long compressedLength = compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH;
+ if (compressedLength > uncompressedBuffer.writerIndex()) {
+ // compressed buffer is larger, send the raw buffer
+ compressedBuffer.close();
+ compressedBuffer = CompressionUtil.packageRawBuffer(allocator, uncompressedBuffer);
+ }
+
+ uncompressedBuffer.close();
+ return compressedBuffer;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private ArrowBuf doCompress(BufferAllocator allocator, ArrowBuf uncompressedBuffer) throws IOException {
+ byte[] inBytes = new byte[(int) uncompressedBuffer.writerIndex()];
+ PlatformDependent.copyMemory(uncompressedBuffer.memoryAddress(), inBytes, 0, uncompressedBuffer.writerIndex());
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try (InputStream in = new ByteArrayInputStream(inBytes);
+ OutputStream out = new FramedLZ4CompressorOutputStream(baos)) {
+ IOUtils.copy(in, out);
+ }
+
+ byte[] outBytes = baos.toByteArray();
+
+ ArrowBuf compressedBuffer = allocator.buffer(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + outBytes.length);
+
+ long uncompressedLength = uncompressedBuffer.writerIndex();
+ if (!MemoryUtil.LITTLE_ENDIAN) {
+ uncompressedLength = Long.reverseBytes(uncompressedLength);
+ }
+ // first 8 bytes reserved for uncompressed length, according to the specification
+ compressedBuffer.setLong(0, uncompressedLength);
+
+ PlatformDependent.copyMemory(
+ outBytes, 0, compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, outBytes.length);
+ compressedBuffer.writerIndex(CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH + outBytes.length);
+ return compressedBuffer;
+ }
+
+ @Override
+ public ArrowBuf decompress(BufferAllocator allocator, ArrowBuf compressedBuffer) {
+ Preconditions.checkArgument(compressedBuffer.writerIndex() <= Integer.MAX_VALUE,
+ "The compressed buffer size exceeds the integer limit");
+
+ Preconditions.checkArgument(compressedBuffer.writerIndex() >= CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH,
+ "Not enough data to decompress.");
+
+ long decompressedLength = compressedBuffer.getLong(0);
+ if (!MemoryUtil.LITTLE_ENDIAN) {
+ decompressedLength = Long.reverseBytes(decompressedLength);
+ }
+
+ if (decompressedLength == 0L) {
+ // shortcut for empty buffer
+ compressedBuffer.close();
+ return allocator.getEmpty();
+ }
+
+ if (decompressedLength == CompressionUtil.NO_COMPRESSION_LENGTH) {
+ // no compression
+ return CompressionUtil.extractUncompressedBuffer(compressedBuffer);
+ }
+
+ try {
+ ArrowBuf decompressedBuffer = doDecompress(allocator, compressedBuffer);
+ compressedBuffer.close();
+ return decompressedBuffer;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBuffer) throws IOException {
+ long decompressedLength = compressedBuffer.getLong(0);
+ if (!MemoryUtil.LITTLE_ENDIAN) {
+ decompressedLength = Long.reverseBytes(decompressedLength);
+ }
+
+ byte[] inBytes = new byte[(int) (compressedBuffer.writerIndex() - CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH)];
+ PlatformDependent.copyMemory(
+ compressedBuffer.memoryAddress() + CompressionUtil.SIZE_OF_UNCOMPRESSED_LENGTH, inBytes, 0, inBytes.length);
+ ByteArrayOutputStream out = new ByteArrayOutputStream((int) decompressedLength);
+ try (InputStream in = new FramedLZ4CompressorInputStream(new ByteArrayInputStream(inBytes))) {
+ IOUtils.copy(in, out);
+ }
+
+ byte[] outBytes = out.toByteArray();
+ ArrowBuf decompressedBuffer = allocator.buffer(outBytes.length);
+ PlatformDependent.copyMemory(outBytes, 0, decompressedBuffer.memoryAddress(), outBytes.length);
+ decompressedBuffer.writerIndex(decompressedLength);
+ return decompressedBuffer;
+ }
+
+ @Override
+ public CompressionUtil.CodecType getCodecType() {
+ return CompressionUtil.CodecType.LZ4_FRAME;
+ }
+}
diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
new file mode 100644
index 0000000000000..52f24e20533ea
--- /dev/null
+++ b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.arrow.compression;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.util.AutoCloseables;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VarBinaryVector;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.compression.CompressionCodec;
+import org.apache.arrow.vector.compression.CompressionUtil;
+import org.apache.arrow.vector.compression.NoCompressionCodec;
+import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+/**
+ * Test cases for {@link CompressionCodec}s.
+ */
+@RunWith(Parameterized.class)
+public class TestCompressionCodec {
+
+ private final CompressionCodec codec;
+
+ private BufferAllocator allocator;
+
+ private final int vectorLength;
+
+ @Before
+ public void init() {
+ allocator = new RootAllocator(Integer.MAX_VALUE);
+ }
+
+ @After
+ public void terminate() {
+ allocator.close();
+ }
+
+ public TestCompressionCodec(CompressionUtil.CodecType type, int vectorLength, CompressionCodec codec) {
+ this.codec = codec;
+ this.vectorLength = vectorLength;
+ }
+
+ @Parameterized.Parameters(name = "codec = {0}, length = {1}")
+ public static Collection