diff --git a/src/main/java/nc/opt/util/J7zip.java b/src/main/java/nc/opt/util/J7zip.java index c369041..bd11a69 100644 --- a/src/main/java/nc/opt/util/J7zip.java +++ b/src/main/java/nc/opt/util/J7zip.java @@ -54,22 +54,18 @@ public Integer call() throws Exception { J7zip.decompress(names[1], names[2], password, command == Command.x); } else if (command == Command.a) { - if (password != null) { - System.err.println("password protection is only applicable on decompression"); - return 1; - } if (names.length < 2) { System.err.println("Destination archive file name or source dir(s)/file(s) missing"); return 1; } - J7zip.compress(names[1], Stream.of(names).skip(2).map(File::new).toArray(File[]::new)); + J7zip.compress(names[1], password, Stream.of(names).skip(2).map(File::new).toArray(File[]::new)); } return 0; } - public static void compress(String name, File... files) throws IOException { - try (SevenZOutputFile out = new SevenZOutputFile(new File(name))) { + public static void compress(String name, String password, File... files) throws IOException { + try (SevenZOutputFile out = new SevenZOutputFile(new File(name), password != null ? password.toCharArray() : null)) { for (File file : files) { addToArchiveCompression(out, file, file.getParent()); } diff --git a/src/main/java/org/apache/commons/compress/MemoryLimitException.java b/src/main/java/org/apache/commons/compress/MemoryLimitException.java new file mode 100644 index 0000000..49e17f0 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/MemoryLimitException.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress; + +import java.io.IOException; + +/** + * If a stream checks for estimated memory allocation, and the estimate + * goes above the memory limit, this is thrown. This can also be thrown + * if a stream tries to allocate a byte array that is larger than + * the allowable limit. + * + * @since 1.14 + */ +public class MemoryLimitException extends IOException { + + private static final long serialVersionUID = 1L; + + //long instead of int to account for overflow for corrupt files + private final long memoryNeededInKb; + private final int memoryLimitInKb; + + public MemoryLimitException(final long memoryNeededInKb, final int memoryLimitInKb) { + super(buildMessage(memoryNeededInKb, memoryLimitInKb)); + this.memoryNeededInKb = memoryNeededInKb; + this.memoryLimitInKb = memoryLimitInKb; + } + + public MemoryLimitException(final long memoryNeededInKb, final int memoryLimitInKb, final Exception e) { + super(buildMessage(memoryNeededInKb, memoryLimitInKb), e); + this.memoryNeededInKb = memoryNeededInKb; + this.memoryLimitInKb = memoryLimitInKb; + } + + public long getMemoryNeededInKb() { + return memoryNeededInKb; + } + + public int getMemoryLimitInKb() { + return memoryLimitInKb; + } + + private static String buildMessage(final long memoryNeededInKb, final int memoryLimitInKb) { + return memoryNeededInKb + " kb of memory would be needed; limit was " + + memoryLimitInKb + " kb. " + + "If the file is not corrupt, consider increasing the memory limit."; + } +} diff --git a/src/main/java/org/apache/commons/compress/PasswordRequiredException.java b/src/main/java/org/apache/commons/compress/PasswordRequiredException.java new file mode 100644 index 0000000..d876b96 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/PasswordRequiredException.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress; + +import java.io.IOException; + +/** + * Exception thrown when trying to read an encrypted entry or file without + * configuring a password. + * @since 1.10 + */ +public class PasswordRequiredException extends IOException { + + private static final long serialVersionUID = 1391070005491684483L; + + /** + * Create a new exception. + * + * @param name name of the archive containing encrypted streams or + * the encrypted file. + */ + public PasswordRequiredException(final String name) { + super("Cannot read encrypted content from " + name + " without a password."); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/ArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/ArchiveEntry.java new file mode 100644 index 0000000..d5fa746 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/ArchiveEntry.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers; + +import java.util.Date; + +/** + * Represents an entry of an archive. + */ +public interface ArchiveEntry { + + /** + * Gets the name of the entry in this archive. May refer to a file or directory or other item. + * + *

This method returns the raw name as it is stored inside of the archive.

+ * + * @return The name of this entry in the archive. + */ + String getName(); + + /** + * Gets the uncompressed size of this entry. May be -1 (SIZE_UNKNOWN) if the size is unknown + * + * @return the uncompressed size of this entry. + */ + long getSize(); + + /** Special value indicating that the size is unknown */ + long SIZE_UNKNOWN = -1; + + /** + * Returns true if this entry refers to a directory. + * + * @return true if this entry refers to a directory. + */ + boolean isDirectory(); + + /** + * Gets the last modified date of this entry. + * + * @return the last modified date of this entry. + * @since 1.1 + */ + Date getLastModifiedDate(); +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/AES256Options.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/AES256Options.java new file mode 100644 index 0000000..2aee902 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/AES256Options.java @@ -0,0 +1,17 @@ +package org.apache.commons.compress.archivers.sevenz; + +import java.util.Arrays; +import java.util.Random; + +public class AES256Options { + byte[] password; + byte[] salt = new byte[0]; + byte[] iv = new byte[16]; + int numCyclesPower = 19; + + public AES256Options(byte[] password) { + this.password = password; + new Random(Arrays.hashCode(password)).nextBytes(salt); + new Random(Arrays.hashCode(password)).nextBytes(iv); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/AES256SHA256Coder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/AES256SHA256Coder.java new file mode 100644 index 0000000..02bfde6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/AES256SHA256Coder.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.security.GeneralSecurityException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import javax.crypto.Cipher; +import javax.crypto.CipherInputStream; +import javax.crypto.CipherOutputStream; +import javax.crypto.SecretKey; +import javax.crypto.spec.IvParameterSpec; +import javax.crypto.spec.SecretKeySpec; +import org.apache.commons.compress.PasswordRequiredException; + +class AES256SHA256Coder extends CoderBase { + + AES256SHA256Coder() { + super(AES256Options.class); + } + + @Override + InputStream decode( + final String archiveName, + final InputStream in, + final long uncompressedLength, + final Coder coder, + final byte[] passwordBytes, + final int maxMemoryLimitInKb + ) { + return new InputStream() { + private boolean isInitialized; + private CipherInputStream cipherInputStream; + + private CipherInputStream init() throws IOException { + if (isInitialized) { + return cipherInputStream; + } + if (coder.properties == null) { + throw new IOException("Missing AES256 properties in " + archiveName); + } + if (coder.properties.length < 2) { + throw new IOException("AES256 properties too short in " + archiveName); + } + final int byte0 = 0xff & coder.properties[0]; + final int numCyclesPower = byte0 & 0x3f; + final int byte1 = 0xff & coder.properties[1]; + final int ivSize = ((byte0 >> 6) & 1) + (byte1 & 0x0f); + final int saltSize = ((byte0 >> 7) & 1) + (byte1 >> 4); + if (2 + saltSize + ivSize > coder.properties.length) { + throw new IOException("Salt size + IV size too long in " + archiveName); + } + final byte[] salt = new byte[saltSize]; + System.arraycopy(coder.properties, 2, salt, 0, saltSize); + final byte[] iv = new byte[16]; + System.arraycopy(coder.properties, 2 + saltSize, iv, 0, ivSize); + + if (passwordBytes == null) { + throw new PasswordRequiredException(archiveName); + } + final byte[] aesKeyBytes; + if (numCyclesPower == 0x3f) { + aesKeyBytes = new byte[32]; + System.arraycopy(salt, 0, aesKeyBytes, 0, saltSize); + System.arraycopy( + passwordBytes, + 0, + aesKeyBytes, + saltSize, + Math.min(passwordBytes.length, aesKeyBytes.length - saltSize) + ); + } else { + final MessageDigest digest; + try { + digest = MessageDigest.getInstance("SHA-256"); + } catch (final NoSuchAlgorithmException noSuchAlgorithmException) { + throw new IOException("SHA-256 is unsupported by your Java implementation", noSuchAlgorithmException); + } + final byte[] extra = new byte[8]; + for (long j = 0; j < (1L << numCyclesPower); j++) { + digest.update(salt); + digest.update(passwordBytes); + digest.update(extra); + for (int k = 0; k < extra.length; k++) { + ++extra[k]; + if (extra[k] != 0) { + break; + } + } + } + aesKeyBytes = digest.digest(); + } + + System.out.println(aesKeyBytes); + + final SecretKey aesKey = new SecretKeySpec(aesKeyBytes, "AES"); + try { + final Cipher cipher = Cipher.getInstance("AES/CBC/NoPadding"); + cipher.init(Cipher.DECRYPT_MODE, aesKey, new IvParameterSpec(iv)); + cipherInputStream = new CipherInputStream(in, cipher); + isInitialized = true; + return cipherInputStream; + } catch (final GeneralSecurityException generalSecurityException) { + throw new IOException( + "Decryption error " + "(do you have the JCE Unlimited Strength Jurisdiction Policy Files installed?)", + generalSecurityException + ); + } + } + + @Override + public int read() throws IOException { + return init().read(); + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + return init().read(b, off, len); + } + + @Override + public void close() throws IOException { + if (cipherInputStream != null) { + cipherInputStream.close(); + } + } + }; + } + + @Override + OutputStream encode(OutputStream out, Object options) throws IOException { + return new OutputStream() { + private boolean isInitialized; + private CipherOutputStream cipherOutputStream; + + private CipherOutputStream init() throws IOException { + if (isInitialized) { + return cipherOutputStream; + } + + AES256Options opts = (AES256Options) options; + + final MessageDigest digest; + try { + digest = MessageDigest.getInstance("SHA-256"); + } catch (final NoSuchAlgorithmException noSuchAlgorithmException) { + throw new IOException("SHA-256 is unsupported by your Java implementation", noSuchAlgorithmException); + } + final byte[] extra = new byte[8]; + for (long j = 0; j < (1L << opts.numCyclesPower); j++) { + digest.update(opts.salt); + digest.update(opts.password); + digest.update(extra); + for (int k = 0; k < extra.length; k++) { + ++extra[k]; + if (extra[k] != 0) { + break; + } + } + } + final byte[] aesKeyBytes = digest.digest(); + System.out.println(aesKeyBytes); + + final SecretKey aesKey = new SecretKeySpec(aesKeyBytes, "AES"); + try { + final Cipher cipher = Cipher.getInstance("AES/CBC/NoPadding"); + cipher.init(Cipher.ENCRYPT_MODE, aesKey, new IvParameterSpec(opts.iv)); + cipherOutputStream = new CipherOutputStream(out, cipher); + isInitialized = true; + return cipherOutputStream; + } catch (final GeneralSecurityException generalSecurityException) { + throw new IOException( + "Decryption error " + "(do you have the JCE Unlimited Strength Jurisdiction Policy Files installed?)", + generalSecurityException + ); + } + } + + @Override + public void write(int b) throws IOException { + init().write(b); + } + + @Override + public void write(byte[] b) throws IOException { + init().write(b); + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + init().write(b, off, len); + } + + @Override + public void close() throws IOException { + if (cipherOutputStream != null) { + cipherOutputStream.close(); + } + } + }; + } + + @Override + byte[] getOptionsAsProperties(Object options) throws IOException { + AES256Options opts = (AES256Options) options; + byte[] props = new byte[2 + opts.salt.length + opts.iv.length]; + + props[0] = (byte) (opts.numCyclesPower | (opts.salt.length == 0 ? 0 : (1 << 7)) | (opts.iv.length == 0 ? 0 : (1 << 6))); + + if (opts.salt.length != 0 || opts.iv.length != 0) { + props[1] = (byte) (((opts.salt.length == 0 ? 0 : opts.salt.length - 1) << 4) | (opts.iv.length == 0 ? 0 : opts.iv.length - 1)); + + System.arraycopy(opts.salt, 0, props, 2, opts.salt.length); + System.arraycopy(opts.iv, 0, props, 2 + opts.salt.length, opts.iv.length); + } + + return props; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java new file mode 100644 index 0000000..7d3d86a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Archive.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.util.BitSet; + +class Archive { + /// Offset from beginning of file + SIGNATURE_HEADER_SIZE to packed streams. + long packPos; + /// Size of each packed stream. + long[] packSizes = new long[0]; + /// Whether each particular packed streams has a CRC. + BitSet packCrcsDefined; + /// CRCs for each packed stream, valid only if that packed stream has one. + long[] packCrcs; + /// Properties of solid compression blocks. + Folder[] folders = Folder.EMPTY_FOLDER_ARRAY; + /// Temporary properties for non-empty files (subsumed into the files array later). + SubStreamsInfo subStreamsInfo; + /// The files and directories in the archive. + SevenZArchiveEntry[] files = SevenZArchiveEntry.EMPTY_SEVEN_Z_ARCHIVE_ENTRY_ARRAY; + /// Mapping between folders, files and streams. + StreamMap streamMap; + + @Override + public String toString() { + return "Archive with packed streams starting at offset " + packPos + + ", " + lengthOf(packSizes) + " pack sizes, " + lengthOf(packCrcs) + + " CRCs, " + lengthOf(folders) + " folders, " + lengthOf(files) + + " files and " + streamMap; + } + + private static String lengthOf(final long[] a) { + return a == null ? "(null)" : String.valueOf(a.length); + } + + private static String lengthOf(final Object[] a) { + return a == null ? "(null)" : String.valueOf(a.length); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java new file mode 100644 index 0000000..2710b72 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/BindPair.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +class BindPair { + long inIndex; + long outIndex; + + @Override + public String toString() { + return "BindPair binding input " + inIndex + " to output " + outIndex; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/BoundedSeekableByteChannelInputStream.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/BoundedSeekableByteChannelInputStream.java new file mode 100644 index 0000000..ca8b754 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/BoundedSeekableByteChannelInputStream.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; + +class BoundedSeekableByteChannelInputStream extends InputStream { + private static final int MAX_BUF_LEN = 8192; + private final ByteBuffer buffer; + private final SeekableByteChannel channel; + private long bytesRemaining; + + public BoundedSeekableByteChannelInputStream(final SeekableByteChannel channel, + final long size) { + this.channel = channel; + this.bytesRemaining = size; + if (size < MAX_BUF_LEN && size > 0) { + buffer = ByteBuffer.allocate((int) size); + } else { + buffer = ByteBuffer.allocate(MAX_BUF_LEN); + } + } + + @Override + public int read() throws IOException { + if (bytesRemaining > 0) { + --bytesRemaining; + final int read = read(1); + if (read < 0) { + return read; + } + return buffer.get() & 0xff; + } + return -1; + } + + /** + * Reads up to len bytes of data from the input stream into an array of bytes. + * + *

An attempt is made to read as many as len bytes, but a + * smaller number may be read. The number of bytes actually read + * is returned as an integer.

+ * + *

This implementation may return 0 if the underlying {@link + * SeekableByteChannel} is non-blocking and currently hasn't got + * any bytes available.

+ */ + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (len == 0) { + return 0; + } + if (bytesRemaining <= 0) { + return -1; + } + int bytesToRead = len; + if (bytesToRead > bytesRemaining) { + bytesToRead = (int) bytesRemaining; + } + final int bytesRead; + final ByteBuffer buf; + if (bytesToRead <= buffer.capacity()) { + buf = buffer; + bytesRead = read(bytesToRead); + } else { + buf = ByteBuffer.allocate(bytesToRead); + bytesRead = channel.read(buf); + buf.flip(); + } + if (bytesRead >= 0) { + buf.get(b, off, bytesRead); + bytesRemaining -= bytesRead; + } + return bytesRead; + } + + private int read(final int len) throws IOException { + buffer.rewind().limit(len); + final int read = channel.read(buffer); + buffer.flip(); + return read; + } + + @Override + public void close() { + // the nested channel is controlled externally + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/CLI.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/CLI.java new file mode 100644 index 0000000..dfa1c58 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/CLI.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.File; +import java.io.IOException; + +public class CLI { + + + private enum Mode { + LIST("Analysing") { + @Override + public void takeAction(final SevenZFile archive, final SevenZArchiveEntry entry) { + System.out.print(entry.getName()); + if (entry.isDirectory()) { + System.out.print(" dir"); + } else { + System.out.print(" " + entry.getCompressedSize() + + "/" + entry.getSize()); + } + if (entry.getHasLastModifiedDate()) { + System.out.print(" " + entry.getLastModifiedDate()); + } else { + System.out.print(" no last modified date"); + } + if (!entry.isDirectory()) { + System.out.println(" " + getContentMethods(entry)); + } else { + System.out.println(); + } + } + + private String getContentMethods(final SevenZArchiveEntry entry) { + final StringBuilder sb = new StringBuilder(); + boolean first = true; + for (final SevenZMethodConfiguration m : entry.getContentMethods()) { + if (!first) { + sb.append(", "); + } + first = false; + sb.append(m.getMethod()); + if (m.getOptions() != null) { + sb.append("(").append(m.getOptions()).append(")"); + } + } + return sb.toString(); + } + }; + + private final String message; + Mode(final String message) { + this.message = message; + } + public String getMessage() { + return message; + } + public abstract void takeAction(SevenZFile archive, SevenZArchiveEntry entry) + throws IOException; + } + + public static void main(final String[] args) throws Exception { + if (args.length == 0) { + usage(); + return; + } + final Mode mode = grabMode(args); + System.out.println(mode.getMessage() + " " + args[0]); + final File f = new File(args[0]); + if (!f.isFile()) { + System.err.println(f + " doesn't exist or is a directory"); + } + try (final SevenZFile archive = new SevenZFile(f)) { + SevenZArchiveEntry ae; + while((ae=archive.getNextEntry()) != null) { + mode.takeAction(archive, ae); + } + } + } + + private static void usage() { + System.out.println("Parameters: archive-name [list]"); + } + + private static Mode grabMode(final String[] args) { + if (args.length < 2) { + return Mode.LIST; + } + return Enum.valueOf(Mode.class, args[1].toUpperCase()); + } + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java new file mode 100644 index 0000000..e8cbe25 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coder.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +class Coder { + byte[] decompressionMethodId; + long numInStreams; + long numOutStreams; + byte[] properties; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/CoderBase.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/CoderBase.java new file mode 100644 index 0000000..fa3c5f4 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/CoderBase.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.utils.ByteUtils; + +/** + * Base Codec class. + */ +abstract class CoderBase { + private final Class[] acceptableOptions; + /** + * @param acceptableOptions types that can be used as options for this codec. + */ + protected CoderBase(final Class... acceptableOptions) { + this.acceptableOptions = acceptableOptions; + } + + /** + * @return whether this method can extract options from the given object. + */ + boolean canAcceptOptions(final Object opts) { + for (final Class c : acceptableOptions) { + if (c.isInstance(opts)) { + return true; + } + } + return false; + } + + /** + * @return property-bytes to write in a Folder block + */ + byte[] getOptionsAsProperties(final Object options) throws IOException { + return ByteUtils.EMPTY_BYTE_ARRAY; + } + + /** + * @return configuration options that have been used to create the given InputStream from the given Coder + */ + Object getOptionsFromCoder(final Coder coder, final InputStream in) throws IOException { + return null; + } + + /** + * @return a stream that reads from in using the configured coder and password. + */ + abstract InputStream decode(final String archiveName, + final InputStream in, long uncompressedLength, + final Coder coder, byte[] password, int maxMemoryLimitInKb) throws IOException; + + /** + * @return a stream that writes to out using the given configuration. + */ + OutputStream encode(final OutputStream out, final Object options) throws IOException { + throw new UnsupportedOperationException("Method doesn't support writing"); + } + + /** + * If the option represents a number, return its integer + * value, otherwise return the given default value. + */ + protected static int numberOptionOrDefault(final Object options, final int defaultValue) { + return options instanceof Number ? ((Number) options).intValue() : defaultValue; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Coders.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coders.java new file mode 100644 index 0000000..91910bf --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Coders.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.SequenceInputStream; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; +import java.util.zip.Inflater; +import java.util.zip.InflaterInputStream; + +import org.apache.commons.compress.utils.FlushShieldFilterOutputStream; +import org.tukaani.xz.ARMOptions; +import org.tukaani.xz.ARMThumbOptions; +import org.tukaani.xz.FilterOptions; +import org.tukaani.xz.FinishableWrapperOutputStream; +import org.tukaani.xz.IA64Options; +import org.tukaani.xz.PowerPCOptions; +import org.tukaani.xz.SPARCOptions; +import org.tukaani.xz.X86Options; + +class Coders { + private static final Map CODER_MAP = new HashMap() { + + private static final long serialVersionUID = 1664829131806520867L; + + { + put(SevenZMethod.COPY, new CopyDecoder()); + put(SevenZMethod.LZMA, new LZMADecoder()); + put(SevenZMethod.LZMA2, new LZMA2Decoder()); + put(SevenZMethod.DEFLATE, new DeflateDecoder()); + put(SevenZMethod.AES256SHA256, new AES256SHA256Coder()); + put(SevenZMethod.BCJ_X86_FILTER, new BCJDecoder(new X86Options())); + put(SevenZMethod.BCJ_PPC_FILTER, new BCJDecoder(new PowerPCOptions())); + put(SevenZMethod.BCJ_IA64_FILTER, new BCJDecoder(new IA64Options())); + put(SevenZMethod.BCJ_ARM_FILTER, new BCJDecoder(new ARMOptions())); + put(SevenZMethod.BCJ_ARM_THUMB_FILTER, new BCJDecoder(new ARMThumbOptions())); + put(SevenZMethod.BCJ_SPARC_FILTER, new BCJDecoder(new SPARCOptions())); + put(SevenZMethod.DELTA_FILTER, new DeltaDecoder()); + } + }; + + static CoderBase findByMethod(final SevenZMethod method) { + return CODER_MAP.get(method); + } + + static InputStream addDecoder(final String archiveName, final InputStream is, final long uncompressedLength, + final Coder coder, final byte[] password, final int maxMemoryLimitInKb) throws IOException { + final CoderBase cb = findByMethod(SevenZMethod.byId(coder.decompressionMethodId)); + if (cb == null) { + throw new IOException("Unsupported compression method " + + Arrays.toString(coder.decompressionMethodId) + + " used in " + archiveName); + } + return cb.decode(archiveName, is, uncompressedLength, coder, password, maxMemoryLimitInKb); + } + + static OutputStream addEncoder(final OutputStream out, final SevenZMethod method, + final Object options) throws IOException { + final CoderBase cb = findByMethod(method); + if (cb == null) { + throw new IOException("Unsupported compression method " + method); + } + return cb.encode(out, options); + } + + static class CopyDecoder extends CoderBase { + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password, final int maxMemoryLimitInKb) throws IOException { + return in; + } + @Override + OutputStream encode(final OutputStream out, final Object options) { + return out; + } + } + + static class BCJDecoder extends CoderBase { + private final FilterOptions opts; + BCJDecoder(final FilterOptions opts) { + this.opts = opts; + } + + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password, final int maxMemoryLimitInKb) throws IOException { + try { + return opts.getInputStream(in); + } catch (final AssertionError e) { + throw new IOException("BCJ filter used in " + archiveName + + " needs XZ for Java > 1.4 - see " + + "https://commons.apache.org/proper/commons-compress/limitations.html#7Z", + e); + } + } + + @SuppressWarnings("resource") + @Override + OutputStream encode(final OutputStream out, final Object options) { + return new FlushShieldFilterOutputStream(opts.getOutputStream(new FinishableWrapperOutputStream(out))); + } + } + + static class DeflateDecoder extends CoderBase { + private static final byte[] ONE_ZERO_BYTE = new byte[1]; + DeflateDecoder() { + super(Number.class); + } + + @SuppressWarnings("resource") // caller must close the InputStream + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password, final int maxMemoryLimitInKb) + throws IOException { + final Inflater inflater = new Inflater(true); + // Inflater with nowrap=true has this odd contract for a zero padding + // byte following the data stream; this used to be zlib's requirement + // and has been fixed a long time ago, but the contract persists so + // we comply. + // https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean) + final InflaterInputStream inflaterInputStream = new InflaterInputStream(new SequenceInputStream(in, + new ByteArrayInputStream(ONE_ZERO_BYTE)), inflater); + return new DeflateDecoderInputStream(inflaterInputStream, inflater); + } + + @Override + OutputStream encode(final OutputStream out, final Object options) { + final int level = numberOptionOrDefault(options, 9); + final Deflater deflater = new Deflater(level, true); + final DeflaterOutputStream deflaterOutputStream = new DeflaterOutputStream(out, deflater); + return new DeflateDecoderOutputStream(deflaterOutputStream, deflater); + } + + static class DeflateDecoderInputStream extends InputStream { + + final InflaterInputStream inflaterInputStream; + Inflater inflater; + + public DeflateDecoderInputStream(final InflaterInputStream inflaterInputStream, + final Inflater inflater) { + this.inflaterInputStream = inflaterInputStream; + this.inflater = inflater; + } + + @Override + public int read() throws IOException { + return inflaterInputStream.read(); + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + return inflaterInputStream.read(b, off, len); + } + + @Override + public int read(final byte[] b) throws IOException { + return inflaterInputStream.read(b); + } + + @Override + public void close() throws IOException { + try { + inflaterInputStream.close(); + } finally { + inflater.end(); + } + } + } + + static class DeflateDecoderOutputStream extends OutputStream { + + final DeflaterOutputStream deflaterOutputStream; + Deflater deflater; + + public DeflateDecoderOutputStream(final DeflaterOutputStream deflaterOutputStream, + final Deflater deflater) { + this.deflaterOutputStream = deflaterOutputStream; + this.deflater = deflater; + } + + @Override + public void write(final int b) throws IOException { + deflaterOutputStream.write(b); + } + + @Override + public void write(final byte[] b) throws IOException { + deflaterOutputStream.write(b); + } + + @Override + public void write(final byte[] b, final int off, final int len) throws IOException { + deflaterOutputStream.write(b, off, len); + } + + @Override + public void close() throws IOException { + try { + deflaterOutputStream.close(); + } finally { + deflater.end(); + } + } + } + } + + +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/DeltaDecoder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/DeltaDecoder.java new file mode 100644 index 0000000..c2747f8 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/DeltaDecoder.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.tukaani.xz.DeltaOptions; +import org.tukaani.xz.FinishableWrapperOutputStream; +import org.tukaani.xz.UnsupportedOptionsException; + +class DeltaDecoder extends CoderBase { + DeltaDecoder() { + super(Number.class); + } + + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password, final int maxMemoryLimitInKb) throws IOException { + return new DeltaOptions(getOptionsFromCoder(coder)).getInputStream(in); + } + + @SuppressWarnings("resource") + @Override + OutputStream encode(final OutputStream out, final Object options) throws IOException { + final int distance = numberOptionOrDefault(options, 1); + try { + return new DeltaOptions(distance).getOutputStream(new FinishableWrapperOutputStream(out)); + } catch (final UnsupportedOptionsException ex) { // NOSONAR + throw new IOException(ex.getMessage()); + } + } + + @Override + byte[] getOptionsAsProperties(final Object options) { + return new byte[] { + (byte) (numberOptionOrDefault(options, 1) - 1) + }; + } + + @Override + Object getOptionsFromCoder(final Coder coder, final InputStream in) { + return getOptionsFromCoder(coder); + } + + private int getOptionsFromCoder(final Coder coder) { + if (coder.properties == null || coder.properties.length == 0) { + return 1; + } + return (0xff & coder.properties[0]) + 1; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java new file mode 100644 index 0000000..2642138 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/Folder.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.util.Collections; +import java.util.LinkedList; + +/** + * The unit of solid compression. + */ +class Folder { + /// List of coders used in this folder, eg. one for compression, one for encryption. + Coder[] coders; + /// Total number of input streams across all coders. + /// this field is currently unused but technically part of the 7z API + long totalInputStreams; + /// Total number of output streams across all coders. + long totalOutputStreams; + /// Mapping between input and output streams. + BindPair[] bindPairs; + /// Indeces of input streams, one per input stream not listed in bindPairs. + long[] packedStreams; + /// Unpack sizes, per each output stream. + long[] unpackSizes; + /// Whether the folder has a CRC. + boolean hasCrc; + /// The CRC, if present. + long crc; + /// The number of unpack substreams, product of the number of + /// output streams and the number of non-empty files in this + /// folder. + int numUnpackSubStreams; + static final Folder[] EMPTY_FOLDER_ARRAY = new Folder[0]; + + /** + * Sorts Coders using bind pairs. + *

The first coder reads from the packed stream (we currently + * only support single input stream decoders), the second reads + * from the output of the first and so on.

+ */ + Iterable getOrderedCoders() throws IOException { + if (packedStreams == null || coders == null || packedStreams.length == 0 || coders.length == 0) { + return Collections.emptyList(); + } + final LinkedList l = new LinkedList<>(); + int current = (int) packedStreams[0]; // more that 2^31 coders? + while (current >= 0 && current < coders.length) { + if (l.contains(coders[current])) { + throw new IOException("folder uses the same coder more than once in coder chain"); + } + l.addLast(coders[current]); + final int pair = findBindPairForOutStream(current); + current = pair != -1 ? (int) bindPairs[pair].inIndex : -1; + } + return l; + } + + int findBindPairForInStream(final int index) { + if (bindPairs != null) { + for (int i = 0; i < bindPairs.length; i++) { + if (bindPairs[i].inIndex == index) { + return i; + } + } + } + return -1; + } + + int findBindPairForOutStream(final int index) { + if (bindPairs != null) { + for (int i = 0; i < bindPairs.length; i++) { + if (bindPairs[i].outIndex == index) { + return i; + } + } + } + return -1; + } + + long getUnpackSize() { + if (totalOutputStreams == 0) { + return 0; + } + for (int i = ((int) totalOutputStreams) - 1; i >= 0; i--) { + if (findBindPairForOutStream(i) < 0) { + return unpackSizes[i]; + } + } + return 0; + } + + long getUnpackSizeForCoder(final Coder coder) { + if (coders != null) { + for (int i = 0; i < coders.length; i++) { + if (coders[i] == coder) { + return unpackSizes[i]; + } + } + } + return 0; + } + + @Override + public String toString() { + return "Folder with " + coders.length + " coders, " + totalInputStreams + + " input streams, " + totalOutputStreams + " output streams, " + + bindPairs.length + " bind pairs, " + packedStreams.length + + " packed streams, " + unpackSizes.length + " unpack sizes, " + + (hasCrc ? "with CRC " + crc : "without CRC") + + " and " + numUnpackSubStreams + " unpack streams"; + } +} + diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMA2Decoder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMA2Decoder.java new file mode 100644 index 0000000..13e498f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMA2Decoder.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.MemoryLimitException; +import org.tukaani.xz.FinishableOutputStream; +import org.tukaani.xz.FinishableWrapperOutputStream; +import org.tukaani.xz.LZMA2InputStream; +import org.tukaani.xz.LZMA2Options; + +class LZMA2Decoder extends CoderBase { + LZMA2Decoder() { + super(LZMA2Options.class, Number.class); + } + + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password, final int maxMemoryLimitInKb) throws IOException { + try { + final int dictionarySize = getDictionarySize(coder); + final int memoryUsageInKb = LZMA2InputStream.getMemoryUsage(dictionarySize); + if (memoryUsageInKb > maxMemoryLimitInKb) { + throw new MemoryLimitException(memoryUsageInKb, maxMemoryLimitInKb); + } + return new LZMA2InputStream(in, dictionarySize); + } catch (final IllegalArgumentException ex) { // NOSONAR + throw new IOException(ex.getMessage()); + } + } + + @Override + OutputStream encode(final OutputStream out, final Object opts) + throws IOException { + final LZMA2Options options = getOptions(opts); + final FinishableOutputStream wrapped = new FinishableWrapperOutputStream(out); + return options.getOutputStream(wrapped); + } + + @Override + byte[] getOptionsAsProperties(final Object opts) { + final int dictSize = getDictSize(opts); + final int lead = Integer.numberOfLeadingZeros(dictSize); + final int secondBit = (dictSize >>> (30 - lead)) - 2; + return new byte[] { + (byte) ((19 - lead) * 2 + secondBit) + }; + } + + @Override + Object getOptionsFromCoder(final Coder coder, final InputStream in) + throws IOException { + return getDictionarySize(coder); + } + + private int getDictSize(final Object opts) { + if (opts instanceof LZMA2Options) { + return ((LZMA2Options) opts).getDictSize(); + } + return numberOptionOrDefault(opts); + } + + private int getDictionarySize(final Coder coder) throws IOException { + if (coder.properties == null) { + throw new IOException("Missing LZMA2 properties"); + } + if (coder.properties.length < 1) { + throw new IOException("LZMA2 properties too short"); + } + final int dictionarySizeBits = 0xff & coder.properties[0]; + if ((dictionarySizeBits & (~0x3f)) != 0) { + throw new IOException("Unsupported LZMA2 property bits"); + } + if (dictionarySizeBits > 40) { + throw new IOException("Dictionary larger than 4GiB maximum size"); + } + if (dictionarySizeBits == 40) { + return 0xFFFFffff; + } + return (2 | (dictionarySizeBits & 0x1)) << (dictionarySizeBits / 2 + 11); + } + + private LZMA2Options getOptions(final Object opts) throws IOException { + if (opts instanceof LZMA2Options) { + return (LZMA2Options) opts; + } + final LZMA2Options options = new LZMA2Options(); + options.setDictSize(numberOptionOrDefault(opts)); + return options; + } + + private int numberOptionOrDefault(final Object opts) { + return numberOptionOrDefault(opts, LZMA2Options.DICT_SIZE_DEFAULT); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMADecoder.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMADecoder.java new file mode 100644 index 0000000..68a2836 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/LZMADecoder.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.commons.compress.MemoryLimitException; +import org.apache.commons.compress.utils.ByteUtils; +import org.apache.commons.compress.utils.FlushShieldFilterOutputStream; +import org.tukaani.xz.LZMA2Options; +import org.tukaani.xz.LZMAInputStream; +import org.tukaani.xz.LZMAOutputStream; + +class LZMADecoder extends CoderBase { + LZMADecoder() { + super(LZMA2Options.class, Number.class); + } + + @Override + InputStream decode(final String archiveName, final InputStream in, final long uncompressedLength, + final Coder coder, final byte[] password, final int maxMemoryLimitInKb) throws IOException { + if (coder.properties == null) { + throw new IOException("Missing LZMA properties"); + } + if (coder.properties.length < 1) { + throw new IOException("LZMA properties too short"); + } + final byte propsByte = coder.properties[0]; + final int dictSize = getDictionarySize(coder); + if (dictSize > LZMAInputStream.DICT_SIZE_MAX) { + throw new IOException("Dictionary larger than 4GiB maximum size used in " + archiveName); + } + final int memoryUsageInKb = LZMAInputStream.getMemoryUsage(dictSize, propsByte); + if (memoryUsageInKb > maxMemoryLimitInKb) { + throw new MemoryLimitException(memoryUsageInKb, maxMemoryLimitInKb); + } + final LZMAInputStream lzmaIn = new LZMAInputStream(in, uncompressedLength, propsByte, dictSize); + lzmaIn.enableRelaxedEndCondition(); + return lzmaIn; + } + + @SuppressWarnings("resource") + @Override + OutputStream encode(final OutputStream out, final Object opts) + throws IOException { + // NOOP as LZMAOutputStream throws an exception in flush + return new FlushShieldFilterOutputStream(new LZMAOutputStream(out, getOptions(opts), false)); + } + + @Override + byte[] getOptionsAsProperties(final Object opts) throws IOException { + final LZMA2Options options = getOptions(opts); + final byte props = (byte) ((options.getPb() * 5 + options.getLp()) * 9 + options.getLc()); + final int dictSize = options.getDictSize(); + final byte[] o = new byte[5]; + o[0] = props; + ByteUtils.toLittleEndian(o, dictSize, 1, 4); + return o; + } + + @Override + Object getOptionsFromCoder(final Coder coder, final InputStream in) throws IOException { + if (coder.properties == null) { + throw new IOException("Missing LZMA properties"); + } + if (coder.properties.length < 1) { + throw new IOException("LZMA properties too short"); + } + final byte propsByte = coder.properties[0]; + int props = propsByte & 0xFF; + final int pb = props / (9 * 5); + props -= pb * 9 * 5; + final int lp = props / 9; + final int lc = props - lp * 9; + final LZMA2Options opts = new LZMA2Options(); + opts.setPb(pb); + opts.setLcLp(lc, lp); + opts.setDictSize(getDictionarySize(coder)); + return opts; + } + + private int getDictionarySize(final Coder coder) throws IllegalArgumentException { + return (int) ByteUtils.fromLittleEndian(coder.properties, 1, 4); + } + + private LZMA2Options getOptions(final Object opts) throws IOException { + if (opts instanceof LZMA2Options) { + return (LZMA2Options) opts; + } + final LZMA2Options options = new LZMA2Options(); + options.setDictSize(numberOptionOrDefault(opts)); + return options; + } + + private int numberOptionOrDefault(final Object opts) { + return numberOptionOrDefault(opts, LZMA2Options.DICT_SIZE_DEFAULT); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java new file mode 100644 index 0000000..89a813a --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/NID.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +final class NID { + public static final int kEnd = 0x00; + public static final int kHeader = 0x01; + public static final int kArchiveProperties = 0x02; + public static final int kAdditionalStreamsInfo = 0x03; + public static final int kMainStreamsInfo = 0x04; + public static final int kFilesInfo = 0x05; + public static final int kPackInfo = 0x06; + public static final int kUnpackInfo = 0x07; + public static final int kSubStreamsInfo = 0x08; + public static final int kSize = 0x09; + public static final int kCRC = 0x0A; + public static final int kFolder = 0x0B; + public static final int kCodersUnpackSize = 0x0C; + public static final int kNumUnpackStream = 0x0D; + public static final int kEmptyStream = 0x0E; + public static final int kEmptyFile = 0x0F; + public static final int kAnti = 0x10; + public static final int kName = 0x11; + public static final int kCTime = 0x12; + public static final int kATime = 0x13; + public static final int kMTime = 0x14; + public static final int kWinAttributes = 0x15; + public static final int kComment = 0x16; + public static final int kEncodedHeader = 0x17; + public static final int kStartPos = 0x18; + public static final int kDummy = 0x19; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java new file mode 100644 index 0000000..74d4276 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZArchiveEntry.java @@ -0,0 +1,591 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.util.Arrays; +import java.util.Calendar; +import java.util.Collections; +import java.util.Date; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.Objects; +import java.util.TimeZone; + +import org.apache.commons.compress.archivers.ArchiveEntry; + +/** + * An entry in a 7z archive. + * + * @NotThreadSafe + * @since 1.6 + */ +public class SevenZArchiveEntry implements ArchiveEntry { + private String name; + private boolean hasStream; + private boolean isDirectory; + private boolean isAntiItem; + private boolean hasCreationDate; + private boolean hasLastModifiedDate; + private boolean hasAccessDate; + private long creationDate; + private long lastModifiedDate; + private long accessDate; + private boolean hasWindowsAttributes; + private int windowsAttributes; + private boolean hasCrc; + private long crc, compressedCrc; + private long size, compressedSize; + private Iterable contentMethods; + static final SevenZArchiveEntry[] EMPTY_SEVEN_Z_ARCHIVE_ENTRY_ARRAY = new SevenZArchiveEntry[0]; + + public SevenZArchiveEntry() { + } + + /** + * Get this entry's name. + * + *

This method returns the raw name as it is stored inside of the archive.

+ * + * @return This entry's name. + */ + @Override + public String getName() { + return name; + } + + /** + * Set this entry's name. + * + * @param name This entry's new name. + */ + public void setName(final String name) { + this.name = name; + } + + /** + * Whether there is any content associated with this entry. + * @return whether there is any content associated with this entry. + */ + public boolean hasStream() { + return hasStream; + } + + /** + * Sets whether there is any content associated with this entry. + * @param hasStream whether there is any content associated with this entry. + */ + public void setHasStream(final boolean hasStream) { + this.hasStream = hasStream; + } + + /** + * Return whether or not this entry represents a directory. + * + * @return True if this entry is a directory. + */ + @Override + public boolean isDirectory() { + return isDirectory; + } + + /** + * Sets whether or not this entry represents a directory. + * + * @param isDirectory True if this entry is a directory. + */ + public void setDirectory(final boolean isDirectory) { + this.isDirectory = isDirectory; + } + + /** + * Indicates whether this is an "anti-item" used in differential backups, + * meaning it should delete the same file from a previous backup. + * @return true if it is an anti-item, false otherwise + */ + public boolean isAntiItem() { + return isAntiItem; + } + + /** + * Sets whether this is an "anti-item" used in differential backups, + * meaning it should delete the same file from a previous backup. + * @param isAntiItem true if it is an anti-item, false otherwise + */ + public void setAntiItem(final boolean isAntiItem) { + this.isAntiItem = isAntiItem; + } + + /** + * Returns whether this entry has got a creation date at all. + * @return whether the entry has got a creation date + */ + public boolean getHasCreationDate() { + return hasCreationDate; + } + + /** + * Sets whether this entry has got a creation date at all. + * @param hasCreationDate whether the entry has got a creation date + */ + public void setHasCreationDate(final boolean hasCreationDate) { + this.hasCreationDate = hasCreationDate; + } + + /** + * Gets the creation date. + * @throws UnsupportedOperationException if the entry hasn't got a + * creation date. + * @return the creation date + */ + public Date getCreationDate() { + if (hasCreationDate) { + return ntfsTimeToJavaTime(creationDate); + } + throw new UnsupportedOperationException( + "The entry doesn't have this timestamp"); + } + + /** + * Sets the creation date using NTFS time (100 nanosecond units + * since 1 January 1601) + * @param ntfsCreationDate the creation date + */ + public void setCreationDate(final long ntfsCreationDate) { + this.creationDate = ntfsCreationDate; + } + + /** + * Sets the creation date, + * @param creationDate the creation date + */ + public void setCreationDate(final Date creationDate) { + hasCreationDate = creationDate != null; + if (hasCreationDate) { + this.creationDate = javaTimeToNtfsTime(creationDate); + } + } + + /** + * Returns whether this entry has got a last modified date at all. + * @return whether this entry has got a last modified date at all + */ + public boolean getHasLastModifiedDate() { + return hasLastModifiedDate; + } + + /** + * Sets whether this entry has got a last modified date at all. + * @param hasLastModifiedDate whether this entry has got a last + * modified date at all + */ + public void setHasLastModifiedDate(final boolean hasLastModifiedDate) { + this.hasLastModifiedDate = hasLastModifiedDate; + } + + /** + * Gets the last modified date. + * @throws UnsupportedOperationException if the entry hasn't got a + * last modified date. + * @return the last modified date + */ + @Override + public Date getLastModifiedDate() { + if (hasLastModifiedDate) { + return ntfsTimeToJavaTime(lastModifiedDate); + } + throw new UnsupportedOperationException( + "The entry doesn't have this timestamp"); + } + + /** + * Sets the last modified date using NTFS time (100 nanosecond + * units since 1 January 1601) + * @param ntfsLastModifiedDate the last modified date + */ + public void setLastModifiedDate(final long ntfsLastModifiedDate) { + this.lastModifiedDate = ntfsLastModifiedDate; + } + + /** + * Sets the last modified date, + * @param lastModifiedDate the last modified date + */ + public void setLastModifiedDate(final Date lastModifiedDate) { + hasLastModifiedDate = lastModifiedDate != null; + if (hasLastModifiedDate) { + this.lastModifiedDate = javaTimeToNtfsTime(lastModifiedDate); + } + } + + /** + * Returns whether this entry has got an access date at all. + * @return whether this entry has got an access date at all. + */ + public boolean getHasAccessDate() { + return hasAccessDate; + } + + /** + * Sets whether this entry has got an access date at all. + * @param hasAcessDate whether this entry has got an access date at all. + */ + public void setHasAccessDate(final boolean hasAcessDate) { + this.hasAccessDate = hasAcessDate; + } + + /** + * Gets the access date. + * @throws UnsupportedOperationException if the entry hasn't got a + * access date. + * @return the access date + */ + public Date getAccessDate() { + if (hasAccessDate) { + return ntfsTimeToJavaTime(accessDate); + } + throw new UnsupportedOperationException( + "The entry doesn't have this timestamp"); + } + + /** + * Sets the access date using NTFS time (100 nanosecond units + * since 1 January 1601) + * @param ntfsAccessDate the access date + */ + public void setAccessDate(final long ntfsAccessDate) { + this.accessDate = ntfsAccessDate; + } + + /** + * Sets the access date, + * @param accessDate the access date + */ + public void setAccessDate(final Date accessDate) { + hasAccessDate = accessDate != null; + if (hasAccessDate) { + this.accessDate = javaTimeToNtfsTime(accessDate); + } + } + + /** + * Returns whether this entry has windows attributes. + * @return whether this entry has windows attributes. + */ + public boolean getHasWindowsAttributes() { + return hasWindowsAttributes; + } + + /** + * Sets whether this entry has windows attributes. + * @param hasWindowsAttributes whether this entry has windows attributes. + */ + public void setHasWindowsAttributes(final boolean hasWindowsAttributes) { + this.hasWindowsAttributes = hasWindowsAttributes; + } + + /** + * Gets the windows attributes. + * @return the windows attributes + */ + public int getWindowsAttributes() { + return windowsAttributes; + } + + /** + * Sets the windows attributes. + * @param windowsAttributes the windows attributes + */ + public void setWindowsAttributes(final int windowsAttributes) { + this.windowsAttributes = windowsAttributes; + } + + /** + * Returns whether this entry has got a crc. + * + *

In general entries without streams don't have a CRC either.

+ * @return whether this entry has got a crc. + */ + public boolean getHasCrc() { + return hasCrc; + } + + /** + * Sets whether this entry has got a crc. + * @param hasCrc whether this entry has got a crc. + */ + public void setHasCrc(final boolean hasCrc) { + this.hasCrc = hasCrc; + } + + /** + * Gets the CRC. + * @deprecated use getCrcValue instead. + * @return the CRC + */ + @Deprecated + public int getCrc() { + return (int) crc; + } + + /** + * Sets the CRC. + * @deprecated use setCrcValue instead. + * @param crc the CRC + */ + @Deprecated + public void setCrc(final int crc) { + this.crc = crc; + } + + /** + * Gets the CRC. + * @since 1.7 + * @return the CRC + */ + public long getCrcValue() { + return crc; + } + + /** + * Sets the CRC. + * @since 1.7 + * @param crc the CRC + */ + public void setCrcValue(final long crc) { + this.crc = crc; + } + + /** + * Gets the compressed CRC. + * @deprecated use getCompressedCrcValue instead. + * @return the compressed CRC + */ + @Deprecated + int getCompressedCrc() { + return (int) compressedCrc; + } + + /** + * Sets the compressed CRC. + * @deprecated use setCompressedCrcValue instead. + * @param crc the CRC + */ + @Deprecated + void setCompressedCrc(final int crc) { + this.compressedCrc = crc; + } + + /** + * Gets the compressed CRC. + * @since 1.7 + * @return the CRC + */ + long getCompressedCrcValue() { + return compressedCrc; + } + + /** + * Sets the compressed CRC. + * @since 1.7 + * @param crc the CRC + */ + void setCompressedCrcValue(final long crc) { + this.compressedCrc = crc; + } + + /** + * Get this entry's file size. + * + * @return This entry's file size. + */ + @Override + public long getSize() { + return size; + } + + /** + * Set this entry's file size. + * + * @param size This entry's new file size. + */ + public void setSize(final long size) { + this.size = size; + } + + /** + * Get this entry's compressed file size. + * + * @return This entry's compressed file size. + */ + long getCompressedSize() { + return compressedSize; + } + + /** + * Set this entry's compressed file size. + * + * @param size This entry's new compressed file size. + */ + void setCompressedSize(final long size) { + this.compressedSize = size; + } + + /** + * Sets the (compression) methods to use for entry's content - the + * default is LZMA2. + * + *

Currently only {@link SevenZMethod#COPY}, {@link + * SevenZMethod#LZMA2}, {@link SevenZMethod#BZIP2} and {@link + * SevenZMethod#DEFLATE} are supported when writing archives.

+ * + *

The methods will be consulted in iteration order to create + * the final output.

+ * + * @param methods the methods to use for the content + * @since 1.8 + */ + public void setContentMethods(final Iterable methods) { + if (methods != null) { + final LinkedList l = new LinkedList<>(); + methods.forEach(l::addLast); + contentMethods = Collections.unmodifiableList(l); + } else { + contentMethods = null; + } + } + + /** + * Sets the (compression) methods to use for entry's content - the + * default is LZMA2. + * + *

Currently only {@link SevenZMethod#COPY}, {@link + * SevenZMethod#LZMA2}, {@link SevenZMethod#BZIP2} and {@link + * SevenZMethod#DEFLATE} are supported when writing archives.

+ * + *

The methods will be consulted in iteration order to create + * the final output.

+ * + * @param methods the methods to use for the content + * @since 1.22 + */ + public void setContentMethods(SevenZMethodConfiguration... methods) { + setContentMethods(Arrays.asList(methods)); + } + + /** + * Gets the (compression) methods to use for entry's content - the + * default is LZMA2. + * + *

Currently only {@link SevenZMethod#COPY}, {@link + * SevenZMethod#LZMA2}, {@link SevenZMethod#BZIP2} and {@link + * SevenZMethod#DEFLATE} are supported when writing archives.

+ * + *

The methods will be consulted in iteration order to create + * the final output.

+ * + * @since 1.8 + * @return the methods to use for the content + */ + public Iterable getContentMethods() { + return contentMethods; + } + + @Override + public int hashCode() { + final String n = getName(); + return n == null ? 0 : n.hashCode(); + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + final SevenZArchiveEntry other = (SevenZArchiveEntry) obj; + return + Objects.equals(name, other.name) && + hasStream == other.hasStream && + isDirectory == other.isDirectory && + isAntiItem == other.isAntiItem && + hasCreationDate == other.hasCreationDate && + hasLastModifiedDate == other.hasLastModifiedDate && + hasAccessDate == other.hasAccessDate && + creationDate == other.creationDate && + lastModifiedDate == other.lastModifiedDate && + accessDate == other.accessDate && + hasWindowsAttributes == other.hasWindowsAttributes && + windowsAttributes == other.windowsAttributes && + hasCrc == other.hasCrc && + crc == other.crc && + compressedCrc == other.compressedCrc && + size == other.size && + compressedSize == other.compressedSize && + equalSevenZMethods(contentMethods, other.contentMethods); + } + + /** + * Converts NTFS time (100 nanosecond units since 1 January 1601) + * to Java time. + * @param ntfsTime the NTFS time in 100 nanosecond units + * @return the Java time + */ + public static Date ntfsTimeToJavaTime(final long ntfsTime) { + final Calendar ntfsEpoch = Calendar.getInstance(); + ntfsEpoch.setTimeZone(TimeZone.getTimeZone("GMT+0")); + ntfsEpoch.set(1601, 0, 1, 0, 0, 0); + ntfsEpoch.set(Calendar.MILLISECOND, 0); + final long realTime = ntfsEpoch.getTimeInMillis() + (ntfsTime / (10*1000)); + return new Date(realTime); + } + + /** + * Converts Java time to NTFS time. + * @param date the Java time + * @return the NTFS time + */ + public static long javaTimeToNtfsTime(final Date date) { + final Calendar ntfsEpoch = Calendar.getInstance(); + ntfsEpoch.setTimeZone(TimeZone.getTimeZone("GMT+0")); + ntfsEpoch.set(1601, 0, 1, 0, 0, 0); + ntfsEpoch.set(Calendar.MILLISECOND, 0); + return ((date.getTime() - ntfsEpoch.getTimeInMillis())* 1000 * 10); + } + + private boolean equalSevenZMethods(final Iterable c1, + final Iterable c2) { + if (c1 == null) { + return c2 == null; + } + if (c2 == null) { + return false; + } + final Iterator i2 = c2.iterator(); + for (SevenZMethodConfiguration element : c1) { + if (!i2.hasNext()) { + return false; + } + if (!element.equals(i2.next())) { + return false; + } + } + return !i2.hasNext(); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java new file mode 100644 index 0000000..8ec1de0 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java @@ -0,0 +1,2136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import static java.nio.charset.StandardCharsets.UTF_16LE; +import static org.apache.commons.compress.utils.ByteUtils.utf16Decode; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.Closeable; +import java.io.DataInputStream; +import java.io.EOFException; +import java.io.File; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.CharBuffer; +import java.nio.channels.Channels; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.EnumSet; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.zip.CRC32; +import java.util.zip.CheckedInputStream; + +import org.apache.commons.compress.MemoryLimitException; +import org.apache.commons.compress.utils.BoundedInputStream; +import org.apache.commons.compress.utils.ByteUtils; +import org.apache.commons.compress.utils.CRC32VerifyingInputStream; +import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.compress.utils.InputStreamStatistics; + +/** + * Reads a 7z file, using SeekableByteChannel under + * the covers. + *

+ * The 7z file format is a flexible container + * that can contain many compression and + * encryption types, but at the moment only + * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 + * are supported. + *

+ * The format is very Windows/Intel specific, + * so it uses little-endian byte order, + * doesn't store user/group or permission bits, + * and represents times using NTFS timestamps + * (100 nanosecond units since 1 January 1601). + * Hence the official tools recommend against + * using it for backup purposes on *nix, and + * recommend .tar.7z or .tar.lzma or .tar.xz + * instead. + *

+ * Both the header and file contents may be + * compressed and/or encrypted. With both + * encrypted, neither file names nor file + * contents can be read, but the use of + * encryption isn't plausibly deniable. + * + *

Multi volume archives can be read by concatenating the parts in + * correct order - either manually or by using {link + * org.apache.commons.compress.utils.MultiReadOnlySeekableByteChannel} + * for example.

+ * + * @NotThreadSafe + * @since 1.6 + */ +public class SevenZFile implements Closeable { + static final int SIGNATURE_HEADER_SIZE = 32; + + private static final String DEFAULT_FILE_NAME = "unknown archive"; + + private final String fileName; + private SeekableByteChannel channel; + private final Archive archive; + private int currentEntryIndex = -1; + private int currentFolderIndex = -1; + private InputStream currentFolderInputStream; + private byte[] password; + private final SevenZFileOptions options; + + private long compressedBytesReadFromCurrentEntry; + private long uncompressedBytesReadFromCurrentEntry; + + private final ArrayList deferredBlockStreams = new ArrayList<>(); + + // shared with SevenZOutputFile and tests, neither mutates it + static final byte[] sevenZSignature = { //NOSONAR + (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C + }; + + /** + * Reads a file as 7z archive + * + * @param fileName the file to read + * @param password optional password if the archive is encrypted + * @throws IOException if reading the archive fails + * @since 1.17 + */ + public SevenZFile(final File fileName, final char[] password) throws IOException { + this(fileName, password, SevenZFileOptions.DEFAULT); + } + + /** + * Reads a file as 7z archive with additional options. + * + * @param fileName the file to read + * @param password optional password if the archive is encrypted + * @param options the options to apply + * @throws IOException if reading the archive fails or the memory limit (if set) is too small + * @since 1.19 + */ + public SevenZFile(final File fileName, final char[] password, final SevenZFileOptions options) throws IOException { + this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), // NOSONAR + fileName.getAbsolutePath(), utf16Decode(password), true, options); + } + + /** + * Reads a file as 7z archive + * + * @param fileName the file to read + * @param password optional password if the archive is encrypted - + * the byte array is supposed to be the UTF16-LE encoded + * representation of the password. + * @throws IOException if reading the archive fails + * @deprecated use the char[]-arg version for the password instead + */ + @Deprecated + public SevenZFile(final File fileName, final byte[] password) throws IOException { + this(Files.newByteChannel(fileName.toPath(), EnumSet.of(StandardOpenOption.READ)), + fileName.getAbsolutePath(), password, true, SevenZFileOptions.DEFAULT); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.

+ * + * @param channel the channel to read + * @throws IOException if reading the archive fails + * @since 1.13 + */ + public SevenZFile(final SeekableByteChannel channel) throws IOException { + this(channel, SevenZFileOptions.DEFAULT); + } + + /** + * Reads a SeekableByteChannel as 7z archive with addtional options. + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.

+ * + * @param channel the channel to read + * @param options the options to apply + * @throws IOException if reading the archive fails or the memory limit (if set) is too small + * @since 1.19 + */ + public SevenZFile(final SeekableByteChannel channel, final SevenZFileOptions options) throws IOException { + this(channel, DEFAULT_FILE_NAME, null, options); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.

+ * + * @param channel the channel to read + * @param password optional password if the archive is encrypted + * @throws IOException if reading the archive fails + * @since 1.17 + */ + public SevenZFile(final SeekableByteChannel channel, + final char[] password) throws IOException { + this(channel, password, SevenZFileOptions.DEFAULT); + } + + /** + * Reads a SeekableByteChannel as 7z archive with additional options. + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.

+ * + * @param channel the channel to read + * @param password optional password if the archive is encrypted + * @param options the options to apply + * @throws IOException if reading the archive fails or the memory limit (if set) is too small + * @since 1.19 + */ + public SevenZFile(final SeekableByteChannel channel, final char[] password, final SevenZFileOptions options) + throws IOException { + this(channel, DEFAULT_FILE_NAME, password, options); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.

+ * + * @param channel the channel to read + * @param fileName name of the archive - only used for error reporting + * @param password optional password if the archive is encrypted + * @throws IOException if reading the archive fails + * @since 1.17 + */ + public SevenZFile(final SeekableByteChannel channel, final String fileName, + final char[] password) throws IOException { + this(channel, fileName, password, SevenZFileOptions.DEFAULT); + } + + /** + * Reads a SeekableByteChannel as 7z archive with addtional options. + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.

+ * + * @param channel the channel to read + * @param fileName name of the archive - only used for error reporting + * @param password optional password if the archive is encrypted + * @param options the options to apply + * @throws IOException if reading the archive fails or the memory limit (if set) is too small + * @since 1.19 + */ + public SevenZFile(final SeekableByteChannel channel, final String fileName, final char[] password, + final SevenZFileOptions options) throws IOException { + this(channel, fileName, utf16Decode(password), false, options); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.

+ * + * @param channel the channel to read + * @param fileName name of the archive - only used for error reporting + * @throws IOException if reading the archive fails + * @since 1.17 + */ + public SevenZFile(final SeekableByteChannel channel, final String fileName) + throws IOException { + this(channel, fileName, SevenZFileOptions.DEFAULT); + } + + /** + * Reads a SeekableByteChannel as 7z archive with additional options. + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.

+ * + * @param channel the channel to read + * @param fileName name of the archive - only used for error reporting + * @param options the options to apply + * @throws IOException if reading the archive fails or the memory limit (if set) is too small + * @since 1.19 + */ + public SevenZFile(final SeekableByteChannel channel, final String fileName, final SevenZFileOptions options) + throws IOException { + this(channel, fileName, null, false, options); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.

+ * + * @param channel the channel to read + * @param password optional password if the archive is encrypted - + * the byte array is supposed to be the UTF16-LE encoded + * representation of the password. + * @throws IOException if reading the archive fails + * @since 1.13 + * @deprecated use the char[]-arg version for the password instead + */ + @Deprecated + public SevenZFile(final SeekableByteChannel channel, + final byte[] password) throws IOException { + this(channel, DEFAULT_FILE_NAME, password); + } + + /** + * Reads a SeekableByteChannel as 7z archive + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to read from an in-memory archive.

+ * + * @param channel the channel to read + * @param fileName name of the archive - only used for error reporting + * @param password optional password if the archive is encrypted - + * the byte array is supposed to be the UTF16-LE encoded + * representation of the password. + * @throws IOException if reading the archive fails + * @since 1.13 + * @deprecated use the char[]-arg version for the password instead + */ + @Deprecated + public SevenZFile(final SeekableByteChannel channel, final String fileName, + final byte[] password) throws IOException { + this(channel, fileName, password, false, SevenZFileOptions.DEFAULT); + } + + private SevenZFile(final SeekableByteChannel channel, final String filename, + final byte[] password, final boolean closeOnError, final SevenZFileOptions options) throws IOException { + boolean succeeded = false; + this.channel = channel; + this.fileName = filename; + this.options = options; + try { + archive = readHeaders(password); + if (password != null) { + this.password = Arrays.copyOf(password, password.length); + } else { + this.password = null; + } + succeeded = true; + } finally { + if (!succeeded && closeOnError) { + this.channel.close(); + } + } + } + + /** + * Reads a file as unencrypted 7z archive + * + * @param fileName the file to read + * @throws IOException if reading the archive fails + */ + public SevenZFile(final File fileName) throws IOException { + this(fileName, SevenZFileOptions.DEFAULT); + } + + /** + * Reads a file as unencrypted 7z archive + * + * @param fileName the file to read + * @param options the options to apply + * @throws IOException if reading the archive fails or the memory limit (if set) is too small + * @since 1.19 + */ + public SevenZFile(final File fileName, final SevenZFileOptions options) throws IOException { + this(fileName, null, options); + } + + /** + * Closes the archive. + * @throws IOException if closing the file fails + */ + @Override + public void close() throws IOException { + if (channel != null) { + try { + channel.close(); + } finally { + channel = null; + if (password != null) { + Arrays.fill(password, (byte) 0); + } + password = null; + } + } + } + + /** + * Returns the next Archive Entry in this archive. + * + * @return the next entry, + * or {@code null} if there are no more entries + * @throws IOException if the next entry could not be read + */ + public SevenZArchiveEntry getNextEntry() throws IOException { + if (currentEntryIndex >= archive.files.length - 1) { + return null; + } + ++currentEntryIndex; + final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; + if (entry.getName() == null && options.getUseDefaultNameForUnnamedEntries()) { + entry.setName(getDefaultName()); + } + buildDecodingStream(currentEntryIndex, false); + uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; + return entry; + } + + /** + * Returns a copy of meta-data of all archive entries. + * + *

This method only provides meta-data, the entries can not be + * used to read the contents, you still need to process all + * entries in order using {@link #getNextEntry} for that.

+ * + *

The content methods are only available for entries that have + * already been reached via {@link #getNextEntry}.

+ * + * @return a copy of meta-data of all archive entries. + * @since 1.11 + */ + public Iterable getEntries() { + return new ArrayList<>(Arrays.asList(archive.files)); + } + + private Archive readHeaders(final byte[] password) throws IOException { + final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) + .order(ByteOrder.LITTLE_ENDIAN); + readFully(buf); + final byte[] signature = new byte[6]; + buf.get(signature); + if (!Arrays.equals(signature, sevenZSignature)) { + throw new IOException("Bad 7z signature"); + } + // 7zFormat.txt has it wrong - it's first major then minor + final byte archiveVersionMajor = buf.get(); + final byte archiveVersionMinor = buf.get(); + if (archiveVersionMajor != 0) { + throw new IOException(String.format("Unsupported 7z version (%d,%d)", + archiveVersionMajor, archiveVersionMinor)); + } + + boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" + final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); + if (startHeaderCrc == 0) { + // This is an indication of a corrupt header - peek the next 20 bytes + final long currentPosition = channel.position(); + final ByteBuffer peekBuf = ByteBuffer.allocate(20); + readFully(peekBuf); + channel.position(currentPosition); + // Header invalid if all data is 0 + while (peekBuf.hasRemaining()) { + if (peekBuf.get()!=0) { + headerLooksValid = true; + break; + } + } + } else { + headerLooksValid = true; + } + + if (headerLooksValid) { + return initializeArchive(readStartHeader(startHeaderCrc), password, true); + } + // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. + if (options.getTryToRecoverBrokenArchives()) { + return tryToLocateEndHeader(password); + } + throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" + + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed" + + " prematurely."); + } + + private Archive tryToLocateEndHeader(final byte[] password) throws IOException { + final ByteBuffer nidBuf = ByteBuffer.allocate(1); + final long searchLimit = 1024L * 1024 * 1; + // Main header, plus bytes that readStartHeader would read + final long previousDataSize = channel.position() + 20; + final long minPos; + // Determine minimal position - can't start before current position + if (channel.position() + searchLimit > channel.size()) { + minPos = channel.position(); + } else { + minPos = channel.size() - searchLimit; + } + long pos = channel.size() - 1; + // Loop: Try from end of archive + while (pos > minPos) { + pos--; + channel.position(pos); + nidBuf.rewind(); + if (channel.read(nidBuf) < 1) { + throw new EOFException(); + } + final int nid = nidBuf.array()[0]; + // First indicator: Byte equals one of these header identifiers + if (nid == NID.kEncodedHeader || nid == NID.kHeader) { + try { + // Try to initialize Archive structure from here + final StartHeader startHeader = new StartHeader(); + startHeader.nextHeaderOffset = pos - previousDataSize; + startHeader.nextHeaderSize = channel.size() - pos; + final Archive result = initializeArchive(startHeader, password, false); + // Sanity check: There must be some data... + if (result.packSizes.length > 0 && result.files.length > 0) { + return result; + } + } catch (final Exception ignore) { + // Wrong guess... + } + } + } + throw new IOException("Start header corrupt and unable to guess end header"); + } + + private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { + assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); + final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; + channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); + if (verifyCrc) { + final long position = channel.position(); + CheckedInputStream cis = new CheckedInputStream(Channels.newInputStream(channel), new CRC32()); + if (cis.skip(nextHeaderSizeInt) != nextHeaderSizeInt) { + throw new IOException("Problem computing NextHeader CRC-32"); + } + if (startHeader.nextHeaderCrc != cis.getChecksum().getValue()) { + throw new IOException("NextHeader CRC-32 mismatch"); + } + channel.position(position); + } + Archive archive = new Archive(); + ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); + readFully(buf); + int nid = getUnsignedByte(buf); + if (nid == NID.kEncodedHeader) { + buf = readEncodedHeader(buf, archive, password); + // Archive gets rebuilt with the new header + archive = new Archive(); + nid = getUnsignedByte(buf); + } + if (nid != NID.kHeader) { + throw new IOException("Broken or unsupported archive: no Header"); + } + readHeader(buf, archive); + archive.subStreamsInfo = null; + return archive; + } + + private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { + final StartHeader startHeader = new StartHeader(); + // using Stream rather than ByteBuffer for the benefit of the + // built-in CRC check + try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( + new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { + startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); + if (startHeader.nextHeaderOffset < 0 + || startHeader.nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { + throw new IOException("nextHeaderOffset is out of bounds"); + } + + startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); + final long nextHeaderEnd = startHeader.nextHeaderOffset + startHeader.nextHeaderSize; + if (nextHeaderEnd < startHeader.nextHeaderOffset + || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { + throw new IOException("nextHeaderSize is out of bounds"); + } + + startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); + + return startHeader; + } + } + + private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { + final int pos = header.position(); + final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); + stats.assertValidity(options.getMaxMemoryLimitInKb()); + header.position(pos); + + int nid = getUnsignedByte(header); + + if (nid == NID.kArchiveProperties) { + readArchiveProperties(header); + nid = getUnsignedByte(header); + } + + if (nid == NID.kAdditionalStreamsInfo) { + throw new IOException("Additional streams unsupported"); + //nid = getUnsignedByte(header); + } + + if (nid == NID.kMainStreamsInfo) { + readStreamsInfo(header, archive); + nid = getUnsignedByte(header); + } + + if (nid == NID.kFilesInfo) { + readFilesInfo(header, archive); + nid = getUnsignedByte(header); + } + } + + private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) + throws IOException { + final ArchiveStatistics stats = new ArchiveStatistics(); + + int nid = getUnsignedByte(header); + + if (nid == NID.kArchiveProperties) { + sanityCheckArchiveProperties(header); + nid = getUnsignedByte(header); + } + + if (nid == NID.kAdditionalStreamsInfo) { + throw new IOException("Additional streams unsupported"); + //nid = getUnsignedByte(header); + } + + if (nid == NID.kMainStreamsInfo) { + sanityCheckStreamsInfo(header, stats); + nid = getUnsignedByte(header); + } + + if (nid == NID.kFilesInfo) { + sanityCheckFilesInfo(header, stats); + nid = getUnsignedByte(header); + } + + if (nid != NID.kEnd) { + throw new IOException("Badly terminated header, found " + nid); + } + + return stats; + } + + private void readArchiveProperties(final ByteBuffer input) throws IOException { + // FIXME: the reference implementation just throws them away? + int nid = getUnsignedByte(input); + while (nid != NID.kEnd) { + final long propertySize = readUint64(input); + final byte[] property = new byte[(int)propertySize]; + get(input, property); + nid = getUnsignedByte(input); + } + } + + private void sanityCheckArchiveProperties(final ByteBuffer header) + throws IOException { + int nid = getUnsignedByte(header); + while (nid != NID.kEnd) { + final int propertySize = + assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); + if (skipBytesFully(header, propertySize) < propertySize) { + throw new IOException("invalid property size"); + } + nid = getUnsignedByte(header); + } + } + + private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, + final byte[] password) throws IOException { + final int pos = header.position(); + final ArchiveStatistics stats = new ArchiveStatistics(); + sanityCheckStreamsInfo(header, stats); + stats.assertValidity(options.getMaxMemoryLimitInKb()); + header.position(pos); + + readStreamsInfo(header, archive); + + if (archive.folders == null || archive.folders.length == 0) { + throw new IOException("no folders, can't read encoded header"); + } + if (archive.packSizes == null || archive.packSizes.length == 0) { + throw new IOException("no packed streams, can't read encoded header"); + } + + // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? + final Folder folder = archive.folders[0]; + final int firstPackStreamIndex = 0; + final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + + 0; + + channel.position(folderOffset); + InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, + archive.packSizes[firstPackStreamIndex]); + for (final Coder coder : folder.getOrderedCoders()) { + if (coder.numInStreams != 1 || coder.numOutStreams != 1) { + throw new IOException("Multi input/output stream coders are not yet supported"); + } + inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR + folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); + } + if (folder.hasCrc) { + inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, + folder.getUnpackSize(), folder.crc); + } + final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); + final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); + if (nextHeader.length < unpackSize) { + throw new IOException("premature end of stream"); + } + inputStreamStack.close(); + return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); + } + + private void sanityCheckStreamsInfo(final ByteBuffer header, + final ArchiveStatistics stats) throws IOException { + int nid = getUnsignedByte(header); + + if (nid == NID.kPackInfo) { + sanityCheckPackInfo(header, stats); + nid = getUnsignedByte(header); + } + + if (nid == NID.kUnpackInfo) { + sanityCheckUnpackInfo(header, stats); + nid = getUnsignedByte(header); + } + + if (nid == NID.kSubStreamsInfo) { + sanityCheckSubStreamsInfo(header, stats); + nid = getUnsignedByte(header); + } + + if (nid != NID.kEnd) { + throw new IOException("Badly terminated StreamsInfo"); + } + } + + private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { + int nid = getUnsignedByte(header); + + if (nid == NID.kPackInfo) { + readPackInfo(header, archive); + nid = getUnsignedByte(header); + } + + if (nid == NID.kUnpackInfo) { + readUnpackInfo(header, archive); + nid = getUnsignedByte(header); + } else { + // archive without unpack/coders info + archive.folders = Folder.EMPTY_FOLDER_ARRAY; + } + + if (nid == NID.kSubStreamsInfo) { + readSubStreamsInfo(header, archive); + nid = getUnsignedByte(header); + } + } + + private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { + final long packPos = readUint64(header); + if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() + || SIGNATURE_HEADER_SIZE + packPos < 0) { + throw new IOException("packPos (" + packPos + ") is out of range"); + } + final long numPackStreams = readUint64(header); + stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); + int nid = getUnsignedByte(header); + if (nid == NID.kSize) { + long totalPackSizes = 0; + for (int i = 0; i < stats.numberOfPackedStreams; i++) { + final long packSize = readUint64(header); + totalPackSizes += packSize; + final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; + if (packSize < 0 + || endOfPackStreams > channel.size() + || endOfPackStreams < packPos) { + throw new IOException("packSize (" + packSize + ") is out of range"); + } + } + nid = getUnsignedByte(header); + } + + if (nid == NID.kCRC) { + final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams) + .cardinality(); + if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { + throw new IOException("invalid number of CRCs in PackInfo"); + } + nid = getUnsignedByte(header); + } + + if (nid != NID.kEnd) { + throw new IOException("Badly terminated PackInfo (" + nid + ")"); + } + } + + private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { + archive.packPos = readUint64(header); + final int numPackStreamsInt = (int) readUint64(header); + int nid = getUnsignedByte(header); + if (nid == NID.kSize) { + archive.packSizes = new long[numPackStreamsInt]; + for (int i = 0; i < archive.packSizes.length; i++) { + archive.packSizes[i] = readUint64(header); + } + nid = getUnsignedByte(header); + } + + if (nid == NID.kCRC) { + archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); + archive.packCrcs = new long[numPackStreamsInt]; + for (int i = 0; i < numPackStreamsInt; i++) { + if (archive.packCrcsDefined.get(i)) { + archive.packCrcs[i] = 0xffffFFFFL & getInt(header); + } + } + + nid = getUnsignedByte(header); + } + } + + private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) + throws IOException { + int nid = getUnsignedByte(header); + if (nid != NID.kFolder) { + throw new IOException("Expected kFolder, got " + nid); + } + final long numFolders = readUint64(header); + stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("External unsupported"); + } + + final List numberOfOutputStreamsPerFolder = new LinkedList<>(); + for (int i = 0; i < stats.numberOfFolders; i++) { + numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); + } + + final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; + final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; + if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { + throw new IOException("archive doesn't contain enough packed streams"); + } + + nid = getUnsignedByte(header); + if (nid != NID.kCodersUnpackSize) { + throw new IOException("Expected kCodersUnpackSize, got " + nid); + } + + for (final int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { + for (int i = 0; i < numberOfOutputStreams; i++) { + final long unpackSize = readUint64(header); + if (unpackSize < 0) { + throw new IllegalArgumentException("negative unpackSize"); + } + } + } + + nid = getUnsignedByte(header); + if (nid == NID.kCRC) { + stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); + final int crcsDefined = stats.folderHasCrc.cardinality(); + if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { + throw new IOException("invalid number of CRCs in UnpackInfo"); + } + nid = getUnsignedByte(header); + } + + if (nid != NID.kEnd) { + throw new IOException("Badly terminated UnpackInfo"); + } + } + + private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { + int nid = getUnsignedByte(header); + final int numFoldersInt = (int) readUint64(header); + final Folder[] folders = new Folder[numFoldersInt]; + archive.folders = folders; + /* final int external = */ getUnsignedByte(header); + for (int i = 0; i < numFoldersInt; i++) { + folders[i] = readFolder(header); + } + + nid = getUnsignedByte(header); + for (final Folder folder : folders) { + assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); + folder.unpackSizes = new long[(int)folder.totalOutputStreams]; + for (int i = 0; i < folder.totalOutputStreams; i++) { + folder.unpackSizes[i] = readUint64(header); + } + } + + nid = getUnsignedByte(header); + if (nid == NID.kCRC) { + final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); + for (int i = 0; i < numFoldersInt; i++) { + if (crcsDefined.get(i)) { + folders[i].hasCrc = true; + folders[i].crc = 0xffffFFFFL & getInt(header); + } else { + folders[i].hasCrc = false; + } + } + + nid = getUnsignedByte(header); + } + } + + private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { + + int nid = getUnsignedByte(header); + final List numUnpackSubStreamsPerFolder = new LinkedList<>(); + if (nid == NID.kNumUnpackStream) { + for (int i = 0; i < stats.numberOfFolders; i++) { + numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); + } + stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().mapToLong(Integer::longValue).sum(); + nid = getUnsignedByte(header); + } else { + stats.numberOfUnpackSubStreams = stats.numberOfFolders; + } + + assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); + + if (nid == NID.kSize) { + for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { + if (numUnpackSubStreams == 0) { + continue; + } + for (int i = 0; i < numUnpackSubStreams - 1; i++) { + final long size = readUint64(header); + if (size < 0) { + throw new IOException("negative unpackSize"); + } + } + } + nid = getUnsignedByte(header); + } + + int numDigests = 0; + if (numUnpackSubStreamsPerFolder.isEmpty()) { + numDigests = stats.folderHasCrc == null ? stats.numberOfFolders + : stats.numberOfFolders - stats.folderHasCrc.cardinality(); + } else { + int folderIdx = 0; + for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { + if (numUnpackSubStreams != 1 || stats.folderHasCrc == null + || !stats.folderHasCrc.get(folderIdx++)) { + numDigests += numUnpackSubStreams; + } + } + } + + if (nid == NID.kCRC) { + assertFitsIntoNonNegativeInt("numDigests", numDigests); + final int missingCrcs = readAllOrBits(header, numDigests) + .cardinality(); + if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { + throw new IOException("invalid number of missing CRCs in SubStreamInfo"); + } + nid = getUnsignedByte(header); + } + + if (nid != NID.kEnd) { + throw new IOException("Badly terminated SubStreamsInfo"); + } + } + + private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { + for (final Folder folder : archive.folders) { + folder.numUnpackSubStreams = 1; + } + long unpackStreamsCount = archive.folders.length; + + int nid = getUnsignedByte(header); + if (nid == NID.kNumUnpackStream) { + unpackStreamsCount = 0; + for (final Folder folder : archive.folders) { + final long numStreams = readUint64(header); + folder.numUnpackSubStreams = (int)numStreams; + unpackStreamsCount += numStreams; + } + nid = getUnsignedByte(header); + } + + final int totalUnpackStreams = (int) unpackStreamsCount; + final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); + subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; + subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); + subStreamsInfo.crcs = new long[totalUnpackStreams]; + + int nextUnpackStream = 0; + for (final Folder folder : archive.folders) { + if (folder.numUnpackSubStreams == 0) { + continue; + } + long sum = 0; + if (nid == NID.kSize) { + for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { + final long size = readUint64(header); + subStreamsInfo.unpackSizes[nextUnpackStream++] = size; + sum += size; + } + } + if (sum > folder.getUnpackSize()) { + throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); + } + subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; + } + if (nid == NID.kSize) { + nid = getUnsignedByte(header); + } + + int numDigests = 0; + for (final Folder folder : archive.folders) { + if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { + numDigests += folder.numUnpackSubStreams; + } + } + + if (nid == NID.kCRC) { + final BitSet hasMissingCrc = readAllOrBits(header, numDigests); + final long[] missingCrcs = new long[numDigests]; + for (int i = 0; i < numDigests; i++) { + if (hasMissingCrc.get(i)) { + missingCrcs[i] = 0xffffFFFFL & getInt(header); + } + } + int nextCrc = 0; + int nextMissingCrc = 0; + for (final Folder folder: archive.folders) { + if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { + subStreamsInfo.hasCrc.set(nextCrc, true); + subStreamsInfo.crcs[nextCrc] = folder.crc; + ++nextCrc; + } else { + for (int i = 0; i < folder.numUnpackSubStreams; i++) { + subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); + subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; + ++nextCrc; + ++nextMissingCrc; + } + } + } + + nid = getUnsignedByte(header); + } + + archive.subStreamsInfo = subStreamsInfo; + } + + private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) + throws IOException { + + final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); + if (numCoders == 0) { + throw new IOException("Folder without coders"); + } + stats.numberOfCoders += numCoders; + + long totalOutStreams = 0; + long totalInStreams = 0; + for (int i = 0; i < numCoders; i++) { + final int bits = getUnsignedByte(header); + final int idSize = bits & 0xf; + get(header, new byte[idSize]); + + final boolean isSimple = (bits & 0x10) == 0; + final boolean hasAttributes = (bits & 0x20) != 0; + final boolean moreAlternativeMethods = (bits & 0x80) != 0; + if (moreAlternativeMethods) { + throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR + "The reference implementation doesn't support them either."); + } + + if (isSimple) { + totalInStreams++; + totalOutStreams++; + } else { + totalInStreams += + assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); + totalOutStreams += + assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); + } + + if (hasAttributes) { + final int propertiesSize = + assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); + if (skipBytesFully(header, propertiesSize) < propertiesSize) { + throw new IOException("invalid propertiesSize in folder"); + } + } + } + assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); + assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); + stats.numberOfOutStreams += totalOutStreams; + stats.numberOfInStreams += totalInStreams; + + if (totalOutStreams == 0) { + throw new IOException("Total output streams can't be 0"); + } + + final int numBindPairs = + assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); + if (totalInStreams < numBindPairs) { + throw new IOException("Total input streams can't be less than the number of bind pairs"); + } + final BitSet inStreamsBound = new BitSet((int) totalInStreams); + for (int i = 0; i < numBindPairs; i++) { + final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); + if (totalInStreams <= inIndex) { + throw new IOException("inIndex is bigger than number of inStreams"); + } + inStreamsBound.set(inIndex); + final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); + if (totalOutStreams <= outIndex) { + throw new IOException("outIndex is bigger than number of outStreams"); + } + } + + final int numPackedStreams = + assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); + + if (numPackedStreams == 1) { + if (inStreamsBound.nextClearBit(0) == -1) { + throw new IOException("Couldn't find stream's bind pair index"); + } + } else { + for (int i = 0; i < numPackedStreams; i++) { + final int packedStreamIndex = + assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); + if (packedStreamIndex >= totalInStreams) { + throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); + } + } + } + + return (int) totalOutStreams; + } + + private Folder readFolder(final ByteBuffer header) throws IOException { + final Folder folder = new Folder(); + + final long numCoders = readUint64(header); + final Coder[] coders = new Coder[(int)numCoders]; + long totalInStreams = 0; + long totalOutStreams = 0; + for (int i = 0; i < coders.length; i++) { + coders[i] = new Coder(); + final int bits = getUnsignedByte(header); + final int idSize = bits & 0xf; + final boolean isSimple = (bits & 0x10) == 0; + final boolean hasAttributes = (bits & 0x20) != 0; + final boolean moreAlternativeMethods = (bits & 0x80) != 0; + + coders[i].decompressionMethodId = new byte[idSize]; + get(header, coders[i].decompressionMethodId); + if (isSimple) { + coders[i].numInStreams = 1; + coders[i].numOutStreams = 1; + } else { + coders[i].numInStreams = readUint64(header); + coders[i].numOutStreams = readUint64(header); + } + totalInStreams += coders[i].numInStreams; + totalOutStreams += coders[i].numOutStreams; + if (hasAttributes) { + final long propertiesSize = readUint64(header); + coders[i].properties = new byte[(int)propertiesSize]; + get(header, coders[i].properties); + } + // would need to keep looping as above: + if (moreAlternativeMethods) { + throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR + "The reference implementation doesn't support them either."); + } + } + folder.coders = coders; + folder.totalInputStreams = totalInStreams; + folder.totalOutputStreams = totalOutStreams; + + final long numBindPairs = totalOutStreams - 1; + final BindPair[] bindPairs = new BindPair[(int)numBindPairs]; + for (int i = 0; i < bindPairs.length; i++) { + bindPairs[i] = new BindPair(); + bindPairs[i].inIndex = readUint64(header); + bindPairs[i].outIndex = readUint64(header); + } + folder.bindPairs = bindPairs; + + final long numPackedStreams = totalInStreams - numBindPairs; + final long[] packedStreams = new long[(int)numPackedStreams]; + if (numPackedStreams == 1) { + int i; + for (i = 0; i < (int)totalInStreams; i++) { + if (folder.findBindPairForInStream(i) < 0) { + break; + } + } + packedStreams[0] = i; + } else { + for (int i = 0; i < (int)numPackedStreams; i++) { + packedStreams[i] = readUint64(header); + } + } + folder.packedStreams = packedStreams; + + return folder; + } + + private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { + final int areAllDefined = getUnsignedByte(header); + final BitSet bits; + if (areAllDefined != 0) { + bits = new BitSet(size); + for (int i = 0; i < size; i++) { + bits.set(i, true); + } + } else { + bits = readBits(header, size); + } + return bits; + } + + private BitSet readBits(final ByteBuffer header, final int size) throws IOException { + final BitSet bits = new BitSet(size); + int mask = 0; + int cache = 0; + for (int i = 0; i < size; i++) { + if (mask == 0) { + mask = 0x80; + cache = getUnsignedByte(header); + } + bits.set(i, (cache & mask) != 0); + mask >>>= 1; + } + return bits; + } + + private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { + stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); + + int emptyStreams = -1; + while (true) { + final int propertyType = getUnsignedByte(header); + if (propertyType == 0) { + break; + } + final long size = readUint64(header); + switch (propertyType) { + case NID.kEmptyStream: { + emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); + break; + } + case NID.kEmptyFile: { + if (emptyStreams == -1) { + throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); + } + readBits(header, emptyStreams); + break; + } + case NID.kAnti: { + if (emptyStreams == -1) { + throw new IOException("Header format error: kEmptyStream must appear before kAnti"); + } + readBits(header, emptyStreams); + break; + } + case NID.kName: { + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("Not implemented"); + } + final int namesLength = + assertFitsIntoNonNegativeInt("file names length", size - 1); + if ((namesLength & 1) != 0) { + throw new IOException("File names length invalid"); + } + + int filesSeen = 0; + for (int i = 0; i < namesLength; i += 2) { + final char c = getChar(header); + if (c == 0) { + filesSeen++; + } + } + if (filesSeen != stats.numberOfEntries) { + throw new IOException("Invalid number of file names (" + filesSeen + " instead of " + + stats.numberOfEntries + ")"); + } + break; + } + case NID.kCTime: { + final int timesDefined = readAllOrBits(header, stats.numberOfEntries) + .cardinality(); + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("Not implemented"); + } + if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { + throw new IOException("invalid creation dates size"); + } + break; + } + case NID.kATime: { + final int timesDefined = readAllOrBits(header, stats.numberOfEntries) + .cardinality(); + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("Not implemented"); + } + if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { + throw new IOException("invalid access dates size"); + } + break; + } + case NID.kMTime: { + final int timesDefined = readAllOrBits(header, stats.numberOfEntries) + .cardinality(); + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("Not implemented"); + } + if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { + throw new IOException("invalid modification dates size"); + } + break; + } + case NID.kWinAttributes: { + final int attributesDefined = readAllOrBits(header, stats.numberOfEntries) + .cardinality(); + final int external = getUnsignedByte(header); + if (external != 0) { + throw new IOException("Not implemented"); + } + if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { + throw new IOException("invalid windows attributes size"); + } + break; + } + case NID.kStartPos: { + throw new IOException("kStartPos is unsupported, please report"); + } + case NID.kDummy: { + // 7z 9.20 asserts the content is all zeros and ignores the property + // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 + + if (skipBytesFully(header, size) < size) { + throw new IOException("Incomplete kDummy property"); + } + break; + } + + default: { + // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 + if (skipBytesFully(header, size) < size) { + throw new IOException("Incomplete property of type " + propertyType); + } + break; + } + } + } + stats.numberOfEntriesWithStream = stats.numberOfEntries - Math.max(emptyStreams, 0); + } + + private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { + final int numFilesInt = (int) readUint64(header); + final Map fileMap = new LinkedHashMap<>(); + BitSet isEmptyStream = null; + BitSet isEmptyFile = null; + BitSet isAnti = null; + while (true) { + final int propertyType = getUnsignedByte(header); + if (propertyType == 0) { + break; + } + final long size = readUint64(header); + switch (propertyType) { + case NID.kEmptyStream: { + isEmptyStream = readBits(header, numFilesInt); + break; + } + case NID.kEmptyFile: { + isEmptyFile = readBits(header, isEmptyStream.cardinality()); + break; + } + case NID.kAnti: { + isAnti = readBits(header, isEmptyStream.cardinality()); + break; + } + case NID.kName: { + /* final int external = */ getUnsignedByte(header); + final byte[] names = new byte[(int) (size - 1)]; + final int namesLength = names.length; + get(header, names); + int nextFile = 0; + int nextName = 0; + for (int i = 0; i < namesLength; i += 2) { + if (names[i] == 0 && names[i + 1] == 0) { + checkEntryIsInitialized(fileMap, nextFile); + fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, UTF_16LE)); + nextName = i + 2; + nextFile++; + } + } + if (nextName != namesLength || nextFile != numFilesInt) { + throw new IOException("Error parsing file names"); + } + break; + } + case NID.kCTime: { + final BitSet timesDefined = readAllOrBits(header, numFilesInt); + /* final int external = */ getUnsignedByte(header); + for (int i = 0; i < numFilesInt; i++) { + checkEntryIsInitialized(fileMap, i); + final SevenZArchiveEntry entryAtIndex = fileMap.get(i); + entryAtIndex.setHasCreationDate(timesDefined.get(i)); + if (entryAtIndex.getHasCreationDate()) { + entryAtIndex.setCreationDate(getLong(header)); + } + } + break; + } + case NID.kATime: { + final BitSet timesDefined = readAllOrBits(header, numFilesInt); + /* final int external = */ getUnsignedByte(header); + for (int i = 0; i < numFilesInt; i++) { + checkEntryIsInitialized(fileMap, i); + final SevenZArchiveEntry entryAtIndex = fileMap.get(i); + entryAtIndex.setHasAccessDate(timesDefined.get(i)); + if (entryAtIndex.getHasAccessDate()) { + entryAtIndex.setAccessDate(getLong(header)); + } + } + break; + } + case NID.kMTime: { + final BitSet timesDefined = readAllOrBits(header, numFilesInt); + /* final int external = */ getUnsignedByte(header); + for (int i = 0; i < numFilesInt; i++) { + checkEntryIsInitialized(fileMap, i); + final SevenZArchiveEntry entryAtIndex = fileMap.get(i); + entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); + if (entryAtIndex.getHasLastModifiedDate()) { + entryAtIndex.setLastModifiedDate(getLong(header)); + } + } + break; + } + case NID.kWinAttributes: { + final BitSet attributesDefined = readAllOrBits(header, numFilesInt); + /* final int external = */ getUnsignedByte(header); + for (int i = 0; i < numFilesInt; i++) { + checkEntryIsInitialized(fileMap, i); + final SevenZArchiveEntry entryAtIndex = fileMap.get(i); + entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); + if (entryAtIndex.getHasWindowsAttributes()) { + entryAtIndex.setWindowsAttributes(getInt(header)); + } + } + break; + } + case NID.kDummy: { + // 7z 9.20 asserts the content is all zeros and ignores the property + // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 + + skipBytesFully(header, size); + break; + } + + default: { + // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 + skipBytesFully(header, size); + break; + } + } + } + int nonEmptyFileCounter = 0; + int emptyFileCounter = 0; + for (int i = 0; i < numFilesInt; i++) { + final SevenZArchiveEntry entryAtIndex = fileMap.get(i); + if (entryAtIndex == null) { + continue; + } + entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); + if (entryAtIndex.hasStream()) { + if (archive.subStreamsInfo == null) { + throw new IOException("Archive contains file with streams but no subStreamsInfo"); + } + entryAtIndex.setDirectory(false); + entryAtIndex.setAntiItem(false); + entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); + entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); + entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); + if (entryAtIndex.getSize() < 0) { + throw new IOException("broken archive, entry with negative size"); + } + ++nonEmptyFileCounter; + } else { + entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); + entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); + entryAtIndex.setHasCrc(false); + entryAtIndex.setSize(0); + ++emptyFileCounter; + } + } + archive.files = fileMap.values().stream().filter(Objects::nonNull).toArray(SevenZArchiveEntry[]::new); + calculateStreamMap(archive); + } + + private void checkEntryIsInitialized(final Map archiveEntries, final int index) { + if (archiveEntries.get(index) == null) { + archiveEntries.put(index, new SevenZArchiveEntry()); + } + } + + private void calculateStreamMap(final Archive archive) throws IOException { + final StreamMap streamMap = new StreamMap(); + + int nextFolderPackStreamIndex = 0; + final int numFolders = archive.folders != null ? archive.folders.length : 0; + streamMap.folderFirstPackStreamIndex = new int[numFolders]; + for (int i = 0; i < numFolders; i++) { + streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; + nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; + } + + long nextPackStreamOffset = 0; + final int numPackSizes = archive.packSizes.length; + streamMap.packStreamOffsets = new long[numPackSizes]; + for (int i = 0; i < numPackSizes; i++) { + streamMap.packStreamOffsets[i] = nextPackStreamOffset; + nextPackStreamOffset += archive.packSizes[i]; + } + + streamMap.folderFirstFileIndex = new int[numFolders]; + streamMap.fileFolderIndex = new int[archive.files.length]; + int nextFolderIndex = 0; + int nextFolderUnpackStreamIndex = 0; + for (int i = 0; i < archive.files.length; i++) { + if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { + streamMap.fileFolderIndex[i] = -1; + continue; + } + if (nextFolderUnpackStreamIndex == 0) { + for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { + streamMap.folderFirstFileIndex[nextFolderIndex] = i; + if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { + break; + } + } + if (nextFolderIndex >= archive.folders.length) { + throw new IOException("Too few folders in archive"); + } + } + streamMap.fileFolderIndex[i] = nextFolderIndex; + if (!archive.files[i].hasStream()) { + continue; + } + ++nextFolderUnpackStreamIndex; + if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { + ++nextFolderIndex; + nextFolderUnpackStreamIndex = 0; + } + } + + archive.streamMap = streamMap; + } + + /** + * Build the decoding stream for the entry to be read. + * This method may be called from a random access(getInputStream) or + * sequential access(getNextEntry). + * If this method is called from a random access, some entries may + * need to be skipped(we put them to the deferredBlockStreams and + * skip them when actually needed to improve the performance) + * + * @param entryIndex the index of the entry to be read + * @param isRandomAccess is this called in a random access + * @throws IOException if there are exceptions when reading the file + */ + private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { + if (archive.streamMap == null) { + throw new IOException("Archive doesn't contain stream information to read entries"); + } + final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; + if (folderIndex < 0) { + deferredBlockStreams.clear(); + // TODO: previously it'd return an empty stream? + // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); + return; + } + final SevenZArchiveEntry file = archive.files[entryIndex]; + boolean isInSameFolder = false; + if (currentFolderIndex == folderIndex) { + // (COMPRESS-320). + // The current entry is within the same (potentially opened) folder. The + // previous stream has to be fully decoded before we can start reading + // but don't do it eagerly -- if the user skips over the entire folder nothing + // is effectively decompressed. + if (entryIndex > 0) { + file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); + } + + // if this is called in a random access, then the content methods of previous entry may be null + // the content methods should be set to methods of the first entry as it must not be null, + // and the content methods would only be set if the content methods was not set + if(isRandomAccess && file.getContentMethods() == null) { + final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; + final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; + file.setContentMethods(folderFirstFile.getContentMethods()); + } + isInSameFolder = true; + } else { + currentFolderIndex = folderIndex; + // We're opening a new folder. Discard any queued streams/ folder stream. + reopenFolderInputStream(folderIndex, file); + } + + boolean haveSkippedEntries = false; + if (isRandomAccess) { + // entries will only need to be skipped if it's a random access + haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); + } + + if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { + // we don't need to add another entry to the deferredBlockStreams when : + // 1. If this method is called in a random access and the entry index + // to be read equals to the current entry index, the input stream + // has already been put in the deferredBlockStreams + // 2. If this entry has not been read(which means no entries are skipped) + return; + } + + InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); + if (file.getHasCrc()) { + fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); + } + + deferredBlockStreams.add(fileStream); + } + + /** + * Discard any queued streams/ folder stream, and reopen the current folder input stream. + * + * @param folderIndex the index of the folder to reopen + * @param file the 7z entry to read + * @throws IOException if exceptions occur when reading the 7z file + */ + private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { + deferredBlockStreams.clear(); + if (currentFolderInputStream != null) { + currentFolderInputStream.close(); + currentFolderInputStream = null; + } + final Folder folder = archive.folders[folderIndex]; + final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; + final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + + archive.streamMap.packStreamOffsets[firstPackStreamIndex]; + + currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); + } + + /** + * Skip all the entries if needed. + * Entries need to be skipped when: + *

+ * 1. it's a random access + * 2. one of these 2 condition is meet : + *

+ * 2.1 currentEntryIndex != entryIndex : this means there are some entries + * to be skipped(currentEntryIndex < entryIndex) or the entry has already + * been read(currentEntryIndex > entryIndex) + *

+ * 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: + * if the entry to be read is the current entry, but some data of it has + * been read before, then we need to reopen the stream of the folder and + * skip all the entries before the current entries + * + * @param entryIndex the entry to be read + * @param isInSameFolder are the entry to be read and the current entry in the same folder + * @param folderIndex the index of the folder which contains the entry + * @return true if there are entries actually skipped + * @throws IOException there are exceptions when skipping entries + * @since 1.21 + */ + private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { + final SevenZArchiveEntry file = archive.files[entryIndex]; + // if the entry to be read is the current entry, and the entry has not + // been read yet, then there's nothing we need to do + if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { + return false; + } + + // 1. if currentEntryIndex < entryIndex : + // this means there are some entries to be skipped(currentEntryIndex < entryIndex) + // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : + // this means the entry has already been read before, and we need to reopen the + // stream of the folder and skip all the entries before the current entries + int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; + if (isInSameFolder) { + if (currentEntryIndex < entryIndex) { + // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped + filesToSkipStartIndex = currentEntryIndex + 1; + } else { + // the entry is in the same folder of current entry, but it has already been read before, we need to reset + // the position of the currentFolderInputStream to the beginning of folder, and then skip the files + // from the start entry of the folder again + reopenFolderInputStream(folderIndex, file); + } + } + + for (int i = filesToSkipStartIndex; i < entryIndex; i++) { + final SevenZArchiveEntry fileToSkip = archive.files[i]; + InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); + if (fileToSkip.getHasCrc()) { + fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); + } + deferredBlockStreams.add(fileStreamToSkip); + + // set the content methods as well, it equals to file.getContentMethods() because they are in same folder + fileToSkip.setContentMethods(file.getContentMethods()); + } + return true; + } + + /** + * Find out if any data of current entry has been read or not. + * This is achieved by comparing the bytes remaining to read + * and the size of the file. + * + * @return true if any data of current entry has been read + * @since 1.21 + */ + private boolean hasCurrentEntryBeenRead() { + boolean hasCurrentEntryBeenRead = false; + if (!deferredBlockStreams.isEmpty()) { + final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); + // get the bytes remaining to read, and compare it with the size of + // the file to figure out if the file has been read + if (currentEntryInputStream instanceof CRC32VerifyingInputStream) { + hasCurrentEntryBeenRead = ((CRC32VerifyingInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); + } + + if (currentEntryInputStream instanceof BoundedInputStream) { + hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); + } + } + return hasCurrentEntryBeenRead; + } + + private InputStream buildDecoderStack(final Folder folder, final long folderOffset, + final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { + channel.position(folderOffset); + InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream( + new BoundedSeekableByteChannelInputStream(channel, + archive.packSizes[firstPackStreamIndex]))) { + @Override + public int read() throws IOException { + final int r = in.read(); + if (r >= 0) { + count(1); + } + return r; + } + @Override + public int read(final byte[] b) throws IOException { + return read(b, 0, b.length); + } + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (len == 0) { + return 0; + } + final int r = in.read(b, off, len); + if (r >= 0) { + count(r); + } + return r; + } + private void count(final int c) { + compressedBytesReadFromCurrentEntry += c; + } + }; + final LinkedList methods = new LinkedList<>(); + for (final Coder coder : folder.getOrderedCoders()) { + if (coder.numInStreams != 1 || coder.numOutStreams != 1) { + throw new IOException("Multi input/output stream coders are not yet supported"); + } + final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); + inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, + folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); + methods.addFirst(new SevenZMethodConfiguration(method, + Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); + } + entry.setContentMethods(methods); + if (folder.hasCrc) { + return new CRC32VerifyingInputStream(inputStreamStack, + folder.getUnpackSize(), folder.crc); + } + return inputStreamStack; + } + + /** + * Reads a byte of data. + * + * @return the byte read, or -1 if end of input is reached + * @throws IOException + * if an I/O error has occurred + */ + public int read() throws IOException { + final int b = getCurrentStream().read(); + if (b >= 0) { + uncompressedBytesReadFromCurrentEntry++; + } + return b; + } + + private InputStream getCurrentStream() throws IOException { + if (archive.files[currentEntryIndex].getSize() == 0) { + return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); + } + if (deferredBlockStreams.isEmpty()) { + throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); + } + + while (deferredBlockStreams.size() > 1) { + // In solid compression mode we need to decompress all leading folder' + // streams to get access to an entry. We defer this until really needed + // so that entire blocks can be skipped without wasting time for decompression. + try (final InputStream stream = deferredBlockStreams.remove(0)) { + IOUtils.skip(stream, Long.MAX_VALUE); + } + compressedBytesReadFromCurrentEntry = 0; + } + + return deferredBlockStreams.get(0); + } + + /** + * Returns an InputStream for reading the contents of the given entry. + * + *

For archives using solid compression randomly accessing + * entries will be significantly slower than reading the archive + * sequentially.

+ * + * @param entry the entry to get the stream for. + * @return a stream to read the entry from. + * @throws IOException if unable to create an input stream from the zipentry + * @since 1.20 + */ + public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { + int entryIndex = -1; + for (int i = 0; i < this.archive.files.length;i++) { + if (entry == this.archive.files[i]) { + entryIndex = i; + break; + } + } + + if (entryIndex < 0) { + throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName); + } + + buildDecodingStream(entryIndex, true); + currentEntryIndex = entryIndex; + currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; + return getCurrentStream(); + } + + /** + * Reads data into an array of bytes. + * + * @param b the array to write data to + * @return the number of bytes read, or -1 if end of input is reached + * @throws IOException + * if an I/O error has occurred + */ + public int read(final byte[] b) throws IOException { + return read(b, 0, b.length); + } + + /** + * Reads data into an array of bytes. + * + * @param b the array to write data to + * @param off offset into the buffer to start filling at + * @param len of bytes to read + * @return the number of bytes read, or -1 if end of input is reached + * @throws IOException + * if an I/O error has occurred + */ + public int read(final byte[] b, final int off, final int len) throws IOException { + if (len == 0) { + return 0; + } + final int cnt = getCurrentStream().read(b, off, len); + if (cnt > 0) { + uncompressedBytesReadFromCurrentEntry += cnt; + } + return cnt; + } + + /** + * Provides statistics for bytes read from the current entry. + * + * @return statistics for bytes read from the current entry + * @since 1.17 + */ + public InputStreamStatistics getStatisticsForCurrentEntry() { + return new InputStreamStatistics() { + @Override + public long getCompressedCount() { + return compressedBytesReadFromCurrentEntry; + } + @Override + public long getUncompressedCount() { + return uncompressedBytesReadFromCurrentEntry; + } + }; + } + + private static long readUint64(final ByteBuffer in) throws IOException { + // long rather than int as it might get shifted beyond the range of an int + final long firstByte = getUnsignedByte(in); + int mask = 0x80; + long value = 0; + for (int i = 0; i < 8; i++) { + if ((firstByte & mask) == 0) { + return value | (firstByte & mask - 1) << 8 * i; + } + final long nextByte = getUnsignedByte(in); + value |= nextByte << 8 * i; + mask >>>= 1; + } + return value; + } + + private static char getChar(final ByteBuffer buf) throws IOException { + if (buf.remaining() < 2) { + throw new EOFException(); + } + return buf.getChar(); + } + + private static int getInt(final ByteBuffer buf) throws IOException { + if (buf.remaining() < 4) { + throw new EOFException(); + } + return buf.getInt(); + } + + private static long getLong(final ByteBuffer buf) throws IOException { + if (buf.remaining() < 8) { + throw new EOFException(); + } + return buf.getLong(); + } + + private static void get(final ByteBuffer buf, final byte[] to) throws IOException { + if (buf.remaining() < to.length) { + throw new EOFException(); + } + buf.get(to); + } + + private static int getUnsignedByte(final ByteBuffer buf) throws IOException { + if (!buf.hasRemaining()) { + throw new EOFException(); + } + return buf.get() & 0xff; + } + + /** + * Checks if the signature matches what is expected for a 7z file. + * + * @param signature + * the bytes to check + * @param length + * the number of bytes to check + * @return true, if this is the signature of a 7z archive. + * @since 1.8 + */ + public static boolean matches(final byte[] signature, final int length) { + if (length < sevenZSignature.length) { + return false; + } + + for (int i = 0; i < sevenZSignature.length; i++) { + if (signature[i] != sevenZSignature[i]) { + return false; + } + } + return true; + } + + private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) { + if (bytesToSkip < 1) { + return 0; + } + final int current = input.position(); + final int maxSkip = input.remaining(); + if (maxSkip < bytesToSkip) { + bytesToSkip = maxSkip; + } + input.position(current + (int) bytesToSkip); + return bytesToSkip; + } + + private void readFully(final ByteBuffer buf) throws IOException { + buf.rewind(); + IOUtils.readFully(channel, buf); + buf.flip(); + } + + @Override + public String toString() { + return archive.toString(); + } + + /** + * Derives a default file name from the archive name - if known. + * + *

This implements the same heuristics the 7z tools use. In + * 7z's case if an archive contains entries without a name - + * i.e. {@link SevenZArchiveEntry#getName} returns {@code null} - + * then its command line and GUI tools will use this default name + * when extracting the entries.

+ * + * @return null if the name of the archive is unknown. Otherwise + * if the name of the archive has got any extension, it is + * stripped and the remainder returned. Finally if the name of the + * archive hasn't got any extension then a {@code ~} character is + * appended to the archive name. + * + * @since 1.19 + */ + public String getDefaultName() { + if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { + return null; + } + + final String lastSegment = new File(fileName).getName(); + final int dotPos = lastSegment.lastIndexOf("."); + if (dotPos > 0) { // if the file starts with a dot then this is not an extension + return lastSegment.substring(0, dotPos); + } + return lastSegment + "~"; + } + + private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { + if (value > Integer.MAX_VALUE || value < 0) { + throw new IOException("Cannot handle " + what + " " + value); + } + return (int) value; + } + + private static class ArchiveStatistics { + private int numberOfPackedStreams; + private long numberOfCoders; + private long numberOfOutStreams; + private long numberOfInStreams; + private long numberOfUnpackSubStreams; + private int numberOfFolders; + private BitSet folderHasCrc; + private int numberOfEntries; + private int numberOfEntriesWithStream; + + @Override + public String toString() { + return "Archive with " + numberOfEntries + " entries in " + numberOfFolders + + " folders. Estimated size " + estimateSize()/ 1024L + " kB."; + } + + long estimateSize() { + final long lowerBound = 16L * numberOfPackedStreams /* packSizes, packCrcs in Archive */ + + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ + + numberOfFolders * folderSize() /* folders in Archive */ + + numberOfCoders * coderSize() /* coders in Folder */ + + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ + + 8L * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ + + 8L * numberOfOutStreams /* unpackSizes in Folder */ + + numberOfEntries * entrySize() /* files in Archive */ + + streamMapSize() + ; + return 2 * lowerBound /* conservative guess */; + } + + void assertValidity(final int maxMemoryLimitInKb) throws IOException { + if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { + throw new IOException("archive with entries but no folders"); + } + if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { + throw new IOException("archive doesn't contain enough substreams for entries"); + } + + final long memoryNeededInKb = estimateSize() / 1024; + if (maxMemoryLimitInKb < memoryNeededInKb) { + throw new MemoryLimitException(memoryNeededInKb, maxMemoryLimitInKb); + } + } + + private long folderSize() { + return 30; /* nested arrays are accounted for separately */ + } + + private long coderSize() { + return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ + + 16 + + 4 /* properties, guess */ + ; + } + + private long bindPairSize() { + return 16; + } + + private long entrySize() { + return 100; /* real size depends on name length, everything without name is about 70 bytes */ + } + + private long streamMapSize() { + return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ + + 8 * numberOfPackedStreams /* packStreamOffsets */ + + 4 * numberOfEntries /* fileFolderIndex */ + ; + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFileOptions.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFileOptions.java new file mode 100644 index 0000000..d886091 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFileOptions.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +/** + * Collects options for reading 7z archives. + * + * @since 1.19 + * @Immutable + */ +public class SevenZFileOptions { + private static final int DEFAUL_MEMORY_LIMIT_IN_KB = Integer.MAX_VALUE; + private static final boolean DEFAULT_USE_DEFAULTNAME_FOR_UNNAMED_ENTRIES= false; + private static final boolean DEFAULT_TRY_TO_RECOVER_BROKEN_ARCHIVES = false; + + private final int maxMemoryLimitInKb; + private final boolean useDefaultNameForUnnamedEntries; + private final boolean tryToRecoverBrokenArchives; + + private SevenZFileOptions(final int maxMemoryLimitInKb, final boolean useDefaultNameForUnnamedEntries, + final boolean tryToRecoverBrokenArchives) { + this.maxMemoryLimitInKb = maxMemoryLimitInKb; + this.useDefaultNameForUnnamedEntries = useDefaultNameForUnnamedEntries; + this.tryToRecoverBrokenArchives = tryToRecoverBrokenArchives; + } + + /** + * The default options. + * + *
    + *
  • no memory limit
  • + *
  • don't modify the name of unnamed entries
  • + *
+ */ + public static final SevenZFileOptions DEFAULT = new SevenZFileOptions(DEFAUL_MEMORY_LIMIT_IN_KB, + DEFAULT_USE_DEFAULTNAME_FOR_UNNAMED_ENTRIES, + DEFAULT_TRY_TO_RECOVER_BROKEN_ARCHIVES); + + /** + * Obtains a builder for SevenZFileOptions. + * @return a builder for SevenZFileOptions. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Gets the maximum amount of memory to use for parsing the + * archive and during extraction. + * + *

Not all codecs will honor this setting. Currently only lzma + * and lzma2 are supported.

+ * + * @return the maximum amount of memory to use for extraction + */ + public int getMaxMemoryLimitInKb() { + return maxMemoryLimitInKb; + } + + /** + * Gets whether entries without a name should get their names set + * to the archive's default file name. + * @return whether entries without a name should get their names + * set to the archive's default file name + */ + public boolean getUseDefaultNameForUnnamedEntries() { + return useDefaultNameForUnnamedEntries; + } + + /** + * Whether {@link SevenZFile} shall try to recover from a certain type of broken archive. + * @return whether SevenZFile shall try to recover from a certain type of broken archive. + * @since 1.21 + */ + public boolean getTryToRecoverBrokenArchives() { + return tryToRecoverBrokenArchives; + } + + /** + * Mutable builder for the immutable {@link SevenZFileOptions}. + * + * @since 1.19 + */ + public static class Builder { + private int maxMemoryLimitInKb = DEFAUL_MEMORY_LIMIT_IN_KB; + private boolean useDefaultNameForUnnamedEntries = DEFAULT_USE_DEFAULTNAME_FOR_UNNAMED_ENTRIES; + private boolean tryToRecoverBrokenArchives = DEFAULT_TRY_TO_RECOVER_BROKEN_ARCHIVES; + + /** + * Sets the maximum amount of memory to use for parsing the + * archive and during extraction. + * + *

Not all codecs will honor this setting. Currently only lzma + * and lzma2 are supported.

+ * + * @param maxMemoryLimitInKb limit of the maximum amount of memory to use + * @return the reconfigured builder + */ + public Builder withMaxMemoryLimitInKb(final int maxMemoryLimitInKb) { + this.maxMemoryLimitInKb = maxMemoryLimitInKb; + return this; + } + + /** + * Sets whether entries without a name should get their names + * set to the archive's default file name. + * + * @param useDefaultNameForUnnamedEntries if true the name of + * unnamed entries will be set to the archive's default name + * @return the reconfigured builder + */ + public Builder withUseDefaultNameForUnnamedEntries(final boolean useDefaultNameForUnnamedEntries) { + this.useDefaultNameForUnnamedEntries = useDefaultNameForUnnamedEntries; + return this; + } + + /** + * Sets whether {@link SevenZFile} will try to revover broken archives where the CRC of the file's metadata is + * 0. + * + *

This special kind of broken archive is encountered when mutli volume archives are closed prematurely. If + * you enable this option SevenZFile will trust data that looks as if it could contain metadata of an archive + * and allocate big amounts of memory. It is strongly recommended to not enable this option without setting + * {@link #withMaxMemoryLimitInKb} at the same time. + * + * @param tryToRecoverBrokenArchives if true SevenZFile will try to recover archives that are broken in the + * specific way + * @return the reconfigured builder + * @since 1.21 + */ + public Builder withTryToRecoverBrokenArchives(final boolean tryToRecoverBrokenArchives) { + this.tryToRecoverBrokenArchives = tryToRecoverBrokenArchives; + return this; + } + + /** + * Create the {@link SevenZFileOptions}. + * + * @return configured {@link SevenZFileOptions}. + */ + public SevenZFileOptions build() { + return new SevenZFileOptions(maxMemoryLimitInKb, useDefaultNameForUnnamedEntries, + tryToRecoverBrokenArchives); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethod.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethod.java new file mode 100644 index 0000000..03d1166 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethod.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.util.Arrays; + +/** + * The (partially) supported compression/encryption methods used in 7z archives. + * + *

All methods with a _FILTER suffix are used as preprocessors with + * the goal of creating a better compression ratio with the compressor + * that comes next in the chain of methods. 7z will in general only + * allow them to be used together with a "real" compression method but + * Commons Compress doesn't enforce this.

+ * + *

The BCJ_ filters work on executable files for the given platform + * and convert relative addresses to absolute addresses in CALL + * instructions. This means they are only useful when applied to + * executables of the chosen platform.

+ */ +public enum SevenZMethod { + /** no compression at all */ + COPY(new byte[] { (byte)0x00 }), + /** LZMA - only supported when reading */ + LZMA(new byte[] { (byte)0x03, (byte)0x01, (byte)0x01 }), + /** LZMA2 */ + LZMA2(new byte[] { (byte)0x21 }), + /** Deflate */ + DEFLATE(new byte[] { (byte)0x04, (byte)0x01, (byte)0x08 }), + /** + * Deflate64 + * @since 1.16 + */ + DEFLATE64(new byte[] { (byte)0x04, (byte)0x01, (byte)0x09 }), + /** BZIP2 */ + BZIP2(new byte[] { (byte)0x04, (byte)0x02, (byte)0x02 }), + /** + * AES encryption with a key length of 256 bit using SHA256 for + * hashes - only supported when reading + */ + AES256SHA256(new byte[] { (byte)0x06, (byte)0xf1, (byte)0x07, (byte)0x01 }), + /** + * BCJ x86 platform version 1. + * @since 1.8 + */ + BCJ_X86_FILTER(new byte[] { 0x03, 0x03, 0x01, 0x03 }), + /** + * BCJ PowerPC platform. + * @since 1.8 + */ + BCJ_PPC_FILTER(new byte[] { 0x03, 0x03, 0x02, 0x05 }), + /** + * BCJ I64 platform. + * @since 1.8 + */ + BCJ_IA64_FILTER(new byte[] { 0x03, 0x03, 0x04, 0x01 }), + /** + * BCJ ARM platform. + * @since 1.8 + */ + BCJ_ARM_FILTER(new byte[] { 0x03, 0x03, 0x05, 0x01 }), + /** + * BCJ ARM Thumb platform. + * @since 1.8 + */ + BCJ_ARM_THUMB_FILTER(new byte[] { 0x03, 0x03, 0x07, 0x01 }), + /** + * BCJ Sparc platform. + * @since 1.8 + */ + BCJ_SPARC_FILTER(new byte[] { 0x03, 0x03, 0x08, 0x05 }), + /** + * Delta filter. + * @since 1.8 + */ + DELTA_FILTER(new byte[] { 0x03 }); + + private final byte[] id; + + SevenZMethod(final byte[] id) { + this.id = id; + } + + byte[] getId() { + final int idLength = id.length; + final byte[] copy = new byte[idLength]; + System.arraycopy(id, 0, copy, 0, idLength); + return copy; + } + + static SevenZMethod byId(final byte[] id) { + for (final SevenZMethod m : SevenZMethod.class.getEnumConstants()) { + if (Arrays.equals(m.id, id)) { + return m; + } + } + return null; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethodConfiguration.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethodConfiguration.java new file mode 100644 index 0000000..a93bf7d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZMethodConfiguration.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.util.Objects; + +/** + * Combines a SevenZMethod with configuration options for the method. + * + *

The exact type and interpretation of options depends on the + * method being configured. Currently supported are:

+ * + * + * + * + * + * + * + * + * + *
Options
MethodOption TypeDescription
BZIP2NumberBlock Size - an number between 1 and 9
DEFLATENumberCompression Level - an number between 1 and 9
LZMA2NumberDictionary Size - a number between 4096 and 768 MiB (768 << 20)
LZMA2org.tukaani.xz.LZMA2OptionsWhole set of LZMA2 options.
DELTA_FILTERNumberDelta Distance - a number between 1 and 256
+ * + * @Immutable + * @since 1.8 + */ +public class SevenZMethodConfiguration { + private final SevenZMethod method; + private final Object options; + + /** + * Doesn't configure any additional options. + * @param method the method to use + */ + public SevenZMethodConfiguration(final SevenZMethod method) { + this(method, null); + } + + /** + * Specifies and method plus configuration options. + * @param method the method to use + * @param options the options to use + * @throws IllegalArgumentException if the method doesn't understand the options specified. + */ + public SevenZMethodConfiguration(final SevenZMethod method, final Object options) { + this.method = method; + this.options = options; + if (options != null && !Coders.findByMethod(method).canAcceptOptions(options)) { + throw new IllegalArgumentException("The " + method + " method doesn't support options of type " + + options.getClass()); + } + } + + /** + * The specified method. + * @return the method + */ + public SevenZMethod getMethod() { + return method; + } + + /** + * The specified options. + * @return the options + */ + public Object getOptions() { + return options; + } + + @Override + public int hashCode() { + return method == null ? 0 : method.hashCode(); + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + final SevenZMethodConfiguration other = (SevenZMethodConfiguration) obj; + return Objects.equals(method, other.method) + && Objects.equals(options, other.options); + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZOutputFile.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZOutputFile.java new file mode 100644 index 0000000..68cf90f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZOutputFile.java @@ -0,0 +1,899 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import static java.nio.charset.StandardCharsets.UTF_16LE; +import static org.apache.commons.compress.utils.ByteUtils.utf16Decode; + +import java.io.BufferedInputStream; +import java.io.ByteArrayOutputStream; +import java.io.Closeable; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; +import java.nio.file.LinkOption; +import java.nio.file.OpenOption; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.BitSet; +import java.util.Collections; +import java.util.Date; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.zip.CRC32; +import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.utils.CountingOutputStream; + +/** + * Writes a 7z file. + * @since 1.6 + */ +public class SevenZOutputFile implements Closeable { + + private final SeekableByteChannel channel; + private final List files = new ArrayList<>(); + private int numNonEmptyStreams; + private final CRC32 crc32 = new CRC32(); + private final CRC32 compressedCrc32 = new CRC32(); + private long fileBytesWritten; + private boolean finished; + private CountingOutputStream currentOutputStream; + private CountingOutputStream[] additionalCountingStreams; + private Iterable contentMethods = Collections.singletonList( + new SevenZMethodConfiguration(SevenZMethod.LZMA2) + ); + private final Map additionalSizes = new HashMap<>(); + private byte[] password; + + /** + * Opens file to write a 7z archive to. + * + * @param fileName the file to write to + * @param password optional password if the archive have to be encrypted + * @throws IOException if opening the file fails + */ + public SevenZOutputFile(final File fileName) throws IOException { + this(fileName, null); + } + + /** + * Opens file to write a 7z archive to. + * + * @param fileName the file to write to + * @param password optional password if the archive have to be encrypted + * @throws IOException if opening the file fails + */ + public SevenZOutputFile(final File fileName, char[] password) throws IOException { + this( + Files.newByteChannel( + fileName.toPath(), + EnumSet.of(StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING) + ), + password + ); + } + + /** + * Prepares channel to write a 7z archive to. + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to write to an in-memory archive.

+ * + * @param channel the channel to write to + * @throws IOException if the channel cannot be positioned properly + * @since 1.13 + */ + public SevenZOutputFile(final SeekableByteChannel channel) throws IOException { + this(channel, null); + } + + /** + * Prepares channel to write a 7z archive to. + * + *

{@link + * org.apache.commons.compress.utils.SeekableInMemoryByteChannel} + * allows you to write to an in-memory archive.

+ * + * @param channel the channel to write to + * @param password optional password if the archive have to be encrypted + * @throws IOException if the channel cannot be positioned properly + * @since 1.13 + */ + public SevenZOutputFile(final SeekableByteChannel channel, char[] password) throws IOException { + this.channel = channel; + channel.position(SevenZFile.SIGNATURE_HEADER_SIZE); + if (password != null) { + this.password = utf16Decode(password); + this.contentMethods = + Arrays.asList( + new SevenZMethodConfiguration(SevenZMethod.AES256SHA256, new AES256Options(this.password)), + new SevenZMethodConfiguration(SevenZMethod.LZMA2) + ); + } + } + + /** + * Sets the default compression method to use for entry contents - the + * default is LZMA2. + * + *

Currently only {@link SevenZMethod#COPY}, {@link + * SevenZMethod#LZMA2}, {@link SevenZMethod#BZIP2} and {@link + * SevenZMethod#DEFLATE} are supported.

+ * + *

This is a short form for passing a single-element iterable + * to {@link #setContentMethods}.

+ * @param method the default compression method + */ + public void setContentCompression(final SevenZMethod method) { + setContentMethods(Collections.singletonList(new SevenZMethodConfiguration(method))); + } + + /** + * Sets the default (compression) methods to use for entry contents - the + * default is LZMA2. + * + *

Currently only {@link SevenZMethod#COPY}, {@link + * SevenZMethod#LZMA2}, {@link SevenZMethod#BZIP2} and {@link + * SevenZMethod#DEFLATE} are supported.

+ * + *

The methods will be consulted in iteration order to create + * the final output.

+ * + * @since 1.8 + * @param methods the default (compression) methods + */ + public void setContentMethods(final Iterable methods) { + this.contentMethods = reverse(methods); + } + + /** + * Closes the archive, calling {@link #finish} if necessary. + * + * @throws IOException on error + */ + @Override + public void close() throws IOException { + try { + if (!finished) { + finish(); + } + } finally { + channel.close(); + } + } + + /** + * Create an archive entry using the inputFile and entryName provided. + * + * @param inputFile file to create an entry from + * @param entryName the name to use + * @return the ArchiveEntry set up with details from the file + */ + public SevenZArchiveEntry createArchiveEntry(final File inputFile, final String entryName) { + final SevenZArchiveEntry entry = new SevenZArchiveEntry(); + entry.setDirectory(inputFile.isDirectory()); + entry.setName(entryName); + entry.setLastModifiedDate(new Date(inputFile.lastModified())); + return entry; + } + + /** + * Create an archive entry using the inputPath and entryName provided. + * + * @param inputPath path to create an entry from + * @param entryName the name to use + * @param options options indicating how symbolic links are handled. + * @return the ArchiveEntry set up with details from the file + * + * @throws IOException on error + * @since 1.21 + */ + public SevenZArchiveEntry createArchiveEntry(final Path inputPath, final String entryName, final LinkOption... options) + throws IOException { + final SevenZArchiveEntry entry = new SevenZArchiveEntry(); + entry.setDirectory(Files.isDirectory(inputPath, options)); + entry.setName(entryName); + entry.setLastModifiedDate(new Date(Files.getLastModifiedTime(inputPath, options).toMillis())); + return entry; + } + + /** + * Records an archive entry to add. + * + * The caller must then write the content to the archive and call + * {@link #closeArchiveEntry()} to complete the process. + * + * @param archiveEntry describes the entry + */ + public void putArchiveEntry(final ArchiveEntry archiveEntry) { + final SevenZArchiveEntry entry = (SevenZArchiveEntry) archiveEntry; + files.add(entry); + } + + /** + * Closes the archive entry. + * @throws IOException on error + */ + public void closeArchiveEntry() throws IOException { + if (currentOutputStream != null) { + currentOutputStream.flush(); + currentOutputStream.close(); + } + + final SevenZArchiveEntry entry = files.get(files.size() - 1); + if (fileBytesWritten > 0) { // this implies currentOutputStream != null + entry.setHasStream(true); + ++numNonEmptyStreams; + entry.setSize(currentOutputStream.getBytesWritten()); //NOSONAR + entry.setCompressedSize(fileBytesWritten); + entry.setCrcValue(crc32.getValue()); + entry.setCompressedCrcValue(compressedCrc32.getValue()); + entry.setHasCrc(true); + if (additionalCountingStreams != null) { + final long[] sizes = new long[additionalCountingStreams.length]; + Arrays.setAll(sizes, i -> additionalCountingStreams[i].getBytesWritten()); + additionalSizes.put(entry, sizes); + } + } else { + entry.setHasStream(false); + entry.setSize(0); + entry.setCompressedSize(0); + entry.setHasCrc(false); + } + currentOutputStream = null; + additionalCountingStreams = null; + crc32.reset(); + compressedCrc32.reset(); + fileBytesWritten = 0; + } + + /** + * Writes a byte to the current archive entry. + * @param b The byte to be written. + * @throws IOException on error + */ + public void write(final int b) throws IOException { + getCurrentOutputStream().write(b); + } + + /** + * Writes a byte array to the current archive entry. + * @param b The byte array to be written. + * @throws IOException on error + */ + public void write(final byte[] b) throws IOException { + write(b, 0, b.length); + } + + /** + * Writes part of a byte array to the current archive entry. + * @param b The byte array to be written. + * @param off offset into the array to start writing from + * @param len number of bytes to write + * @throws IOException on error + */ + public void write(final byte[] b, final int off, final int len) throws IOException { + if (len > 0) { + getCurrentOutputStream().write(b, off, len); + } + } + + /** + * Writes all of the given input stream to the current archive entry. + * @param inputStream the data source. + * @throws IOException if an I/O error occurs. + * @since 1.21 + */ + public void write(final InputStream inputStream) throws IOException { + final byte[] buffer = new byte[8024]; + int n = 0; + while (-1 != (n = inputStream.read(buffer))) { + write(buffer, 0, n); + } + } + + /** + * Writes all of the given input stream to the current archive entry. + * @param path the data source. + * @param options options specifying how the file is opened. + * @throws IOException if an I/O error occurs. + * @since 1.21 + */ + public void write(final Path path, final OpenOption... options) throws IOException { + try (InputStream in = new BufferedInputStream(Files.newInputStream(path, options))) { + write(in); + } + } + + /** + * Finishes the addition of entries to this archive, without closing it. + * + * @throws IOException if archive is already closed. + */ + public void finish() throws IOException { + if (finished) { + throw new IOException("This archive has already been finished"); + } + finished = true; + + final long headerPosition = channel.position(); + + final ByteArrayOutputStream headerBaos = new ByteArrayOutputStream(); + final DataOutputStream header = new DataOutputStream(headerBaos); + + writeHeader(header); + header.flush(); + final byte[] headerBytes = headerBaos.toByteArray(); + channel.write(ByteBuffer.wrap(headerBytes)); + + final CRC32 crc32 = new CRC32(); + crc32.update(headerBytes); + + final ByteBuffer bb = ByteBuffer + .allocate( + SevenZFile.sevenZSignature.length + + 2/* version */ + + 4/* start header CRC */ + + 8/* next header position */ + + 8/* next header length */ + + 4 + /* next header CRC */ + ) + .order(ByteOrder.LITTLE_ENDIAN); + // signature header + channel.position(0); + bb.put(SevenZFile.sevenZSignature); + // version + bb.put((byte) 0).put((byte) 2); + + // placeholder for start header CRC + bb.putInt(0); + + // start header + bb + .putLong(headerPosition - SevenZFile.SIGNATURE_HEADER_SIZE) + .putLong(0xffffFFFFL & headerBytes.length) + .putInt((int) crc32.getValue()); + crc32.reset(); + crc32.update(bb.array(), SevenZFile.sevenZSignature.length + 6, 20); + bb.putInt(SevenZFile.sevenZSignature.length + 2, (int) crc32.getValue()); + bb.flip(); + channel.write(bb); + } + + /* + * Creation of output stream is deferred until data is actually + * written as some codecs might write header information even for + * empty streams and directories otherwise. + */ + private OutputStream getCurrentOutputStream() throws IOException { + if (currentOutputStream == null) { + currentOutputStream = setupFileOutputStream(); + } + return currentOutputStream; + } + + private CountingOutputStream setupFileOutputStream() throws IOException { + if (files.isEmpty()) { + throw new IllegalStateException("No current 7z entry"); + } + + // doesn't need to be closed, just wraps the instance field channel + OutputStream out = new OutputStreamWrapper(); // NOSONAR + final ArrayList moreStreams = new ArrayList<>(); + boolean first = true; + for (final SevenZMethodConfiguration m : getContentMethods(files.get(files.size() - 1))) { + if (!first) { + final CountingOutputStream cos = new CountingOutputStream(out); + moreStreams.add(cos); + out = cos; + } + out = Coders.addEncoder(out, m.getMethod(), m.getOptions()); + first = false; + } + if (!moreStreams.isEmpty()) { + additionalCountingStreams = moreStreams.toArray(new CountingOutputStream[0]); + } + return new CountingOutputStream(out) { + @Override + public void write(final int b) throws IOException { + super.write(b); + crc32.update(b); + } + + @Override + public void write(final byte[] b) throws IOException { + super.write(b); + crc32.update(b); + } + + @Override + public void write(final byte[] b, final int off, final int len) throws IOException { + super.write(b, off, len); + crc32.update(b, off, len); + } + }; + } + + private Iterable getContentMethods(final SevenZArchiveEntry entry) { + final Iterable ms = entry.getContentMethods(); + return ms == null ? contentMethods : ms; + } + + private void writeHeader(final DataOutput header) throws IOException { + header.write(NID.kHeader); + + header.write(NID.kMainStreamsInfo); + writeStreamsInfo(header); + writeFilesInfo(header); + header.write(NID.kEnd); + } + + private void writeStreamsInfo(final DataOutput header) throws IOException { + if (numNonEmptyStreams > 0) { + writePackInfo(header); + writeUnpackInfo(header); + } + + writeSubStreamsInfo(header); + + header.write(NID.kEnd); + } + + private void writePackInfo(final DataOutput header) throws IOException { + header.write(NID.kPackInfo); + + writeUint64(header, 0); + writeUint64(header, 0xffffFFFFL & numNonEmptyStreams); + + header.write(NID.kSize); + for (final SevenZArchiveEntry entry : files) { + if (entry.hasStream()) { + writeUint64(header, entry.getCompressedSize()); + } + } + + header.write(NID.kCRC); + header.write(1); // "allAreDefined" == true + for (final SevenZArchiveEntry entry : files) { + if (entry.hasStream()) { + header.writeInt(Integer.reverseBytes((int) entry.getCompressedCrcValue())); + } + } + + header.write(NID.kEnd); + } + + private void writeUnpackInfo(final DataOutput header) throws IOException { + header.write(NID.kUnpackInfo); + + header.write(NID.kFolder); + writeUint64(header, numNonEmptyStreams); + header.write(0); + for (final SevenZArchiveEntry entry : files) { + if (entry.hasStream()) { + writeFolder(header, entry); + } + } + + header.write(NID.kCodersUnpackSize); + for (final SevenZArchiveEntry entry : files) { + if (entry.hasStream()) { + final long[] moreSizes = additionalSizes.get(entry); + if (moreSizes != null) { + for (final long s : moreSizes) { + writeUint64(header, s); + } + } + writeUint64(header, entry.getSize()); + } + } + + header.write(NID.kCRC); + header.write(1); // "allAreDefined" == true + for (final SevenZArchiveEntry entry : files) { + if (entry.hasStream()) { + header.writeInt(Integer.reverseBytes((int) entry.getCrcValue())); + } + } + + header.write(NID.kEnd); + } + + private void writeFolder(final DataOutput header, final SevenZArchiveEntry entry) throws IOException { + final ByteArrayOutputStream bos = new ByteArrayOutputStream(); + int numCoders = 0; + for (final SevenZMethodConfiguration m : getContentMethods(entry)) { + numCoders++; + writeSingleCodec(m, bos); + } + + writeUint64(header, numCoders); + header.write(bos.toByteArray()); + for (long i = 0; i < numCoders - 1; i++) { + writeUint64(header, i + 1); + writeUint64(header, i); + } + } + + private void writeSingleCodec(final SevenZMethodConfiguration m, final OutputStream bos) throws IOException { + final byte[] id = m.getMethod().getId(); + final byte[] properties = Coders.findByMethod(m.getMethod()).getOptionsAsProperties(m.getOptions()); + + int codecFlags = id.length; + if (properties.length > 0) { + codecFlags |= 0x20; + } + bos.write(codecFlags); + bos.write(id); + + if (properties.length > 0) { + bos.write(properties.length); + bos.write(properties); + } + } + + private void writeSubStreamsInfo(final DataOutput header) throws IOException { + header.write(NID.kSubStreamsInfo); + // + // header.write(NID.kCRC); + // header.write(1); + // for (final SevenZArchiveEntry entry : files) { + // if (entry.getHasCrc()) { + // header.writeInt(Integer.reverseBytes(entry.getCrc())); + // } + // } + // + header.write(NID.kEnd); + } + + private void writeFilesInfo(final DataOutput header) throws IOException { + header.write(NID.kFilesInfo); + + writeUint64(header, files.size()); + + writeFileEmptyStreams(header); + writeFileEmptyFiles(header); + writeFileAntiItems(header); + writeFileNames(header); + writeFileCTimes(header); + writeFileATimes(header); + writeFileMTimes(header); + writeFileWindowsAttributes(header); + header.write(NID.kEnd); + } + + private void writeFileEmptyStreams(final DataOutput header) throws IOException { + final boolean hasEmptyStreams = files.stream().anyMatch(entry -> !entry.hasStream()); + if (hasEmptyStreams) { + header.write(NID.kEmptyStream); + final BitSet emptyStreams = new BitSet(files.size()); + for (int i = 0; i < files.size(); i++) { + emptyStreams.set(i, !files.get(i).hasStream()); + } + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + writeBits(out, emptyStreams, files.size()); + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileEmptyFiles(final DataOutput header) throws IOException { + boolean hasEmptyFiles = false; + int emptyStreamCounter = 0; + final BitSet emptyFiles = new BitSet(0); + for (final SevenZArchiveEntry file1 : files) { + if (!file1.hasStream()) { + final boolean isDir = file1.isDirectory(); + emptyFiles.set(emptyStreamCounter++, !isDir); + hasEmptyFiles |= !isDir; + } + } + if (hasEmptyFiles) { + header.write(NID.kEmptyFile); + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + writeBits(out, emptyFiles, emptyStreamCounter); + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileAntiItems(final DataOutput header) throws IOException { + boolean hasAntiItems = false; + final BitSet antiItems = new BitSet(0); + int antiItemCounter = 0; + for (final SevenZArchiveEntry file1 : files) { + if (!file1.hasStream()) { + final boolean isAnti = file1.isAntiItem(); + antiItems.set(antiItemCounter++, isAnti); + hasAntiItems |= isAnti; + } + } + if (hasAntiItems) { + header.write(NID.kAnti); + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + writeBits(out, antiItems, antiItemCounter); + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileNames(final DataOutput header) throws IOException { + header.write(NID.kName); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + out.write(0); + for (final SevenZArchiveEntry entry : files) { + out.write(entry.getName().getBytes(UTF_16LE)); + out.writeShort(0); + } + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + + private void writeFileCTimes(final DataOutput header) throws IOException { + int numCreationDates = 0; + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasCreationDate()) { + ++numCreationDates; + } + } + if (numCreationDates > 0) { + header.write(NID.kCTime); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + if (numCreationDates != files.size()) { + out.write(0); + final BitSet cTimes = new BitSet(files.size()); + for (int i = 0; i < files.size(); i++) { + cTimes.set(i, files.get(i).getHasCreationDate()); + } + writeBits(out, cTimes, files.size()); + } else { + out.write(1); // "allAreDefined" == true + } + out.write(0); + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasCreationDate()) { + out.writeLong(Long.reverseBytes(SevenZArchiveEntry.javaTimeToNtfsTime(entry.getCreationDate()))); + } + } + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileATimes(final DataOutput header) throws IOException { + int numAccessDates = 0; + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasAccessDate()) { + ++numAccessDates; + } + } + if (numAccessDates > 0) { + header.write(NID.kATime); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + if (numAccessDates != files.size()) { + out.write(0); + final BitSet aTimes = new BitSet(files.size()); + for (int i = 0; i < files.size(); i++) { + aTimes.set(i, files.get(i).getHasAccessDate()); + } + writeBits(out, aTimes, files.size()); + } else { + out.write(1); // "allAreDefined" == true + } + out.write(0); + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasAccessDate()) { + out.writeLong(Long.reverseBytes(SevenZArchiveEntry.javaTimeToNtfsTime(entry.getAccessDate()))); + } + } + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileMTimes(final DataOutput header) throws IOException { + int numLastModifiedDates = 0; + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasLastModifiedDate()) { + ++numLastModifiedDates; + } + } + if (numLastModifiedDates > 0) { + header.write(NID.kMTime); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + if (numLastModifiedDates != files.size()) { + out.write(0); + final BitSet mTimes = new BitSet(files.size()); + for (int i = 0; i < files.size(); i++) { + mTimes.set(i, files.get(i).getHasLastModifiedDate()); + } + writeBits(out, mTimes, files.size()); + } else { + out.write(1); // "allAreDefined" == true + } + out.write(0); + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasLastModifiedDate()) { + out.writeLong(Long.reverseBytes(SevenZArchiveEntry.javaTimeToNtfsTime(entry.getLastModifiedDate()))); + } + } + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeFileWindowsAttributes(final DataOutput header) throws IOException { + int numWindowsAttributes = 0; + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasWindowsAttributes()) { + ++numWindowsAttributes; + } + } + if (numWindowsAttributes > 0) { + header.write(NID.kWinAttributes); + + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(baos); + if (numWindowsAttributes != files.size()) { + out.write(0); + final BitSet attributes = new BitSet(files.size()); + for (int i = 0; i < files.size(); i++) { + attributes.set(i, files.get(i).getHasWindowsAttributes()); + } + writeBits(out, attributes, files.size()); + } else { + out.write(1); // "allAreDefined" == true + } + out.write(0); + for (final SevenZArchiveEntry entry : files) { + if (entry.getHasWindowsAttributes()) { + out.writeInt(Integer.reverseBytes(entry.getWindowsAttributes())); + } + } + out.flush(); + final byte[] contents = baos.toByteArray(); + writeUint64(header, contents.length); + header.write(contents); + } + } + + private void writeUint64(final DataOutput header, long value) throws IOException { + int firstByte = 0; + int mask = 0x80; + int i; + for (i = 0; i < 8; i++) { + if (value < ((1L << (7 * (i + 1))))) { + firstByte |= (value >>> (8 * i)); + break; + } + firstByte |= mask; + mask >>>= 1; + } + header.write(firstByte); + for (; i > 0; i--) { + header.write((int) (0xff & value)); + value >>>= 8; + } + } + + private void writeBits(final DataOutput header, final BitSet bits, final int length) throws IOException { + int cache = 0; + int shift = 7; + for (int i = 0; i < length; i++) { + cache |= ((bits.get(i) ? 1 : 0) << shift); + if (--shift < 0) { + header.write(cache); + shift = 7; + cache = 0; + } + } + if (shift != 7) { + header.write(cache); + } + } + + private static Iterable reverse(final Iterable i) { + final LinkedList l = new LinkedList<>(); + for (final T t : i) { + l.addFirst(t); + } + return l; + } + + private class OutputStreamWrapper extends OutputStream { + + private static final int BUF_SIZE = 8192; + private final ByteBuffer buffer = ByteBuffer.allocate(BUF_SIZE); + + @Override + public void write(final int b) throws IOException { + buffer.clear(); + buffer.put((byte) b).flip(); + channel.write(buffer); + compressedCrc32.update(b); + fileBytesWritten++; + } + + @Override + public void write(final byte[] b) throws IOException { + OutputStreamWrapper.this.write(b, 0, b.length); + } + + @Override + public void write(final byte[] b, final int off, final int len) throws IOException { + if (len > BUF_SIZE) { + channel.write(ByteBuffer.wrap(b, off, len)); + } else { + buffer.clear(); + buffer.put(b, off, len).flip(); + channel.write(buffer); + } + compressedCrc32.update(b, off, len); + fileBytesWritten += len; + } + + @Override + public void flush() throws IOException { + // no reason to flush the channel + } + + @Override + public void close() throws IOException { + // the file will be closed by the containing class's close method + } + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java new file mode 100644 index 0000000..a33aca7 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/StartHeader.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +class StartHeader { + long nextHeaderOffset; + long nextHeaderSize; + long nextHeaderCrc; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java new file mode 100644 index 0000000..9a10e1e --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/StreamMap.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +/// Map between folders, files and streams. +class StreamMap { + /// The first Archive.packStream index of each folder. + int[] folderFirstPackStreamIndex; + /// Offset to beginning of this pack stream's data, relative to the beginning of the first pack stream. + long[] packStreamOffsets; + /// Index of first file for each folder. + int[] folderFirstFileIndex; + /// Index of folder for each file. + int[] fileFolderIndex; + + @Override + public String toString() { + return "StreamMap with indices of " + folderFirstPackStreamIndex.length + + " folders, offsets of " + packStreamOffsets.length + " packed streams," + + " first files of " + folderFirstFileIndex.length + " folders and" + + " folder indices for " + fileFolderIndex.length + " files"; + } +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java b/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java new file mode 100644 index 0000000..95fabc6 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/SubStreamsInfo.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.archivers.sevenz; + +import java.util.BitSet; + +/// Properties for non-empty files. +class SubStreamsInfo { + /// Unpacked size of each unpacked stream. + long[] unpackSizes; + /// Whether CRC is present for each unpacked stream. + BitSet hasCrc; + /// CRCs of unpacked streams, if present. + long[] crcs; +} diff --git a/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html b/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html new file mode 100644 index 0000000..c5756f2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/archivers/sevenz/package.html @@ -0,0 +1,28 @@ + + + + + 7z package + + +

Provides classes for reading and writing archives using + the 7z format.

+ + diff --git a/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java b/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java new file mode 100644 index 0000000..2da6a2f --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/ArchiveUtils.java @@ -0,0 +1,278 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +import static java.nio.charset.StandardCharsets.US_ASCII; + +import java.util.Arrays; + +import org.apache.commons.compress.archivers.ArchiveEntry; + +/** + * Generic Archive utilities + */ +public class ArchiveUtils { + + private static final int MAX_SANITIZED_NAME_LENGTH = 255; + + /** Private constructor to prevent instantiation of this utility class. */ + private ArchiveUtils(){ + } + + /** + * Generates a string containing the name, isDirectory setting and size of an entry. + *

+ * For example: + *

+     * -    2000 main.c
+     * d     100 testfiles
+     * 
+ * + * @param entry the entry + * @return the representation of the entry + */ + public static String toString(final ArchiveEntry entry){ + final StringBuilder sb = new StringBuilder(); + sb.append(entry.isDirectory()? 'd' : '-');// c.f. "ls -l" output + final String size = Long.toString(entry.getSize()); + sb.append(' '); + // Pad output to 7 places, leading spaces + for(int i=7; i > size.length(); i--){ + sb.append(' '); + } + sb.append(size); + sb.append(' ').append(entry.getName()); + return sb.toString(); + } + + /** + * Check if buffer contents matches Ascii String. + * + * @param expected expected string + * @param buffer the buffer + * @param offset offset to read from + * @param length length of the buffer + * @return {@code true} if buffer is the same as the expected string + */ + public static boolean matchAsciiBuffer( + final String expected, final byte[] buffer, final int offset, final int length){ + final byte[] buffer1; + buffer1 = expected.getBytes(US_ASCII); + return isEqual(buffer1, 0, buffer1.length, buffer, offset, length, false); + } + + /** + * Check if buffer contents matches Ascii String. + * + * @param expected the expected strin + * @param buffer the buffer + * @return {@code true} if buffer is the same as the expected string + */ + public static boolean matchAsciiBuffer(final String expected, final byte[] buffer){ + return matchAsciiBuffer(expected, buffer, 0, buffer.length); + } + + /** + * Convert a string to Ascii bytes. + * Used for comparing "magic" strings which need to be independent of the default Locale. + * + * @param inputString string to convert + * @return the bytes + */ + public static byte[] toAsciiBytes(final String inputString){ + return inputString.getBytes(US_ASCII); + } + + /** + * Convert an input byte array to a String using the ASCII character set. + * + * @param inputBytes bytes to convert + * @return the bytes, interpreted as an Ascii string + */ + public static String toAsciiString(final byte[] inputBytes){ + return new String(inputBytes, US_ASCII); + } + + /** + * Convert an input byte array to a String using the ASCII character set. + * + * @param inputBytes input byte array + * @param offset offset within array + * @param length length of array + * @return the bytes, interpreted as an Ascii string + */ + public static String toAsciiString(final byte[] inputBytes, final int offset, final int length){ + return new String(inputBytes, offset, length, US_ASCII); + } + + /** + * Compare byte buffers, optionally ignoring trailing nulls + * + * @param buffer1 first buffer + * @param offset1 first offset + * @param length1 first length + * @param buffer2 second buffer + * @param offset2 second offset + * @param length2 second length + * @param ignoreTrailingNulls whether to ignore trailing nulls + * @return {@code true} if buffer1 and buffer2 have same contents, having regard to trailing nulls + */ + public static boolean isEqual( + final byte[] buffer1, final int offset1, final int length1, + final byte[] buffer2, final int offset2, final int length2, + final boolean ignoreTrailingNulls){ + final int minLen= Math.min(length1, length2); + for (int i=0; i < minLen; i++){ + if (buffer1[offset1+i] != buffer2[offset2+i]){ + return false; + } + } + if (length1 == length2){ + return true; + } + if (ignoreTrailingNulls){ + if (length1 > length2){ + for(int i = length2; i < length1; i++){ + if (buffer1[offset1+i] != 0){ + return false; + } + } + } else { + for(int i = length1; i < length2; i++){ + if (buffer2[offset2+i] != 0){ + return false; + } + } + } + return true; + } + return false; + } + + /** + * Compare byte buffers + * + * @param buffer1 the first buffer + * @param offset1 the first offset + * @param length1 the first length + * @param buffer2 the second buffer + * @param offset2 the second offset + * @param length2 the second length + * @return {@code true} if buffer1 and buffer2 have same contents + */ + public static boolean isEqual( + final byte[] buffer1, final int offset1, final int length1, + final byte[] buffer2, final int offset2, final int length2){ + return isEqual(buffer1, offset1, length1, buffer2, offset2, length2, false); + } + + /** + * Compare byte buffers + * + * @param buffer1 the first buffer + * @param buffer2 the second buffer + * @return {@code true} if buffer1 and buffer2 have same contents + */ + public static boolean isEqual(final byte[] buffer1, final byte[] buffer2 ){ + return isEqual(buffer1, 0, buffer1.length, buffer2, 0, buffer2.length, false); + } + + /** + * Compare byte buffers, optionally ignoring trailing nulls + * + * @param buffer1 the first buffer + * @param buffer2 the second buffer + * @param ignoreTrailingNulls whether to ignore trailing nulls + * @return {@code true} if buffer1 and buffer2 have same contents + */ + public static boolean isEqual(final byte[] buffer1, final byte[] buffer2, final boolean ignoreTrailingNulls){ + return isEqual(buffer1, 0, buffer1.length, buffer2, 0, buffer2.length, ignoreTrailingNulls); + } + + /** + * Compare byte buffers, ignoring trailing nulls + * + * @param buffer1 the first buffer + * @param offset1 the first offset + * @param length1 the first length + * @param buffer2 the second buffer + * @param offset2 the second offset + * @param length2 the second length + * @return {@code true} if buffer1 and buffer2 have same contents, having regard to trailing nulls + */ + public static boolean isEqualWithNull( + final byte[] buffer1, final int offset1, final int length1, + final byte[] buffer2, final int offset2, final int length2){ + return isEqual(buffer1, offset1, length1, buffer2, offset2, length2, true); + } + + /** + * Returns true if the first N bytes of an array are all zero + * + * @param a + * The array to check + * @param size + * The number of characters to check (not the size of the array) + * @return true if the first N bytes are zero + */ + public static boolean isArrayZero(final byte[] a, final int size) { + for (int i = 0; i < size; i++) { + if (a[i] != 0) { + return false; + } + } + return true; + } + + /** + * Returns a "sanitized" version of the string given as arguments, + * where sanitized means non-printable characters have been + * replaced with a question mark and the outcome is not longer + * than 255 chars. + * + *

This method is used to clean up file names when they are + * used in exception messages as they may end up in log files or + * as console output and may have been read from a corrupted + * input.

+ * + * @param s the string to sanitize + * @return a sanitized version of the argument + * @since 1.12 + */ + public static String sanitize(final String s) { + final char[] cs = s.toCharArray(); + final char[] chars = cs.length <= MAX_SANITIZED_NAME_LENGTH ? cs : Arrays.copyOf(cs, MAX_SANITIZED_NAME_LENGTH); + if (cs.length > MAX_SANITIZED_NAME_LENGTH) { + Arrays.fill(chars, MAX_SANITIZED_NAME_LENGTH - 3, MAX_SANITIZED_NAME_LENGTH, '.'); + } + final StringBuilder sb = new StringBuilder(); + for (final char c : chars) { + if (!Character.isISOControl(c)) { + final Character.UnicodeBlock block = Character.UnicodeBlock.of(c); + if (block != null && block != Character.UnicodeBlock.SPECIALS) { + sb.append(c); + continue; + } + } + sb.append('?'); + } + return sb.toString(); + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/BitInputStream.java b/src/main/java/org/apache/commons/compress/utils/BitInputStream.java new file mode 100644 index 0000000..201d3e7 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/BitInputStream.java @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteOrder; + +/** + * Reads bits from an InputStream. + * @since 1.10 + * @NotThreadSafe + */ +public class BitInputStream implements Closeable { + private static final int MAXIMUM_CACHE_SIZE = 63; // bits in long minus sign bit + private static final long[] MASKS = new long[MAXIMUM_CACHE_SIZE + 1]; + + static { + for (int i = 1; i <= MAXIMUM_CACHE_SIZE; i++) { + MASKS[i] = (MASKS[i - 1] << 1) + 1; + } + } + + private final CountingInputStream in; + private final ByteOrder byteOrder; + private long bitsCached; + private int bitsCachedSize; + + /** + * Constructor taking an InputStream and its bit arrangement. + * @param in the InputStream + * @param byteOrder the bit arrangement across byte boundaries, + * either BIG_ENDIAN (aaaaabbb bb000000) or LITTLE_ENDIAN (bbbaaaaa 000000bb) + */ + public BitInputStream(final InputStream in, final ByteOrder byteOrder) { + this.in = new CountingInputStream(in); + this.byteOrder = byteOrder; + } + + @Override + public void close() throws IOException { + in.close(); + } + + /** + * Clears the cache of bits that have been read from the + * underlying stream but not yet provided via {@link #readBits}. + */ + public void clearBitCache() { + bitsCached = 0; + bitsCachedSize = 0; + } + + /** + * Returns at most 63 bits read from the underlying stream. + * + * @param count the number of bits to read, must be a positive + * number not bigger than 63. + * @return the bits concatenated as a long using the stream's byte order. + * -1 if the end of the underlying stream has been reached before reading + * the requested number of bits + * @throws IOException on error + */ + public long readBits(final int count) throws IOException { + if (count < 0 || count > MAXIMUM_CACHE_SIZE) { + throw new IOException("count must not be negative or greater than " + MAXIMUM_CACHE_SIZE); + } + if (ensureCache(count)) { + return -1; + } + + if (bitsCachedSize < count) { + return processBitsGreater57(count); + } + return readCachedBits(count); + } + + /** + * Returns the number of bits that can be read from this input + * stream without reading from the underlying input stream at all. + * @return estimate of the number of bits that can be read without reading from the underlying stream + * @since 1.16 + */ + public int bitsCached() { + return bitsCachedSize; + } + + /** + * Returns an estimate of the number of bits that can be read from + * this input stream without blocking by the next invocation of a + * method for this input stream. + * @throws IOException if the underlying stream throws one when calling available + * @return estimate of the number of bits that can be read without blocking + * @since 1.16 + */ + public long bitsAvailable() throws IOException { + return bitsCachedSize + ((long) Byte.SIZE) * in.available(); + } + + /** + * Drops bits until the next bits will be read from a byte boundary. + * @since 1.16 + */ + public void alignWithByteBoundary() { + final int toSkip = bitsCachedSize % Byte.SIZE; + if (toSkip > 0) { + readCachedBits(toSkip); + } + } + + /** + * Returns the number of bytes read from the underlying stream. + * + *

This includes the bytes read to fill the current cache and + * not read as bits so far.

+ * @return the number of bytes read from the underlying stream + * @since 1.17 + */ + public long getBytesRead() { + return in.getBytesRead(); + } + + private long processBitsGreater57(final int count) throws IOException { + final long bitsOut; + final int overflowBits; + long overflow = 0L; + + // bitsCachedSize >= 57 and left-shifting it 8 bits would cause an overflow + final int bitsToAddCount = count - bitsCachedSize; + overflowBits = Byte.SIZE - bitsToAddCount; + final long nextByte = in.read(); + if (nextByte < 0) { + return nextByte; + } + if (byteOrder == ByteOrder.LITTLE_ENDIAN) { + final long bitsToAdd = nextByte & MASKS[bitsToAddCount]; + bitsCached |= (bitsToAdd << bitsCachedSize); + overflow = (nextByte >>> bitsToAddCount) & MASKS[overflowBits]; + } else { + bitsCached <<= bitsToAddCount; + final long bitsToAdd = (nextByte >>> (overflowBits)) & MASKS[bitsToAddCount]; + bitsCached |= bitsToAdd; + overflow = nextByte & MASKS[overflowBits]; + } + bitsOut = bitsCached & MASKS[count]; + bitsCached = overflow; + bitsCachedSize = overflowBits; + return bitsOut; + } + + private long readCachedBits(final int count) { + final long bitsOut; + if (byteOrder == ByteOrder.LITTLE_ENDIAN) { + bitsOut = (bitsCached & MASKS[count]); + bitsCached >>>= count; + } else { + bitsOut = (bitsCached >> (bitsCachedSize - count)) & MASKS[count]; + } + bitsCachedSize -= count; + return bitsOut; + } + + /** + * Fills the cache up to 56 bits + * @param count + * @return return true, when EOF + * @throws IOException + */ + private boolean ensureCache(final int count) throws IOException { + while (bitsCachedSize < count && bitsCachedSize < 57) { + final long nextByte = in.read(); + if (nextByte < 0) { + return true; + } + if (byteOrder == ByteOrder.LITTLE_ENDIAN) { + bitsCached |= (nextByte << bitsCachedSize); + } else { + bitsCached <<= Byte.SIZE; + bitsCached |= nextByte; + } + bitsCachedSize += Byte.SIZE; + } + return false; + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/BoundedArchiveInputStream.java b/src/main/java/org/apache/commons/compress/utils/BoundedArchiveInputStream.java new file mode 100644 index 0000000..a72aa15 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/BoundedArchiveInputStream.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; + +/** + * NIO backed bounded input stream for reading a predefined amount of data from. + * @ThreadSafe this base class is thread safe but implementations must not be. + * @since 1.21 + */ +public abstract class BoundedArchiveInputStream extends InputStream { + + private final long end; + private ByteBuffer singleByteBuffer; + private long loc; + + /** + * Create a new bounded input stream. + * + * @param start position in the stream from where the reading of this bounded stream starts. + * @param remaining amount of bytes which are allowed to read from the bounded stream. + */ + public BoundedArchiveInputStream(final long start, final long remaining) { + this.end = start + remaining; + if (this.end < start) { + // check for potential vulnerability due to overflow + throw new IllegalArgumentException("Invalid length of stream at offset=" + start + ", length=" + remaining); + } + loc = start; + } + + @Override + public synchronized int read() throws IOException { + if (loc >= end) { + return -1; + } + if (singleByteBuffer == null) { + singleByteBuffer = ByteBuffer.allocate(1); + } else { + singleByteBuffer.rewind(); + } + int read = read(loc, singleByteBuffer); + if (read < 1) { + return -1; + } + loc++; + return singleByteBuffer.get() & 0xff; + } + + @Override + public synchronized int read(final byte[] b, final int off, int len) throws IOException { + if (loc >= end) { + return -1; + } + final long maxLen = Math.min(len, end - loc); + if (maxLen <= 0) { + return 0; + } + if (off < 0 || off > b.length || maxLen > b.length - off) { + throw new IndexOutOfBoundsException("offset or len are out of bounds"); + } + + ByteBuffer buf = ByteBuffer.wrap(b, off, (int) maxLen); + int ret = read(loc, buf); + if (ret > 0) { + loc += ret; + } + return ret; + } + + /** + * Read content of the stream into a {@link ByteBuffer}. + * @param pos position to start the read. + * @param buf buffer to add the read content. + * @return number of read bytes. + * @throws IOException if I/O fails. + */ + protected abstract int read(long pos, ByteBuffer buf) throws IOException; +} diff --git a/src/main/java/org/apache/commons/compress/utils/BoundedInputStream.java b/src/main/java/org/apache/commons/compress/utils/BoundedInputStream.java new file mode 100644 index 0000000..2e874bb --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/BoundedInputStream.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.IOException; +import java.io.InputStream; + +/** + * A stream that limits reading from a wrapped stream to a given number of bytes. + * @NotThreadSafe + * @since 1.6 + */ +public class BoundedInputStream extends InputStream { + private final InputStream in; + private long bytesRemaining; + + /** + * Creates the stream that will at most read the given amount of + * bytes from the given stream. + * @param in the stream to read from + * @param size the maximum amount of bytes to read + */ + public BoundedInputStream(final InputStream in, final long size) { + this.in = in; + bytesRemaining = size; + } + + @Override + public int read() throws IOException { + if (bytesRemaining > 0) { + --bytesRemaining; + return in.read(); + } + return -1; + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (len == 0) { + return 0; + } + if (bytesRemaining == 0) { + return -1; + } + int bytesToRead = len; + if (bytesToRead > bytesRemaining) { + bytesToRead = (int) bytesRemaining; + } + final int bytesRead = in.read(b, off, bytesToRead); + if (bytesRead >= 0) { + bytesRemaining -= bytesRead; + } + return bytesRead; + } + + @Override + public void close() { + // there isn't anything to close in this stream and the nested + // stream is controlled externally + } + + /** + * @since 1.20 + */ + @Override + public long skip(final long n) throws IOException { + final long bytesToSkip = Math.min(bytesRemaining, n); + final long bytesSkipped = in.skip(bytesToSkip); + bytesRemaining -= bytesSkipped; + + return bytesSkipped; + } + + /** + * @return bytes remaining to read + * @since 1.21 + */ + public long getBytesRemaining() { + return bytesRemaining; + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/BoundedSeekableByteChannelInputStream.java b/src/main/java/org/apache/commons/compress/utils/BoundedSeekableByteChannelInputStream.java new file mode 100644 index 0000000..75d352e --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/BoundedSeekableByteChannelInputStream.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.SeekableByteChannel; + +/** + * InputStream that delegates requests to the underlying SeekableByteChannel, making sure that only bytes from a certain + * range can be read. + * @ThreadSafe + * @since 1.21 + */ +public class BoundedSeekableByteChannelInputStream extends BoundedArchiveInputStream { + + private final SeekableByteChannel channel; + + /** + * Create a bounded stream on the underlying {@link SeekableByteChannel} + * + * @param start Position in the stream from where the reading of this bounded stream starts + * @param remaining Amount of bytes which are allowed to read from the bounded stream + * @param channel Channel which the reads will be delegated to + */ + public BoundedSeekableByteChannelInputStream(final long start, final long remaining, + final SeekableByteChannel channel) { + super(start, remaining); + this.channel = channel; + } + + @Override + protected int read(long pos, ByteBuffer buf) throws IOException { + int read; + synchronized (channel) { + channel.position(pos); + read = channel.read(buf); + } + buf.flip(); + return read; + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/ByteUtils.java b/src/main/java/org/apache/commons/compress/utils/ByteUtils.java new file mode 100644 index 0000000..0c64a24 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/ByteUtils.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +import static java.nio.charset.StandardCharsets.UTF_16LE; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; + +/** + * Utility methods for reading and writing bytes. + * @since 1.14 + */ +public final class ByteUtils { + + /** + * Empty array. + * + * @since 1.21 + */ + public static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; + + private ByteUtils() { /* no instances */ } + + /** + * Used to supply bytes. + * @since 1.14 + */ + public interface ByteSupplier { + /** + * The contract is similar to {@link InputStream#read()}, return + * the byte as an unsigned int, -1 if there are no more bytes. + * @return the supplied byte or -1 if there are no more bytes + * @throws IOException if supplying fails + */ + int getAsByte() throws IOException; + } + + /** + * Used to consume bytes. + * @since 1.14 + */ + public interface ByteConsumer { + /** + * The contract is similar to {@link OutputStream#write(int)}, + * consume the lower eight bytes of the int as a byte. + * @param b the byte to consume + * @throws IOException if consuming fails + */ + void accept(int b) throws IOException; + } + + /** + * Reads the given byte array as a little endian long. + * @param bytes the byte array to convert + * @return the number read + */ + public static long fromLittleEndian(final byte[] bytes) { + return fromLittleEndian(bytes, 0, bytes.length); + } + + /** + * Reads the given byte array as a little endian long. + * @param bytes the byte array to convert + * @param off the offset into the array that starts the value + * @param length the number of bytes representing the value + * @return the number read + * @throws IllegalArgumentException if len is bigger than eight + */ + public static long fromLittleEndian(final byte[] bytes, final int off, final int length) { + checkReadLength(length); + long l = 0; + for (int i = 0; i < length; i++) { + l |= (bytes[off + i] & 0xffL) << (8 * i); + } + return l; + } + + /** + * Reads the given number of bytes from the given stream as a little endian long. + * @param in the stream to read from + * @param length the number of bytes representing the value + * @return the number read + * @throws IllegalArgumentException if len is bigger than eight + * @throws IOException if reading fails or the stream doesn't + * contain the given number of bytes anymore + */ + public static long fromLittleEndian(final InputStream in, final int length) throws IOException { + // somewhat duplicates the ByteSupplier version in order to save the creation of a wrapper object + checkReadLength(length); + long l = 0; + for (int i = 0; i < length; i++) { + final long b = in.read(); + if (b == -1) { + throw new IOException("Premature end of data"); + } + l |= (b << (i * 8)); + } + return l; + } + + /** + * Reads the given number of bytes from the given supplier as a little endian long. + * + *

Typically used by our InputStreams that need to count the + * bytes read as well.

+ * + * @param supplier the supplier for bytes + * @param length the number of bytes representing the value + * @return the number read + * @throws IllegalArgumentException if len is bigger than eight + * @throws IOException if the supplier fails or doesn't supply the + * given number of bytes anymore + */ + public static long fromLittleEndian(final ByteSupplier supplier, final int length) throws IOException { + checkReadLength(length); + long l = 0; + for (int i = 0; i < length; i++) { + final long b = supplier.getAsByte(); + if (b == -1) { + throw new IOException("Premature end of data"); + } + l |= (b << (i * 8)); + } + return l; + } + + /** + * Reads the given number of bytes from the given input as little endian long. + * @param in the input to read from + * @param length the number of bytes representing the value + * @return the number read + * @throws IllegalArgumentException if len is bigger than eight + * @throws IOException if reading fails or the stream doesn't + * contain the given number of bytes anymore + */ + public static long fromLittleEndian(final DataInput in, final int length) throws IOException { + // somewhat duplicates the ByteSupplier version in order to save the creation of a wrapper object + checkReadLength(length); + long l = 0; + for (int i = 0; i < length; i++) { + final long b = in.readUnsignedByte(); + l |= (b << (i * 8)); + } + return l; + } + + /** + * Inserts the given value into the array as a little endian + * sequence of the given length starting at the given offset. + * @param b the array to write into + * @param value the value to insert + * @param off the offset into the array that receives the first byte + * @param length the number of bytes to use to represent the value + */ + public static void toLittleEndian(final byte[] b, final long value, final int off, final int length) { + long num = value; + for (int i = 0; i < length; i++) { + b[off + i] = (byte) (num & 0xff); + num >>= 8; + } + } + + /** + * Writes the given value to the given stream as a little endian + * array of the given length. + * @param out the stream to write to + * @param value the value to write + * @param length the number of bytes to use to represent the value + * @throws IOException if writing fails + */ + public static void toLittleEndian(final OutputStream out, final long value, final int length) + throws IOException { + // somewhat duplicates the ByteConsumer version in order to save the creation of a wrapper object + long num = value; + for (int i = 0; i < length; i++) { + out.write((int) (num & 0xff)); + num >>= 8; + } + } + + /** + * Provides the given value to the given consumer as a little endian + * sequence of the given length. + * @param consumer the consumer to provide the bytes to + * @param value the value to provide + * @param length the number of bytes to use to represent the value + * @throws IOException if writing fails + */ + public static void toLittleEndian(final ByteConsumer consumer, final long value, final int length) + throws IOException { + long num = value; + for (int i = 0; i < length; i++) { + consumer.accept((int) (num & 0xff)); + num >>= 8; + } + } + + /** + * Writes the given value to the given stream as a little endian + * array of the given length. + * @param out the output to write to + * @param value the value to write + * @param length the number of bytes to use to represent the value + * @throws IOException if writing fails + */ + public static void toLittleEndian(final DataOutput out, final long value, final int length) + throws IOException { + // somewhat duplicates the ByteConsumer version in order to save the creation of a wrapper object + long num = value; + for (int i = 0; i < length; i++) { + out.write((int) (num & 0xff)); + num >>= 8; + } + } + + /** + * {@link ByteSupplier} based on {@link InputStream}. + * @since 1.14 + */ + public static class InputStreamByteSupplier implements ByteSupplier { + private final InputStream is; + public InputStreamByteSupplier(final InputStream is) { + this.is = is; + } + @Override + public int getAsByte() throws IOException { + return is.read(); + } + } + + /** + * {@link ByteConsumer} based on {@link OutputStream}. + * @since 1.14 + */ + public static class OutputStreamByteConsumer implements ByteConsumer { + private final OutputStream os; + public OutputStreamByteConsumer(final OutputStream os) { + this.os = os; + } + @Override + public void accept(final int b) throws IOException { + os.write(b); + } + } + + private static void checkReadLength(final int length) { + if (length > 8) { + throw new IllegalArgumentException("Can't read more than eight bytes into a long value"); + } + } + + public static byte[] utf16Decode(final char[] chars) { + if (chars == null) { + return null; + } + final ByteBuffer encoded = UTF_16LE.encode(CharBuffer.wrap(chars)); + if (encoded.hasArray()) { + return encoded.array(); + } + final byte[] e = new byte[encoded.remaining()]; + encoded.get(e); + return e; + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java b/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java new file mode 100644 index 0000000..0e5bf50 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/CRC32VerifyingInputStream.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.InputStream; +import java.util.zip.CRC32; + +/** + * A stream that verifies the CRC of the data read once the stream is + * exhausted. + * @NotThreadSafe + * @since 1.6 + */ +public class CRC32VerifyingInputStream extends ChecksumVerifyingInputStream { + + /** + * Constructs a new instance. + * + * @param in the stream to wrap + * @param size the of the stream's content + * @param expectedCrc32 the expected checksum + */ + public CRC32VerifyingInputStream(final InputStream in, final long size, final int expectedCrc32) { + this(in, size, expectedCrc32 & 0xFFFFffffL); + } + + /** + * Constructs a new instance. + * + * @param in the stream to wrap + * @param size the of the stream's content + * @param expectedCrc32 the expected checksum + * @since 1.7 + */ + public CRC32VerifyingInputStream(final InputStream in, final long size, final long expectedCrc32) { + super(new CRC32(), in, size, expectedCrc32); + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/CharsetNames.java b/src/main/java/org/apache/commons/compress/utils/CharsetNames.java new file mode 100644 index 0000000..7a49133 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/CharsetNames.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +/** + * Character encoding names required of every implementation of the Java platform. + * + * From the Java documentation Standard + * charsets: + *

+ * Every implementation of the Java platform is required to support the following character encodings. Consult the + * release documentation for your implementation to see if any other encodings are supported. Consult the release + * documentation for your implementation to see if any other encodings are supported. + *

+ * + *
+ *
{@code US-ASCII}
+ *
Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.
+ *
{@code ISO-8859-1}
+ *
ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.
+ *
{@code UTF-8}
+ *
Eight-bit Unicode Transformation Format.
+ *
{@code UTF-16BE}
+ *
Sixteen-bit Unicode Transformation Format, big-endian byte order.
+ *
{@code UTF-16LE}
+ *
Sixteen-bit Unicode Transformation Format, little-endian byte order.
+ *
{@code UTF-16}
+ *
Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order + * accepted on input, big-endian used on output.)
+ *
+ * + *

This perhaps would best belong in the [lang] project. Even if a similar interface is defined in [lang], it is not + * foreseen that [compress] would be made to depend on [lang].

+ * + * @see Standard charsets + * @since 1.4 + */ +public class CharsetNames { + /** + * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. + *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + */ + public static final String ISO_8859_1 = "ISO-8859-1"; + + /** + *

+ * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. + *

+ *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + */ + public static final String US_ASCII = "US-ASCII"; + + /** + *

+ * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark + * (either order accepted on input, big-endian used on output) + *

+ *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + */ + public static final String UTF_16 = "UTF-16"; + + /** + *

+ * Sixteen-bit Unicode Transformation Format, big-endian byte order. + *

+ *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + */ + public static final String UTF_16BE = "UTF-16BE"; + + /** + *

+ * Sixteen-bit Unicode Transformation Format, little-endian byte order. + *

+ *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + */ + public static final String UTF_16LE = "UTF-16LE"; + + /** + *

+ * Eight-bit Unicode Transformation Format. + *

+ *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + */ + public static final String UTF_8 = "UTF-8"; +} diff --git a/src/main/java/org/apache/commons/compress/utils/Charsets.java b/src/main/java/org/apache/commons/compress/utils/Charsets.java new file mode 100644 index 0000000..f446654 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/Charsets.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +/** + * Charsets required of every implementation of the Java platform. + * + * From the Java documentation Standard + * charsets: + *

+ * Every implementation of the Java platform is required to support the following character encodings. Consult the + * release documentation for your implementation to see if any other encodings are supported. Consult the release + * documentation for your implementation to see if any other encodings are supported. + *

+ * + *
+ *
{@code US-ASCII}
+ *
Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.
+ *
{@code ISO-8859-1}
+ *
ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.
+ *
{@code UTF-8}
+ *
Eight-bit Unicode Transformation Format.
+ *
{@code UTF-16BE}
+ *
Sixteen-bit Unicode Transformation Format, big-endian byte order.
+ *
{@code UTF-16LE}
+ *
Sixteen-bit Unicode Transformation Format, little-endian byte order.
+ *
{@code UTF-16}
+ *
Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order + * accepted on input, big-endian used on output.)
+ *
+ * + *

This class best belongs in the Commons Lang or IO project. Even if a similar class is defined in another Commons + * component, it is not foreseen that Commons Compress would be made to depend on another Commons component.

+ * + * @see Standard charsets + * @see StandardCharsets + * @since 1.4 + */ +public class Charsets { + + // + // This class should only contain Charset instances for required encodings. This guarantees that it will load correctly and + // without delay on all Java platforms. + // + + /** + * Returns the given Charset or the default Charset if the given Charset is null. + * + * @param charset + * A charset or null. + * @return the given Charset or the default Charset if the given Charset is null + */ + public static Charset toCharset(final Charset charset) { + return charset == null ? Charset.defaultCharset() : charset; + } + + /** + * Returns a Charset for the named charset. If the name is null, return the default Charset. + * + * @param charset + * The name of the requested charset, may be null. + * @return a Charset for the named charset + * @throws java.nio.charset.UnsupportedCharsetException + * If the named charset is unavailable + * @throws java.nio.charset.IllegalCharsetNameException + * If the given charset name is illegal + */ + public static Charset toCharset(final String charset) { + return charset == null ? Charset.defaultCharset() : Charset.forName(charset); + } + + /** + * CharsetNamesISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. + *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + @Deprecated + public static final Charset ISO_8859_1 = StandardCharsets.ISO_8859_1; + + /** + *

+ * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. + *

+ *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + @Deprecated + public static final Charset US_ASCII = StandardCharsets.US_ASCII; + + /** + *

+ * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark + * (either order accepted on input, big-endian used on output) + *

+ *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + @Deprecated + public static final Charset UTF_16 = StandardCharsets.UTF_16; + + /** + *

+ * Sixteen-bit Unicode Transformation Format, big-endian byte order. + *

+ *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + @Deprecated + public static final Charset UTF_16BE = StandardCharsets.UTF_16BE; + + /** + *

+ * Sixteen-bit Unicode Transformation Format, little-endian byte order. + *

+ *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + @Deprecated + public static final Charset UTF_16LE = StandardCharsets.UTF_16LE; + + /** + *

+ * Eight-bit Unicode Transformation Format. + *

+ *

+ * Every implementation of the Java platform is required to support this character encoding. + *

+ * + * @see Standard charsets + * @deprecated replaced by {@link StandardCharsets} in Java 7 + */ + @Deprecated + public static final Charset UTF_8 = StandardCharsets.UTF_8; +} diff --git a/src/main/java/org/apache/commons/compress/utils/ChecksumCalculatingInputStream.java b/src/main/java/org/apache/commons/compress/utils/ChecksumCalculatingInputStream.java new file mode 100644 index 0000000..e397ae2 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/ChecksumCalculatingInputStream.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Objects; +import java.util.zip.Checksum; + +/** + * A stream that calculates the checksum of the data read. + * @NotThreadSafe + * @since 1.14 + */ +public class ChecksumCalculatingInputStream extends InputStream { + private final InputStream in; + private final Checksum checksum; + + public ChecksumCalculatingInputStream(final Checksum checksum, final InputStream inputStream) { + + Objects.requireNonNull(checksum, "checksum"); + Objects.requireNonNull(inputStream, "in"); + + this.checksum = checksum; + this.in = inputStream; + } + + /** + * Reads a single byte from the stream + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read() throws IOException { + final int ret = in.read(); + if (ret >= 0) { + checksum.update(ret); + } + return ret; + } + + /** + * Reads a byte array from the stream + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read(final byte[] b) throws IOException { + return read(b, 0, b.length); + } + + /** + * Reads from the stream into a byte array. + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (len == 0) { + return 0; + } + final int ret = in.read(b, off, len); + if (ret >= 0) { + checksum.update(b, off, ret); + } + return ret; + } + + @Override + public long skip(final long n) throws IOException { + // Can't really skip, we have to hash everything to verify the checksum + if (read() >= 0) { + return 1; + } + return 0; + } + + /** + * Returns the calculated checksum. + * @return the calculated checksum. + */ + public long getValue() { + return checksum.getValue(); + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStream.java b/src/main/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStream.java new file mode 100644 index 0000000..62eca31 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/ChecksumVerifyingInputStream.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.Checksum; + +/** + * A stream that verifies the checksum of the data read once the stream is + * exhausted. + * @NotThreadSafe + * @since 1.7 + */ +public class ChecksumVerifyingInputStream extends InputStream { + + private final InputStream in; + private long bytesRemaining; + private final long expectedChecksum; + private final Checksum checksum; + + /** + * Constructs a new instance. + * + * @param checksum Checksum implementation. + * @param in the stream to wrap + * @param size the of the stream's content + * @param expectedChecksum the expected checksum + */ + public ChecksumVerifyingInputStream(final Checksum checksum, final InputStream in, + final long size, final long expectedChecksum) { + this.checksum = checksum; + this.in = in; + this.expectedChecksum = expectedChecksum; + this.bytesRemaining = size; + } + + @Override + public void close() throws IOException { + in.close(); + } + + /** + * @return bytes remaining to read + * @since 1.21 + */ + public long getBytesRemaining() { + return bytesRemaining; + } + + /** + * Reads a single byte from the stream + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read() throws IOException { + if (bytesRemaining <= 0) { + return -1; + } + final int ret = in.read(); + if (ret >= 0) { + checksum.update(ret); + --bytesRemaining; + } + verify(); + return ret; + } + + /** + * Reads a byte array from the stream + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read(final byte[] b) throws IOException { + return read(b, 0, b.length); + } + + /** + * Reads from the stream into a byte array. + * @throws IOException if the underlying stream throws or the + * stream is exhausted and the Checksum doesn't match the expected + * value + */ + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (len == 0) { + return 0; + } + final int ret = in.read(b, off, len); + if (ret >= 0) { + checksum.update(b, off, ret); + bytesRemaining -= ret; + } + verify(); + return ret; + } + + @Override + public long skip(final long n) throws IOException { + // Can't really skip, we have to hash everything to verify the checksum + return read() >= 0 ? 1 : 0; + } + + private void verify() throws IOException { + if (bytesRemaining <= 0 && expectedChecksum != checksum.getValue()) { + throw new IOException("Checksum verification failed"); + } + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/CloseShieldFilterInputStream.java b/src/main/java/org/apache/commons/compress/utils/CloseShieldFilterInputStream.java new file mode 100644 index 0000000..c1ce797 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/CloseShieldFilterInputStream.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * Re-implements {@link FilterInputStream#close()} to do nothing. + * @since 1.14 + */ +public class CloseShieldFilterInputStream extends FilterInputStream { + + public CloseShieldFilterInputStream(final InputStream in) { + super(in); + } + + @Override + public void close() throws IOException { + // NO IMPLEMENTATION. + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/CountingInputStream.java b/src/main/java/org/apache/commons/compress/utils/CountingInputStream.java new file mode 100644 index 0000000..cd6478c --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/CountingInputStream.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * Input stream that tracks the number of bytes read. + * @since 1.3 + * @NotThreadSafe + */ +public class CountingInputStream extends FilterInputStream { + private long bytesRead; + + public CountingInputStream(final InputStream in) { + super(in); + } + + @Override + public int read() throws IOException { + final int r = in.read(); + if (r >= 0) { + count(1); + } + return r; + } + + @Override + public int read(final byte[] b) throws IOException { + return read(b, 0, b.length); + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (len == 0) { + return 0; + } + final int r = in.read(b, off, len); + if (r >= 0) { + count(r); + } + return r; + } + + /** + * Increments the counter of already read bytes. + * Doesn't increment if the EOF has been hit (read == -1) + * + * @param read the number of bytes read + */ + protected final void count(final long read) { + if (read != -1) { + bytesRead += read; + } + } + + /** + * Returns the current number of bytes read from this stream. + * @return the number of read bytes + */ + public long getBytesRead() { + return bytesRead; + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/CountingOutputStream.java b/src/main/java/org/apache/commons/compress/utils/CountingOutputStream.java new file mode 100644 index 0000000..9802272 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/CountingOutputStream.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * Stream that tracks the number of bytes read. + * @since 1.3 + * @NotThreadSafe + */ +public class CountingOutputStream extends FilterOutputStream { + private long bytesWritten; + + public CountingOutputStream(final OutputStream out) { + super(out); + } + + @Override + public void write(final int b) throws IOException { + out.write(b); + count(1); + } + @Override + public void write(final byte[] b) throws IOException { + write(b, 0, b.length); + } + @Override + public void write(final byte[] b, final int off, final int len) throws IOException { + out.write(b, off, len); + count(len); + } + + /** + * Increments the counter of already written bytes. + * Doesn't increment if the EOF has been hit (written == -1) + * + * @param written the number of bytes written + */ + protected void count(final long written) { + if (written != -1) { + bytesWritten += written; + } + } + + /** + * Returns the current number of bytes written to this stream. + * @return the number of written bytes + */ + public long getBytesWritten() { + return bytesWritten; + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/ExactMath.java b/src/main/java/org/apache/commons/compress/utils/ExactMath.java new file mode 100644 index 0000000..eab9c71 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/ExactMath.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +/** + * PRIVATE. + * + * Performs exact math through {@link Math} "exact" APIs. + */ +public class ExactMath { + + private ExactMath() { + // no instances + } + + /** + * Returns the int result of adding an int and a long, and throws an exception if the result overflows an int. + * + * @param x the first value, an int. + * @param y the second value, a long, + * @return the addition of both values. + * @throws ArithmeticException when y overflow an int. + * @throws ArithmeticException if the result overflows an int. + */ + public static int add(final int x, final long y) { + return Math.addExact(x, Math.toIntExact(y)); + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/FileNameUtils.java b/src/main/java/org/apache/commons/compress/utils/FileNameUtils.java new file mode 100644 index 0000000..127de93 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/FileNameUtils.java @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +import java.io.File; +import java.nio.file.Path; + +/** + * Generic file name utilities. + * @since 1.20 + */ +public class FileNameUtils { + + private static String fileNameToBaseName(final String name) { + final int extensionIndex = name.lastIndexOf('.'); + return extensionIndex < 0 ? name : name.substring(0, extensionIndex); + } + + private static String fileNameToExtension(final String name) { + final int extensionIndex = name.lastIndexOf('.'); + return extensionIndex < 0 ? "" : name.substring(extensionIndex + 1); + } + + /** + * Gets the basename (i.e. the part up to and not including the + * last ".") of the last path segment of a filename. + *

Will return the file name itself if it doesn't contain any + * dots. All leading directories of the {@code filename} parameter + * are skipped.

+ * @return the basename of filename + * @param path the path of the file to obtain the basename of. + * @since 1.22 + */ + public static String getBaseName(final Path path) { + if (path == null) { + return null; + } + return fileNameToBaseName(path.getFileName().toString()); + } + + /** + * Gets the basename (i.e. the part up to and not including the + * last ".") of the last path segment of a filename. + * + *

Will return the file name itself if it doesn't contain any + * dots. All leading directories of the {@code filename} parameter + * are skipped.

+ * + * @return the basename of filename + * @param filename the name of the file to obtain the basename of. + */ + public static String getBaseName(final String filename) { + if (filename == null) { + return null; + } + return fileNameToBaseName(new File(filename).getName()); + } + + /** + * Gets the extension (i.e. the part after the last ".") of a file. + *

Will return an empty string if the file name doesn't contain + * any dots. Only the last segment of a the file name is consulted + * - i.e. all leading directories of the {@code filename} + * parameter are skipped.

+ * @return the extension of filename + * @param path the path of the file to obtain the extension of. + * @since 1.22 + */ + public static String getExtension(final Path path) { + if (path == null) { + return null; + } + return fileNameToExtension(path.getFileName().toString()); + } + + /** + * Gets the extension (i.e. the part after the last ".") of a file. + * + *

Will return an empty string if the file name doesn't contain + * any dots. Only the last segment of a the file name is consulted + * - i.e. all leading directories of the {@code filename} + * parameter are skipped.

+ * + * @return the extension of filename + * @param filename the name of the file to obtain the extension of. + */ + public static String getExtension(final String filename) { + if (filename == null) { + return null; + } + return fileNameToExtension(new File(filename).getName()); + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/FixedLengthBlockOutputStream.java b/src/main/java/org/apache/commons/compress/utils/FixedLengthBlockOutputStream.java new file mode 100644 index 0000000..1de269e --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/FixedLengthBlockOutputStream.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.ClosedChannelException; +import java.nio.channels.WritableByteChannel; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * This class supports writing to an OutputStream or WritableByteChannel in fixed length blocks. + *

It can be be used to support output to devices such as tape drives that require output in this + * format. If the final block does not have enough content to fill an entire block, the output will + * be padded to a full block size.

+ * + *

This class can be used to support TAR,PAX, and CPIO blocked output to character special devices. + * It is not recommended that this class be used unless writing to such devices, as the padding + * serves no useful purpose in such cases.

+ * + *

This class should normally wrap a FileOutputStream or associated WritableByteChannel directly. + * If there is an intervening filter that modified the output, such as a CompressorOutputStream, or + * performs its own buffering, such as BufferedOutputStream, output to the device may + * no longer be of the specified size.

+ * + *

Any content written to this stream should be self-delimiting and should tolerate any padding + * added to fill the last block.

+ * + * @since 1.15 + */ +public class FixedLengthBlockOutputStream extends OutputStream implements WritableByteChannel { + + private final WritableByteChannel out; + private final int blockSize; + private final ByteBuffer buffer; + private final AtomicBoolean closed = new AtomicBoolean(false); + + /** + * Create a fixed length block output stream with given destination stream and block size + * @param os The stream to wrap. + * @param blockSize The block size to use. + */ + public FixedLengthBlockOutputStream(final OutputStream os, final int blockSize) { + if (os instanceof FileOutputStream) { + final FileOutputStream fileOutputStream = (FileOutputStream) os; + out = fileOutputStream.getChannel(); + buffer = ByteBuffer.allocateDirect(blockSize); + } else { + out = new BufferAtATimeOutputChannel(os); + buffer = ByteBuffer.allocate(blockSize); + } + this.blockSize = blockSize; + } + /** + * Create a fixed length block output stream with given destination writable byte channel and block size + * @param out The writable byte channel to wrap. + * @param blockSize The block size to use. + */ + public FixedLengthBlockOutputStream(final WritableByteChannel out, final int blockSize) { + this.out = out; + this.blockSize = blockSize; + this.buffer = ByteBuffer.allocateDirect(blockSize); + } + + private void maybeFlush() throws IOException { + if (!buffer.hasRemaining()) { + writeBlock(); + } + } + + private void writeBlock() throws IOException { + buffer.flip(); + final int i = out.write(buffer); + final boolean hasRemaining = buffer.hasRemaining(); + if (i != blockSize || hasRemaining) { + final String msg = String + .format("Failed to write %,d bytes atomically. Only wrote %,d", + blockSize, i); + throw new IOException(msg); + } + buffer.clear(); + } + + @Override + public void write(final int b) throws IOException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + buffer.put((byte) b); + maybeFlush(); + } + + @Override + public void write(final byte[] b, final int offset, final int length) throws IOException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + int off = offset; + int len = length; + while (len > 0) { + final int n = Math.min(len, buffer.remaining()); + buffer.put(b, off, n); + maybeFlush(); + len -= n; + off += n; + } + } + + @Override + public int write(final ByteBuffer src) throws IOException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + final int srcRemaining = src.remaining(); + + if (srcRemaining < buffer.remaining()) { + // if don't have enough bytes in src to fill up a block we must buffer + buffer.put(src); + } else { + int srcLeft = srcRemaining; + final int savedLimit = src.limit(); + // If we're not at the start of buffer, we have some bytes already buffered + // fill up the reset of buffer and write the block. + if (buffer.position() != 0) { + final int n = buffer.remaining(); + src.limit(src.position() + n); + buffer.put(src); + writeBlock(); + srcLeft -= n; + } + // whilst we have enough bytes in src for complete blocks, + // write them directly from src without copying them to buffer + while (srcLeft >= blockSize) { + src.limit(src.position() + blockSize); + out.write(src); + srcLeft -= blockSize; + } + // copy any remaining bytes into buffer + src.limit(savedLimit); + buffer.put(src); + } + return srcRemaining; + } + + @Override + public boolean isOpen() { + if (!out.isOpen()) { + closed.set(true); + } + return !closed.get(); + } + + /** + * Potentially pads and then writes the current block to the underlying stream. + * @throws IOException if writing fails + */ + public void flushBlock() throws IOException { + if (buffer.position() != 0) { + padBlock(); + writeBlock(); + } + } + + @Override + public void close() throws IOException { + if (closed.compareAndSet(false, true)) { + try { + flushBlock(); + } finally { + out.close(); + } + } + } + + private void padBlock() { + buffer.order(ByteOrder.nativeOrder()); + int bytesToWrite = buffer.remaining(); + if (bytesToWrite > 8) { + final int align = buffer.position() & 7; + if (align != 0) { + final int limit = 8 - align; + for (int i = 0; i < limit; i++) { + buffer.put((byte) 0); + } + bytesToWrite -= limit; + } + + while (bytesToWrite >= 8) { + buffer.putLong(0L); + bytesToWrite -= 8; + } + } + while (buffer.hasRemaining()) { + buffer.put((byte) 0); + } + } + + /** + * Helper class to provide channel wrapper for arbitrary output stream that doesn't alter the + * size of writes. We can't use Channels.newChannel, because for non FileOutputStreams, it + * breaks up writes into 8KB max chunks. Since the purpose of this class is to always write + * complete blocks, we need to write a simple class to take care of it. + */ + private static class BufferAtATimeOutputChannel implements WritableByteChannel { + + private final OutputStream out; + private final AtomicBoolean closed = new AtomicBoolean(false); + + private BufferAtATimeOutputChannel(final OutputStream out) { + this.out = out; + } + + @Override + public int write(final ByteBuffer buffer) throws IOException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + if (!buffer.hasArray()) { + throw new IOException("Direct buffer somehow written to BufferAtATimeOutputChannel"); + } + + try { + final int pos = buffer.position(); + final int len = buffer.limit() - pos; + out.write(buffer.array(), buffer.arrayOffset() + pos, len); + buffer.position(buffer.limit()); + return len; + } catch (final IOException e) { + try { + close(); + } catch (final IOException ignored) { //NOSONAR + } + throw e; + } + } + + @Override + public boolean isOpen() { + return !closed.get(); + } + + @Override + public void close() throws IOException { + if (closed.compareAndSet(false, true)) { + out.close(); + } + } + + } + + +} diff --git a/src/main/java/org/apache/commons/compress/utils/FlushShieldFilterOutputStream.java b/src/main/java/org/apache/commons/compress/utils/FlushShieldFilterOutputStream.java new file mode 100644 index 0000000..07108b5 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/FlushShieldFilterOutputStream.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +/** + * Re-implements {@link FilterOutputStream#flush()} to do nothing. + */ +public class FlushShieldFilterOutputStream extends FilterOutputStream { + + public FlushShieldFilterOutputStream(final OutputStream out) { + super(out); + } + + @Override + public void flush() throws IOException { + // NO IMPLEMENTATION. + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/IOUtils.java b/src/main/java/org/apache/commons/compress/utils/IOUtils.java new file mode 100644 index 0000000..ebcef55 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/IOUtils.java @@ -0,0 +1,387 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.utils; + +import java.io.ByteArrayOutputStream; +import java.io.Closeable; +import java.io.EOFException; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.channels.ReadableByteChannel; +import java.nio.file.Files; +import java.nio.file.LinkOption; + +/** + * Utility functions + * @Immutable (has mutable data but it is write-only) + */ +public final class IOUtils { + + private static final int COPY_BUF_SIZE = 8024; + private static final int SKIP_BUF_SIZE = 4096; + + /** + * Empty array of of type {@link LinkOption}. + * + * @since 1.21 + */ + public static final LinkOption[] EMPTY_LINK_OPTIONS = {}; + + // This buffer does not need to be synchronized because it is write only; the contents are ignored + // Does not affect Immutability + private static final byte[] SKIP_BUF = new byte[SKIP_BUF_SIZE]; + + /** Private constructor to prevent instantiation of this utility class. */ + private IOUtils(){ + } + + /** + * Copies the content of a InputStream into an OutputStream. + * Uses a default buffer size of 8024 bytes. + * + * @param input + * the InputStream to copy + * @param output + * the target, may be null to simulate output to dev/null on Linux and NUL on Windows + * @return the number of bytes copied + * @throws IOException + * if an error occurs + */ + public static long copy(final InputStream input, final OutputStream output) throws IOException { + return copy(input, output, COPY_BUF_SIZE); + } + + /** + * Copies the content of a InputStream into an OutputStream + * + * @param input + * the InputStream to copy + * @param output + * the target, may be null to simulate output to dev/null on Linux and NUL on Windows + * @param buffersize + * the buffer size to use, must be bigger than 0 + * @return the number of bytes copied + * @throws IOException + * if an error occurs + * @throws IllegalArgumentException + * if buffersize is smaller than or equal to 0 + */ + public static long copy(final InputStream input, final OutputStream output, final int buffersize) throws IOException { + if (buffersize < 1) { + throw new IllegalArgumentException("buffersize must be bigger than 0"); + } + final byte[] buffer = new byte[buffersize]; + int n = 0; + long count=0; + while (-1 != (n = input.read(buffer))) { + if (output != null) { + output.write(buffer, 0, n); + } + count += n; + } + return count; + } + + /** + * Skips the given number of bytes by repeatedly invoking skip on + * the given input stream if necessary. + * + *

In a case where the stream's skip() method returns 0 before + * the requested number of bytes has been skip this implementation + * will fall back to using the read() method.

+ * + *

This method will only skip less than the requested number of + * bytes if the end of the input stream has been reached.

+ * + * @param input stream to skip bytes in + * @param numToSkip the number of bytes to skip + * @return the number of bytes actually skipped + * @throws IOException on error + */ + public static long skip(final InputStream input, long numToSkip) throws IOException { + final long available = numToSkip; + while (numToSkip > 0) { + final long skipped = input.skip(numToSkip); + if (skipped == 0) { + break; + } + numToSkip -= skipped; + } + + while (numToSkip > 0) { + final int read = readFully(input, SKIP_BUF, 0, + (int) Math.min(numToSkip, SKIP_BUF_SIZE)); + if (read < 1) { + break; + } + numToSkip -= read; + } + return available - numToSkip; + } + + /** + * Reads as much from the file as possible to fill the given array. + * + *

This method may invoke read repeatedly to fill the array and + * only read less bytes than the length of the array if the end of + * the stream has been reached.

+ * + * @param file file to read + * @param array buffer to fill + * @return the number of bytes actually read + * @throws IOException on error + * @since 1.20 + */ + public static int read(final File file, final byte[] array) throws IOException { + try (InputStream inputStream = Files.newInputStream(file.toPath())) { + return readFully(inputStream, array, 0, array.length); + } + } + + /** + * Reads as much from input as possible to fill the given array. + * + *

This method may invoke read repeatedly to fill the array and + * only read less bytes than the length of the array if the end of + * the stream has been reached.

+ * + * @param input stream to read from + * @param array buffer to fill + * @return the number of bytes actually read + * @throws IOException on error + */ + public static int readFully(final InputStream input, final byte[] array) throws IOException { + return readFully(input, array, 0, array.length); + } + + /** + * Reads as much from input as possible to fill the given array + * with the given amount of bytes. + * + *

This method may invoke read repeatedly to read the bytes and + * only read less bytes than the requested length if the end of + * the stream has been reached.

+ * + * @param input stream to read from + * @param array buffer to fill + * @param offset offset into the buffer to start filling at + * @param len of bytes to read + * @return the number of bytes actually read + * @throws IOException + * if an I/O error has occurred + */ + public static int readFully(final InputStream input, final byte[] array, final int offset, final int len) + throws IOException { + if (len < 0 || offset < 0 || len + offset > array.length || len + offset < 0) { + throw new IndexOutOfBoundsException(); + } + int count = 0, x = 0; + while (count != len) { + x = input.read(array, offset + count, len - count); + if (x == -1) { + break; + } + count += x; + } + return count; + } + + /** + * Reads {@code b.remaining()} bytes from the given channel + * starting at the current channel's position. + * + *

This method reads repeatedly from the channel until the + * requested number of bytes are read. This method blocks until + * the requested number of bytes are read, the end of the channel + * is detected, or an exception is thrown.

+ * + * @param channel the channel to read from + * @param byteBuffer the buffer into which the data is read. + * @throws IOException - if an I/O error occurs. + * @throws EOFException - if the channel reaches the end before reading all the bytes. + */ + public static void readFully(final ReadableByteChannel channel, final ByteBuffer byteBuffer) throws IOException { + final int expectedLength = byteBuffer.remaining(); + int read = 0; + while (read < expectedLength) { + final int readNow = channel.read(byteBuffer); + if (readNow <= 0) { + break; + } + read += readNow; + } + if (read < expectedLength) { + throw new EOFException(); + } + } + + // toByteArray(InputStream) copied from: + // commons/proper/io/trunk/src/main/java/org/apache/commons/io/IOUtils.java?revision=1428941 + // January 8th, 2013 + // + // Assuming our copy() works just as well as theirs! :-) + + /** + * Gets the contents of an {@code InputStream} as a {@code byte[]}. + *

+ * This method buffers the input internally, so there is no need to use a + * {@code BufferedInputStream}. + * + * @param input the {@code InputStream} to read from + * @return the requested byte array + * @throws NullPointerException if the input is null + * @throws IOException if an I/O error occurs + * @since 1.5 + */ + public static byte[] toByteArray(final InputStream input) throws IOException { + final ByteArrayOutputStream output = new ByteArrayOutputStream(); + copy(input, output); + return output.toByteArray(); + } + + /** + * Closes the given Closeable and swallows any IOException that may occur. + * @param c Closeable to close, can be null + * @since 1.7 + */ + public static void closeQuietly(final Closeable c) { + if (c != null) { + try { + c.close(); + } catch (final IOException ignored) { // NOPMD NOSONAR + } + } + } + + /** + * Copies the source file to the given output stream. + * @param sourceFile The file to read. + * @param outputStream The output stream to write. + * @throws IOException if an I/O error occurs when reading or writing. + * @since 1.21 + */ + public static void copy(final File sourceFile, final OutputStream outputStream) throws IOException { + Files.copy(sourceFile.toPath(), outputStream); + } + + /** + * Copies part of the content of a InputStream into an OutputStream. + * Uses a default buffer size of 8024 bytes. + * + * @param input + * the InputStream to copy + * @param output + * the target Stream + * @param len + * maximum amount of bytes to copy + * @return the number of bytes copied + * @throws IOException + * if an error occurs + * @since 1.21 + */ + public static long copyRange(final InputStream input, final long len, final OutputStream output) + throws IOException { + return copyRange(input, len, output, COPY_BUF_SIZE); + } + + /** + * Copies part of the content of a InputStream into an OutputStream + * + * @param input + * the InputStream to copy + * @param len + * maximum amount of bytes to copy + * @param output + * the target, may be null to simulate output to dev/null on Linux and NUL on Windows + * @param buffersize + * the buffer size to use, must be bigger than 0 + * @return the number of bytes copied + * @throws IOException + * if an error occurs + * @throws IllegalArgumentException + * if buffersize is smaller than or equal to 0 + * @since 1.21 + */ + public static long copyRange(final InputStream input, final long len, final OutputStream output, + final int buffersize) throws IOException { + if (buffersize < 1) { + throw new IllegalArgumentException("buffersize must be bigger than 0"); + } + final byte[] buffer = new byte[(int) Math.min(buffersize, len)]; + int n = 0; + long count = 0; + while (count < len && -1 != (n = input.read(buffer, 0, (int) Math.min(len - count, buffer.length)))) { + if (output != null) { + output.write(buffer, 0, n); + } + count += n; + } + return count; + } + + /** + * Gets part of the contents of an {@code InputStream} as a {@code byte[]}. + * + * @param input the {@code InputStream} to read from + * @param len + * maximum amount of bytes to copy + * @return the requested byte array + * @throws NullPointerException if the input is null + * @throws IOException if an I/O error occurs + * @since 1.21 + */ + public static byte[] readRange(final InputStream input, final int len) throws IOException { + final ByteArrayOutputStream output = new ByteArrayOutputStream(); + copyRange(input, len, output); + return output.toByteArray(); + } + + /** + * Gets part of the contents of an {@code ReadableByteChannel} as a {@code byte[]}. + * + * @param input the {@code ReadableByteChannel} to read from + * @param len + * maximum amount of bytes to copy + * @return the requested byte array + * @throws NullPointerException if the input is null + * @throws IOException if an I/O error occurs + * @since 1.21 + */ + public static byte[] readRange(final ReadableByteChannel input, final int len) throws IOException { + final ByteArrayOutputStream output = new ByteArrayOutputStream(); + final ByteBuffer b = ByteBuffer.allocate(Math.min(len, COPY_BUF_SIZE)); + int read = 0; + while (read < len) { + // Make sure we never read more than len bytes + b.limit(Math.min(len - read, b.capacity())); + final int readNow = input.read(b); + if (readNow <= 0) { + break; + } + output.write(b.array(), 0, readNow); + b.rewind(); + read += readNow; + } + return output.toByteArray(); + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/InputStreamStatistics.java b/src/main/java/org/apache/commons/compress/utils/InputStreamStatistics.java new file mode 100644 index 0000000..569ab36 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/InputStreamStatistics.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +/** + * This interface provides statistics on the current decompression stream. + * The stream consumer can use that statistics to handle abnormal + * compression ratios, i.e. to prevent zip bombs. + * + * @since 1.17 + */ +public interface InputStreamStatistics { + /** + * @return the amount of raw or compressed bytes read by the stream + */ + long getCompressedCount(); + + /** + * @return the amount of decompressed bytes returned by the stream + */ + long getUncompressedCount(); +} diff --git a/src/main/java/org/apache/commons/compress/utils/Iterators.java b/src/main/java/org/apache/commons/compress/utils/Iterators.java new file mode 100644 index 0000000..0db0c36 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/Iterators.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.util.Collection; +import java.util.Iterator; +import java.util.Objects; + +/** + * Iterator utilities. + * + * @since 1.13. + */ +public class Iterators { + + /** + * Adds all the elements in the source {@code iterator} to the target + * {@code collection}. + * + *

+ * When this method returns, the {@code iterator} will be "empty": its + * {@code hasNext()} method returns {@code false}. + *

+ * + * @param type of the elements contained inside the collection + * @param collection target collection + * @param iterator source + * @return {@code true} if the target {@code collection} was modified as a + * result of this operation + */ + public static boolean addAll(final Collection collection, final Iterator iterator) { + Objects.requireNonNull(collection); + Objects.requireNonNull(iterator); + boolean wasModified = false; + while (iterator.hasNext()) { + wasModified |= collection.add(iterator.next()); + } + return wasModified; + } + + private Iterators() { + // do not instantiate + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/Lists.java b/src/main/java/org/apache/commons/compress/utils/Lists.java new file mode 100644 index 0000000..e7a82dc --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/Lists.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.util.ArrayList; +import java.util.Iterator; + +/** + * List utilities + * + * @since 1.13 + */ +public class Lists { + + /** + * Creates a new {@link ArrayList}. + * + * @param type of elements contained in new list + * @return a new {@link ArrayList} + */ + public static ArrayList newArrayList() { + return new ArrayList<>(); + } + + /** + * Creates a new {@link ArrayList} filled with the contents of the given + * {@code iterator}. + * + * @param iterator + * the source iterator + * @param type of elements contained in new list + * @return a new {@link ArrayList} + */ + public static ArrayList newArrayList(final Iterator iterator) { + final ArrayList list = newArrayList(); + Iterators.addAll(list, iterator); + return list; + } + + private Lists() { + // do not instantiate + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/MultiReadOnlySeekableByteChannel.java b/src/main/java/org/apache/commons/compress/utils/MultiReadOnlySeekableByteChannel.java new file mode 100644 index 0000000..4edcd40 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/MultiReadOnlySeekableByteChannel.java @@ -0,0 +1,271 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.ClosedChannelException; +import java.nio.channels.NonWritableChannelException; +import java.nio.channels.SeekableByteChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +/** + * Read-Only Implementation of {@link SeekableByteChannel} that + * concatenates a collection of other {@link SeekableByteChannel}s. + * + *

This is a lose port of MultiReadOnlySeekableByteChannel + * by Tim Underwood.

+ * + * @since 1.19 + */ +public class MultiReadOnlySeekableByteChannel implements SeekableByteChannel { + + private static final Path[] EMPTY_PATH_ARRAY = {}; + private final List channels; + private long globalPosition; + private int currentChannelIdx; + + /** + * Concatenates the given channels. + * + * @param channels the channels to concatenate + * @throws NullPointerException if channels is null + */ + public MultiReadOnlySeekableByteChannel(final List channels) { + this.channels = Collections.unmodifiableList(new ArrayList<>( + Objects.requireNonNull(channels, "channels must not be null"))); + } + + @Override + public synchronized int read(final ByteBuffer dst) throws IOException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + if (!dst.hasRemaining()) { + return 0; + } + + int totalBytesRead = 0; + while (dst.hasRemaining() && currentChannelIdx < channels.size()) { + final SeekableByteChannel currentChannel = channels.get(currentChannelIdx); + final int newBytesRead = currentChannel.read(dst); + if (newBytesRead == -1) { + // EOF for this channel -- advance to next channel idx + currentChannelIdx += 1; + continue; + } + if (currentChannel.position() >= currentChannel.size()) { + // we are at the end of the current channel + currentChannelIdx++; + } + totalBytesRead += newBytesRead; + } + if (totalBytesRead > 0) { + globalPosition += totalBytesRead; + return totalBytesRead; + } + return -1; + } + + @Override + public void close() throws IOException { + IOException first = null; + for (final SeekableByteChannel ch : channels) { + try { + ch.close(); + } catch (final IOException ex) { + if (first == null) { + first = ex; + } + } + } + if (first != null) { + throw new IOException("failed to close wrapped channel", first); + } + } + + @Override + public boolean isOpen() { + return channels.stream().allMatch(SeekableByteChannel::isOpen); + } + + /** + * Returns this channel's position. + * + *

This method violates the contract of {@link SeekableByteChannel#position()} as it will not throw any exception + * when invoked on a closed channel. Instead it will return the position the channel had when close has been + * called.

+ */ + @Override + public long position() { + return globalPosition; + } + + /** + * set the position based on the given channel number and relative offset + * + * @param channelNumber the channel number + * @param relativeOffset the relative offset in the corresponding channel + * @return global position of all channels as if they are a single channel + * @throws IOException if positioning fails + */ + public synchronized SeekableByteChannel position(final long channelNumber, final long relativeOffset) throws IOException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + long globalPosition = relativeOffset; + for (int i = 0; i < channelNumber; i++) { + globalPosition += channels.get(i).size(); + } + + return position(globalPosition); + } + + @Override + public long size() throws IOException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + long acc = 0; + for (final SeekableByteChannel ch : channels) { + acc += ch.size(); + } + return acc; + } + + /** + * @throws NonWritableChannelException since this implementation is read-only. + */ + @Override + public SeekableByteChannel truncate(final long size) { + throw new NonWritableChannelException(); + } + + /** + * @throws NonWritableChannelException since this implementation is read-only. + */ + @Override + public int write(final ByteBuffer src) { + throw new NonWritableChannelException(); + } + + @Override + public synchronized SeekableByteChannel position(final long newPosition) throws IOException { + if (newPosition < 0) { + throw new IOException("Negative position: " + newPosition); + } + if (!isOpen()) { + throw new ClosedChannelException(); + } + + globalPosition = newPosition; + + long pos = newPosition; + + for (int i = 0; i < channels.size(); i++) { + final SeekableByteChannel currentChannel = channels.get(i); + final long size = currentChannel.size(); + + final long newChannelPos; + if (pos == -1L) { + // Position is already set for the correct channel, + // the rest of the channels get reset to 0 + newChannelPos = 0; + } else if (pos <= size) { + // This channel is where we want to be + currentChannelIdx = i; + final long tmp = pos; + pos = -1L; // Mark pos as already being set + newChannelPos = tmp; + } else { + // newPosition is past this channel. Set channel + // position to the end and substract channel size from + // pos + pos -= size; + newChannelPos = size; + } + + currentChannel.position(newChannelPos); + } + return this; + } + + /** + * Concatenates the given channels. + * + * @param channels the channels to concatenate + * @throws NullPointerException if channels is null + * @return SeekableByteChannel that concatenates all provided channels + */ + public static SeekableByteChannel forSeekableByteChannels(final SeekableByteChannel... channels) { + if (Objects.requireNonNull(channels, "channels must not be null").length == 1) { + return channels[0]; + } + return new MultiReadOnlySeekableByteChannel(Arrays.asList(channels)); + } + + /** + * Concatenates the given files. + * + * @param files the files to concatenate + * @throws NullPointerException if files is null + * @throws IOException if opening a channel for one of the files fails + * @return SeekableByteChannel that concatenates all provided files + */ + public static SeekableByteChannel forFiles(final File... files) throws IOException { + final List paths = new ArrayList<>(); + for (final File f : Objects.requireNonNull(files, "files must not be null")) { + paths.add(f.toPath()); + } + + return forPaths(paths.toArray(EMPTY_PATH_ARRAY)); + } + + /** + * Concatenates the given file paths. + * @param paths the file paths to concatenate, note that the LAST FILE of files should be the LAST SEGMENT(.zip) + * and these files should be added in correct order (e.g.: .z01, .z02... .z99, .zip) + * @return SeekableByteChannel that concatenates all provided files + * @throws NullPointerException if files is null + * @throws IOException if opening a channel for one of the files fails + * @throws IOException if the first channel doesn't seem to hold + * the beginning of a split archive + * @since 1.22 + */ + public static SeekableByteChannel forPaths(final Path... paths) throws IOException { + final List channels = new ArrayList<>(); + for (final Path path : Objects.requireNonNull(paths, "paths must not be null")) { + channels.add(Files.newByteChannel(path, StandardOpenOption.READ)); + } + if (channels.size() == 1) { + return channels.get(0); + } + return new MultiReadOnlySeekableByteChannel(channels); + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/OsgiUtils.java b/src/main/java/org/apache/commons/compress/utils/OsgiUtils.java new file mode 100644 index 0000000..3553c17 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/OsgiUtils.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +/** + * Utilities for dealing with OSGi environments. + * + * @since 1.21 + */ +public class OsgiUtils { + + private static final boolean inOsgiEnvironment; + + static { + final Class classloaderClass = OsgiUtils.class.getClassLoader().getClass(); + inOsgiEnvironment = isBundleReference(classloaderClass); + } + + private static boolean isBundleReference(final Class clazz) { + Class c = clazz; + while (c != null) { + if (c.getName().equals("org.osgi.framework.BundleReference")) { + return true; + } + for (Class ifc : c.getInterfaces()) { + if (isBundleReference(ifc)) { + return true; + } + } + c = c.getSuperclass(); + } + return false; + } + + /** + * Tests if Commons Compress running as an OSGi bundle? + * @return true if Commons Compress running as an OSGi bundle. + */ + public static boolean isRunningInOsgiEnvironment() { + return inOsgiEnvironment; + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/SeekableInMemoryByteChannel.java b/src/main/java/org/apache/commons/compress/utils/SeekableInMemoryByteChannel.java new file mode 100644 index 0000000..749e50c --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/SeekableInMemoryByteChannel.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.commons.compress.utils; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.ClosedChannelException; +import java.nio.channels.SeekableByteChannel; +import java.util.Arrays; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * A {@link SeekableByteChannel} implementation that wraps a byte[]. + * + *

When this channel is used for writing an internal buffer grows to accommodate incoming data. The natural size + * limit is the value of {@link Integer#MAX_VALUE} and it is not possible to {@link #position(long) set the position} or + * {@link #truncate truncate} to a value bigger than that. Internal buffer can be accessed via {@link + * SeekableInMemoryByteChannel#array()}.

+ * + * @since 1.13 + * @NotThreadSafe + */ +public class SeekableInMemoryByteChannel implements SeekableByteChannel { + + private static final int NAIVE_RESIZE_LIMIT = Integer.MAX_VALUE >> 1; + + private byte[] data; + private final AtomicBoolean closed = new AtomicBoolean(); + private int position, size; + + /** + * Constructor taking a byte array. + * + *

This constructor is intended to be used with pre-allocated buffer or when + * reading from a given byte array.

+ * + * @param data input data or pre-allocated array. + */ + public SeekableInMemoryByteChannel(final byte[] data) { + this.data = data; + size = data.length; + } + + /** + * Parameterless constructor - allocates internal buffer by itself. + */ + public SeekableInMemoryByteChannel() { + this(ByteUtils.EMPTY_BYTE_ARRAY); + } + + /** + * Constructor taking a size of storage to be allocated. + * + *

Creates a channel and allocates internal storage of a given size.

+ * + * @param size size of internal buffer to allocate, in bytes. + */ + public SeekableInMemoryByteChannel(final int size) { + this(new byte[size]); + } + + /** + * Returns this channel's position. + * + *

This method violates the contract of {@link SeekableByteChannel#position()} as it will not throw any exception + * when invoked on a closed channel. Instead it will return the position the channel had when close has been + * called.

+ */ + @Override + public long position() { + return position; + } + + @Override + public SeekableByteChannel position(final long newPosition) throws IOException { + ensureOpen(); + if (newPosition < 0L || newPosition > Integer.MAX_VALUE) { + throw new IOException("Position has to be in range 0.. " + Integer.MAX_VALUE); + } + position = (int) newPosition; + return this; + } + + /** + * Returns the current size of entity to which this channel is connected. + * + *

This method violates the contract of {@link SeekableByteChannel#size} as it will not throw any exception when + * invoked on a closed channel. Instead it will return the size the channel had when close has been called.

+ */ + @Override + public long size() { + return size; + } + + /** + * Truncates the entity, to which this channel is connected, to the given size. + * + *

This method violates the contract of {@link SeekableByteChannel#truncate} as it will not throw any exception when + * invoked on a closed channel.

+ * + * @throws IllegalArgumentException if size is negative or bigger than the maximum of a Java integer + */ + @Override + public SeekableByteChannel truncate(final long newSize) { + if (newSize < 0L || newSize > Integer.MAX_VALUE) { + throw new IllegalArgumentException("Size has to be in range 0.. " + Integer.MAX_VALUE); + } + if (size > newSize) { + size = (int) newSize; + } + if (position > newSize) { + position = (int) newSize; + } + return this; + } + + @Override + public int read(final ByteBuffer buf) throws IOException { + ensureOpen(); + int wanted = buf.remaining(); + final int possible = size - position; + if (possible <= 0) { + return -1; + } + if (wanted > possible) { + wanted = possible; + } + buf.put(data, position, wanted); + position += wanted; + return wanted; + } + + @Override + public void close() { + closed.set(true); + } + + @Override + public boolean isOpen() { + return !closed.get(); + } + + @Override + public int write(final ByteBuffer b) throws IOException { + ensureOpen(); + int wanted = b.remaining(); + final int possibleWithoutResize = size - position; + if (wanted > possibleWithoutResize) { + final int newSize = position + wanted; + if (newSize < 0) { // overflow + resize(Integer.MAX_VALUE); + wanted = Integer.MAX_VALUE - position; + } else { + resize(newSize); + } + } + b.get(data, position, wanted); + position += wanted; + if (size < position) { + size = position; + } + return wanted; + } + + /** + * Obtains the array backing this channel. + * + *

NOTE: + * The returned buffer is not aligned with containing data, use + * {@link #size()} to obtain the size of data stored in the buffer.

+ * + * @return internal byte array. + */ + public byte[] array() { + return data; + } + + private void resize(final int newLength) { + int len = data.length; + if (len <= 0) { + len = 1; + } + if (newLength < NAIVE_RESIZE_LIMIT) { + while (len < newLength) { + len <<= 1; + } + } else { // avoid overflow + len = newLength; + } + data = Arrays.copyOf(data, len); + } + + private void ensureOpen() throws ClosedChannelException { + if (!isOpen()) { + throw new ClosedChannelException(); + } + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/ServiceLoaderIterator.java b/src/main/java/org/apache/commons/compress/utils/ServiceLoaderIterator.java new file mode 100644 index 0000000..08856bb --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/ServiceLoaderIterator.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.ServiceConfigurationError; +import java.util.ServiceLoader; + +/** + * Iterates all services for a given class through the standard + * {@link ServiceLoader} mechanism. + * + * @param + * The service to load + * @since 1.13 + * @deprecated No longer needed. + */ +@Deprecated +public class ServiceLoaderIterator implements Iterator { + + private E nextServiceLoader; + private final Class service; + private final Iterator serviceLoaderIterator; + + public ServiceLoaderIterator(final Class service) { + this(service, ClassLoader.getSystemClassLoader()); + } + + public ServiceLoaderIterator(final Class service, final ClassLoader classLoader) { + this.service = service; + this.serviceLoaderIterator = ServiceLoader.load(service, classLoader).iterator(); + } + + @Override + public boolean hasNext() { + while (nextServiceLoader == null) { + try { + if (!serviceLoaderIterator.hasNext()) { + return false; + } + nextServiceLoader = serviceLoaderIterator.next(); + } catch (final ServiceConfigurationError e) { + if (e.getCause() instanceof SecurityException) { + // Ignore security exceptions + // TODO Log? + continue; + } + throw e; + } + } + return true; + } + + @Override + public E next() { + if (!hasNext()) { + throw new NoSuchElementException("No more elements for service " + service.getName()); + } + final E tempNext = nextServiceLoader; + nextServiceLoader = null; + return tempNext; + } + + @Override + public void remove() { + throw new UnsupportedOperationException("service=" + service.getName()); + } + +} diff --git a/src/main/java/org/apache/commons/compress/utils/Sets.java b/src/main/java/org/apache/commons/compress/utils/Sets.java new file mode 100644 index 0000000..329b6b8 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/Sets.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.commons.compress.utils; + +import java.util.Collections; +import java.util.HashSet; + +/** + * Set utilities + * + * @since 1.13 + */ +public class Sets { + + private Sets() { + // Do not instantiate + } + + /** + * Creates a new HashSet filled with the given elements + * + * @param elements + * the elements to fill the new set + * @param type of elements contained in new set + * @return A new HasSet + */ + @SafeVarargs + public static HashSet newHashSet(final E... elements) { + final HashSet set = new HashSet<>(elements.length); + Collections.addAll(set, elements); + return set; + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/SkipShieldingInputStream.java b/src/main/java/org/apache/commons/compress/utils/SkipShieldingInputStream.java new file mode 100644 index 0000000..edb5289 --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/SkipShieldingInputStream.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.commons.compress.utils; + +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; + +/** + * A wrapper that overwrites {@link #skip} and delegates to {@link #read} instead. + * + *

Some implementations of {@link InputStream} implement {@link + * InputStream#skip} in a way that throws an exception if the stream + * is not seekable - {@link System#in System.in} is known to behave + * that way. For such a stream it is impossible to invoke skip at all + * and you have to read from the stream (and discard the data read) + * instead. Skipping is potentially much faster than reading so we do + * want to invoke {@code skip} when possible. We provide this class so + * you can wrap your own {@link InputStream} in it if you encounter + * problems with {@code skip} throwing an exception.

+ * + * @since 1.17 + */ +public class SkipShieldingInputStream extends FilterInputStream { + private static final int SKIP_BUFFER_SIZE = 8192; + // we can use a shared buffer as the content is discarded anyway + private static final byte[] SKIP_BUFFER = new byte[SKIP_BUFFER_SIZE]; + public SkipShieldingInputStream(final InputStream in) { + super(in); + } + + @Override + public long skip(final long n) throws IOException { + return n < 0 ? 0 : read(SKIP_BUFFER, 0, (int) Math.min(n, SKIP_BUFFER_SIZE)); + } +} diff --git a/src/main/java/org/apache/commons/compress/utils/package.html b/src/main/java/org/apache/commons/compress/utils/package.html new file mode 100644 index 0000000..985999d --- /dev/null +++ b/src/main/java/org/apache/commons/compress/utils/package.html @@ -0,0 +1,27 @@ + + + + + Utility package + + +

Contains utilities used internally by the compress library.

+ + diff --git a/src/test/java/nc/opt/uil/j7zip/Tests.java b/src/test/java/nc/opt/uil/j7zip/Tests.java index 1b3c9ed..7cbd2be 100644 --- a/src/test/java/nc/opt/uil/j7zip/Tests.java +++ b/src/test/java/nc/opt/uil/j7zip/Tests.java @@ -2,6 +2,7 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayOutputStream; @@ -60,14 +61,13 @@ public void testCompressDecompress() throws IOException { exitCode = new CommandLine(new J7zip()).execute(new String[] { "e", "target/archive.7z", "target" }); restoreStreams(); + assertEquals(0, exitCode); err = new String(this.err.toByteArray()); if (!err.isEmpty()) { System.err.println(err); throw new AssertionFailedError(err); } - assertEquals(0, exitCode); - // check (compare decompressed file with initial) assertTrue(Paths.get("target/poem.txt").toFile().exists()); assertArrayEquals(Files.readAllBytes(Paths.get("src/test/resources/poem.txt")), Files.readAllBytes(Paths.get("target/poem.txt"))); @@ -76,6 +76,9 @@ public void testCompressDecompress() throws IOException { Paths.get("target/archive.7z").toFile().delete(); } + /** + * Test with a archive file compressed by 7z tool + */ @Test public void testDecompressWithPassword() throws IOException { // compress @@ -83,14 +86,13 @@ public void testDecompressWithPassword() throws IOException { .execute(new String[] { "x", "-p", "poem", "src/test/resources/poem-with-password.7z", "target" }); restoreStreams(); + assertEquals(0, exitCode); String err = new String(this.err.toByteArray()); if (!err.isEmpty()) { System.err.println(err); throw new AssertionFailedError(err); } - assertEquals(0, exitCode); - // check (compare decompressed file with initial) assertTrue(Paths.get("target/src/test/resources/poem.txt").toFile().exists()); assertArrayEquals( @@ -100,4 +102,49 @@ public void testDecompressWithPassword() throws IOException { Paths.get("target/src/test/resources/poem.txt").toFile().delete(); } + + @Test + public void testCompressDecompressWithPassowrd() throws IOException { + // compress + int exitCode = new CommandLine(new J7zip()) + .execute(new String[] { "a", "-p", "frog", "target/archive.7z", "src/test/resources/poem.txt" }); + restoreStreams(); + + assertEquals(0, exitCode); + String err = new String(this.err.toByteArray()); + if (!err.isEmpty()) { + System.err.println(err); + throw new AssertionFailedError(err); + } + + // checks (compare file size original > compressed) + assertTrue(Paths.get("target/archive.7z").toFile().exists()); + assertTrue(Paths.get("target/archive.7z").toFile().length() < Paths.get("src/test/resources/poem.txt").toFile().length()); + + // decompress without password : should fail + setUpStreams(); + exitCode = new CommandLine(new J7zip()).execute(new String[] { "e", "target/archive.7z", "target" }); + restoreStreams(); + + assertNotEquals(0, exitCode); + + // decompress + setUpStreams(); + exitCode = new CommandLine(new J7zip()).execute(new String[] { "e", "-p", "frog", "target/archive.7z", "target" }); + restoreStreams(); + + assertEquals(0, exitCode); + err = new String(this.err.toByteArray()); + if (!err.isEmpty()) { + System.err.println(err); + throw new AssertionFailedError(err); + } + + // check (compare decompressed file with initial) + assertTrue(Paths.get("target/poem.txt").toFile().exists()); + assertArrayEquals(Files.readAllBytes(Paths.get("src/test/resources/poem.txt")), Files.readAllBytes(Paths.get("target/poem.txt"))); + + Paths.get("target/poem.txt").toFile().delete(); + // Paths.get("target/archive.7z").toFile().delete(); + } }