From d9fb8a46f7735f17e8de5f5300006c0a144a9ecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?nils=20m=C3=A5s=C3=A9n?= Date: Tue, 27 Apr 2021 21:23:24 +0200 Subject: [PATCH] PR 351: Add support for Filename field in GZip --- .../GZip/GZipConstants.cs | 73 +++++++---- .../GZip/GzipInputStream.cs | 117 +++++++----------- .../GZip/GzipOutputStream.cs | 47 ++++++- .../Streams/InflaterInputStream.cs | 2 +- .../GZip/GZipTests.cs | 37 ++++++ 5 files changed, 171 insertions(+), 105 deletions(-) diff --git a/src/ICSharpCode.SharpZipLib/GZip/GZipConstants.cs b/src/ICSharpCode.SharpZipLib/GZip/GZipConstants.cs index 6930f113d..a59799278 100644 --- a/src/ICSharpCode.SharpZipLib/GZip/GZipConstants.cs +++ b/src/ICSharpCode.SharpZipLib/GZip/GZipConstants.cs @@ -1,3 +1,6 @@ +using System; +using System.Text; + namespace ICSharpCode.SharpZipLib.GZip { /// @@ -7,53 +10,69 @@ namespace ICSharpCode.SharpZipLib.GZip sealed public class GZipConstants { /// - /// Magic number found at start of GZIP header + /// First GZip identification byte /// - public const int GZIP_MAGIC = 0x1F8B; + public const byte ID1 = 0x1F; - /* The flag byte is divided into individual bits as follows: + /// + /// Second GZip identification byte + /// + public const byte ID2 = 0x8B; - bit 0 FTEXT - bit 1 FHCRC - bit 2 FEXTRA - bit 3 FNAME - bit 4 FCOMMENT - bit 5 reserved - bit 6 reserved - bit 7 reserved - */ + /// + /// Deflate compression method + /// + public const byte CompressionMethodDeflate = 0x8; /// - /// Flag bit mask for text + /// Get the GZip specified encoding (CP-1252 if supported, otherwise ASCII) /// - public const int FTEXT = 0x1; + public static Encoding Encoding + { + get + { + try + { + return Encoding.GetEncoding(1252); + } + catch + { + return Encoding.ASCII; + } + } + } + } + + /// + /// GZip header flags + /// + [Flags] + public enum GZipFlags: byte + { /// - /// Flag bitmask for Crc + /// Text flag hinting that the file is in ASCII /// - public const int FHCRC = 0x2; + FTEXT = 0x1 << 0, /// - /// Flag bit mask for extra + /// CRC flag indicating that a CRC16 preceeds the data /// - public const int FEXTRA = 0x4; + FHCRC = 0x1 << 1, /// - /// flag bitmask for name + /// Extra flag indicating that extra fields are present /// - public const int FNAME = 0x8; + FEXTRA = 0x1 << 2, /// - /// flag bit mask indicating comment is present + /// Filename flag indicating that the original filename is present /// - public const int FCOMMENT = 0x10; + FNAME = 0x1 << 3, /// - /// Initialise default instance. + /// Flag bit mask indicating that a comment is present /// - /// Constructor is private to prevent instances being created. - private GZipConstants() - { - } + FCOMMENT = 0x1 << 4, } } diff --git a/src/ICSharpCode.SharpZipLib/GZip/GzipInputStream.cs b/src/ICSharpCode.SharpZipLib/GZip/GzipInputStream.cs index a924a7ffc..20a4ded17 100644 --- a/src/ICSharpCode.SharpZipLib/GZip/GzipInputStream.cs +++ b/src/ICSharpCode.SharpZipLib/GZip/GzipInputStream.cs @@ -3,6 +3,7 @@ using ICSharpCode.SharpZipLib.Zip.Compression.Streams; using System; using System.IO; +using System.Text; namespace ICSharpCode.SharpZipLib.GZip { @@ -54,6 +55,8 @@ public class GZipInputStream : InflaterInputStream /// private bool completedLastBlock; + private string fileName; + #endregion Instance Fields #region Constructors @@ -149,6 +152,15 @@ public override int Read(byte[] buffer, int offset, int count) } } + /// + /// Retrieves the filename header field for the block last read + /// + /// + public string GetFilename() + { + return fileName; + } + #endregion Stream overrides #region Support routines @@ -170,132 +182,96 @@ private bool ReadHeader() } } - // 1. Check the two magic bytes var headCRC = new Crc32(); - int magic = inputBuffer.ReadLeByte(); - if (magic < 0) - { - throw new EndOfStreamException("EOS reading GZIP header"); - } + // 1. Check the two magic bytes + var magic = inputBuffer.ReadLeByte(); headCRC.Update(magic); - if (magic != (GZipConstants.GZIP_MAGIC >> 8)) + if (magic != GZipConstants.ID1) { throw new GZipException("Error GZIP header, first magic byte doesn't match"); } - //magic = baseInputStream.ReadByte(); magic = inputBuffer.ReadLeByte(); - - if (magic < 0) - { - throw new EndOfStreamException("EOS reading GZIP header"); - } - - if (magic != (GZipConstants.GZIP_MAGIC & 0xFF)) + if (magic != GZipConstants.ID2) { throw new GZipException("Error GZIP header, second magic byte doesn't match"); } - headCRC.Update(magic); // 2. Check the compression type (must be 8) - int compressionType = inputBuffer.ReadLeByte(); - - if (compressionType < 0) - { - throw new EndOfStreamException("EOS reading GZIP header"); - } + var compressionType = inputBuffer.ReadLeByte(); - if (compressionType != 8) + if (compressionType != GZipConstants.CompressionMethodDeflate) { throw new GZipException("Error GZIP header, data not in deflate format"); } headCRC.Update(compressionType); // 3. Check the flags - int flags = inputBuffer.ReadLeByte(); - if (flags < 0) - { - throw new EndOfStreamException("EOS reading GZIP header"); - } - headCRC.Update(flags); - - /* This flag byte is divided into individual bits as follows: + var flagsByte = inputBuffer.ReadLeByte(); - bit 0 FTEXT - bit 1 FHCRC - bit 2 FEXTRA - bit 3 FNAME - bit 4 FCOMMENT - bit 5 reserved - bit 6 reserved - bit 7 reserved - */ + headCRC.Update(flagsByte); // 3.1 Check the reserved bits are zero - if ((flags & 0xE0) != 0) + if ((flagsByte & 0xE0) != 0) { throw new GZipException("Reserved flag bits in GZIP header != 0"); } + var flags = (GZipFlags)flagsByte; + // 4.-6. Skip the modification time, extra flags, and OS type for (int i = 0; i < 6; i++) { - int readByte = inputBuffer.ReadLeByte(); - if (readByte < 0) - { - throw new EndOfStreamException("EOS reading GZIP header"); - } - headCRC.Update(readByte); + headCRC.Update(inputBuffer.ReadLeByte()); } // 7. Read extra field - if ((flags & GZipConstants.FEXTRA) != 0) + if (flags.HasFlag(GZipFlags.FEXTRA)) { // XLEN is total length of extra subfields, we will skip them all - int len1, len2; - len1 = inputBuffer.ReadLeByte(); - len2 = inputBuffer.ReadLeByte(); - if ((len1 < 0) || (len2 < 0)) - { - throw new EndOfStreamException("EOS reading GZIP header"); - } + var len1 = inputBuffer.ReadLeByte(); + var len2 = inputBuffer.ReadLeByte(); + headCRC.Update(len1); headCRC.Update(len2); int extraLen = (len2 << 8) | len1; // gzip is LSB first for (int i = 0; i < extraLen; i++) { - int readByte = inputBuffer.ReadLeByte(); - if (readByte < 0) - { - throw new EndOfStreamException("EOS reading GZIP header"); - } - headCRC.Update(readByte); + headCRC.Update(inputBuffer.ReadLeByte()); } } // 8. Read file name - if ((flags & GZipConstants.FNAME) != 0) + if (flags.HasFlag(GZipFlags.FNAME)) { + var fname = new byte[1024]; + var fnamePos = 0; int readByte; while ((readByte = inputBuffer.ReadLeByte()) > 0) { + if (fnamePos < 1024) + { + fname[fnamePos++] = (byte)readByte; + } headCRC.Update(readByte); } - if (readByte < 0) - { - throw new EndOfStreamException("EOS reading GZIP header"); - } headCRC.Update(readByte); + + fileName = GZipConstants.Encoding.GetString(fname, 0, fnamePos); + } + else + { + fileName = null; } // 9. Read comment - if ((flags & GZipConstants.FCOMMENT) != 0) + if (flags.HasFlag(GZipFlags.FCOMMENT)) { int readByte; while ((readByte = inputBuffer.ReadLeByte()) > 0) @@ -303,16 +279,11 @@ bit 7 reserved headCRC.Update(readByte); } - if (readByte < 0) - { - throw new EndOfStreamException("EOS reading GZIP header"); - } - headCRC.Update(readByte); } // 10. Read header CRC - if ((flags & GZipConstants.FHCRC) != 0) + if (flags.HasFlag(GZipFlags.FHCRC)) { int tempByte; int crcval = inputBuffer.ReadLeByte(); diff --git a/src/ICSharpCode.SharpZipLib/GZip/GzipOutputStream.cs b/src/ICSharpCode.SharpZipLib/GZip/GzipOutputStream.cs index afa43d7fd..31985f93b 100644 --- a/src/ICSharpCode.SharpZipLib/GZip/GzipOutputStream.cs +++ b/src/ICSharpCode.SharpZipLib/GZip/GzipOutputStream.cs @@ -3,6 +3,7 @@ using ICSharpCode.SharpZipLib.Zip.Compression.Streams; using System; using System.IO; +using System.Text; namespace ICSharpCode.SharpZipLib.GZip { @@ -53,6 +54,10 @@ private enum OutputState private OutputState state_ = OutputState.Header; + private string fileName; + + private GZipFlags flags = 0; + #endregion Instance Fields #region Constructors @@ -111,6 +116,26 @@ public int GetLevel() return deflater_.GetLevel(); } + /// + /// Original filename + /// + public string FileName + { + get => fileName; + set + { + fileName = CleanFilename(value); + if (string.IsNullOrEmpty(fileName)) + { + flags &= ~GZipFlags.FNAME; + } + else + { + flags |= GZipFlags.FNAME; + } + } + } + #endregion Public API #region Stream overrides @@ -218,6 +243,9 @@ public override void Finish() #region Support Routines + private string CleanFilename(string path) + => path.Substring(path.LastIndexOf('/') + 1); + private void WriteHeader() { if (state_ == OutputState.Header) @@ -227,13 +255,14 @@ private void WriteHeader() var mod_time = (int)((DateTime.Now.Ticks - new DateTime(1970, 1, 1).Ticks) / 10000000L); // Ticks give back 100ns intervals byte[] gzipHeader = { // The two magic bytes - (byte) (GZipConstants.GZIP_MAGIC >> 8), (byte) (GZipConstants.GZIP_MAGIC & 0xff), + GZipConstants.ID1, + GZipConstants.ID2, // The compression type - (byte) Deflater.DEFLATED, + GZipConstants.CompressionMethodDeflate, // The flags (not set) - 0, + (byte)flags, // The modification time (byte) mod_time, (byte) (mod_time >> 8), @@ -243,9 +272,19 @@ private void WriteHeader() 0, // The OS type (unknown) - (byte) 255 + 255 }; + baseOutputStream_.Write(gzipHeader, 0, gzipHeader.Length); + + if (flags.HasFlag(GZipFlags.FNAME)) + { + var fname = GZipConstants.Encoding.GetBytes(fileName); + baseOutputStream_.Write(fname, 0, fname.Length); + + // End filename string with a \0 + baseOutputStream_.Write(new byte[] { 0 }, 0, 1); + } } } diff --git a/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/InflaterInputStream.cs b/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/InflaterInputStream.cs index 3fb257906..7790474d2 100644 --- a/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/InflaterInputStream.cs +++ b/src/ICSharpCode.SharpZipLib/Zip/Compression/Streams/InflaterInputStream.cs @@ -226,7 +226,7 @@ public int ReadClearTextBuffer(byte[] outBuffer, int offset, int length) /// Read a from the input stream. /// /// Returns the byte read. - public int ReadLeByte() + public byte ReadLeByte() { if (available <= 0) { diff --git a/test/ICSharpCode.SharpZipLib.Tests/GZip/GZipTests.cs b/test/ICSharpCode.SharpZipLib.Tests/GZip/GZipTests.cs index 5846b0d5b..8a9f61d69 100644 --- a/test/ICSharpCode.SharpZipLib.Tests/GZip/GZipTests.cs +++ b/test/ICSharpCode.SharpZipLib.Tests/GZip/GZipTests.cs @@ -3,6 +3,7 @@ using NUnit.Framework; using System; using System.IO; +using System.Text; namespace ICSharpCode.SharpZipLib.Tests.GZip { @@ -514,5 +515,41 @@ public void ReadWriteThroughput() output: w => new GZipOutputStream(w) ); } + + /// + /// Basic compress/decompress test + /// + [Test] + [Category("GZip")] + public void OriginalFilename() + { + var content = "FileContents"; + + + using (var ms = new MemoryStream()) + { + using (var outStream = new GZipOutputStream(ms) { IsStreamOwner = false }) + { + outStream.FileName = "/path/to/file.ext"; + + var writeBuffer = Encoding.ASCII.GetBytes(content); + outStream.Write(writeBuffer, 0, writeBuffer.Length); + outStream.Flush(); + outStream.Finish(); + } + + ms.Seek(0, SeekOrigin.Begin); + + using (var inStream = new GZipInputStream(ms)) + { + var readBuffer = new byte[content.Length]; + inStream.Read(readBuffer, 0, readBuffer.Length); + Assert.AreEqual(content, Encoding.ASCII.GetString(readBuffer)); + Assert.AreEqual("file.ext", inStream.GetFilename()); + } + + } + + } } }