diff --git a/build.zig b/build.zig index faf14cc40522..f75efeb8b490 100644 --- a/build.zig +++ b/build.zig @@ -113,8 +113,11 @@ pub fn build(b: *std.Build) !void { ".gz", ".z.0", ".z.9", + ".zstd.3", + ".zstd.19", "rfc1951.txt", "rfc1952.txt", + "rfc8478.txt", // exclude files from lib/std/compress/deflate/testdata ".expect", ".expect-noinput", diff --git a/lib/std/RingBuffer.zig b/lib/std/RingBuffer.zig new file mode 100644 index 000000000000..857775b5a0a6 --- /dev/null +++ b/lib/std/RingBuffer.zig @@ -0,0 +1,136 @@ +//! This ring buffer stores read and write indices while being able to utilise +//! the full backing slice by incrementing the indices modulo twice the slice's +//! length and reducing indices modulo the slice's length on slice access. This +//! means that whether the ring buffer if full or empty can be distinguished by +//! looking at the difference between the read and write indices without adding +//! an extra boolean flag or having to reserve a slot in the buffer. +//! +//! This ring buffer has not been implemented with thread safety in mind, and +//! therefore should not be assumed to be suitable for use cases involving +//! separate reader and writer threads. + +const Allocator = @import("std").mem.Allocator; +const assert = @import("std").debug.assert; + +const RingBuffer = @This(); + +data: []u8, +read_index: usize, +write_index: usize, + +pub const Error = error{Full}; + +/// Allocate a new `RingBuffer`; `deinit()` should be called to free the buffer. +pub fn init(allocator: Allocator, capacity: usize) Allocator.Error!RingBuffer { + const bytes = try allocator.alloc(u8, capacity); + return RingBuffer{ + .data = bytes, + .write_index = 0, + .read_index = 0, + }; +} + +/// Free the data backing a `RingBuffer`; must be passed the same `Allocator` as +/// `init()`. +pub fn deinit(self: *RingBuffer, allocator: Allocator) void { + allocator.free(self.data); + self.* = undefined; +} + +/// Returns `index` modulo the length of the backing slice. +pub fn mask(self: RingBuffer, index: usize) usize { + return index % self.data.len; +} + +/// Returns `index` modulo twice the length of the backing slice. +pub fn mask2(self: RingBuffer, index: usize) usize { + return index % (2 * self.data.len); +} + +/// Write `byte` into the ring buffer. Returns `error.Full` if the ring +/// buffer is full. +pub fn write(self: *RingBuffer, byte: u8) Error!void { + if (self.isFull()) return error.Full; + self.writeAssumeCapacity(byte); +} + +/// Write `byte` into the ring buffer. If the ring buffer is full, the +/// oldest byte is overwritten. +pub fn writeAssumeCapacity(self: *RingBuffer, byte: u8) void { + self.data[self.mask(self.write_index)] = byte; + self.write_index = self.mask2(self.write_index + 1); +} + +/// Write `bytes` into the ring buffer. Returns `error.Full` if the ring +/// buffer does not have enough space, without writing any data. +pub fn writeSlice(self: *RingBuffer, bytes: []const u8) Error!void { + if (self.len() + bytes.len > self.data.len) return error.Full; + self.writeSliceAssumeCapacity(bytes); +} + +/// Write `bytes` into the ring buffer. If there is not enough space, older +/// bytes will be overwritten. +pub fn writeSliceAssumeCapacity(self: *RingBuffer, bytes: []const u8) void { + for (bytes) |b| self.writeAssumeCapacity(b); +} + +/// Consume a byte from the ring buffer and return it. Returns `null` if the +/// ring buffer is empty. +pub fn read(self: *RingBuffer) ?u8 { + if (self.isEmpty()) return null; + return self.readAssumeLength(); +} + +/// Consume a byte from the ring buffer and return it; asserts that the buffer +/// is not empty. +pub fn readAssumeLength(self: *RingBuffer) u8 { + assert(!self.isEmpty()); + const byte = self.data[self.mask(self.read_index)]; + self.read_index = self.mask2(self.read_index + 1); + return byte; +} + +/// Returns `true` if the ring buffer is empty and `false` otherwise. +pub fn isEmpty(self: RingBuffer) bool { + return self.write_index == self.read_index; +} + +/// Returns `true` if the ring buffer is full and `false` otherwise. +pub fn isFull(self: RingBuffer) bool { + return self.mask2(self.write_index + self.data.len) == self.read_index; +} + +/// Returns the length +pub fn len(self: RingBuffer) usize { + const wrap_offset = 2 * self.data.len * @boolToInt(self.write_index < self.read_index); + const adjusted_write_index = self.write_index + wrap_offset; + return adjusted_write_index - self.read_index; +} + +/// A `Slice` represents a region of a ring buffer. The region is split into two +/// sections as the ring buffer data will not be contiguous if the desired +/// region wraps to the start of the backing slice. +pub const Slice = struct { + first: []u8, + second: []u8, +}; + +/// Returns a `Slice` for the region of the ring buffer starting at +/// `self.mask(start_unmasked)` with the specified length. +pub fn sliceAt(self: RingBuffer, start_unmasked: usize, length: usize) Slice { + assert(length <= self.data.len); + const slice1_start = self.mask(start_unmasked); + const slice1_end = @min(self.data.len, slice1_start + length); + const slice1 = self.data[slice1_start..slice1_end]; + const slice2 = self.data[0 .. length - slice1.len]; + return Slice{ + .first = slice1, + .second = slice2, + }; +} + +/// Returns a `Slice` for the last `length` bytes written to the ring buffer. +/// Does not check that any bytes have been written into the region. +pub fn sliceLast(self: RingBuffer, length: usize) Slice { + return self.sliceAt(self.write_index + self.data.len - length, length); +} diff --git a/lib/std/compress.zig b/lib/std/compress.zig index 9af1b30259a2..7e81d9deba26 100644 --- a/lib/std/compress.zig +++ b/lib/std/compress.zig @@ -6,6 +6,7 @@ pub const lzma = @import("compress/lzma.zig"); pub const lzma2 = @import("compress/lzma2.zig"); pub const xz = @import("compress/xz.zig"); pub const zlib = @import("compress/zlib.zig"); +pub const zstd = @import("compress/zstandard.zig"); pub fn HashedReader( comptime ReaderType: anytype, @@ -44,4 +45,5 @@ test { _ = lzma2; _ = xz; _ = zlib; + _ = zstd; } diff --git a/lib/std/compress/testdata/rfc8478.txt b/lib/std/compress/testdata/rfc8478.txt new file mode 100644 index 000000000000..e4ac22a302bd --- /dev/null +++ b/lib/std/compress/testdata/rfc8478.txt @@ -0,0 +1,3027 @@ + + + + + + +Internet Engineering Task Force (IETF) Y. Collet +Request for Comments: 8478 M. Kucherawy, Ed. +Category: Informational Facebook +ISSN: 2070-1721 October 2018 + + + Zstandard Compression and the application/zstd Media Type + +Abstract + + Zstandard, or "zstd" (pronounced "zee standard"), is a data + compression mechanism. This document describes the mechanism and + registers a media type and content encoding to be used when + transporting zstd-compressed content via Multipurpose Internet Mail + Extensions (MIME). + + Despite use of the word "standard" as part of its name, readers are + advised that this document is not an Internet Standards Track + specification; it is being published for informational purposes only. + +Status of This Memo + + This document is not an Internet Standards Track specification; it is + published for informational purposes. + + This document is a product of the Internet Engineering Task Force + (IETF). It represents the consensus of the IETF community. It has + received public review and has been approved for publication by the + Internet Engineering Steering Group (IESG). Not all documents + approved by the IESG are candidates for any level of Internet + Standard; see Section 2 of RFC 7841. + + Information about the current status of this document, any errata, + and how to provide feedback on it may be obtained at + https://www.rfc-editor.org/info/rfc8478. + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 1] + +RFC 8478 application/zstd October 2018 + + +Copyright Notice + + Copyright (c) 2018 IETF Trust and the persons identified as the + document authors. All rights reserved. + + This document is subject to BCP 78 and the IETF Trust's Legal + Provisions Relating to IETF Documents + (https://trustee.ietf.org/license-info) in effect on the date of + publication of this document. Please review these documents + carefully, as they describe your rights and restrictions with respect + to this document. Code Components extracted from this document must + include Simplified BSD License text as described in Section 4.e of + the Trust Legal Provisions and are provided without warranty as + described in the Simplified BSD License. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 2] + +RFC 8478 application/zstd October 2018 + + +Table of Contents + + 1. Introduction . . . . . . . . . . . . . . . . . . . . . . . . 4 + 2. Definitions . . . . . . . . . . . . . . . . . . . . . . . . . 4 + 3. Compression Algorithm . . . . . . . . . . . . . . . . . . . . 5 + 3.1. Frames . . . . . . . . . . . . . . . . . . . . . . . . . 6 + 3.1.1. Zstandard Frames . . . . . . . . . . . . . . . . . . 6 + 3.1.1.1. Frame Header . . . . . . . . . . . . . . . . . . 7 + 3.1.1.2. Blocks . . . . . . . . . . . . . . . . . . . . . 12 + 3.1.1.3. Compressed Blocks . . . . . . . . . . . . . . . . 14 + 3.1.1.4. Sequence Execution . . . . . . . . . . . . . . . 28 + 3.1.1.5. Repeat Offsets . . . . . . . . . . . . . . . . . 29 + 3.1.2. Skippable Frames . . . . . . . . . . . . . . . . . . 30 + 4. Entropy Encoding . . . . . . . . . . . . . . . . . . . . . . 30 + 4.1. FSE . . . . . . . . . . . . . . . . . . . . . . . . . . . 31 + 4.1.1. FSE Table Description . . . . . . . . . . . . . . . . 31 + 4.2. Huffman Coding . . . . . . . . . . . . . . . . . . . . . 34 + 4.2.1. Huffman Tree Description . . . . . . . . . . . . . . 35 + 4.2.1.1. Huffman Tree Header . . . . . . . . . . . . . . . 36 + 4.2.1.2. FSE Compression of Huffman Weights . . . . . . . 37 + 4.2.1.3. Conversion from Weights to Huffman Prefix Codes . 38 + 4.2.2. Huffman-Coded Streams . . . . . . . . . . . . . . . . 39 + 5. Dictionary Format . . . . . . . . . . . . . . . . . . . . . . 40 + 6. IANA Considerations . . . . . . . . . . . . . . . . . . . . . 42 + 6.1. The 'application/zstd' Media Type . . . . . . . . . . . . 42 + 6.2. Content Encoding . . . . . . . . . . . . . . . . . . . . 43 + 6.3. Dictionaries . . . . . . . . . . . . . . . . . . . . . . 43 + 7. Security Considerations . . . . . . . . . . . . . . . . . . . 43 + 8. Implementation Status . . . . . . . . . . . . . . . . . . . . 44 + 9. References . . . . . . . . . . . . . . . . . . . . . . . . . 45 + 9.1. Normative References . . . . . . . . . . . . . . . . . . 45 + 9.2. Informative References . . . . . . . . . . . . . . . . . 45 + Appendix A. Decoding Tables for Predefined Codes . . . . . . . . 46 + A.1. Literal Length Code Table . . . . . . . . . . . . . . . . 46 + A.2. Match Length Code Table . . . . . . . . . . . . . . . . . 49 + A.3. Offset Code Table . . . . . . . . . . . . . . . . . . . . 52 + Acknowledgments . . . . . . . . . . . . . . . . . . . . . . . . . 53 + Authors' Addresses . . . . . . . . . . . . . . . . . . . . . . . 54 + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 3] + +RFC 8478 application/zstd October 2018 + + +1. Introduction + + Zstandard, or "zstd" (pronounced "zee standard"), is a data + compression mechanism, akin to gzip [RFC1952]. + + Despite use of the word "standard" as part of its name, readers are + advised that this document is not an Internet Standards Track + specification; it is being published for informational purposes only. + + This document describes the Zstandard format. Also, to enable the + transport of a data object compressed with Zstandard, this document + registers a media type that can be used to identify such content when + it is used in a payload encoded using Multipurpose Internet Mail + Extensions (MIME). + +2. Definitions + + Some terms used elsewhere in this document are defined here for + clarity. + + uncompressed: Describes an arbitrary set of bytes in their original + form, prior to being subjected to compression. + + compress, compression: The act of processing a set of bytes via the + compression mechanism described here. + + compressed: Describes the result of passing a set of bytes through + this mechanism. The original input has thus been compressed. + + decompress, decompression: The act of processing a set of bytes + through the inverse of the compression mechanism described here, + in an attempt to recover the original set of bytes prior to + compression. + + decompressed: Describes the result of passing a set of bytes through + the reverse of this mechanism. When this is successful, the + decompressed payload and the uncompressed payload are + indistinguishable. + + encode: The process of translating data from one form to another; + this may include compression or it may refer to other translations + done as part of this specification. + + decode: The reverse of "encode"; describes a process of reversing a + prior encoding to recover the original content. + + + + + + +Collet & Kucherawy Informational [Page 4] + +RFC 8478 application/zstd October 2018 + + + frame: Content compressed by Zstandard is transformed into a + Zstandard frame. Multiple frames can be appended into a single + file or stream. A frame is completely independent, has a defined + beginning and end, and has a set of parameters that tells the + decoder how to decompress it. + + block: A frame encapsulates one or multiple blocks. Each block + contains arbitrary content, which is described by its header, and + has a guaranteed maximum content size that depends upon frame + parameters. Unlike frames, each block depends on previous blocks + for proper decoding. However, each block can be decompressed + without waiting for its successor, allowing streaming operations. + + natural order: A sequence or ordering of objects or values that is + typical of that type of object or value. A set of unique + integers, for example, is in "natural order" if when progressing + from one element in the set or sequence to the next, there is + never a decrease in value. + + The naming convention for identifiers within the specification is + Mixed_Case_With_Underscores. Identifiers inside square brackets + indicate that the identifier is optional in the presented context. + +3. Compression Algorithm + + This section describes the Zstandard algorithm. + + The purpose of this document is to define a lossless compressed data + format that is a) independent of the CPU type, operating system, file + system, and character set and b) is suitable for file compression and + pipe and streaming compression, using the Zstandard algorithm. The + text of the specification assumes a basic background in programming + at the level of bits and other primitive data representations. + + The data can be produced or consumed, even for an arbitrarily long + sequentially presented input data stream, using only an a priori + bounded amount of intermediate storage, and hence can be used in data + communications. The format uses the Zstandard compression method, + and an optional xxHash-64 checksum method [XXHASH], for detection of + data corruption. + + The data format defined by this specification does not attempt to + allow random access to compressed data. + + Unless otherwise indicated below, a compliant compressor must produce + data sets that conform to the specifications presented here. + However, it does not need to support all options. + + + + +Collet & Kucherawy Informational [Page 5] + +RFC 8478 application/zstd October 2018 + + + A compliant decompressor must be able to decompress at least one + working set of parameters that conforms to the specifications + presented here. It may also ignore informative fields, such as the + checksum. Whenever it does not support a parameter defined in the + compressed stream, it must produce a non-ambiguous error code and + associated error message explaining which parameter is unsupported. + + This specification is intended for use by implementers of software to + compress data into Zstandard format and/or decompress data from + Zstandard format. The Zstandard format is supported by an open + source reference implementation, written in portable C, and available + at [ZSTD]. + +3.1. Frames + + Zstandard compressed data is made up of one or more frames. Each + frame is independent and can be decompressed independently of other + frames. The decompressed content of multiple concatenated frames is + the concatenation of each frame's decompressed content. + + There are two frame formats defined for Zstandard: Zstandard frames + and skippable frames. Zstandard frames contain compressed data, + while skippable frames contain custom user metadata. + +3.1.1. Zstandard Frames + + The structure of a single Zstandard frame is as follows: + + +--------------------+------------+ + | Magic_Number | 4 bytes | + +--------------------+------------+ + | Frame_Header | 2-14 bytes | + +--------------------+------------+ + | Data_Block | n bytes | + +--------------------+------------+ + | [More Data_Blocks] | | + +--------------------+------------+ + | [Content_Checksum] | 0-4 bytes | + +--------------------+------------+ + + Magic_Number: 4 bytes, little-endian format. Value: 0xFD2FB528. + + Frame_Header: 2 to 14 bytes, detailed in Section 3.1.1.1. + + Data_Block: Detailed in Section 3.1.1.2. This is where data + appears. + + + + + +Collet & Kucherawy Informational [Page 6] + +RFC 8478 application/zstd October 2018 + + + Content_Checksum: An optional 32-bit checksum, only present if + Content_Checksum_Flag is set. The content checksum is the result + of the XXH64() hash function [XXHASH] digesting the original + (decoded) data as input, and a seed of zero. The low 4 bytes of + the checksum are stored in little-endian format. + + The magic number was selected to be less probable to find at the + beginning of an arbitrary file. It avoids trivial patterns (0x00, + 0xFF, repeated bytes, increasing bytes, etc.), contains byte values + outside of ASCII range, and doesn't map into UTF-8 space, all of + which reduce the likelihood of its appearance at the top of a text + file. + +3.1.1.1. Frame Header + + The frame header has a variable size, with a minimum of 2 bytes and + up to 14 bytes depending on optional parameters. The structure of + Frame_Header is as follows: + + +-------------------------+-----------+ + | Frame_Header_Descriptor | 1 byte | + +-------------------------+-----------+ + | [Window_Descriptor] | 0-1 byte | + +-------------------------+-----------+ + | [Dictionary_ID] | 0-4 bytes | + +-------------------------+-----------+ + | [Frame_Content_Size] | 0-8 bytes | + +-------------------------+-----------+ + + + + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 7] + +RFC 8478 application/zstd October 2018 + + +3.1.1.1.1. Frame_Header_Descriptor + + The first header's byte is called the Frame_Header_Descriptor. It + describes which other fields are present. Decoding this byte is + enough to tell the size of Frame_Header. + + +------------+-------------------------+ + | Bit Number | Field Name | + +------------+-------------------------+ + | 7-6 | Frame_Content_Size_Flag | + +------------+-------------------------+ + | 5 | Single_Segment_Flag | + +------------+-------------------------+ + | 4 | (unused) | + +------------+-------------------------+ + | 3 | (reserved) | + +------------+-------------------------+ + | 2 | Content_Checksum_Flag | + +------------+-------------------------+ + | 1-0 | Dictionary_ID_Flag | + +------------+-------------------------+ + + In this table, bit 7 is the highest bit, while bit 0 is the lowest + one. + +3.1.1.1.1.1. Frame_Content_Size_Flag + + This is a 2-bit flag (equivalent to Frame_Header_Descriptor right- + shifted 6 bits) specifying whether Frame_Content_Size (the + decompressed data size) is provided within the header. Flag_Value + provides FCS_Field_Size, which is the number of bytes used by + Frame_Content_Size according to the following table: + + +----------------+--------+---+---+---+ + | Flag_Value | 0 | 1 | 2 | 3 | + +----------------+--------+---+---+---+ + | FCS_Field_Size | 0 or 1 | 2 | 4 | 8 | + +----------------+--------+---+---+---+ + + When Flag_Value is 0, FCS_Field_Size depends on Single_Segment_Flag: + If Single_Segment_Flag is set, FCS_Field_Size is 1. Otherwise, + FCS_Field_Size is 0; Frame_Content_Size is not provided. + + + + + + + + + +Collet & Kucherawy Informational [Page 8] + +RFC 8478 application/zstd October 2018 + + +3.1.1.1.1.2. Single_Segment_Flag + + If this flag is set, data must be regenerated within a single + continuous memory segment. + + In this case, Window_Descriptor byte is skipped, but + Frame_Content_Size is necessarily present. As a consequence, the + decoder must allocate a memory segment of size equal or larger than + Frame_Content_Size. + + In order to protect the decoder from unreasonable memory + requirements, a decoder is allowed to reject a compressed frame that + requests a memory size beyond the decoder's authorized range. + + For broader compatibility, decoders are recommended to support memory + sizes of at least 8 MB. This is only a recommendation; each decoder + is free to support higher or lower limits, depending on local + limitations. + +3.1.1.1.1.3. Unused Bit + + A decoder compliant with this specification version shall not + interpret this bit. It might be used in a future version, to signal + a property that is not mandatory to properly decode the frame. An + encoder compliant with this specification must set this bit to zero. + +3.1.1.1.1.4. Reserved Bit + + This bit is reserved for some future feature. Its value must be + zero. A decoder compliant with this specification version must + ensure it is not set. This bit may be used in a future revision, to + signal a feature that must be interpreted to decode the frame + correctly. + +3.1.1.1.1.5. Content_Checksum_Flag + + If this flag is set, a 32-bit Content_Checksum will be present at the + frame's end. See the description of Content_Checksum above. + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 9] + +RFC 8478 application/zstd October 2018 + + +3.1.1.1.1.6. Dictionary_ID_Flag + + This is a 2-bit flag (= Frame_Header_Descriptor & 0x3) indicating + whether a dictionary ID is provided within the header. It also + specifies the size of this field as DID_Field_Size: + + +----------------+---+---+---+---+ + | Flag_Value | 0 | 1 | 2 | 3 | + +----------------+---+---+---+---+ + | DID_Field_Size | 0 | 1 | 2 | 4 | + +----------------+---+---+---+---+ + +3.1.1.1.2. Window Descriptor + + This provides guarantees about the minimum memory buffer required to + decompress a frame. This information is important for decoders to + allocate enough memory. + + The Window_Descriptor byte is optional. When Single_Segment_Flag is + set, Window_Descriptor is not present. In this case, Window_Size is + Frame_Content_Size, which can be any value from 0 to 2^64-1 bytes (16 + ExaBytes). + + +------------+----------+----------+ + | Bit Number | 7-3 | 2-0 | + +------------+----------+----------+ + | Field Name | Exponent | Mantissa | + +------------+----------+----------+ + + The minimum memory buffer size is called Window_Size. It is + described by the following formulae: + + windowLog = 10 + Exponent; + windowBase = 1 << windowLog; + windowAdd = (windowBase / 8) * Mantissa; + Window_Size = windowBase + windowAdd; + + The minimum Window_Size is 1 KB. The maximum Window_Size is (1<<41) + + 7*(1<<38) bytes, which is 3.75 TB. + + In general, larger Window_Size values tend to improve the compression + ratio, but at the cost of increased memory usage. + + To properly decode compressed data, a decoder will need to allocate a + buffer of at least Window_Size bytes. + + + + + + +Collet & Kucherawy Informational [Page 10] + +RFC 8478 application/zstd October 2018 + + + In order to protect decoders from unreasonable memory requirements, a + decoder is allowed to reject a compressed frame that requests a + memory size beyond decoder's authorized range. + + For improved interoperability, it's recommended for decoders to + support values of Window_Size up to 8 MB and for encoders not to + generate frames requiring a Window_Size larger than 8 MB. It's + merely a recommendation though, and decoders are free to support + larger or lower limits, depending on local limitations. + +3.1.1.1.3. Dictionary_ID + + This is a variable size field, which contains the ID of the + dictionary required to properly decode the frame. This field is + optional. When it's not present, it's up to the decoder to know + which dictionary to use. + + Dictionary_ID field size is provided by DID_Field_Size. + DID_Field_Size is directly derived from the value of + Dictionary_ID_Flag. One byte can represent an ID 0-255; 2 bytes can + represent an ID 0-65535; 4 bytes can represent an ID 0-4294967295. + Format is little-endian. + + It is permitted to represent a small ID (for example, 13) with a + large 4-byte dictionary ID, even if it is less efficient. + + Within private environments, any dictionary ID can be used. However, + for frames and dictionaries distributed in public space, + Dictionary_ID must be attributed carefully. The following ranges are + reserved for use only with dictionaries that have been registered + with IANA (see Section 6.3): + + low range: <= 32767 + high range: >= (1 << 31) + + Any other value for Dictionary_ID can be used by private arrangement + between participants. + + Any payload presented for decompression that references an + unregistered reserved dictionary ID results in an error. + + + + + + + + + + + +Collet & Kucherawy Informational [Page 11] + +RFC 8478 application/zstd October 2018 + + +3.1.1.1.4. Frame Content Size + + This is the original (uncompressed) size. This information is + optional. Frame_Content_Size uses a variable number of bytes, + provided by FCS_Field_Size. FCS_Field_Size is provided by the value + of Frame_Content_Size_Flag. FCS_Field_Size can be equal to 0 (not + present), 1, 2, 4, or 8 bytes. + + +----------------+--------------+ + | FCS Field Size | Range | + +----------------+--------------+ + | 0 | unknown | + +----------------+--------------+ + | 1 | 0 - 255 | + +----------------+--------------+ + | 2 | 256 - 65791 | + +----------------+--------------+ + | 4 | 0 - 2^32 - 1 | + +----------------+--------------+ + | 8 | 0 - 2^64 - 1 | + +----------------+--------------+ + + Frame_Content_Size format is little-endian. When FCS_Field_Size is + 1, 4, or 8 bytes, the value is read directly. When FCS_Field_Size is + 2, the offset of 256 is added. It's allowed to represent a small + size (for example 18) using any compatible variant. + +3.1.1.2. Blocks + + After Magic_Number and Frame_Header, there are some number of blocks. + Each frame must have at least 1 block, but there is no upper limit on + the number of blocks per frame. + + The structure of a block is as follows: + + +--------------+---------------+ + | Block_Header | Block_Content | + +--------------+---------------+ + | 3 bytes | n bytes | + +--------------+---------------+ + + + + + + + + + + + +Collet & Kucherawy Informational [Page 12] + +RFC 8478 application/zstd October 2018 + + + Block_Header uses 3 bytes, written using little-endian convention. + It contains three fields: + + +------------+------------+------------+ + | Last_Block | Block_Type | Block_Size | + +------------+------------+------------+ + | bit 0 | bits 1-2 | bits 3-23 | + +------------+------------+------------+ + +3.1.1.2.1. Last_Block + + The lowest bit (Last_Block) signals whether this block is the last + one. The frame will end after this last block. It may be followed + by an optional Content_Checksum (see Section 3.1.1). + +3.1.1.2.2. Block_Type + + The next 2 bits represent the Block_Type. There are four block + types: + + +-----------+------------------+ + | Value | Block_Type | + +-----------+------------------+ + | 0 | Raw_Block | + +-----------+------------------+ + | 1 | RLE_Block | + +-----------+------------------+ + | 2 | Compressed_Block | + +-----------+------------------+ + | 3 | Reserved | + +-----------+------------------+ + + Raw_Block: This is an uncompressed block. Block_Content contains + Block_Size bytes. + + RLE_Block: This is a single byte, repeated Block_Size times. + Block_Content consists of a single byte. On the decompression + side, this byte must be repeated Block_Size times. + + Compressed_Block: This is a compressed block as described in + Section 3.1.1.3. Block_Size is the length of Block_Content, + namely the compressed data. The decompressed size is not known, + but its maximum possible value is guaranteed (see below). + + Reserved: This is not a block. This value cannot be used with the + current specification. If such a value is present, it is + considered to be corrupt data. + + + + +Collet & Kucherawy Informational [Page 13] + +RFC 8478 application/zstd October 2018 + + +3.1.1.2.3. Block_Size + + The upper 21 bits of Block_Header represent the Block_Size. + Block_Size is the size of the block excluding the header. A block + can contain any number of bytes (even zero), up to + Block_Maximum_Decompressed_Size, which is the smallest of: + + o Window_Size + + o 128 KB + + A Compressed_Block has the extra restriction that Block_Size is + always strictly less than the decompressed size. If this condition + cannot be respected, the block must be sent uncompressed instead + (i.e., treated as a Raw_Block). + +3.1.1.3. Compressed Blocks + + To decompress a compressed block, the compressed size must be + provided from the Block_Size field within Block_Header. + + A compressed block consists of two sections: a Literals + Section (Section 3.1.1.3.1) and a + Sequences_Section (Section 3.1.1.3.2). The results of the two + sections are then combined to produce the decompressed data in + Sequence Execution (Section 3.1.1.4). + + To decode a compressed block, the following elements are necessary: + + o Previous decoded data, up to a distance of Window_Size, or the + beginning of the Frame, whichever is smaller. Single_Segment_Flag + will be set in the latter case. + + o List of "recent offsets" from the previous Compressed_Block. + + o The previous Huffman tree, required by Treeless_Literals_Block + type. + + o Previous Finite State Entropy (FSE) decoding tables, required by + Repeat_Mode, for each symbol type (literals lengths, match + lengths, offsets). + + Note that decoding tables are not always from the previous + Compressed_Block: + + o Every decoding table can come from a dictionary. + + + + + +Collet & Kucherawy Informational [Page 14] + +RFC 8478 application/zstd October 2018 + + + o The Huffman tree comes from the previous + Compressed_Literals_Block. + +3.1.1.3.1. Literals_Section_Header + + All literals are regrouped in the first part of the block. They can + be decoded first and then copied during Sequence Execution (see + Section 3.1.1.4), or they can be decoded on the flow during Sequence + Execution. + + Literals can be stored uncompressed or compressed using Huffman + prefix codes. When compressed, an optional tree description can be + present, followed by 1 or 4 streams. + + +----------------------------+ + | Literals_Section_Header | + +----------------------------+ + | [Huffman_Tree_Description] | + +----------------------------+ + | [Jump_Table] | + +----------------------------+ + | Stream_1 | + +----------------------------+ + | [Stream_2] | + +----------------------------+ + | [Stream_3] | + +----------------------------+ + | [Stream_4] | + +----------------------------+ + +3.1.1.3.1.1. Literals_Section_Header + + This field describes how literals are packed. It's a byte-aligned + variable-size bit field, ranging from 1 to 5 bytes, using little- + endian convention. + + +---------------------+-----------+ + | Literals_Block_Type | 2 bits | + +---------------------+-----------+ + | Size_Format | 1-2 bits | + +---------------------+-----------+ + | Regenerated_Size | 5-20 bits | + +---------------------+-----------+ + | [Compressed_Size] | 0-18 bits | + +---------------------+-----------+ + + In this representation, bits at the top are the lowest bits. + + + + +Collet & Kucherawy Informational [Page 15] + +RFC 8478 application/zstd October 2018 + + + The Literals_Block_Type field uses the two lowest bits of the first + byte, describing four different block types: + + +---------------------------+-------+ + | Literals_Block_Type | Value | + +---------------------------+-------+ + | Raw_Literals_Block | 0 | + +---------------------------+-------+ + | RLE_Literals_Block | 1 | + +---------------------------+-------+ + | Compressed_Literals_Block | 2 | + +---------------------------+-------+ + | Treeless_Literals_Block | 3 | + +---------------------------+-------+ + + Raw_Literals_Block: Literals are stored uncompressed. + Literals_Section_Content is Regenerated_Size. + + RLE_Literals_Block: Literals consist of a single-byte value repeated + Regenerated_Size times. Literals_Section_Content is 1. + + Compressed_Literals_Block: This is a standard Huffman-compressed + block, starting with a Huffman tree description. See details + below. Literals_Section_Content is Compressed_Size. + + Treeless_Literals_Block: This is a Huffman-compressed block, using + the Huffman tree from the previous Compressed_Literals_Block, or a + dictionary if there is no previous Huffman-compressed literals + block. Huffman_Tree_Description will be skipped. Note that if + this mode is triggered without any previous Huffman-table in the + frame (or dictionary, per Section 5), it should be treated as data + corruption. Literals_Section_Content is Compressed_Size. + + The Size_Format is divided into two families: + + o For Raw_Literals_Block and RLE_Literals_Block, it's only necessary + to decode Regenerated_Size. There is no Compressed_Size field. + + o For Compressed_Block and Treeless_Literals_Block, it's required to + decode both Compressed_Size and Regenerated_Size (the decompressed + size). It's also necessary to decode the number of streams (1 or + 4). + + For values spanning several bytes, the convention is little endian. + + Size_Format for Raw_Literals_Block and RLE_Literals_Block uses 1 or 2 + bits. Its value is (Literals_Section_Header[0]>>2) & 0x3. + + + + +Collet & Kucherawy Informational [Page 16] + +RFC 8478 application/zstd October 2018 + + + Size_Format == 00 or 10: Size_Format uses 1 bit. Regenerated_Size + uses 5 bits (value 0-31). Literals_Section_Header uses 1 byte. + Regenerated_Size = Literal_Section_Header[0]>>3. + + Size_Format == 01: Size_Format uses 2 bits. Regenerated_Size uses + 12 bits (values 0-4095). Literals_Section_Header uses 2 bytes. + Regenerated_Size = (Literals_Section_Header[0]>>4) + + (Literals_Section_Header[1]<<4). + + Size_Format == 11: Size_Format uses 2 bits. Regenerated_Size uses + 20 bits (values 0-1048575). Literals_Section_Header uses 3 bytes. + Regenerated_Size = (Literals_Section_Header[0]>>4) + + (Literals_Section_Header[1]<<4) + (Literals_Section_Header[2]<<12) + + Only Stream_1 is present for these cases. Note that it is permitted + to represent a short value (for example, 13) using a long format, + even if it's less efficient. + + Size_Format for Compressed_Literals_Block and Treeless_Literals_Block + always uses 2 bits. + + Size_Format == 00: A single stream. Both Regenerated_Size and + Compressed_Size use 10 bits (values 0-1023). + Literals_Section_Header uses 3 bytes. + + Size_Format == 01: 4 streams. Both Regenerated_Size and + Compressed_Size use 10 bits (values 0-1023). + Literals_Section_Header uses 3 bytes. + + Size_Format == 10: 4 streams. Both Regenerated_Size and + Compressed_Size use 14 bits (values 0-16383). + Literals_Section_Header uses 4 bytes. + + Size_Format == 11: 4 streams. Both Regenerated_Size and + Compressed_Size use 18 bits (values 0-262143). + Literals_Section_Header uses 5 bytes. + + Both the Compressed_Size and Regenerated_Size fields follow little- + endian convention. Note that Compressed_Size includes the size of + the Huffman_Tree_Description when it is present. + +3.1.1.3.1.2. Raw_Literals_Block + + The data in Stream_1 is Regenerated_Size bytes long. It contains the + raw literals data to be used during Sequence Execution + (Section 3.1.1.3.2). + + + + + +Collet & Kucherawy Informational [Page 17] + +RFC 8478 application/zstd October 2018 + + +3.1.1.3.1.3. RLE_Literals_Block + + Stream_1 consists of a single byte that should be repeated + Regenerated_Size times to generate the decoded literals. + +3.1.1.3.1.4. Compressed_Literals_Block and Treeless_Literals_Block + + Both of these modes contain Huffman-encoded data. For + Treeless_Literals_Block, the Huffman table comes from the previously + compressed literals block, or from a dictionary; see Section 5. + +3.1.1.3.1.5. Huffman_Tree_Description + + This section is only present when the Literals_Block_Type type is + Compressed_Literals_Block (2). The format of + Huffman_Tree_Description can be found in Section 4.2.1. The size of + Huffman_Tree_Description is determined during the decoding process. + It must be used to determine where streams begin. + + Total_Streams_Size = Compressed_Size + - Huffman_Tree_Description_Size + +3.1.1.3.1.6. Jump_Table + + The Jump_Table is only present when there are 4 Huffman-coded + streams. + + (Reminder: Huffman-compressed data consists of either 1 or 4 Huffman- + coded streams.) + + If only 1 stream is present, it is a single bitstream occupying the + entire remaining portion of the literals block, encoded as described + within Section 4.2.2. + + If there are 4 streams, Literals_Section_Header only provides enough + information to know the decompressed and compressed sizes of all 4 + streams combined. The decompressed size of each stream is equal to + (Regenerated_Size+3)/4, except for the last stream, which may be up + to 3 bytes smaller, to reach a total decompressed size as specified + in Regenerated_Size. + + The compressed size of each stream is provided explicitly in the + Jump_Table. The Jump_Table is 6 bytes long and consists of three + 2-byte little-endian fields, describing the compressed sizes of the + first 3 streams. Stream4_Size is computed from Total_Streams_Size + minus sizes of other streams. + + + + + +Collet & Kucherawy Informational [Page 18] + +RFC 8478 application/zstd October 2018 + + + Stream4_Size = Total_Streams_Size - 6 + - Stream1_Size - Stream2_Size + - Stream3_Size + + Note that if Stream1_Size + Stream2_Size + Stream3_Size exceeds + Total_Streams_Size, the data are considered corrupted. + + Each of these 4 bitstreams is then decoded independently as a + Huffman-Coded stream, as described in Section 4.2.2. + +3.1.1.3.2. Sequences_Section + + A compressed block is a succession of sequences. A sequence is a + literal copy command, followed by a match copy command. A literal + copy command specifies a length. It is the number of bytes to be + copied (or extracted) from the Literals Section. A match copy + command specifies an offset and a length. + + When all sequences are decoded, if there are literals left in the + literals section, these bytes are added at the end of the block. + + This is described in more detail in Section 3.1.1.4. + + The Sequences_Section regroups all symbols required to decode + commands. There are three symbol types: literals lengths, offsets, + and match lengths. They are encoded together, interleaved, in a + single "bitstream". + + The Sequences_Section starts by a header, followed by optional + probability tables for each symbol type, followed by the bitstream. + + Sequences_Section_Header + [Literals_Length_Table] + [Offset_Table] + [Match_Length_Table] + bitStream + + To decode the Sequences_Section, it's necessary to know its size. + This size is deduced from the size of the Literals_Section: + Sequences_Section_Size = Block_Size - Literals_Section_Header - + Literals_Section_Content + + + + + + + + + + +Collet & Kucherawy Informational [Page 19] + +RFC 8478 application/zstd October 2018 + + +3.1.1.3.2.1. Sequences_Section_Header + + This header consists of two items: + + o Number_of_Sequences + + o Symbol_Compression_Modes + + Number_of_Sequences is a variable size field using between 1 and 3 + bytes. If the first byte is "byte0": + + o if (byte0 == 0): there are no sequences. The sequence section + stops here. Decompressed content is defined entirely as Literals + Section content. The FSE tables used in Repeat_Mode are not + updated. + + o if (byte0 < 128): Number_of_Sequences = byte0. Uses 1 byte. + + o if (byte0 < 255): Number_of_Sequences = ((byte0 - 128) << 8) + + byte1. Uses 2 bytes. + + o if (byte0 == 255): Number_of_Sequences = byte1 + (byte2 << 8) + + 0x7F00. Uses 3 bytes. + + Symbol_Compression_Modes is a single byte, defining the compression + mode of each symbol type. + + +-------------+----------------------+ + | Bit Number | Field Name | + +-------------+----------------------+ + | 7-6 | Literal_Lengths_Mode | + +-------------+----------------------+ + | 5-4 | Offsets_Mode | + +-------------+----------------------+ + | 3-2 | Match_Lengths_Mode | + +-------------+----------------------+ + | 1-0 | Reserved | + +-------------+----------------------+ + + The last field, Reserved, must be all zeroes. + + + + + + + + + + + +Collet & Kucherawy Informational [Page 20] + +RFC 8478 application/zstd October 2018 + + + Literals_Lengths_Mode, Offsets_Mode, and Match_Lengths_Mode define + the Compression_Mode of literals lengths, offsets, and match lengths + symbols, respectively. They follow the same enumeration: + + +-------+---------------------+ + | Value | Compression_Mode | + +-------+---------------------+ + | 0 | Predefined_Mode | + +-------+---------------------+ + | 1 | RLE_Mode | + +-------+---------------------+ + | 2 | FSE_Compressed_Mode | + +-------+---------------------+ + | 3 | Repeat_Mode | + +-------+---------------------+ + + Predefined_Mode: A predefined FSE (see Section 4.1) distribution + table is used, as defined in Section 3.1.1.3.2.2. No distribution + table will be present. + + RLE_Mode: The table description consists of a single byte, which + contains the symbol's value. This symbol will be used for all + sequences. + + FSE_Compressed_Mode: Standard FSE compression. A distribution table + will be present. The format of this distribution table is + described in Section 4.1.1. Note that the maximum allowed + accuracy log for literals length and match length tables is 9, and + the maximum accuracy log for the offsets table is 8. This mode + must not be used when only one symbol is present; RLE_Mode should + be used instead (although any other mode will work). + + Repeat_Mode: The table used in the previous Compressed_Block with + Number_Of_Sequences > 0 will be used again, or if this is the + first block, the table in the dictionary will be used. Note that + this includes RLE_Mode, so if Repeat_Mode follows RLE_Mode, the + same symbol will be repeated. It also includes Predefined_Mode, + in which case Repeat_Mode will have the same outcome as + Predefined_Mode. No distribution table will be present. If this + mode is used without any previous sequence table in the frame (or + dictionary; see Section 5) to repeat, this should be treated as + corruption. + + + + + + + + + +Collet & Kucherawy Informational [Page 21] + +RFC 8478 application/zstd October 2018 + + +3.1.1.3.2.1.1. Sequence Codes for Lengths and Offsets + + Each symbol is a code in its own context, which specifies Baseline + and Number_of_Bits to add. Codes are FSE compressed and interleaved + with raw additional bits in the same bitstream. + + Literals length codes are values ranging from 0 to 35 inclusive. + They define lengths from 0 to 131071 bytes. The literals length is + equal to the decoded Baseline plus the result of reading + Number_of_Bits bits from the bitstream, as a little-endian value. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 22] + +RFC 8478 application/zstd October 2018 + + + +----------------------+----------+----------------+ + | Literals_Length_Code | Baseline | Number_of_Bits | + +----------------------+----------+----------------+ + | 0-15 | length | 0 | + +----------------------+----------+----------------+ + | 16 | 16 | 1 | + +----------------------+----------+----------------+ + | 17 | 18 | 1 | + +----------------------+----------+----------------+ + | 18 | 20 | 1 | + +----------------------+----------+----------------+ + | 19 | 22 | 1 | + +----------------------+----------+----------------+ + | 20 | 24 | 2 | + +----------------------+----------+----------------+ + | 21 | 28 | 2 | + +----------------------+----------+----------------+ + | 22 | 32 | 3 | + +----------------------+----------+----------------+ + | 23 | 40 | 3 | + +----------------------+----------+----------------+ + | 24 | 48 | 4 | + +----------------------+----------+----------------+ + | 25 | 64 | 6 | + +----------------------+----------+----------------+ + | 26 | 128 | 7 | + +----------------------+----------+----------------+ + | 27 | 256 | 8 | + +----------------------+----------+----------------+ + | 28 | 512 | 9 | + +----------------------+----------+----------------+ + | 29 | 1024 | 10 | + +----------------------+----------+----------------+ + | 30 | 2048 | 11 | + +----------------------+----------+----------------+ + | 31 | 4096 | 12 | + +----------------------+----------+----------------+ + | 32 | 8192 | 13 | + +----------------------+----------+----------------+ + | 33 | 16384 | 14 | + +----------------------+----------+----------------+ + | 34 | 32768 | 15 | + +----------------------+----------+----------------+ + | 35 | 65536 | 16 | + +----------------------+----------+----------------+ + + + + + + +Collet & Kucherawy Informational [Page 23] + +RFC 8478 application/zstd October 2018 + + + Match length codes are values ranging from 0 to 52 inclusive. They + define lengths from 3 to 131074 bytes. The match length is equal to + the decoded Baseline plus the result of reading Number_of_Bits bits + from the bitstream, as a little-endian value. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 24] + +RFC 8478 application/zstd October 2018 + + + +-------------------+-----------------------+----------------+ + | Match_Length_Code | Baseline | Number_of_Bits | + +-------------------+-----------------------+----------------+ + | 0-31 | Match_Length_Code + 3 | 0 | + +-------------------+-----------------------+----------------+ + | 32 | 35 | 1 | + +-------------------+-----------------------+----------------+ + | 33 | 37 | 1 | + +-------------------+-----------------------+----------------+ + | 34 | 39 | 1 | + +-------------------+-----------------------+----------------+ + | 35 | 41 | 1 | + +-------------------+-----------------------+----------------+ + | 36 | 43 | 2 | + +-------------------+-----------------------+----------------+ + | 37 | 47 | 2 | + +-------------------+-----------------------+----------------+ + | 38 | 51 | 3 | + +-------------------+-----------------------+----------------+ + | 39 | 59 | 3 | + +-------------------+-----------------------+----------------+ + | 40 | 67 | 4 | + +-------------------+-----------------------+----------------+ + | 41 | 83 | 4 | + +-------------------+-----------------------+----------------+ + | 42 | 99 | 5 | + +-------------------+-----------------------+----------------+ + | 43 | 131 | 7 | + +-------------------+-----------------------+----------------+ + | 44 | 259 | 8 | + +-------------------+-----------------------+----------------+ + | 45 | 515 | 9 | + +-------------------+-----------------------+----------------+ + | 46 | 1027 | 10 | + +-------------------+-----------------------+----------------+ + | 47 | 2051 | 11 | + +-------------------+-----------------------+----------------+ + | 48 | 4099 | 12 | + +-------------------+-----------------------+----------------+ + | 49 | 8195 | 13 | + +-------------------+-----------------------+----------------+ + | 50 | 16387 | 14 | + +-------------------+-----------------------+----------------+ + | 51 | 32771 | 15 | + +-------------------+-----------------------+----------------+ + | 52 | 65539 | 16 | + +-------------------+-----------------------+----------------+ + + + + +Collet & Kucherawy Informational [Page 25] + +RFC 8478 application/zstd October 2018 + + + Offset codes are values ranging from 0 to N. + + A decoder is free to limit its maximum supported value for N. + Support for values of at least 22 is recommended. At the time of + this writing, the reference decoder supports a maximum N value of 31. + + An offset code is also the number of additional bits to read in + little-endian fashion and can be translated into an Offset_Value + using the following formulas: + + Offset_Value = (1 << offsetCode) + readNBits(offsetCode); + if (Offset_Value > 3) Offset = Offset_Value - 3; + + This means that maximum Offset_Value is (2^(N+1))-1, supporting back- + reference distance up to (2^(N+1))-4, but it is limited by the + maximum back-reference distance (see Section 3.1.1.1.2). + + Offset_Value from 1 to 3 are special: they define "repeat codes". + This is described in more detail in Section 3.1.1.5. + +3.1.1.3.2.1.2. Decoding Sequences + + FSE bitstreams are read in reverse of the direction they are written. + In zstd, the compressor writes bits forward into a block, and the + decompressor must read the bitstream backwards. + + To find the start of the bitstream, it is therefore necessary to know + the offset of the last byte of the block, which can be found by + counting Block_Size bytes after the block header. + + After writing the last bit containing information, the compressor + writes a single 1 bit and then fills the byte with 0-7 zero bits of + padding. The last byte of the compressed bitstream cannot be zero + for that reason. + + When decompressing, the last byte containing the padding is the first + byte to read. The decompressor needs to skip 0-7 initial zero bits + until the first 1 bit occurs. Afterwards, the useful part of the + bitstream begins. + + FSE decoding requires a 'state' to be carried from symbol to symbol. + For more explanation on FSE decoding, see Section 4.1. + + For sequence decoding, a separate state keeps track of each literal + lengths, offsets, and match lengths symbols. Some FSE primitives are + also used. For more details on the operation of these primitives, + see Section 4.1. + + + + +Collet & Kucherawy Informational [Page 26] + +RFC 8478 application/zstd October 2018 + + + The bitstream starts with initial FSE state values, each using the + required number of bits in their respective accuracy, decoded + previously from their normalized distribution. It starts with + Literals_Length_State, followed by Offset_State, and finally + Match_Length_State. + + Note that all values are read backward, so the 'start' of the + bitstream is at the highest position in memory, immediately before + the last 1 bit for padding. + + After decoding the starting states, a single sequence is decoded + Number_Of_Sequences times. These sequences are decoded in order from + first to last. Since the compressor writes the bitstream in the + forward direction, this means the compressor must encode the + sequences starting with the last one and ending with the first. + + For each of the symbol types, the FSE state can be used to determine + the appropriate code. The code then defines the Baseline and + Number_of_Bits to read for each type. The description of the codes + for how to determine these values can be found in + Section 3.1.1.3.2.1. + + Decoding starts by reading the Number_of_Bits required to decode + offset. It does the same for Match_Length and then for + Literals_Length. This sequence is then used for Sequence Execution + (see Section 3.1.1.4). + + If it is not the last sequence in the block, the next operation is to + update states. Using the rules pre-calculated in the decoding + tables, Literals_Length_State is updated, followed by + Match_Length_State, and then Offset_State. See Section 4.1 for + details on how to update states from the bitstream. + + This operation will be repeated Number_of_Sequences times. At the + end, the bitstream shall be entirely consumed; otherwise, the + bitstream is considered corrupted. + +3.1.1.3.2.2. Default Distributions + + If Predefined_Mode is selected for a symbol type, its FSE decoding + table is generated from a predefined distribution table defined here. + For details on how to convert this distribution into a decoding + table, see Section 4.1. + + + + + + + + +Collet & Kucherawy Informational [Page 27] + +RFC 8478 application/zstd October 2018 + + +3.1.1.3.2.2.1. Literals Length + + The decoding table uses an accuracy log of 6 bits (64 states). + + short literalsLength_defaultDistribution[36] = + { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, + -1,-1,-1,-1 + }; + +3.1.1.3.2.2.2. Match Length + + The decoding table uses an accuracy log of 6 bits (64 states). + + short matchLengths_defaultDistribution[53] = + { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1, + -1,-1,-1,-1,-1 + }; + +3.1.1.3.2.2.3. Offset Codes + + The decoding table uses an accuracy log of 5 bits (32 states), and + supports a maximum N value of 28, allowing offset values up to + 536,870,908. + + If any sequence in the compressed block requires a larger offset than + this, it's not possible to use the default distribution to represent + it. + + short offsetCodes_defaultDistribution[29] = + { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 + }; + +3.1.1.4. Sequence Execution + + Once literals and sequences have been decoded, they are combined to + produce the decoded content of a block. + + Each sequence consists of a tuple of (literals_length, offset_value, + match_length), decoded as described in the + Sequences_Section (Section 3.1.1.3.2). To execute a sequence, first + copy literals_length bytes from the decoded literals to the output. + + + + + + +Collet & Kucherawy Informational [Page 28] + +RFC 8478 application/zstd October 2018 + + + Then, match_length bytes are copied from previous decoded data. The + offset to copy from is determined by offset_value: + + o if Offset_Value > 3, then the offset is Offset_Value - 3; + + o if Offset_Value is from 1-3, the offset is a special repeat offset + value. See Section 3.1.1.5 for how the offset is determined in + this case. + + The offset is defined as from the current position (after copying the + literals), so an offset of 6 and a match length of 3 means that 3 + bytes should be copied from 6 bytes back. Note that all offsets + leading to previously decoded data must be smaller than Window_Size + defined in Frame_Header_Descriptor (Section 3.1.1.1.1). + +3.1.1.5. Repeat Offsets + + As seen above, the first three values define a repeated offset; we + will call them Repeated_Offset1, Repeated_Offset2, and + Repeated_Offset3. They are sorted in recency order, with + Repeated_Offset1 meaning "most recent one". + + If offset_value is 1, then the offset used is Repeated_Offset1, etc. + + There is one exception: When the current sequence's literals_length + is 0, repeated offsets are shifted by 1, so an offset_value of 1 + means Repeated_Offset2, an offset_value of 2 means Repeated_Offset3, + and an offset_value of 3 means Repeated_Offset1 - 1_byte. + + For the first block, the starting offset history is populated with + the following values: Repeated_Offset1 (1), Repeated_Offset2 (4), and + Repeated_Offset3 (8), unless a dictionary is used, in which case they + come from the dictionary. + + Then each block gets its starting offset history from the ending + values of the most recent Compressed_Block. Note that blocks that + are not Compressed_Block are skipped; they do not contribute to + offset history. + + The newest offset takes the lead in offset history, shifting others + back (up to its previous place if it was already present). This + means that when Repeated_Offset1 (most recent) is used, history is + unmodified. When Repeated_Offset2 is used, it is swapped with + Repeated_Offset1. If any other offset is used, it becomes + Repeated_Offset1, and the rest are shifted back by 1. + + + + + + +Collet & Kucherawy Informational [Page 29] + +RFC 8478 application/zstd October 2018 + + +3.1.2. Skippable Frames + + +--------------+------------+-----------+ + | Magic_Number | Frame_Size | User_Data | + +--------------+------------+-----------+ + | 4 bytes | 4 bytes | n bytes | + +--------------+------------+-----------+ + + Skippable frames allow the insertion of user-defined metadata into a + flow of concatenated frames. + + Skippable frames defined in this specification are compatible with + skippable frames in [LZ4]. + + From a compliant decoder perspective, skippable frames simply need to + be skipped, and their content ignored, resuming decoding after the + skippable frame. + + It should be noted that a skippable frame can be used to watermark a + stream of concatenated frames embedding any kind of tracking + information (even just a Universally Unique Identifier (UUID)). + Users wary of such possibility should scan the stream of concatenated + frames in an attempt to detect such frames for analysis or removal. + + The fields are: + + Magic_Number: 4 bytes, little-endian format. Value: 0x184D2A5?, + which means any value from 0x184D2A50 to 0x184D2A5F. All 16 + values are valid to identify a skippable frame. This + specification does not detail any specific tagging methods for + skippable frames. + + Frame_Size: This is the size, in bytes, of the following User_Data + (without including the magic number nor the size field itself). + This field is represented using 4 bytes, little-endian format, + unsigned 32 bits. This means User_Data can't be bigger than + (2^32-1) bytes. + + User_Data: This field can be anything. Data will just be skipped by + the decoder. + +4. Entropy Encoding + + Two types of entropy encoding are used by the Zstandard format: FSE + and Huffman coding. Huffman is used to compress literals, while FSE + is used for all other symbols (Literals_Length_Code, + Match_Length_Code, and offset codes) and to compress Huffman headers. + + + + +Collet & Kucherawy Informational [Page 30] + +RFC 8478 application/zstd October 2018 + + +4.1. FSE + + FSE, short for Finite State Entropy, is an entropy codec based on + [ANS]. FSE encoding/decoding involves a state that is carried over + between symbols, so decoding must be done in the opposite direction + as encoding. Therefore, all FSE bitstreams are read from end to + beginning. Note that the order of the bits in the stream is not + reversed; they are simply read in the reverse order from which they + were written. + + For additional details on FSE, see Finite State Entropy [FSE]. + + FSE decoding involves a decoding table that has a power of 2 size and + contains three elements: Symbol, Num_Bits, and Baseline. The base 2 + logarithm of the table size is its Accuracy_Log. An FSE state value + represents an index in this table. + + To obtain the initial state value, consume Accuracy_Log bits from the + stream as a little-endian value. The next symbol in the stream is + the Symbol indicated in the table for that state. To obtain the next + state value, the decoder should consume Num_Bits bits from the stream + as a little-endian value and add it to Baseline. + +4.1.1. FSE Table Description + + To decode FSE streams, it is necessary to construct the decoding + table. The Zstandard format encodes FSE table descriptions as + described here. + + An FSE distribution table describes the probabilities of all symbols + from 0 to the last present one (included) on a normalized scale of + (1 << Accuracy_Log). Note that there must be two or more symbols + with non-zero probability. + + A bitstream is read forward, in little-endian fashion. It is not + necessary to know its exact size, since the size will be discovered + and reported by the decoding process. The bitstream starts by + reporting on which scale it operates. If low4bits designates the + lowest 4 bits of the first byte, then Accuracy_Log = low4bits + 5. + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 31] + +RFC 8478 application/zstd October 2018 + + + This is followed by each symbol value, from 0 to the last present + one. The number of bits used by each field is variable and depends + on: + + Remaining probabilities + 1: For example, presuming an Accuracy_Log + of 8, and presuming 100 probabilities points have already been + distributed, the decoder may read any value from 0 to + (256 - 100 + 1) == 157, inclusive. Therefore, it must read + log2sup(157) == 8 bits. + + Value decoded: Small values use 1 fewer bit. For example, presuming + values from 0 to 157 (inclusive) are possible, 255 - 157 = 98 + values are remaining in an 8-bit field. The first 98 values + (hence from 0 to 97) use only 7 bits, and values from 98 to 157 + use 8 bits. This is achieved through this scheme: + + +------------+---------------+-----------+ + | Value Read | Value Decoded | Bits Used | + +------------+---------------+-----------+ + | 0 - 97 | 0 - 97 | 7 | + +------------+---------------+-----------+ + | 98 - 127 | 98 - 127 | 8 | + +------------+---------------+-----------+ + | 128 - 225 | 0 - 97 | 7 | + +------------+---------------+-----------+ + | 226 - 255 | 128 - 157 | 8 | + +------------+---------------+-----------+ + + Symbol probabilities are read one by one, in order. The probability + is obtained from Value decoded using the formula P = Value - 1. This + means the value 0 becomes the negative probability -1. This is a + special probability that means "less than 1". Its effect on the + distribution table is described below. For the purpose of + calculating total allocated probability points, it counts as 1. + + When a symbol has a probability of zero, it is followed by a 2-bit + repeat flag. This repeat flag tells how many probabilities of zeroes + follow the current one. It provides a number ranging from 0 to 3. + If it is a 3, another 2-bit repeat flag follows, and so on. + + When the last symbol reaches a cumulated total of + (1 << Accuracy_Log), decoding is complete. If the last symbol makes + the cumulated total go above (1 << Accuracy_Log), distribution is + considered corrupted. + + + + + + + +Collet & Kucherawy Informational [Page 32] + +RFC 8478 application/zstd October 2018 + + + Finally, the decoder can tell how many bytes were used in this + process and how many symbols are present. The bitstream consumes a + round number of bytes. Any remaining bit within the last byte is + simply unused. + + The distribution of normalized probabilities is enough to create a + unique decoding table. The table has a size of (1 << Accuracy_Log). + Each cell describes the symbol decoded and instructions to get the + next state. + + Symbols are scanned in their natural order for "less than 1" + probabilities as described above. Symbols with this probability are + being attributed a single cell, starting from the end of the table + and retreating. These symbols define a full state reset, reading + Accuracy_Log bits. + + All remaining symbols are allocated in their natural order. Starting + from symbol 0 and table position 0, each symbol gets allocated as + many cells as its probability. Cell allocation is spread, not + linear; each successor position follows this rule: + + position += (tableSize >> 1) + (tableSize >> 3) + 3; + position &= tableSize - 1; + + A position is skipped if it is already occupied by a "less than 1" + probability symbol. Position does not reset between symbols; it + simply iterates through each position in the table, switching to the + next symbol when enough states have been allocated to the current + one. + + The result is a list of state values. Each state will decode the + current symbol. + + To get the Number_of_Bits and Baseline required for the next state, + it is first necessary to sort all states in their natural order. The + lower states will need 1 more bit than higher ones. The process is + repeated for each symbol. + + For example, presuming a symbol has a probability of 5, it receives + five state values. States are sorted in natural order. The next + power of 2 is 8. The space of probabilities is divided into 8 equal + parts. Presuming the Accuracy_Log is 7, this defines 128 states, and + each share (divided by 8) is 16 in size. In order to reach 8, 8 - 5 + = 3 lowest states will count "double", doubling the number of shares + (32 in width), requiring 1 more bit in the process. + + + + + + +Collet & Kucherawy Informational [Page 33] + +RFC 8478 application/zstd October 2018 + + + Baseline is assigned starting from the higher states using fewer + bits, and proceeding naturally, then resuming at the first state, + each taking its allocated width from Baseline. + + +----------------+-------+-------+--------+------+-------+ + | state order | 0 | 1 | 2 | 3 | 4 | + +----------------+-------+-------+--------+------+-------+ + | width | 32 | 32 | 32 | 16 | 16 | + +----------------+-------+-------+--------+------+-------+ + | Number_of_Bits | 5 | 5 | 5 | 4 | 4 | + +----------------+-------+-------+--------+------+-------+ + | range number | 2 | 4 | 6 | 0 | 1 | + +----------------+-------+-------+--------+------+-------+ + | Baseline | 32 | 64 | 96 | 0 | 16 | + +----------------+-------+-------+--------+------+-------+ + | range | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 | + +----------------+-------+-------+--------+------+-------+ + + The next state is determined from the current state by reading the + required Number_of_Bits and adding the specified Baseline. + + See Appendix A for the results of this process that are applied to + the default distributions. + +4.2. Huffman Coding + + Zstandard Huffman-coded streams are read backwards, similar to the + FSE bitstreams. Therefore, to find the start of the bitstream, it is + necessary to know the offset of the last byte of the Huffman-coded + stream. + + After writing the last bit containing information, the compressor + writes a single 1 bit and then fills the byte with 0-7 0 bits of + padding. The last byte of the compressed bitstream cannot be 0 for + that reason. + + When decompressing, the last byte containing the padding is the first + byte to read. The decompressor needs to skip 0-7 initial 0 bits and + the first 1 bit that occurs. Afterwards, the useful part of the + bitstream begins. + + The bitstream contains Huffman-coded symbols in little-endian order, + with the codes defined by the method below. + + + + + + + + +Collet & Kucherawy Informational [Page 34] + +RFC 8478 application/zstd October 2018 + + +4.2.1. Huffman Tree Description + + Prefix coding represents symbols from an a priori known alphabet by + bit sequences (codewords), one codeword for each symbol, in a manner + such that different symbols may be represented by bit sequences of + different lengths, but a parser can always parse an encoded string + unambiguously symbol by symbol. + + Given an alphabet with known symbol frequencies, the Huffman + algorithm allows the construction of an optimal prefix code using the + fewest bits of any possible prefix codes for that alphabet. + + The prefix code must not exceed a maximum code length. More bits + improve accuracy but yield a larger header size and require more + memory or more complex decoding operations. This specification + limits the maximum code length to 11 bits. + + All literal values from zero (included) to the last present one + (excluded) are represented by Weight with values from 0 to + Max_Number_of_Bits. Transformation from Weight to Number_of_Bits + follows this pseudocode: + + if Weight == 0 + Number_of_Bits = 0 + else + Number_of_Bits = Max_Number_of_Bits + 1 - Weight + + The last symbol's Weight is deduced from previously decoded ones, by + completing to the nearest power of 2. This power of 2 gives + Max_Number_of_Bits the depth of the current tree. + + For example, presume the following Huffman tree must be described: + + +---------------+----------------+ + | Literal Value | Number_of_Bits | + +---------------+----------------+ + | 0 | 1 | + +---------------+----------------+ + | 1 | 2 | + +---------------+----------------+ + | 2 | 3 | + +---------------+----------------+ + | 3 | 0 | + +---------------+----------------+ + | 4 | 4 | + +---------------+----------------+ + | 5 | 4 | + +---------------+----------------+ + + + +Collet & Kucherawy Informational [Page 35] + +RFC 8478 application/zstd October 2018 + + + The tree depth is 4, since its longest element uses 4 bits. (The + longest elements are those with the smallest frequencies.) Value 5 + will not be listed as it can be determined from the values for 0-4, + nor will values above 5 as they are all 0. Values from 0 to 4 will + be listed using Weight instead of Number_of_Bits. The pseudocode to + determine Weight is: + + if Number_of_Bits == 0 + Weight = 0 + else + Weight = Max_Number_of_Bits + 1 - Number_of_Bits + + It gives the following series of weights: + + +---------------+--------+ + | Literal Value | Weight | + +---------------+--------+ + | 0 | 4 | + +---------------+--------+ + | 1 | 3 | + +---------------+--------+ + | 2 | 2 | + +---------------+--------+ + | 3 | 0 | + +---------------+--------+ + | 4 | 1 | + +---------------+--------+ + + The decoder will do the inverse operation: having collected weights + of literals from 0 to 4, it knows the last literal, 5, is present + with a non-zero Weight. The Weight of 5 can be determined by + advancing to the next power of 2. The sum of 2^(Weight-1) (excluding + 0's) is 15. The nearest power of 2 is 16. Therefore, + Max_Number_of_Bits = 4 and Weight[5] = 16 - 15 = 1. + +4.2.1.1. Huffman Tree Header + + This is a single byte value (0-255), which describes how the series + of weights is encoded. + + headerByte < 128: The series of weights is compressed using FSE (see + below). The length of the FSE-compressed series is equal to + headerByte (0-127). + + + + + + + + +Collet & Kucherawy Informational [Page 36] + +RFC 8478 application/zstd October 2018 + + + headerByte >= 128: This is a direct representation, where each + Weight is written directly as a 4-bit field (0-15). They are + encoded forward, 2 weights to a byte with the first weight taking + the top 4 bits and the second taking the bottom 4; for example, + the following operations could be used to read the weights: + + Weight[0] = (Byte[0] >> 4) + Weight[1] = (Byte[0] & 0xf), + etc. + + The full representation occupies ceiling(Number_of_Symbols/2) + bytes, meaning it uses only full bytes even if Number_of_Symbols + is odd. Number_of_Symbols = headerByte - 127. Note that maximum + Number_of_Symbols is 255 - 127 = 128. If any literal has a value + over 128, raw header mode is not possible, and it is necessary to + use FSE compression. + +4.2.1.2. FSE Compression of Huffman Weights + + In this case, the series of Huffman weights is compressed using FSE + compression. It is a single bitstream with two interleaved states, + sharing a single distribution table. + + To decode an FSE bitstream, it is necessary to know its compressed + size. Compressed size is provided by headerByte. It's also + necessary to know its maximum possible decompressed size, which is + 255, since literal values span from 0 to 255, and the last symbol's + Weight is not represented. + + An FSE bitstream starts by a header, describing probabilities + distribution. It will create a decoding table. For a list of + Huffman weights, the maximum accuracy log is 6 bits. For more + details, see Section 4.1.1. + + The Huffman header compression uses two states, which share the same + FSE distribution table. The first state (State1) encodes the even- + numbered index symbols, and the second (State2) encodes the odd- + numbered index symbols. State1 is initialized first, and then + State2, and they take turns decoding a single symbol and updating + their state. For more details on these FSE operations, see + Section 4.1. + + The number of symbols to be decoded is determined by tracking the + bitStream overflow condition: If updating state after decoding a + symbol would require more bits than remain in the stream, it is + assumed that extra bits are zero. Then, symbols for each of the + final states are decoded and the process is complete. + + + + +Collet & Kucherawy Informational [Page 37] + +RFC 8478 application/zstd October 2018 + + +4.2.1.3. Conversion from Weights to Huffman Prefix Codes + + All present symbols will now have a Weight value. It is possible to + transform weights into Number_of_Bits, using this formula: + + if Weight > 0 + Number_of_Bits = Max_Number_of_Bits + 1 - Weight + else + Number_of_Bits = 0 + + Symbols are sorted by Weight. Within the same Weight, symbols keep + natural sequential order. Symbols with a Weight of zero are removed. + Then, starting from the lowest Weight, prefix codes are distributed + in sequential order. + + For example, assume the following list of weights has been decoded: + + +---------+--------+ + | Literal | Weight | + +---------+--------+ + | 0 | 4 | + +---------+--------+ + | 1 | 3 | + +---------+--------+ + | 2 | 2 | + +---------+--------+ + | 3 | 0 | + +---------+--------+ + | 4 | 1 | + +---------+--------+ + | 5 | 1 | + +---------+--------+ + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 38] + +RFC 8478 application/zstd October 2018 + + + Sorting by weight and then the natural sequential order yields the + following distribution: + + +---------+--------+----------------+--------------+ + | Literal | Weight | Number_Of_Bits | Prefix Codes | + +---------+--------+----------------|--------------+ + | 3 | 0 | 0 | N/A | + +---------+--------+----------------|--------------+ + | 4 | 1 | 4 | 0000 | + +---------+--------+----------------|--------------+ + | 5 | 1 | 4 | 0001 | + +---------+--------+----------------|--------------+ + | 2 | 2 | 3 | 001 | + +---------+--------+----------------|--------------+ + | 1 | 3 | 2 | 01 | + +---------+--------+----------------|--------------+ + | 0 | 4 | 1 | 1 | + +---------+--------+----------------|--------------+ + +4.2.2. Huffman-Coded Streams + + Given a Huffman decoding table, it is possible to decode a Huffman- + coded stream. + + Each bitstream must be read backward, which starts from the end and + goes up to the beginning. Therefore, it is necessary to know the + size of each bitstream. + + It is also necessary to know exactly which bit is the last. This is + detected by a final bit flag: the highest bit of the last byte is a + final-bit-flag. Consequently, a last byte of 0 is not possible. And + the final-bit-flag itself is not part of the useful bitstream. + Hence, the last byte contains between 0 and 7 useful bits. + + Starting from the end, it is possible to read the bitstream in a + little-endian fashion, keeping track of already used bits. Since the + bitstream is encoded in reverse order, starting from the end, read + symbols in forward order. + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 39] + +RFC 8478 application/zstd October 2018 + + + For example, if the literal sequence "0145" was encoded using the + above prefix code, it would be encoded (in reverse order) as: + + +---------+----------+ + | Symbol | Encoding | + +---------+----------+ + | 5 | 0000 | + +---------+----------+ + | 4 | 0001 | + +---------+----------+ + | 1 | 01 | + +---------+----------+ + | 0 | 1 | + +---------+----------+ + | Padding | 00001 | + +---------+----------+ + + This results in the following 2-byte bitstream: + + 00010000 00001101 + + Here is an alternative representation with the symbol codes separated + by underscores: + + 0001_0000 00001_1_01 + + Reading the highest Max_Number_of_Bits bits, it's possible to compare + the extracted value to the decoding table, determining the symbol to + decode and number of bits to discard. + + The process continues reading up to the required number of symbols + per stream. If a bitstream is not entirely and exactly consumed, + hence reaching exactly its beginning position with all bits consumed, + the decoding process is considered faulty. + +5. Dictionary Format + + Zstandard is compatible with "raw content" dictionaries, free of any + format restriction, except that they must be at least 8 bytes. These + dictionaries function as if they were just the content part of a + formatted dictionary. + + However, dictionaries created by "zstd --train" in the reference + implementation follow a specific format, described here. + + Dictionaries are not included in the compressed content but rather + are provided out of band. That is, the Dictionary_ID identifies + which should be used, but this specification does not describe the + + + +Collet & Kucherawy Informational [Page 40] + +RFC 8478 application/zstd October 2018 + + + mechanism by which the dictionary is obtained prior to use during + compression or decompression. + + A dictionary has a size, defined either by a buffer limit or a file + size. The general format is: + + +--------------+---------------+----------------+---------+ + | Magic_Number | Dictionary_ID | Entropy_Tables | Content | + +--------------+---------------+----------------+---------+ + + Magic_Number: 4 bytes ID, value 0xEC30A437, little-endian format. + + Dictionary_ID: 4 bytes, stored in little-endian format. + Dictionary_ID can be any value, except 0 (which means no + Dictionary_ID). It is used by decoders to check if they use the + correct dictionary. If the frame is going to be distributed in a + private environment, any Dictionary_ID can be used. However, for + public distribution of compressed frames, the following ranges are + reserved and shall not be used: + + low range: <= 32767 + high range: >= (2^31) + + Entropy_Tables: Follow the same format as the tables in compressed + blocks. See the relevant FSE and Huffman sections for how to + decode these tables. They are stored in the following order: + Huffman table for literals, FSE table for offsets, FSE table for + match lengths, and FSE table for literals lengths. These tables + populate the Repeat Stats literals mode and Repeat distribution + mode for sequence decoding. It is finally followed by 3 offset + values, populating repeat offsets (instead of using {1,4,8}), + stored in order, 4-bytes little-endian each, for a total of 12 + bytes. Each repeat offset must have a value less than the + dictionary size. + + Content: The rest of the dictionary is its content. The content + acts as a "past" in front of data to be compressed or + decompressed, so it can be referenced in sequence commands. As + long as the amount of data decoded from this frame is less than or + equal to Window_Size, sequence commands may specify offsets longer + than the total length of decoded output so far to reference back + to the dictionary, even parts of the dictionary with offsets + larger than Window_Size. After the total output has surpassed + Window_Size, however, this is no longer allowed, and the + dictionary is no longer accessible. + + + + + + +Collet & Kucherawy Informational [Page 41] + +RFC 8478 application/zstd October 2018 + + +6. IANA Considerations + + IANA has made two registrations, as described below. + +6.1. The 'application/zstd' Media Type + + The 'application/zstd' media type identifies a block of data that is + compressed using zstd compression. The data is a stream of bytes as + described in this document. IANA has added the following to the + "Media Types" registry: + + Type name: application + + Subtype name: zstd + + Required parameters: N/A + + Optional parameters: N/A + + Encoding considerations: binary + + Security considerations: See Section 7 of RFC 8478 + + Interoperability considerations: N/A + + Published specification: RFC 8478 + + Applications that use this media type: anywhere data size is an + issue + + Additional information: + + Magic number(s): 4 bytes, little-endian format. + Value: 0xFD2FB528 + + File extension(s): zst + + Macintosh file type code(s): N/A + + For further information: See [ZSTD] + + Intended usage: common + + Restrictions on usage: N/A + + Author: Murray S. Kucherawy + + Change Controller: IETF + + + +Collet & Kucherawy Informational [Page 42] + +RFC 8478 application/zstd October 2018 + + + Provisional registration: no + +6.2. Content Encoding + + IANA has added the following entry to the "HTTP Content Coding + Registry" within the "Hypertext Transfer Protocol (HTTP) Parameters" + registry: + + Name: zstd + + Description: A stream of bytes compressed using the Zstandard + protocol + + Pointer to specification text: RFC 8478 + +6.3. Dictionaries + + Work in progress includes development of dictionaries that will + optimize compression and decompression of particular types of data. + Specification of such dictionaries for public use will necessitate + registration of a code point from the reserved range described in + Section 3.1.1.1.3 and its association with a specific dictionary. + + However, there are at present no such dictionaries published for + public use, so this document makes no immediate request of IANA to + create such a registry. + +7. Security Considerations + + Any data compression method involves the reduction of redundancy in + the data. Zstandard is no exception, and the usual precautions + apply. + + One should never compress a message whose content must remain secret + with a message generated by a third party. Such a compression can be + used to guess the content of the secret message through analysis of + entropy reduction. This was demonstrated in the Compression Ratio + Info-leak Made Easy (CRIME) attack [CRIME], for example. + + A decoder has to demonstrate capabilities to detect and prevent any + kind of data tampering in the compressed frame from triggering system + faults, such as reading or writing beyond allowed memory ranges. + This can be guaranteed by either the implementation language or + careful bound checkings. Of particular note is the encoding of + Number_of_Sequences values that cause the decoder to read into the + block header (and beyond), as well as the indication of a + Frame_Content_Size that is smaller than the actual decompressed data, + in an attempt to trigger a buffer overflow. It is highly recommended + + + +Collet & Kucherawy Informational [Page 43] + +RFC 8478 application/zstd October 2018 + + + to fuzz-test (i.e., provide invalid, unexpected, or random input and + verify safe operation of) decoder implementations to test and harden + their capability to detect bad frames and deal with them without any + adverse system side effect. + + An attacker may provide correctly formed compressed frames with + unreasonable memory requirements. A decoder must always control + memory requirements and enforce some (system-specific) limits in + order to protect memory usage from such scenarios. + + Compression can be optimized by training a dictionary on a variety of + related content payloads. This dictionary must then be available at + the decoder for decompression of the payload to be possible. While + this document does not specify how to acquire a dictionary for a + given compressed payload, it is worth noting that third-party + dictionaries may interact unexpectedly with a decoder, leading to + possible memory or other resource exhaustion attacks. We expect such + topics to be discussed in further detail in the Security + Considerations section of a forthcoming RFC for dictionary + acquisition and transmission, but highlight this issue now out of an + abundance of caution. + + As discussed in Section 3.1.2, it is possible to store arbitrary user + metadata in skippable frames. While such frames are ignored during + decompression of the data, they can be used as a watermark to track + the path of the compressed payload. + +8. Implementation Status + + Source code for a C language implementation of a Zstandard-compliant + library is available at [ZSTD-GITHUB]. This implementation is + considered to be the reference implementation and is production + ready; it implements the full range of the specification. It is + routinely tested against security hazards and widely deployed within + Facebook infrastructure. + + The reference version is optimized for speed and is highly portable. + It has been proven to run safely on multiple architectures (e.g., + x86, x64, ARM, MIPS, PowerPC, IA64) featuring 32- or 64-bit + addressing schemes, a little- or big-endian storage scheme, a number + of different operating systems (e.g., UNIX (including Linux, BSD, + OS-X, and Solaris) and Windows), and a number of compilers (e.g., + gcc, clang, visual, and icc). + + + + + + + + +Collet & Kucherawy Informational [Page 44] + +RFC 8478 application/zstd October 2018 + + +9. References + +9.1. Normative References + + [ZSTD] "Zstandard", . + +9.2. Informative References + + [ANS] Duda, J., "Asymmetric numeral systems: entropy coding + combining speed of Huffman coding with compression rate of + arithmetic coding", January 2014, + . + + [CRIME] "CRIME", June 2018, . + + [FSE] "FiniteStateEntropy", commit 6efa78a, June 2018, + . + + [LZ4] "LZ4 Frame Format Description", commit d03224b, January + 2018, . + + [RFC1952] Deutsch, P., "GZIP file format specification version 4.3", + RFC 1952, DOI 10.17487/RFC1952, May 1996, + . + + [XXHASH] "XXHASH Algorithm", . + + [ZSTD-GITHUB] + "zstd", commit 8514bd8, August 2018, + . + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 45] + +RFC 8478 application/zstd October 2018 + + +Appendix A. Decoding Tables for Predefined Codes + + This appendix contains FSE decoding tables for the predefined literal + length, match length, and offset codes. The tables have been + constructed using the algorithm as given above in Section 4.1.1. The + tables here can be used as examples to crosscheck that an + implementation has built its decoding tables correctly. + +A.1. Literal Length Code Table + + +-------+--------+----------------+------+ + | State | Symbol | Number_Of_Bits | Base | + +-------+--------+----------------+------+ + | 0 | 0 | 0 | 0 | + +-------+--------+----------------+------+ + | 0 | 0 | 4 | 0 | + +-------+--------+----------------+------+ + | 1 | 0 | 4 | 16 | + +-------+--------+----------------+------+ + | 2 | 1 | 5 | 32 | + +-------+--------+----------------+------+ + | 3 | 3 | 5 | 0 | + +-------+--------+----------------+------+ + | 4 | 4 | 5 | 0 | + +-------+--------+----------------+------+ + | 5 | 6 | 5 | 0 | + +-------+--------+----------------+------+ + | 6 | 7 | 5 | 0 | + +-------+--------+----------------+------+ + | 7 | 9 | 5 | 0 | + +-------+--------+----------------+------+ + | 8 | 10 | 5 | 0 | + +-------+--------+----------------+------+ + | 9 | 12 | 5 | 0 | + +-------+--------+----------------+------+ + | 10 | 14 | 6 | 0 | + +-------+--------+----------------+------+ + | 11 | 16 | 5 | 0 | + +-------+--------+----------------+------+ + | 12 | 18 | 5 | 0 | + +-------+--------+----------------+------+ + | 13 | 19 | 5 | 0 | + +-------+--------+----------------+------+ + | 14 | 21 | 5 | 0 | + +-------+--------+----------------+------+ + | 15 | 22 | 5 | 0 | + +-------+--------+----------------+------+ + | 16 | 24 | 5 | 0 | + + + +Collet & Kucherawy Informational [Page 46] + +RFC 8478 application/zstd October 2018 + + + +-------+--------+----------------+------+ + | 17 | 25 | 5 | 32 | + +-------+--------+----------------+------+ + | 18 | 26 | 5 | 0 | + +-------+--------+----------------+------+ + | 19 | 27 | 6 | 0 | + +-------+--------+----------------+------+ + | 20 | 29 | 6 | 0 | + +-------+--------+----------------+------+ + | 21 | 31 | 6 | 0 | + +-------+--------+----------------+------+ + | 22 | 0 | 4 | 32 | + +-------+--------+----------------+------+ + | 23 | 1 | 4 | 0 | + +-------+--------+----------------+------+ + | 24 | 2 | 5 | 0 | + +-------+--------+----------------+------+ + | 25 | 4 | 5 | 32 | + +-------+--------+----------------+------+ + | 26 | 5 | 5 | 0 | + +-------+--------+----------------+------+ + | 27 | 7 | 5 | 32 | + +-------+--------+----------------+------+ + | 28 | 8 | 5 | 0 | + +-------+--------+----------------+------+ + | 29 | 10 | 5 | 32 | + +-------+--------+----------------+------+ + | 30 | 11 | 5 | 0 | + +-------+--------+----------------+------+ + | 31 | 13 | 6 | 0 | + +-------+--------+----------------+------+ + | 32 | 16 | 5 | 32 | + +-------+--------+----------------+------+ + | 33 | 17 | 5 | 0 | + +-------+--------+----------------+------+ + | 34 | 19 | 5 | 32 | + +-------+--------+----------------+------+ + | 35 | 20 | 5 | 0 | + +-------+--------+----------------+------+ + | 36 | 22 | 5 | 32 | + +-------+--------+----------------+------+ + | 37 | 23 | 5 | 0 | + +-------+--------+----------------+------+ + | 38 | 25 | 4 | 0 | + +-------+--------+----------------+------+ + | 39 | 25 | 4 | 16 | + +-------+--------+----------------+------+ + | 40 | 26 | 5 | 32 | + + + +Collet & Kucherawy Informational [Page 47] + +RFC 8478 application/zstd October 2018 + + + +-------+--------+----------------+------+ + | 41 | 28 | 6 | 0 | + +-------+--------+----------------+------+ + | 42 | 30 | 6 | 0 | + +-------+--------+----------------+------+ + | 43 | 0 | 4 | 48 | + +-------+--------+----------------+------+ + | 44 | 1 | 4 | 16 | + +-------+--------+----------------+------+ + | 45 | 2 | 5 | 32 | + +-------+--------+----------------+------+ + | 46 | 3 | 5 | 32 | + +-------+--------+----------------+------+ + | 47 | 5 | 5 | 32 | + +-------+--------+----------------+------+ + | 48 | 6 | 5 | 32 | + +-------+--------+----------------+------+ + | 49 | 8 | 5 | 32 | + +-------+--------+----------------+------+ + | 50 | 9 | 5 | 32 | + +-------+--------+----------------+------+ + | 51 | 11 | 5 | 32 | + +-------+--------+----------------+------+ + | 52 | 12 | 5 | 32 | + +-------+--------+----------------+------+ + | 53 | 15 | 6 | 0 | + +-------+--------+----------------+------+ + | 54 | 17 | 5 | 32 | + +-------+--------+----------------+------+ + | 55 | 18 | 5 | 32 | + +-------+--------+----------------+------+ + | 56 | 20 | 5 | 32 | + +-------+--------+----------------+------+ + | 57 | 21 | 5 | 32 | + +-------+--------+----------------+------+ + | 58 | 23 | 5 | 32 | + +-------+--------+----------------+------+ + | 59 | 24 | 5 | 32 | + +-------+--------+----------------+------+ + | 60 | 35 | 6 | 0 | + +-------+--------+----------------+------+ + | 61 | 34 | 6 | 0 | + +-------+--------+----------------+------+ + | 62 | 33 | 6 | 0 | + +-------+--------+----------------+------+ + | 63 | 32 | 6 | 0 | + +-------+--------+----------------+------+ + + + + +Collet & Kucherawy Informational [Page 48] + +RFC 8478 application/zstd October 2018 + + +A.2. Match Length Code Table + + +-------+--------+----------------+------+ + | State | Symbol | Number_Of_Bits | Base | + +-------+--------+----------------+------+ + | 0 | 0 | 0 | 0 | + +-------+--------+----------------+------+ + | 0 | 0 | 6 | 0 | + +-------+--------+----------------+------+ + | 1 | 1 | 4 | 0 | + +-------+--------+----------------+------+ + | 2 | 2 | 5 | 32 | + +-------+--------+----------------+------+ + | 3 | 3 | 5 | 0 | + +-------+--------+----------------+------+ + | 4 | 5 | 5 | 0 | + +-------+--------+----------------+------+ + | 5 | 6 | 5 | 0 | + +-------+--------+----------------+------+ + | 6 | 8 | 5 | 0 | + +-------+--------+----------------+------+ + | 7 | 10 | 6 | 0 | + +-------+--------+----------------+------+ + | 8 | 13 | 6 | 0 | + +-------+--------+----------------+------+ + | 9 | 16 | 6 | 0 | + +-------+--------+----------------+------+ + | 10 | 19 | 6 | 0 | + +-------+--------+----------------+------+ + | 11 | 22 | 6 | 0 | + +-------+--------+----------------+------+ + | 12 | 25 | 6 | 0 | + +-------+--------+----------------+------+ + | 13 | 28 | 6 | 0 | + +-------+--------+----------------+------+ + | 14 | 31 | 6 | 0 | + +-------+--------+----------------+------+ + | 15 | 33 | 6 | 0 | + +-------+--------+----------------+------+ + | 16 | 35 | 6 | 0 | + +-------+--------+----------------+------+ + | 17 | 37 | 6 | 0 | + +-------+--------+----------------+------+ + | 18 | 39 | 6 | 0 | + +-------+--------+----------------+------+ + | 19 | 41 | 6 | 0 | + +-------+--------+----------------+------+ + | 20 | 43 | 6 | 0 | + + + +Collet & Kucherawy Informational [Page 49] + +RFC 8478 application/zstd October 2018 + + + +-------+--------+----------------+------+ + | 21 | 45 | 6 | 0 | + +-------+--------+----------------+------+ + | 22 | 1 | 4 | 16 | + +-------+--------+----------------+------+ + | 23 | 2 | 4 | 0 | + +-------+--------+----------------+------+ + | 24 | 3 | 5 | 32 | + +-------+--------+----------------+------+ + | 25 | 4 | 5 | 0 | + +-------+--------+----------------+------+ + | 26 | 6 | 5 | 32 | + +-------+--------+----------------+------+ + | 27 | 7 | 5 | 0 | + +-------+--------+----------------+------+ + | 28 | 9 | 6 | 0 | + +-------+--------+----------------+------+ + | 29 | 12 | 6 | 0 | + +-------+--------+----------------+------+ + | 30 | 15 | 6 | 0 | + +-------+--------+----------------+------+ + | 31 | 18 | 6 | 0 | + +-------+--------+----------------+------+ + | 32 | 21 | 6 | 0 | + +-------+--------+----------------+------+ + | 33 | 24 | 6 | 0 | + +-------+--------+----------------+------+ + | 34 | 27 | 6 | 0 | + +-------+--------+----------------+------+ + | 35 | 30 | 6 | 0 | + +-------+--------+----------------+------+ + | 36 | 32 | 6 | 0 | + +-------+--------+----------------+------+ + | 37 | 34 | 6 | 0 | + +-------+--------+----------------+------+ + | 38 | 36 | 6 | 0 | + +-------+--------+----------------+------+ + | 39 | 38 | 6 | 0 | + +-------+--------+----------------+------+ + | 40 | 40 | 6 | 0 | + +-------+--------+----------------+------+ + | 41 | 42 | 6 | 0 | + +-------+--------+----------------+------+ + | 42 | 44 | 6 | 0 | + +-------+--------+----------------+------+ + | 43 | 1 | 4 | 32 | + +-------+--------+----------------+------+ + | 44 | 1 | 4 | 48 | + + + +Collet & Kucherawy Informational [Page 50] + +RFC 8478 application/zstd October 2018 + + + +-------+--------+----------------+------+ + | 45 | 2 | 4 | 16 | + +-------+--------+----------------+------+ + | 46 | 4 | 5 | 32 | + +-------+--------+----------------+------+ + | 47 | 5 | 5 | 32 | + +-------+--------+----------------+------+ + | 48 | 7 | 5 | 32 | + +-------+--------+----------------+------+ + | 49 | 8 | 5 | 32 | + +-------+--------+----------------+------+ + | 50 | 11 | 6 | 0 | + +-------+--------+----------------+------+ + | 51 | 14 | 6 | 0 | + +-------+--------+----------------+------+ + | 52 | 17 | 6 | 0 | + +-------+--------+----------------+------+ + | 53 | 20 | 6 | 0 | + +-------+--------+----------------+------+ + | 54 | 23 | 6 | 0 | + +-------+--------+----------------+------+ + | 55 | 26 | 6 | 0 | + +-------+--------+----------------+------+ + | 56 | 29 | 6 | 0 | + +-------+--------+----------------+------+ + | 57 | 52 | 6 | 0 | + +-------+--------+----------------+------+ + | 58 | 51 | 6 | 0 | + +-------+--------+----------------+------+ + | 59 | 50 | 6 | 0 | + +-------+--------+----------------+------+ + | 60 | 49 | 6 | 0 | + +-------+--------+----------------+------+ + | 61 | 48 | 6 | 0 | + +-------+--------+----------------+------+ + | 62 | 47 | 6 | 0 | + +-------+--------+----------------+------+ + | 63 | 46 | 6 | 0 | + +-------+--------+----------------+------+ + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 51] + +RFC 8478 application/zstd October 2018 + + +A.3. Offset Code Table + + +-------+--------+----------------+------+ + | State | Symbol | Number_Of_Bits | Base | + +-------+--------+----------------+------+ + | 0 | 0 | 0 | 0 | + +-------+--------+----------------+------+ + | 0 | 0 | 5 | 0 | + +-------+--------+----------------+------+ + | 1 | 6 | 4 | 0 | + +-------+--------+----------------+------+ + | 2 | 9 | 5 | 0 | + +-------+--------+----------------+------+ + | 3 | 15 | 5 | 0 | + +-------+--------+----------------+------+ + | 4 | 21 | 5 | 0 | + +-------+--------+----------------+------+ + | 5 | 3 | 5 | 0 | + +-------+--------+----------------+------+ + | 6 | 7 | 4 | 0 | + +-------+--------+----------------+------+ + | 7 | 12 | 5 | 0 | + +-------+--------+----------------+------+ + | 8 | 18 | 5 | 0 | + +-------+--------+----------------+------+ + | 9 | 23 | 5 | 0 | + +-------+--------+----------------+------+ + | 10 | 5 | 5 | 0 | + +-------+--------+----------------+------+ + | 11 | 8 | 4 | 0 | + +-------+--------+----------------+------+ + | 12 | 14 | 5 | 0 | + +-------+--------+----------------+------+ + | 13 | 20 | 5 | 0 | + +-------+--------+----------------+------+ + | 14 | 2 | 5 | 0 | + +-------+--------+----------------+------+ + | 15 | 7 | 4 | 16 | + +-------+--------+----------------+------+ + | 16 | 11 | 5 | 0 | + +-------+--------+----------------+------+ + | 17 | 17 | 5 | 0 | + +-------+--------+----------------+------+ + | 18 | 22 | 5 | 0 | + +-------+--------+----------------+------+ + | 19 | 4 | 5 | 0 | + +-------+--------+----------------+------+ + | 20 | 8 | 4 | 16 | + + + +Collet & Kucherawy Informational [Page 52] + +RFC 8478 application/zstd October 2018 + + + +-------+--------+----------------+------+ + | 21 | 13 | 5 | 0 | + +-------+--------+----------------+------+ + | 22 | 19 | 5 | 0 | + +-------+--------+----------------+------+ + | 23 | 1 | 5 | 0 | + +-------+--------+----------------+------+ + | 24 | 6 | 4 | 16 | + +-------+--------+----------------+------+ + | 25 | 10 | 5 | 0 | + +-------+--------+----------------+------+ + | 26 | 16 | 5 | 0 | + +-------+--------+----------------+------+ + | 27 | 28 | 5 | 0 | + +-------+--------+----------------+------+ + | 28 | 27 | 5 | 0 | + +-------+--------+----------------+------+ + | 29 | 26 | 5 | 0 | + +-------+--------+----------------+------+ + | 30 | 25 | 5 | 0 | + +-------+--------+----------------+------+ + | 31 | 24 | 5 | 0 | + +-------+--------+----------------+------+ + +Acknowledgments + + zstd was developed by Yann Collet. + + Bobo Bose-Kolanu, Felix Handte, Kyle Nekritz, Nick Terrell, and David + Schleimer provided helpful feedback during the development of this + document. + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 53] + +RFC 8478 application/zstd October 2018 + + +Authors' Addresses + + Yann Collet + Facebook + 1 Hacker Way + Menlo Park, CA 94025 + United States of America + + Email: cyan@fb.com + + + Murray S. Kucherawy (editor) + Facebook + 1 Hacker Way + Menlo Park, CA 94025 + United States of America + + Email: msk@fb.com + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Collet & Kucherawy Informational [Page 54] + diff --git a/lib/std/compress/testdata/rfc8478.txt.zst.19 b/lib/std/compress/testdata/rfc8478.txt.zst.19 new file mode 100644 index 000000000000..e0cf325af238 Binary files /dev/null and b/lib/std/compress/testdata/rfc8478.txt.zst.19 differ diff --git a/lib/std/compress/testdata/rfc8478.txt.zst.3 b/lib/std/compress/testdata/rfc8478.txt.zst.3 new file mode 100644 index 000000000000..781601a8a2c9 Binary files /dev/null and b/lib/std/compress/testdata/rfc8478.txt.zst.3 differ diff --git a/lib/std/compress/zstandard.zig b/lib/std/compress/zstandard.zig new file mode 100644 index 000000000000..f59de87e6ba4 --- /dev/null +++ b/lib/std/compress/zstandard.zig @@ -0,0 +1,286 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; +const RingBuffer = std.RingBuffer; + +const types = @import("zstandard/types.zig"); +pub const frame = types.frame; +pub const compressed_block = types.compressed_block; + +pub const decompress = @import("zstandard/decompress.zig"); + +pub const DecompressStreamOptions = struct { + verify_checksum: bool = true, + window_size_max: usize = 1 << 23, // 8MiB default maximum window size, +}; + +pub fn DecompressStream( + comptime ReaderType: type, + comptime options: DecompressStreamOptions, +) type { + return struct { + const Self = @This(); + + allocator: Allocator, + source: std.io.CountingReader(ReaderType), + state: enum { NewFrame, InFrame, LastBlock }, + decode_state: decompress.block.DecodeState, + frame_context: decompress.FrameContext, + buffer: RingBuffer, + literal_fse_buffer: []types.compressed_block.Table.Fse, + match_fse_buffer: []types.compressed_block.Table.Fse, + offset_fse_buffer: []types.compressed_block.Table.Fse, + literals_buffer: []u8, + sequence_buffer: []u8, + checksum: if (options.verify_checksum) ?u32 else void, + current_frame_decompressed_size: usize, + + pub const Error = ReaderType.Error || error{ + ChecksumFailure, + DictionaryIdFlagUnsupported, + MalformedBlock, + MalformedFrame, + OutOfMemory, + }; + + pub const Reader = std.io.Reader(*Self, Error, read); + + pub fn init(allocator: Allocator, source: ReaderType) Self { + return Self{ + .allocator = allocator, + .source = std.io.countingReader(source), + .state = .NewFrame, + .decode_state = undefined, + .frame_context = undefined, + .buffer = undefined, + .literal_fse_buffer = undefined, + .match_fse_buffer = undefined, + .offset_fse_buffer = undefined, + .literals_buffer = undefined, + .sequence_buffer = undefined, + .checksum = undefined, + .current_frame_decompressed_size = undefined, + }; + } + + fn frameInit(self: *Self) !void { + const source_reader = self.source.reader(); + switch (try decompress.decodeFrameHeader(source_reader)) { + .skippable => |header| { + try source_reader.skipBytes(header.frame_size, .{}); + self.state = .NewFrame; + }, + .zstandard => |header| { + const frame_context = context: { + break :context try decompress.FrameContext.init( + header, + options.window_size_max, + options.verify_checksum, + ); + }; + + const literal_fse_buffer = try self.allocator.alloc( + types.compressed_block.Table.Fse, + types.compressed_block.table_size_max.literal, + ); + errdefer self.allocator.free(literal_fse_buffer); + + const match_fse_buffer = try self.allocator.alloc( + types.compressed_block.Table.Fse, + types.compressed_block.table_size_max.match, + ); + errdefer self.allocator.free(match_fse_buffer); + + const offset_fse_buffer = try self.allocator.alloc( + types.compressed_block.Table.Fse, + types.compressed_block.table_size_max.offset, + ); + errdefer self.allocator.free(offset_fse_buffer); + + const decode_state = decompress.block.DecodeState.init( + literal_fse_buffer, + match_fse_buffer, + offset_fse_buffer, + ); + const buffer = try RingBuffer.init(self.allocator, frame_context.window_size); + + const literals_data = try self.allocator.alloc(u8, options.window_size_max); + errdefer self.allocator.free(literals_data); + + const sequence_data = try self.allocator.alloc(u8, options.window_size_max); + errdefer self.allocator.free(sequence_data); + + self.literal_fse_buffer = literal_fse_buffer; + self.match_fse_buffer = match_fse_buffer; + self.offset_fse_buffer = offset_fse_buffer; + self.literals_buffer = literals_data; + self.sequence_buffer = sequence_data; + + self.buffer = buffer; + + self.decode_state = decode_state; + self.frame_context = frame_context; + + self.checksum = if (options.verify_checksum) null else {}; + self.current_frame_decompressed_size = 0; + + self.state = .InFrame; + }, + } + } + + pub fn deinit(self: *Self) void { + if (self.state == .NewFrame) return; + self.allocator.free(self.decode_state.literal_fse_buffer); + self.allocator.free(self.decode_state.match_fse_buffer); + self.allocator.free(self.decode_state.offset_fse_buffer); + self.allocator.free(self.literals_buffer); + self.allocator.free(self.sequence_buffer); + self.buffer.deinit(self.allocator); + } + + pub fn reader(self: *Self) Reader { + return .{ .context = self }; + } + + pub fn read(self: *Self, buffer: []u8) Error!usize { + if (buffer.len == 0) return 0; + + var size: usize = 0; + while (size == 0) { + while (self.state == .NewFrame) { + const initial_count = self.source.bytes_read; + self.frameInit() catch |err| switch (err) { + error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported, + error.EndOfStream => return if (self.source.bytes_read == initial_count) + 0 + else + error.MalformedFrame, + error.OutOfMemory => return error.OutOfMemory, + else => return error.MalformedFrame, + }; + } + size = try self.readInner(buffer); + } + return size; + } + + fn readInner(self: *Self, buffer: []u8) Error!usize { + std.debug.assert(self.state != .NewFrame); + + const source_reader = self.source.reader(); + while (self.buffer.isEmpty() and self.state != .LastBlock) { + const header_bytes = source_reader.readBytesNoEof(3) catch + return error.MalformedFrame; + const block_header = decompress.block.decodeBlockHeader(&header_bytes); + + decompress.block.decodeBlockReader( + &self.buffer, + source_reader, + block_header, + &self.decode_state, + self.frame_context.block_size_max, + self.literals_buffer, + self.sequence_buffer, + ) catch + return error.MalformedBlock; + + if (self.frame_context.content_size) |size| { + if (self.current_frame_decompressed_size > size) return error.MalformedFrame; + } + + const size = self.buffer.len(); + self.current_frame_decompressed_size += size; + + if (self.frame_context.hasher_opt) |*hasher| { + if (size > 0) { + const written_slice = self.buffer.sliceLast(size); + hasher.update(written_slice.first); + hasher.update(written_slice.second); + } + } + if (block_header.last_block) { + self.state = .LastBlock; + if (self.frame_context.has_checksum) { + const checksum = source_reader.readIntLittle(u32) catch + return error.MalformedFrame; + if (comptime options.verify_checksum) { + if (self.frame_context.hasher_opt) |*hasher| { + if (checksum != decompress.computeChecksum(hasher)) + return error.ChecksumFailure; + } + } + } + if (self.frame_context.content_size) |content_size| { + if (content_size != self.current_frame_decompressed_size) { + return error.MalformedFrame; + } + } + } + } + + const size = @min(self.buffer.len(), buffer.len); + for (0..size) |i| { + buffer[i] = self.buffer.read().?; + } + if (self.state == .LastBlock and self.buffer.len() == 0) { + self.state = .NewFrame; + self.allocator.free(self.literal_fse_buffer); + self.allocator.free(self.match_fse_buffer); + self.allocator.free(self.offset_fse_buffer); + self.allocator.free(self.literals_buffer); + self.allocator.free(self.sequence_buffer); + self.buffer.deinit(self.allocator); + } + return size; + } + }; +} + +pub fn decompressStreamOptions( + allocator: Allocator, + reader: anytype, + comptime options: DecompressStreamOptions, +) DecompressStream(@TypeOf(reader, options)) { + return DecompressStream(@TypeOf(reader), options).init(allocator, reader); +} + +pub fn decompressStream( + allocator: Allocator, + reader: anytype, +) DecompressStream(@TypeOf(reader), .{}) { + return DecompressStream(@TypeOf(reader), .{}).init(allocator, reader); +} + +fn testDecompress(data: []const u8) ![]u8 { + var in_stream = std.io.fixedBufferStream(data); + var zstd_stream = decompressStream(std.testing.allocator, in_stream.reader()); + defer zstd_stream.deinit(); + const result = zstd_stream.reader().readAllAlloc(std.testing.allocator, std.math.maxInt(usize)); + return result; +} + +fn testReader(data: []const u8, comptime expected: []const u8) !void { + const buf = try testDecompress(data); + defer std.testing.allocator.free(buf); + try std.testing.expectEqualSlices(u8, expected, buf); +} + +test "zstandard decompression" { + const uncompressed = @embedFile("testdata/rfc8478.txt"); + const compressed3 = @embedFile("testdata/rfc8478.txt.zst.3"); + const compressed19 = @embedFile("testdata/rfc8478.txt.zst.19"); + + var buffer = try std.testing.allocator.alloc(u8, uncompressed.len); + defer std.testing.allocator.free(buffer); + + const res3 = try decompress.decode(buffer, compressed3, true); + try std.testing.expectEqual(uncompressed.len, res3); + try std.testing.expectEqualSlices(u8, uncompressed, buffer); + + const res19 = try decompress.decode(buffer, compressed19, true); + try std.testing.expectEqual(uncompressed.len, res19); + try std.testing.expectEqualSlices(u8, uncompressed, buffer); + + try testReader(compressed3, uncompressed); + try testReader(compressed19, uncompressed); +} diff --git a/lib/std/compress/zstandard/decode/block.zig b/lib/std/compress/zstandard/decode/block.zig new file mode 100644 index 000000000000..4b7353f63c62 --- /dev/null +++ b/lib/std/compress/zstandard/decode/block.zig @@ -0,0 +1,1149 @@ +const std = @import("std"); +const assert = std.debug.assert; +const RingBuffer = std.RingBuffer; + +const types = @import("../types.zig"); +const frame = types.frame; +const Table = types.compressed_block.Table; +const LiteralsSection = types.compressed_block.LiteralsSection; +const SequencesSection = types.compressed_block.SequencesSection; + +const huffman = @import("huffman.zig"); +const readers = @import("../readers.zig"); + +const decodeFseTable = @import("fse.zig").decodeFseTable; + +const readInt = std.mem.readIntLittle; + +pub const Error = error{ + BlockSizeOverMaximum, + MalformedBlockSize, + ReservedBlock, + MalformedRleBlock, + MalformedCompressedBlock, +}; + +pub const DecodeState = struct { + repeat_offsets: [3]u32, + + offset: StateData(8), + match: StateData(9), + literal: StateData(9), + + offset_fse_buffer: []Table.Fse, + match_fse_buffer: []Table.Fse, + literal_fse_buffer: []Table.Fse, + + fse_tables_undefined: bool, + + literal_stream_reader: readers.ReverseBitReader, + literal_stream_index: usize, + literal_streams: LiteralsSection.Streams, + literal_header: LiteralsSection.Header, + huffman_tree: ?LiteralsSection.HuffmanTree, + + literal_written_count: usize, + written_count: usize = 0, + + fn StateData(comptime max_accuracy_log: comptime_int) type { + return struct { + state: State, + table: Table, + accuracy_log: u8, + + const State = std.meta.Int(.unsigned, max_accuracy_log); + }; + } + + pub fn init( + literal_fse_buffer: []Table.Fse, + match_fse_buffer: []Table.Fse, + offset_fse_buffer: []Table.Fse, + ) DecodeState { + return DecodeState{ + .repeat_offsets = .{ + types.compressed_block.start_repeated_offset_1, + types.compressed_block.start_repeated_offset_2, + types.compressed_block.start_repeated_offset_3, + }, + + .offset = undefined, + .match = undefined, + .literal = undefined, + + .literal_fse_buffer = literal_fse_buffer, + .match_fse_buffer = match_fse_buffer, + .offset_fse_buffer = offset_fse_buffer, + + .fse_tables_undefined = true, + + .literal_written_count = 0, + .literal_header = undefined, + .literal_streams = undefined, + .literal_stream_reader = undefined, + .literal_stream_index = undefined, + .huffman_tree = null, + + .written_count = 0, + }; + } + + /// Prepare the decoder to decode a compressed block. Loads the literals + /// stream and Huffman tree from `literals` and reads the FSE tables from + /// `source`. + /// + /// Errors returned: + /// - `error.BitStreamHasNoStartBit` if the (reversed) literal bitstream's + /// first byte does not have any bits set + /// - `error.TreelessLiteralsFirst` `literals` is a treeless literals + /// section and the decode state does not have a Huffman tree from a + /// previous block + /// - `error.RepeatModeFirst` on the first call if one of the sequence FSE + /// tables is set to repeat mode + /// - `error.MalformedAccuracyLog` if an FSE table has an invalid accuracy + /// - `error.MalformedFseTable` if there are errors decoding an FSE table + /// - `error.EndOfStream` if `source` ends before all FSE tables are read + pub fn prepare( + self: *DecodeState, + source: anytype, + literals: LiteralsSection, + sequences_header: SequencesSection.Header, + ) !void { + self.literal_written_count = 0; + self.literal_header = literals.header; + self.literal_streams = literals.streams; + + if (literals.huffman_tree) |tree| { + self.huffman_tree = tree; + } else if (literals.header.block_type == .treeless and self.huffman_tree == null) { + return error.TreelessLiteralsFirst; + } + + switch (literals.header.block_type) { + .raw, .rle => {}, + .compressed, .treeless => { + self.literal_stream_index = 0; + switch (literals.streams) { + .one => |slice| try self.initLiteralStream(slice), + .four => |streams| try self.initLiteralStream(streams[0]), + } + }, + } + + if (sequences_header.sequence_count > 0) { + try self.updateFseTable(source, .literal, sequences_header.literal_lengths); + try self.updateFseTable(source, .offset, sequences_header.offsets); + try self.updateFseTable(source, .match, sequences_header.match_lengths); + self.fse_tables_undefined = false; + } + } + + /// Read initial FSE states for sequence decoding. + /// + /// Errors returned: + /// - `error.EndOfStream` if `bit_reader` does not contain enough bits. + pub fn readInitialFseState(self: *DecodeState, bit_reader: *readers.ReverseBitReader) error{EndOfStream}!void { + self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log); + self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log); + self.match.state = try bit_reader.readBitsNoEof(u9, self.match.accuracy_log); + } + + fn updateRepeatOffset(self: *DecodeState, offset: u32) void { + self.repeat_offsets[2] = self.repeat_offsets[1]; + self.repeat_offsets[1] = self.repeat_offsets[0]; + self.repeat_offsets[0] = offset; + } + + fn useRepeatOffset(self: *DecodeState, index: usize) u32 { + if (index == 1) + std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1]) + else if (index == 2) { + std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]); + std.mem.swap(u32, &self.repeat_offsets[1], &self.repeat_offsets[2]); + } + return self.repeat_offsets[0]; + } + + const DataType = enum { offset, match, literal }; + + fn updateState( + self: *DecodeState, + comptime choice: DataType, + bit_reader: *readers.ReverseBitReader, + ) error{ MalformedFseBits, EndOfStream }!void { + switch (@field(self, @tagName(choice)).table) { + .rle => {}, + .fse => |table| { + const data = table[@field(self, @tagName(choice)).state]; + const T = @TypeOf(@field(self, @tagName(choice))).State; + const bits_summand = try bit_reader.readBitsNoEof(T, data.bits); + const next_state = std.math.cast( + @TypeOf(@field(self, @tagName(choice))).State, + data.baseline + bits_summand, + ) orelse return error.MalformedFseBits; + @field(self, @tagName(choice)).state = next_state; + }, + } + } + + const FseTableError = error{ + MalformedFseTable, + MalformedAccuracyLog, + RepeatModeFirst, + EndOfStream, + }; + + fn updateFseTable( + self: *DecodeState, + source: anytype, + comptime choice: DataType, + mode: SequencesSection.Header.Mode, + ) !void { + const field_name = @tagName(choice); + switch (mode) { + .predefined => { + @field(self, field_name).accuracy_log = + @field(types.compressed_block.default_accuracy_log, field_name); + + @field(self, field_name).table = + @field(types.compressed_block, "predefined_" ++ field_name ++ "_fse_table"); + }, + .rle => { + @field(self, field_name).accuracy_log = 0; + @field(self, field_name).table = .{ .rle = try source.readByte() }; + }, + .fse => { + var bit_reader = readers.bitReader(source); + + const table_size = try decodeFseTable( + &bit_reader, + @field(types.compressed_block.table_symbol_count_max, field_name), + @field(types.compressed_block.table_accuracy_log_max, field_name), + @field(self, field_name ++ "_fse_buffer"), + ); + @field(self, field_name).table = .{ + .fse = @field(self, field_name ++ "_fse_buffer")[0..table_size], + }; + @field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size); + }, + .repeat => if (self.fse_tables_undefined) return error.RepeatModeFirst, + } + } + + const Sequence = struct { + literal_length: u32, + match_length: u32, + offset: u32, + }; + + fn nextSequence( + self: *DecodeState, + bit_reader: *readers.ReverseBitReader, + ) error{ InvalidBitStream, EndOfStream }!Sequence { + const raw_code = self.getCode(.offset); + const offset_code = std.math.cast(u5, raw_code) orelse { + return error.InvalidBitStream; + }; + const offset_value = (@as(u32, 1) << offset_code) + try bit_reader.readBitsNoEof(u32, offset_code); + + const match_code = self.getCode(.match); + if (match_code >= types.compressed_block.match_length_code_table.len) + return error.InvalidBitStream; + const match = types.compressed_block.match_length_code_table[match_code]; + const match_length = match[0] + try bit_reader.readBitsNoEof(u32, match[1]); + + const literal_code = self.getCode(.literal); + if (literal_code >= types.compressed_block.literals_length_code_table.len) + return error.InvalidBitStream; + const literal = types.compressed_block.literals_length_code_table[literal_code]; + const literal_length = literal[0] + try bit_reader.readBitsNoEof(u32, literal[1]); + + const offset = if (offset_value > 3) offset: { + const offset = offset_value - 3; + self.updateRepeatOffset(offset); + break :offset offset; + } else offset: { + if (literal_length == 0) { + if (offset_value == 3) { + const offset = self.repeat_offsets[0] - 1; + self.updateRepeatOffset(offset); + break :offset offset; + } + break :offset self.useRepeatOffset(offset_value); + } + break :offset self.useRepeatOffset(offset_value - 1); + }; + + if (offset == 0) return error.InvalidBitStream; + + return .{ + .literal_length = literal_length, + .match_length = match_length, + .offset = offset, + }; + } + + fn executeSequenceSlice( + self: *DecodeState, + dest: []u8, + write_pos: usize, + sequence: Sequence, + ) (error{MalformedSequence} || DecodeLiteralsError)!void { + if (sequence.offset > write_pos + sequence.literal_length) return error.MalformedSequence; + + try self.decodeLiteralsSlice(dest[write_pos..], sequence.literal_length); + const copy_start = write_pos + sequence.literal_length - sequence.offset; + const copy_end = copy_start + sequence.match_length; + // NOTE: we ignore the usage message for std.mem.copy and copy with dest.ptr >= src.ptr + // to allow repeats + std.mem.copy(u8, dest[write_pos + sequence.literal_length ..], dest[copy_start..copy_end]); + self.written_count += sequence.match_length; + } + + fn executeSequenceRingBuffer( + self: *DecodeState, + dest: *RingBuffer, + sequence: Sequence, + ) (error{MalformedSequence} || DecodeLiteralsError)!void { + if (sequence.offset > @min(dest.data.len, self.written_count + sequence.literal_length)) + return error.MalformedSequence; + + try self.decodeLiteralsRingBuffer(dest, sequence.literal_length); + const copy_start = dest.write_index + dest.data.len - sequence.offset; + const copy_slice = dest.sliceAt(copy_start, sequence.match_length); + // TODO: would std.mem.copy and figuring out dest slice be better/faster? + for (copy_slice.first) |b| dest.writeAssumeCapacity(b); + for (copy_slice.second) |b| dest.writeAssumeCapacity(b); + self.written_count += sequence.match_length; + } + + const DecodeSequenceError = error{ + InvalidBitStream, + EndOfStream, + MalformedSequence, + MalformedFseBits, + } || DecodeLiteralsError; + + /// Decode one sequence from `bit_reader` into `dest`, written starting at + /// `write_pos` and update FSE states if `last_sequence` is `false`. + /// `prepare()` must be called for the block before attempting to decode + /// sequences. + /// + /// Errors returned: + /// - `error.MalformedSequence` if the decompressed sequence would be + /// longer than `sequence_size_limit` or the sequence's offset is too + /// large + /// - `error.UnexpectedEndOfLiteralStream` if the decoder state's literal + /// streams do not contain enough literals for the sequence (this may + /// mean the literal stream or the sequence is malformed). + /// - `error.InvalidBitStream` if the FSE sequence bitstream is malformed + /// - `error.EndOfStream` if `bit_reader` does not contain enough bits + /// - `error.DestTooSmall` if `dest` is not large enough to holde the + /// decompressed sequence + pub fn decodeSequenceSlice( + self: *DecodeState, + dest: []u8, + write_pos: usize, + bit_reader: *readers.ReverseBitReader, + sequence_size_limit: usize, + last_sequence: bool, + ) (error{DestTooSmall} || DecodeSequenceError)!usize { + const sequence = try self.nextSequence(bit_reader); + const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length; + if (sequence_length > sequence_size_limit) return error.MalformedSequence; + if (sequence_length > dest[write_pos..].len) return error.DestTooSmall; + + try self.executeSequenceSlice(dest, write_pos, sequence); + if (!last_sequence) { + try self.updateState(.literal, bit_reader); + try self.updateState(.match, bit_reader); + try self.updateState(.offset, bit_reader); + } + return sequence_length; + } + + /// Decode one sequence from `bit_reader` into `dest`; see + /// `decodeSequenceSlice`. + pub fn decodeSequenceRingBuffer( + self: *DecodeState, + dest: *RingBuffer, + bit_reader: anytype, + sequence_size_limit: usize, + last_sequence: bool, + ) DecodeSequenceError!usize { + const sequence = try self.nextSequence(bit_reader); + const sequence_length = @as(usize, sequence.literal_length) + sequence.match_length; + if (sequence_length > sequence_size_limit) return error.MalformedSequence; + + try self.executeSequenceRingBuffer(dest, sequence); + if (!last_sequence) { + try self.updateState(.literal, bit_reader); + try self.updateState(.match, bit_reader); + try self.updateState(.offset, bit_reader); + } + return sequence_length; + } + + fn nextLiteralMultiStream( + self: *DecodeState, + ) error{BitStreamHasNoStartBit}!void { + self.literal_stream_index += 1; + try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]); + } + + fn initLiteralStream(self: *DecodeState, bytes: []const u8) error{BitStreamHasNoStartBit}!void { + try self.literal_stream_reader.init(bytes); + } + + fn isLiteralStreamEmpty(self: *DecodeState) bool { + switch (self.literal_streams) { + .one => return self.literal_stream_reader.isEmpty(), + .four => return self.literal_stream_index == 3 and self.literal_stream_reader.isEmpty(), + } + } + + const LiteralBitsError = error{ + BitStreamHasNoStartBit, + UnexpectedEndOfLiteralStream, + }; + fn readLiteralsBits( + self: *DecodeState, + bit_count_to_read: usize, + ) LiteralBitsError!u16 { + return self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch bits: { + if (self.literal_streams == .four and self.literal_stream_index < 3) { + try self.nextLiteralMultiStream(); + break :bits self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch + return error.UnexpectedEndOfLiteralStream; + } else { + return error.UnexpectedEndOfLiteralStream; + } + }; + } + + const DecodeLiteralsError = error{ + MalformedLiteralsLength, + NotFound, + } || LiteralBitsError; + + /// Decode `len` bytes of literals into `dest`. + /// + /// Errors returned: + /// - `error.MalformedLiteralsLength` if the number of literal bytes + /// decoded by `self` plus `len` is greater than the regenerated size of + /// `literals` + /// - `error.UnexpectedEndOfLiteralStream` and `error.NotFound` if there + /// are problems decoding Huffman compressed literals + pub fn decodeLiteralsSlice( + self: *DecodeState, + dest: []u8, + len: usize, + ) DecodeLiteralsError!void { + if (self.literal_written_count + len > self.literal_header.regenerated_size) + return error.MalformedLiteralsLength; + + switch (self.literal_header.block_type) { + .raw => { + const literals_end = self.literal_written_count + len; + const literal_data = self.literal_streams.one[self.literal_written_count..literals_end]; + std.mem.copy(u8, dest, literal_data); + self.literal_written_count += len; + self.written_count += len; + }, + .rle => { + for (0..len) |i| { + dest[i] = self.literal_streams.one[0]; + } + self.literal_written_count += len; + self.written_count += len; + }, + .compressed, .treeless => { + // const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4; + const huffman_tree = self.huffman_tree orelse unreachable; + const max_bit_count = huffman_tree.max_bit_count; + const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount( + huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight, + max_bit_count, + ); + var bits_read: u4 = 0; + var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one; + var bit_count_to_read: u4 = starting_bit_count; + for (0..len) |i| { + var prefix: u16 = 0; + while (true) { + const new_bits = self.readLiteralsBits(bit_count_to_read) catch |err| { + return err; + }; + prefix <<= bit_count_to_read; + prefix |= new_bits; + bits_read += bit_count_to_read; + const result = huffman_tree.query(huffman_tree_index, prefix) catch |err| { + return err; + }; + + switch (result) { + .symbol => |sym| { + dest[i] = sym; + bit_count_to_read = starting_bit_count; + bits_read = 0; + huffman_tree_index = huffman_tree.symbol_count_minus_one; + break; + }, + .index => |index| { + huffman_tree_index = index; + const bit_count = LiteralsSection.HuffmanTree.weightToBitCount( + huffman_tree.nodes[index].weight, + max_bit_count, + ); + bit_count_to_read = bit_count - bits_read; + }, + } + } + } + self.literal_written_count += len; + self.written_count += len; + }, + } + } + + /// Decode literals into `dest`; see `decodeLiteralsSlice()`. + pub fn decodeLiteralsRingBuffer( + self: *DecodeState, + dest: *RingBuffer, + len: usize, + ) DecodeLiteralsError!void { + if (self.literal_written_count + len > self.literal_header.regenerated_size) + return error.MalformedLiteralsLength; + + switch (self.literal_header.block_type) { + .raw => { + const literals_end = self.literal_written_count + len; + const literal_data = self.literal_streams.one[self.literal_written_count..literals_end]; + dest.writeSliceAssumeCapacity(literal_data); + self.literal_written_count += len; + self.written_count += len; + }, + .rle => { + for (0..len) |_| { + dest.writeAssumeCapacity(self.literal_streams.one[0]); + } + self.literal_written_count += len; + self.written_count += len; + }, + .compressed, .treeless => { + // const written_bytes_per_stream = (literals.header.regenerated_size + 3) / 4; + const huffman_tree = self.huffman_tree orelse unreachable; + const max_bit_count = huffman_tree.max_bit_count; + const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount( + huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight, + max_bit_count, + ); + var bits_read: u4 = 0; + var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one; + var bit_count_to_read: u4 = starting_bit_count; + for (0..len) |_| { + var prefix: u16 = 0; + while (true) { + const new_bits = try self.readLiteralsBits(bit_count_to_read); + prefix <<= bit_count_to_read; + prefix |= new_bits; + bits_read += bit_count_to_read; + const result = try huffman_tree.query(huffman_tree_index, prefix); + + switch (result) { + .symbol => |sym| { + dest.writeAssumeCapacity(sym); + bit_count_to_read = starting_bit_count; + bits_read = 0; + huffman_tree_index = huffman_tree.symbol_count_minus_one; + break; + }, + .index => |index| { + huffman_tree_index = index; + const bit_count = LiteralsSection.HuffmanTree.weightToBitCount( + huffman_tree.nodes[index].weight, + max_bit_count, + ); + bit_count_to_read = bit_count - bits_read; + }, + } + } + } + self.literal_written_count += len; + self.written_count += len; + }, + } + } + + fn getCode(self: *DecodeState, comptime choice: DataType) u32 { + return switch (@field(self, @tagName(choice)).table) { + .rle => |value| value, + .fse => |table| table[@field(self, @tagName(choice)).state].symbol, + }; + } +}; + +/// Decode a single block from `src` into `dest`. The beginning of `src` must be +/// the start of the block content (i.e. directly after the block header). +/// Increments `consumed_count` by the number of bytes read from `src` to decode +/// the block and returns the decompressed size of the block. +/// +/// Errors returned: +/// +/// - `error.BlockSizeOverMaximum` if block's size is larger than 1 << 17 or +/// `dest[written_count..].len` +/// - `error.MalformedBlockSize` if `src.len` is smaller than the block size +/// and the block is a raw or compressed block +/// - `error.ReservedBlock` if the block is a reserved block +/// - `error.MalformedRleBlock` if the block is an RLE block and `src.len < 1` +/// - `error.MalformedCompressedBlock` if there are errors decoding a +/// compressed block +/// - `error.DestTooSmall` is `dest` is not large enough to hold the +/// decompressed block +pub fn decodeBlock( + dest: []u8, + src: []const u8, + block_header: frame.Zstandard.Block.Header, + decode_state: *DecodeState, + consumed_count: *usize, + block_size_max: usize, + written_count: usize, +) (error{DestTooSmall} || Error)!usize { + const block_size = block_header.block_size; + if (block_size_max < block_size) return error.BlockSizeOverMaximum; + switch (block_header.block_type) { + .raw => { + if (src.len < block_size) return error.MalformedBlockSize; + if (dest[written_count..].len < block_size) return error.DestTooSmall; + const data = src[0..block_size]; + std.mem.copy(u8, dest[written_count..], data); + consumed_count.* += block_size; + decode_state.written_count += block_size; + return block_size; + }, + .rle => { + if (src.len < 1) return error.MalformedRleBlock; + if (dest[written_count..].len < block_size) return error.DestTooSmall; + for (written_count..block_size + written_count) |write_pos| { + dest[write_pos] = src[0]; + } + consumed_count.* += 1; + decode_state.written_count += block_size; + return block_size; + }, + .compressed => { + if (src.len < block_size) return error.MalformedBlockSize; + var bytes_read: usize = 0; + const literals = decodeLiteralsSectionSlice(src[0..block_size], &bytes_read) catch + return error.MalformedCompressedBlock; + var fbs = std.io.fixedBufferStream(src[bytes_read..block_size]); + const fbs_reader = fbs.reader(); + const sequences_header = decodeSequencesHeader(fbs_reader) catch + return error.MalformedCompressedBlock; + + decode_state.prepare(fbs_reader, literals, sequences_header) catch + return error.MalformedCompressedBlock; + + bytes_read += fbs.pos; + + var bytes_written: usize = 0; + { + const bit_stream_bytes = src[bytes_read..block_size]; + var bit_stream: readers.ReverseBitReader = undefined; + bit_stream.init(bit_stream_bytes) catch return error.MalformedCompressedBlock; + + if (sequences_header.sequence_count > 0) { + decode_state.readInitialFseState(&bit_stream) catch + return error.MalformedCompressedBlock; + + var sequence_size_limit = block_size_max; + for (0..sequences_header.sequence_count) |i| { + const write_pos = written_count + bytes_written; + const decompressed_size = decode_state.decodeSequenceSlice( + dest, + write_pos, + &bit_stream, + sequence_size_limit, + i == sequences_header.sequence_count - 1, + ) catch |err| switch (err) { + error.DestTooSmall => return error.DestTooSmall, + else => return error.MalformedCompressedBlock, + }; + bytes_written += decompressed_size; + sequence_size_limit -= decompressed_size; + } + } + + if (!bit_stream.isEmpty()) { + return error.MalformedCompressedBlock; + } + } + + if (decode_state.literal_written_count < literals.header.regenerated_size) { + const len = literals.header.regenerated_size - decode_state.literal_written_count; + if (len > dest[written_count + bytes_written ..].len) return error.DestTooSmall; + decode_state.decodeLiteralsSlice(dest[written_count + bytes_written ..], len) catch + return error.MalformedCompressedBlock; + bytes_written += len; + } + + switch (decode_state.literal_header.block_type) { + .treeless, .compressed => { + if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock; + }, + .raw, .rle => {}, + } + + consumed_count.* += block_size; + return bytes_written; + }, + .reserved => return error.ReservedBlock, + } +} + +/// Decode a single block from `src` into `dest`; see `decodeBlock()`. Returns +/// the size of the decompressed block, which can be used with `dest.sliceLast()` +/// to get the decompressed bytes. `error.BlockSizeOverMaximum` is returned if +/// the block's compressed or decompressed size is larger than `block_size_max`. +pub fn decodeBlockRingBuffer( + dest: *RingBuffer, + src: []const u8, + block_header: frame.Zstandard.Block.Header, + decode_state: *DecodeState, + consumed_count: *usize, + block_size_max: usize, +) Error!usize { + const block_size = block_header.block_size; + if (block_size_max < block_size) return error.BlockSizeOverMaximum; + switch (block_header.block_type) { + .raw => { + if (src.len < block_size) return error.MalformedBlockSize; + const data = src[0..block_size]; + dest.writeSliceAssumeCapacity(data); + consumed_count.* += block_size; + decode_state.written_count += block_size; + return block_size; + }, + .rle => { + if (src.len < 1) return error.MalformedRleBlock; + for (0..block_size) |_| { + dest.writeAssumeCapacity(src[0]); + } + consumed_count.* += 1; + decode_state.written_count += block_size; + return block_size; + }, + .compressed => { + if (src.len < block_size) return error.MalformedBlockSize; + var bytes_read: usize = 0; + const literals = decodeLiteralsSectionSlice(src[0..block_size], &bytes_read) catch + return error.MalformedCompressedBlock; + var fbs = std.io.fixedBufferStream(src[bytes_read..block_size]); + const fbs_reader = fbs.reader(); + const sequences_header = decodeSequencesHeader(fbs_reader) catch + return error.MalformedCompressedBlock; + + decode_state.prepare(fbs_reader, literals, sequences_header) catch + return error.MalformedCompressedBlock; + + bytes_read += fbs.pos; + + var bytes_written: usize = 0; + { + const bit_stream_bytes = src[bytes_read..block_size]; + var bit_stream: readers.ReverseBitReader = undefined; + bit_stream.init(bit_stream_bytes) catch return error.MalformedCompressedBlock; + + if (sequences_header.sequence_count > 0) { + decode_state.readInitialFseState(&bit_stream) catch + return error.MalformedCompressedBlock; + + var sequence_size_limit = block_size_max; + for (0..sequences_header.sequence_count) |i| { + const decompressed_size = decode_state.decodeSequenceRingBuffer( + dest, + &bit_stream, + sequence_size_limit, + i == sequences_header.sequence_count - 1, + ) catch return error.MalformedCompressedBlock; + bytes_written += decompressed_size; + sequence_size_limit -= decompressed_size; + } + } + + if (!bit_stream.isEmpty()) { + return error.MalformedCompressedBlock; + } + } + + if (decode_state.literal_written_count < literals.header.regenerated_size) { + const len = literals.header.regenerated_size - decode_state.literal_written_count; + decode_state.decodeLiteralsRingBuffer(dest, len) catch + return error.MalformedCompressedBlock; + bytes_written += len; + } + + switch (decode_state.literal_header.block_type) { + .treeless, .compressed => { + if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock; + }, + .raw, .rle => {}, + } + + consumed_count.* += block_size; + if (bytes_written > block_size_max) return error.BlockSizeOverMaximum; + return bytes_written; + }, + .reserved => return error.ReservedBlock, + } +} + +/// Decode a single block from `source` into `dest`. Literal and sequence data +/// from the block is copied into `literals_buffer` and `sequence_buffer`, which +/// must be large enough or `error.LiteralsBufferTooSmall` and +/// `error.SequenceBufferTooSmall` are returned (the maximum block size is an +/// upper bound for the size of both buffers). See `decodeBlock` +/// and `decodeBlockRingBuffer` for function that can decode a block without +/// these extra copies. `error.EndOfStream` is returned if `source` does not +/// contain enough bytes. +pub fn decodeBlockReader( + dest: *RingBuffer, + source: anytype, + block_header: frame.Zstandard.Block.Header, + decode_state: *DecodeState, + block_size_max: usize, + literals_buffer: []u8, + sequence_buffer: []u8, +) !void { + const block_size = block_header.block_size; + var block_reader_limited = std.io.limitedReader(source, block_size); + const block_reader = block_reader_limited.reader(); + if (block_size_max < block_size) return error.BlockSizeOverMaximum; + switch (block_header.block_type) { + .raw => { + if (block_size == 0) return; + const slice = dest.sliceAt(dest.write_index, block_size); + try source.readNoEof(slice.first); + try source.readNoEof(slice.second); + dest.write_index = dest.mask2(dest.write_index + block_size); + decode_state.written_count += block_size; + }, + .rle => { + const byte = try source.readByte(); + for (0..block_size) |_| { + dest.writeAssumeCapacity(byte); + } + decode_state.written_count += block_size; + }, + .compressed => { + const literals = try decodeLiteralsSection(block_reader, literals_buffer); + const sequences_header = try decodeSequencesHeader(block_reader); + + try decode_state.prepare(block_reader, literals, sequences_header); + + var bytes_written: usize = 0; + { + const size = try block_reader.readAll(sequence_buffer); + var bit_stream: readers.ReverseBitReader = undefined; + try bit_stream.init(sequence_buffer[0..size]); + + if (sequences_header.sequence_count > 0) { + if (sequence_buffer.len < block_reader_limited.bytes_left) + return error.SequenceBufferTooSmall; + + decode_state.readInitialFseState(&bit_stream) catch + return error.MalformedCompressedBlock; + + var sequence_size_limit = block_size_max; + for (0..sequences_header.sequence_count) |i| { + const decompressed_size = decode_state.decodeSequenceRingBuffer( + dest, + &bit_stream, + sequence_size_limit, + i == sequences_header.sequence_count - 1, + ) catch return error.MalformedCompressedBlock; + sequence_size_limit -= decompressed_size; + bytes_written += decompressed_size; + } + } + + if (!bit_stream.isEmpty()) { + return error.MalformedCompressedBlock; + } + } + + if (decode_state.literal_written_count < literals.header.regenerated_size) { + const len = literals.header.regenerated_size - decode_state.literal_written_count; + decode_state.decodeLiteralsRingBuffer(dest, len) catch + return error.MalformedCompressedBlock; + bytes_written += len; + } + + switch (decode_state.literal_header.block_type) { + .treeless, .compressed => { + if (!decode_state.isLiteralStreamEmpty()) return error.MalformedCompressedBlock; + }, + .raw, .rle => {}, + } + + if (bytes_written > block_size_max) return error.BlockSizeOverMaximum; + if (block_reader_limited.bytes_left != 0) return error.MalformedCompressedBlock; + decode_state.literal_written_count = 0; + }, + .reserved => return error.ReservedBlock, + } +} + +/// Decode the header of a block. +pub fn decodeBlockHeader(src: *const [3]u8) frame.Zstandard.Block.Header { + const last_block = src[0] & 1 == 1; + const block_type = @intToEnum(frame.Zstandard.Block.Type, (src[0] & 0b110) >> 1); + const block_size = ((src[0] & 0b11111000) >> 3) + (@as(u21, src[1]) << 5) + (@as(u21, src[2]) << 13); + return .{ + .last_block = last_block, + .block_type = block_type, + .block_size = block_size, + }; +} + +/// Decode the header of a block. +/// +/// Errors returned: +/// - `error.EndOfStream` if `src.len < 3` +pub fn decodeBlockHeaderSlice(src: []const u8) error{EndOfStream}!frame.Zstandard.Block.Header { + if (src.len < 3) return error.EndOfStream; + return decodeBlockHeader(src[0..3]); +} + +/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the +/// number of bytes the section uses. +/// +/// Errors returned: +/// - `error.MalformedLiteralsHeader` if the header is invalid +/// - `error.MalformedLiteralsSection` if there are decoding errors +/// - `error.MalformedAccuracyLog` if compressed literals have invalid +/// accuracy +/// - `error.MalformedFseTable` if compressed literals have invalid FSE table +/// - `error.MalformedHuffmanTree` if there are errors decoding a Huffamn tree +/// - `error.EndOfStream` if there are not enough bytes in `src` +pub fn decodeLiteralsSectionSlice( + src: []const u8, + consumed_count: *usize, +) (error{ MalformedLiteralsHeader, MalformedLiteralsSection, EndOfStream } || huffman.Error)!LiteralsSection { + var bytes_read: usize = 0; + const header = header: { + var fbs = std.io.fixedBufferStream(src); + defer bytes_read = fbs.pos; + break :header decodeLiteralsHeader(fbs.reader()) catch return error.MalformedLiteralsHeader; + }; + switch (header.block_type) { + .raw => { + if (src.len < bytes_read + header.regenerated_size) return error.MalformedLiteralsSection; + const stream = src[bytes_read .. bytes_read + header.regenerated_size]; + consumed_count.* += header.regenerated_size + bytes_read; + return LiteralsSection{ + .header = header, + .huffman_tree = null, + .streams = .{ .one = stream }, + }; + }, + .rle => { + if (src.len < bytes_read + 1) return error.MalformedLiteralsSection; + const stream = src[bytes_read .. bytes_read + 1]; + consumed_count.* += 1 + bytes_read; + return LiteralsSection{ + .header = header, + .huffman_tree = null, + .streams = .{ .one = stream }, + }; + }, + .compressed, .treeless => { + const huffman_tree_start = bytes_read; + const huffman_tree = if (header.block_type == .compressed) + try huffman.decodeHuffmanTreeSlice(src[bytes_read..], &bytes_read) + else + null; + const huffman_tree_size = bytes_read - huffman_tree_start; + const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch + return error.MalformedLiteralsSection; + + if (src.len < bytes_read + total_streams_size) return error.MalformedLiteralsSection; + const stream_data = src[bytes_read .. bytes_read + total_streams_size]; + + const streams = try decodeStreams(header.size_format, stream_data); + consumed_count.* += bytes_read + total_streams_size; + return LiteralsSection{ + .header = header, + .huffman_tree = huffman_tree, + .streams = streams, + }; + }, + } +} + +/// Decode a `LiteralsSection` from `src`, incrementing `consumed_count` by the +/// number of bytes the section uses. See `decodeLiterasSectionSlice()`. +pub fn decodeLiteralsSection( + source: anytype, + buffer: []u8, +) !LiteralsSection { + const header = try decodeLiteralsHeader(source); + switch (header.block_type) { + .raw => { + try source.readNoEof(buffer[0..header.regenerated_size]); + return LiteralsSection{ + .header = header, + .huffman_tree = null, + .streams = .{ .one = buffer }, + }; + }, + .rle => { + buffer[0] = try source.readByte(); + return LiteralsSection{ + .header = header, + .huffman_tree = null, + .streams = .{ .one = buffer[0..1] }, + }; + }, + .compressed, .treeless => { + var counting_reader = std.io.countingReader(source); + const huffman_tree = if (header.block_type == .compressed) + try huffman.decodeHuffmanTree(counting_reader.reader(), buffer) + else + null; + const huffman_tree_size = @intCast(usize, counting_reader.bytes_read); + const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch + return error.MalformedLiteralsSection; + + if (total_streams_size > buffer.len) return error.LiteralsBufferTooSmall; + try source.readNoEof(buffer[0..total_streams_size]); + const stream_data = buffer[0..total_streams_size]; + + const streams = try decodeStreams(header.size_format, stream_data); + return LiteralsSection{ + .header = header, + .huffman_tree = huffman_tree, + .streams = streams, + }; + }, + } +} + +fn decodeStreams(size_format: u2, stream_data: []const u8) !LiteralsSection.Streams { + if (size_format == 0) { + return .{ .one = stream_data }; + } + + if (stream_data.len < 6) return error.MalformedLiteralsSection; + + const stream_1_length = @as(usize, readInt(u16, stream_data[0..2])); + const stream_2_length = @as(usize, readInt(u16, stream_data[2..4])); + const stream_3_length = @as(usize, readInt(u16, stream_data[4..6])); + + const stream_1_start = 6; + const stream_2_start = stream_1_start + stream_1_length; + const stream_3_start = stream_2_start + stream_2_length; + const stream_4_start = stream_3_start + stream_3_length; + + if (stream_data.len < stream_4_start) return error.MalformedLiteralsSection; + + return .{ .four = .{ + stream_data[stream_1_start .. stream_1_start + stream_1_length], + stream_data[stream_2_start .. stream_2_start + stream_2_length], + stream_data[stream_3_start .. stream_3_start + stream_3_length], + stream_data[stream_4_start..], + } }; +} + +/// Decode a literals section header. +/// +/// Errors returned: +/// - `error.EndOfStream` if there are not enough bytes in `source` +pub fn decodeLiteralsHeader(source: anytype) !LiteralsSection.Header { + const byte0 = try source.readByte(); + const block_type = @intToEnum(LiteralsSection.BlockType, byte0 & 0b11); + const size_format = @intCast(u2, (byte0 & 0b1100) >> 2); + var regenerated_size: u20 = undefined; + var compressed_size: ?u18 = null; + switch (block_type) { + .raw, .rle => { + switch (size_format) { + 0, 2 => { + regenerated_size = byte0 >> 3; + }, + 1 => regenerated_size = (byte0 >> 4) + (@as(u20, try source.readByte()) << 4), + 3 => regenerated_size = (byte0 >> 4) + + (@as(u20, try source.readByte()) << 4) + + (@as(u20, try source.readByte()) << 12), + } + }, + .compressed, .treeless => { + const byte1 = try source.readByte(); + const byte2 = try source.readByte(); + switch (size_format) { + 0, 1 => { + regenerated_size = (byte0 >> 4) + ((@as(u20, byte1) & 0b00111111) << 4); + compressed_size = ((byte1 & 0b11000000) >> 6) + (@as(u18, byte2) << 2); + }, + 2 => { + const byte3 = try source.readByte(); + regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00000011) << 12); + compressed_size = ((byte2 & 0b11111100) >> 2) + (@as(u18, byte3) << 6); + }, + 3 => { + const byte3 = try source.readByte(); + const byte4 = try source.readByte(); + regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00111111) << 12); + compressed_size = ((byte2 & 0b11000000) >> 6) + (@as(u18, byte3) << 2) + (@as(u18, byte4) << 10); + }, + } + }, + } + return LiteralsSection.Header{ + .block_type = block_type, + .size_format = size_format, + .regenerated_size = regenerated_size, + .compressed_size = compressed_size, + }; +} + +/// Decode a sequences section header. +/// +/// Errors returned: +/// - `error.ReservedBitSet` if the reserved bit is set +/// - `error.EndOfStream` if there are not enough bytes in `source` +pub fn decodeSequencesHeader( + source: anytype, +) !SequencesSection.Header { + var sequence_count: u24 = undefined; + + const byte0 = try source.readByte(); + if (byte0 == 0) { + return SequencesSection.Header{ + .sequence_count = 0, + .offsets = undefined, + .match_lengths = undefined, + .literal_lengths = undefined, + }; + } else if (byte0 < 128) { + sequence_count = byte0; + } else if (byte0 < 255) { + sequence_count = (@as(u24, (byte0 - 128)) << 8) + try source.readByte(); + } else { + sequence_count = (try source.readByte()) + (@as(u24, try source.readByte()) << 8) + 0x7F00; + } + + const compression_modes = try source.readByte(); + + const matches_mode = @intToEnum(SequencesSection.Header.Mode, (compression_modes & 0b00001100) >> 2); + const offsets_mode = @intToEnum(SequencesSection.Header.Mode, (compression_modes & 0b00110000) >> 4); + const literal_mode = @intToEnum(SequencesSection.Header.Mode, (compression_modes & 0b11000000) >> 6); + if (compression_modes & 0b11 != 0) return error.ReservedBitSet; + + return SequencesSection.Header{ + .sequence_count = sequence_count, + .offsets = offsets_mode, + .match_lengths = matches_mode, + .literal_lengths = literal_mode, + }; +} diff --git a/lib/std/compress/zstandard/decode/fse.zig b/lib/std/compress/zstandard/decode/fse.zig new file mode 100644 index 000000000000..41a34d0fc15b --- /dev/null +++ b/lib/std/compress/zstandard/decode/fse.zig @@ -0,0 +1,153 @@ +const std = @import("std"); +const assert = std.debug.assert; + +const types = @import("../types.zig"); +const Table = types.compressed_block.Table; + +pub fn decodeFseTable( + bit_reader: anytype, + expected_symbol_count: usize, + max_accuracy_log: u4, + entries: []Table.Fse, +) !usize { + const accuracy_log_biased = try bit_reader.readBitsNoEof(u4, 4); + if (accuracy_log_biased > max_accuracy_log -| 5) return error.MalformedAccuracyLog; + const accuracy_log = accuracy_log_biased + 5; + + var values: [256]u16 = undefined; + var value_count: usize = 0; + + const total_probability = @as(u16, 1) << accuracy_log; + var accumulated_probability: u16 = 0; + + while (accumulated_probability < total_probability) { + // WARNING: The RFC in poorly worded, and would suggest std.math.log2_int_ceil is correct here, + // but power of two (remaining probabilities + 1) need max bits set to 1 more. + const max_bits = std.math.log2_int(u16, total_probability - accumulated_probability + 1) + 1; + const small = try bit_reader.readBitsNoEof(u16, max_bits - 1); + + const cutoff = (@as(u16, 1) << max_bits) - 1 - (total_probability - accumulated_probability + 1); + + const value = if (small < cutoff) + small + else value: { + const value_read = small + (try bit_reader.readBitsNoEof(u16, 1) << (max_bits - 1)); + break :value if (value_read < @as(u16, 1) << (max_bits - 1)) + value_read + else + value_read - cutoff; + }; + + accumulated_probability += if (value != 0) value - 1 else 1; + + values[value_count] = value; + value_count += 1; + + if (value == 1) { + while (true) { + const repeat_flag = try bit_reader.readBitsNoEof(u2, 2); + if (repeat_flag + value_count > 256) return error.MalformedFseTable; + for (0..repeat_flag) |_| { + values[value_count] = 1; + value_count += 1; + } + if (repeat_flag < 3) break; + } + } + if (value_count == 256) break; + } + bit_reader.alignToByte(); + + if (value_count < 2) return error.MalformedFseTable; + if (accumulated_probability != total_probability) return error.MalformedFseTable; + if (value_count > expected_symbol_count) return error.MalformedFseTable; + + const table_size = total_probability; + + try buildFseTable(values[0..value_count], entries[0..table_size]); + return table_size; +} + +fn buildFseTable(values: []const u16, entries: []Table.Fse) !void { + const total_probability = @intCast(u16, entries.len); + const accuracy_log = std.math.log2_int(u16, total_probability); + assert(total_probability <= 1 << 9); + + var less_than_one_count: usize = 0; + for (values, 0..) |value, i| { + if (value == 0) { + entries[entries.len - 1 - less_than_one_count] = Table.Fse{ + .symbol = @intCast(u8, i), + .baseline = 0, + .bits = accuracy_log, + }; + less_than_one_count += 1; + } + } + + var position: usize = 0; + var temp_states: [1 << 9]u16 = undefined; + for (values, 0..) |value, symbol| { + if (value == 0 or value == 1) continue; + const probability = value - 1; + + const state_share_dividend = std.math.ceilPowerOfTwo(u16, probability) catch + return error.MalformedFseTable; + const share_size = @divExact(total_probability, state_share_dividend); + const double_state_count = state_share_dividend - probability; + const single_state_count = probability - double_state_count; + const share_size_log = std.math.log2_int(u16, share_size); + + for (0..probability) |i| { + temp_states[i] = @intCast(u16, position); + position += (entries.len >> 1) + (entries.len >> 3) + 3; + position &= entries.len - 1; + while (position >= entries.len - less_than_one_count) { + position += (entries.len >> 1) + (entries.len >> 3) + 3; + position &= entries.len - 1; + } + } + std.sort.sort(u16, temp_states[0..probability], {}, std.sort.asc(u16)); + for (0..probability) |i| { + entries[temp_states[i]] = if (i < double_state_count) Table.Fse{ + .symbol = @intCast(u8, symbol), + .bits = share_size_log + 1, + .baseline = single_state_count * share_size + @intCast(u16, i) * 2 * share_size, + } else Table.Fse{ + .symbol = @intCast(u8, symbol), + .bits = share_size_log, + .baseline = (@intCast(u16, i) - double_state_count) * share_size, + }; + } + } +} + +test buildFseTable { + const literals_length_default_values = [36]u16{ + 5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 2, 2, 2, 2, 2, + 0, 0, 0, 0, + }; + + const match_lengths_default_values = [53]u16{ + 2, 5, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, + 0, 0, 0, 0, 0, + }; + + const offset_codes_default_values = [29]u16{ + 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, + }; + + var entries: [64]Table.Fse = undefined; + try buildFseTable(&literals_length_default_values, &entries); + try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_literal_fse_table.fse, &entries); + + try buildFseTable(&match_lengths_default_values, &entries); + try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_match_fse_table.fse, &entries); + + try buildFseTable(&offset_codes_default_values, entries[0..32]); + try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_offset_fse_table.fse, entries[0..32]); +} diff --git a/lib/std/compress/zstandard/decode/huffman.zig b/lib/std/compress/zstandard/decode/huffman.zig new file mode 100644 index 000000000000..291419826831 --- /dev/null +++ b/lib/std/compress/zstandard/decode/huffman.zig @@ -0,0 +1,234 @@ +const std = @import("std"); + +const types = @import("../types.zig"); +const LiteralsSection = types.compressed_block.LiteralsSection; +const Table = types.compressed_block.Table; + +const readers = @import("../readers.zig"); + +const decodeFseTable = @import("fse.zig").decodeFseTable; + +pub const Error = error{ + MalformedHuffmanTree, + MalformedFseTable, + MalformedAccuracyLog, + EndOfStream, +}; + +fn decodeFseHuffmanTree( + source: anytype, + compressed_size: usize, + buffer: []u8, + weights: *[256]u4, +) !usize { + var stream = std.io.limitedReader(source, compressed_size); + var bit_reader = readers.bitReader(stream.reader()); + + var entries: [1 << 6]Table.Fse = undefined; + const table_size = decodeFseTable(&bit_reader, 256, 6, &entries) catch |err| switch (err) { + error.MalformedAccuracyLog, error.MalformedFseTable => |e| return e, + error.EndOfStream => return error.MalformedFseTable, + else => |e| return e, + }; + const accuracy_log = std.math.log2_int_ceil(usize, table_size); + + const amount = try stream.reader().readAll(buffer); + var huff_bits: readers.ReverseBitReader = undefined; + huff_bits.init(buffer[0..amount]) catch return error.MalformedHuffmanTree; + + return assignWeights(&huff_bits, accuracy_log, &entries, weights); +} + +fn decodeFseHuffmanTreeSlice(src: []const u8, compressed_size: usize, weights: *[256]u4) !usize { + if (src.len < compressed_size) return error.MalformedHuffmanTree; + var stream = std.io.fixedBufferStream(src[0..compressed_size]); + var counting_reader = std.io.countingReader(stream.reader()); + var bit_reader = readers.bitReader(counting_reader.reader()); + + var entries: [1 << 6]Table.Fse = undefined; + const table_size = decodeFseTable(&bit_reader, 256, 6, &entries) catch |err| switch (err) { + error.MalformedAccuracyLog, error.MalformedFseTable => |e| return e, + error.EndOfStream => return error.MalformedFseTable, + }; + const accuracy_log = std.math.log2_int_ceil(usize, table_size); + + const start_index = std.math.cast(usize, counting_reader.bytes_read) orelse + return error.MalformedHuffmanTree; + var huff_data = src[start_index..compressed_size]; + var huff_bits: readers.ReverseBitReader = undefined; + huff_bits.init(huff_data) catch return error.MalformedHuffmanTree; + + return assignWeights(&huff_bits, accuracy_log, &entries, weights); +} + +fn assignWeights( + huff_bits: *readers.ReverseBitReader, + accuracy_log: usize, + entries: *[1 << 6]Table.Fse, + weights: *[256]u4, +) !usize { + var i: usize = 0; + var even_state: u32 = huff_bits.readBitsNoEof(u32, accuracy_log) catch return error.MalformedHuffmanTree; + var odd_state: u32 = huff_bits.readBitsNoEof(u32, accuracy_log) catch return error.MalformedHuffmanTree; + + while (i < 254) { + const even_data = entries[even_state]; + var read_bits: usize = 0; + const even_bits = huff_bits.readBits(u32, even_data.bits, &read_bits) catch unreachable; + weights[i] = std.math.cast(u4, even_data.symbol) orelse return error.MalformedHuffmanTree; + i += 1; + if (read_bits < even_data.bits) { + weights[i] = std.math.cast(u4, entries[odd_state].symbol) orelse return error.MalformedHuffmanTree; + i += 1; + break; + } + even_state = even_data.baseline + even_bits; + + read_bits = 0; + const odd_data = entries[odd_state]; + const odd_bits = huff_bits.readBits(u32, odd_data.bits, &read_bits) catch unreachable; + weights[i] = std.math.cast(u4, odd_data.symbol) orelse return error.MalformedHuffmanTree; + i += 1; + if (read_bits < odd_data.bits) { + if (i == 255) return error.MalformedHuffmanTree; + weights[i] = std.math.cast(u4, entries[even_state].symbol) orelse return error.MalformedHuffmanTree; + i += 1; + break; + } + odd_state = odd_data.baseline + odd_bits; + } else return error.MalformedHuffmanTree; + + if (!huff_bits.isEmpty()) { + return error.MalformedHuffmanTree; + } + + return i + 1; // stream contains all but the last symbol +} + +fn decodeDirectHuffmanTree(source: anytype, encoded_symbol_count: usize, weights: *[256]u4) !usize { + const weights_byte_count = (encoded_symbol_count + 1) / 2; + for (0..weights_byte_count) |i| { + const byte = try source.readByte(); + weights[2 * i] = @intCast(u4, byte >> 4); + weights[2 * i + 1] = @intCast(u4, byte & 0xF); + } + return encoded_symbol_count + 1; +} + +fn assignSymbols(weight_sorted_prefixed_symbols: []LiteralsSection.HuffmanTree.PrefixedSymbol, weights: [256]u4) usize { + for (0..weight_sorted_prefixed_symbols.len) |i| { + weight_sorted_prefixed_symbols[i] = .{ + .symbol = @intCast(u8, i), + .weight = undefined, + .prefix = undefined, + }; + } + + std.sort.sort( + LiteralsSection.HuffmanTree.PrefixedSymbol, + weight_sorted_prefixed_symbols, + weights, + lessThanByWeight, + ); + + var prefix: u16 = 0; + var prefixed_symbol_count: usize = 0; + var sorted_index: usize = 0; + const symbol_count = weight_sorted_prefixed_symbols.len; + while (sorted_index < symbol_count) { + var symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; + const weight = weights[symbol]; + if (weight == 0) { + sorted_index += 1; + continue; + } + + while (sorted_index < symbol_count) : ({ + sorted_index += 1; + prefixed_symbol_count += 1; + prefix += 1; + }) { + symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; + if (weights[symbol] != weight) { + prefix = ((prefix - 1) >> (weights[symbol] - weight)) + 1; + break; + } + weight_sorted_prefixed_symbols[prefixed_symbol_count].symbol = symbol; + weight_sorted_prefixed_symbols[prefixed_symbol_count].prefix = prefix; + weight_sorted_prefixed_symbols[prefixed_symbol_count].weight = weight; + } + } + return prefixed_symbol_count; +} + +fn buildHuffmanTree(weights: *[256]u4, symbol_count: usize) error{MalformedHuffmanTree}!LiteralsSection.HuffmanTree { + var weight_power_sum_big: u32 = 0; + for (weights[0 .. symbol_count - 1]) |value| { + weight_power_sum_big += (@as(u16, 1) << value) >> 1; + } + if (weight_power_sum_big >= 1 << 11) return error.MalformedHuffmanTree; + const weight_power_sum = @intCast(u16, weight_power_sum_big); + + // advance to next power of two (even if weight_power_sum is a power of 2) + // TODO: is it valid to have weight_power_sum == 0? + const max_number_of_bits = if (weight_power_sum == 0) 1 else std.math.log2_int(u16, weight_power_sum) + 1; + const next_power_of_two = @as(u16, 1) << max_number_of_bits; + weights[symbol_count - 1] = std.math.log2_int(u16, next_power_of_two - weight_power_sum) + 1; + + var weight_sorted_prefixed_symbols: [256]LiteralsSection.HuffmanTree.PrefixedSymbol = undefined; + const prefixed_symbol_count = assignSymbols(weight_sorted_prefixed_symbols[0..symbol_count], weights.*); + const tree = LiteralsSection.HuffmanTree{ + .max_bit_count = max_number_of_bits, + .symbol_count_minus_one = @intCast(u8, prefixed_symbol_count - 1), + .nodes = weight_sorted_prefixed_symbols, + }; + return tree; +} + +pub fn decodeHuffmanTree( + source: anytype, + buffer: []u8, +) (@TypeOf(source).Error || Error)!LiteralsSection.HuffmanTree { + const header = try source.readByte(); + var weights: [256]u4 = undefined; + const symbol_count = if (header < 128) + // FSE compressed weights + try decodeFseHuffmanTree(source, header, buffer, &weights) + else + try decodeDirectHuffmanTree(source, header - 127, &weights); + + return buildHuffmanTree(&weights, symbol_count); +} + +pub fn decodeHuffmanTreeSlice( + src: []const u8, + consumed_count: *usize, +) Error!LiteralsSection.HuffmanTree { + if (src.len == 0) return error.MalformedHuffmanTree; + const header = src[0]; + var bytes_read: usize = 1; + var weights: [256]u4 = undefined; + const symbol_count = if (header < 128) count: { + // FSE compressed weights + bytes_read += header; + break :count try decodeFseHuffmanTreeSlice(src[1..], header, &weights); + } else count: { + var fbs = std.io.fixedBufferStream(src[1..]); + defer bytes_read += fbs.pos; + break :count try decodeDirectHuffmanTree(fbs.reader(), header - 127, &weights); + }; + + consumed_count.* += bytes_read; + return buildHuffmanTree(&weights, symbol_count); +} + +fn lessThanByWeight( + weights: [256]u4, + lhs: LiteralsSection.HuffmanTree.PrefixedSymbol, + rhs: LiteralsSection.HuffmanTree.PrefixedSymbol, +) bool { + // NOTE: this function relies on the use of a stable sorting algorithm, + // otherwise a special case of if (weights[lhs] == weights[rhs]) return lhs < rhs; + // should be added + return weights[lhs.symbol] < weights[rhs.symbol]; +} diff --git a/lib/std/compress/zstandard/decompress.zig b/lib/std/compress/zstandard/decompress.zig new file mode 100644 index 000000000000..a2ba59e6887b --- /dev/null +++ b/lib/std/compress/zstandard/decompress.zig @@ -0,0 +1,636 @@ +const std = @import("std"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const RingBuffer = std.RingBuffer; + +const types = @import("types.zig"); +const frame = types.frame; +const LiteralsSection = types.compressed_block.LiteralsSection; +const SequencesSection = types.compressed_block.SequencesSection; +const SkippableHeader = types.frame.Skippable.Header; +const ZstandardHeader = types.frame.Zstandard.Header; +const Table = types.compressed_block.Table; + +pub const block = @import("decode/block.zig"); + +const readers = @import("readers.zig"); + +const readInt = std.mem.readIntLittle; +const readIntSlice = std.mem.readIntSliceLittle; + +/// Returns `true` is `magic` is a valid magic number for a skippable frame +pub fn isSkippableMagic(magic: u32) bool { + return frame.Skippable.magic_number_min <= magic and magic <= frame.Skippable.magic_number_max; +} + +/// Returns the kind of frame at the beginning of `source`. +/// +/// Errors returned: +/// - `error.BadMagic` if `source` begins with bytes not equal to the +/// Zstandard frame magic number, or outside the range of magic numbers for +/// skippable frames. +/// - `error.EndOfStream` if `source` contains fewer than 4 bytes +pub fn decodeFrameType(source: anytype) error{ BadMagic, EndOfStream }!frame.Kind { + const magic = try source.readIntLittle(u32); + return frameType(magic); +} + +/// Returns the kind of frame associated to `magic`. +/// +/// Errors returned: +/// - `error.BadMagic` if `magic` is not a valid magic number. +pub fn frameType(magic: u32) error{BadMagic}!frame.Kind { + return if (magic == frame.Zstandard.magic_number) + .zstandard + else if (isSkippableMagic(magic)) + .skippable + else + error.BadMagic; +} + +pub const FrameHeader = union(enum) { + zstandard: ZstandardHeader, + skippable: SkippableHeader, +}; + +pub const HeaderError = error{ BadMagic, EndOfStream, ReservedBitSet }; + +/// Returns the header of the frame at the beginning of `source`. +/// +/// Errors returned: +/// - `error.BadMagic` if `source` begins with bytes not equal to the +/// Zstandard frame magic number, or outside the range of magic numbers for +/// skippable frames. +/// - `error.EndOfStream` if `source` contains fewer than 4 bytes +/// - `error.ReservedBitSet` if the frame is a Zstandard frame and any of the +/// reserved bits are set +pub fn decodeFrameHeader(source: anytype) (@TypeOf(source).Error || HeaderError)!FrameHeader { + const magic = try source.readIntLittle(u32); + const frame_type = try frameType(magic); + switch (frame_type) { + .zstandard => return FrameHeader{ .zstandard = try decodeZstandardHeader(source) }, + .skippable => return FrameHeader{ + .skippable = .{ + .magic_number = magic, + .frame_size = try source.readIntLittle(u32), + }, + }, + } +} + +pub const ReadWriteCount = struct { + read_count: usize, + write_count: usize, +}; + +/// Decodes frames from `src` into `dest`; returns the length of the result. +/// The stream should not have extra trailing bytes - either all bytes in `src` +/// will be decoded, or an error will be returned. An error will be returned if +/// a Zstandard frame in `src` does not declare its content size. +/// +/// Errors returned: +/// - `error.DictionaryIdFlagUnsupported` if a `src` contains a frame that +/// uses a dictionary +/// - `error.MalformedFrame` if a frame in `src` is invalid +/// - `error.UnknownContentSizeUnsupported` if a frame in `src` does not +/// declare its content size +pub fn decode(dest: []u8, src: []const u8, verify_checksum: bool) error{ + MalformedFrame, + UnknownContentSizeUnsupported, + DictionaryIdFlagUnsupported, +}!usize { + var write_count: usize = 0; + var read_count: usize = 0; + while (read_count < src.len) { + const counts = decodeFrame(dest, src[read_count..], verify_checksum) catch |err| { + switch (err) { + error.UnknownContentSizeUnsupported => return error.UnknownContentSizeUnsupported, + error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported, + else => return error.MalformedFrame, + } + }; + read_count += counts.read_count; + write_count += counts.write_count; + } + return write_count; +} + +/// Decodes a stream of frames from `src`; returns the decoded bytes. The stream +/// should not have extra trailing bytes - either all bytes in `src` will be +/// decoded, or an error will be returned. +/// +/// Errors returned: +/// - `error.DictionaryIdFlagUnsupported` if a `src` contains a frame that +/// uses a dictionary +/// - `error.MalformedFrame` if a frame in `src` is invalid +/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory +pub fn decodeAlloc( + allocator: Allocator, + src: []const u8, + verify_checksum: bool, + window_size_max: usize, +) error{ DictionaryIdFlagUnsupported, MalformedFrame, OutOfMemory }![]u8 { + var result = std.ArrayList(u8).init(allocator); + errdefer result.deinit(); + + var read_count: usize = 0; + while (read_count < src.len) { + read_count += decodeFrameArrayList( + allocator, + &result, + src[read_count..], + verify_checksum, + window_size_max, + ) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported, + else => return error.MalformedFrame, + }; + } + return result.toOwnedSlice(); +} + +/// Decodes the frame at the start of `src` into `dest`. Returns the number of +/// bytes read from `src` and written to `dest`. This function can only decode +/// frames that declare the decompressed content size. +/// +/// Errors returned: +/// - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic +/// number for a Zstandard or skippable frame +/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the +/// uncompressed content size +/// - `error.WindowSizeUnknown` if the frame does not have a valid window size +/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data +/// size declared by the frame header +/// - `error.ContentSizeTooLarge` if the frame header indicates a content size +/// that is larger than `std.math.maxInt(usize)` +/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary +/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame +/// contains a checksum that does not match the checksum of the decompressed +/// data +/// - `error.ReservedBitSet` if any of the reserved bits of the frame header +/// are set +/// - `error.EndOfStream` if `src` does not contain a complete frame +/// - `error.BadContentSize` if the content size declared by the frame does +/// not equal the actual size of decompressed data +/// - an error in `block.Error` if there are errors decoding a block +/// - `error.SkippableSizeTooLarge` if the frame is skippable and reports a +/// size greater than `src.len` +pub fn decodeFrame( + dest: []u8, + src: []const u8, + verify_checksum: bool, +) (error{ + BadMagic, + UnknownContentSizeUnsupported, + ContentTooLarge, + ContentSizeTooLarge, + WindowSizeUnknown, + DictionaryIdFlagUnsupported, + SkippableSizeTooLarge, +} || FrameError)!ReadWriteCount { + var fbs = std.io.fixedBufferStream(src); + switch (try decodeFrameType(fbs.reader())) { + .zstandard => return decodeZstandardFrame(dest, src, verify_checksum), + .skippable => { + const content_size = try fbs.reader().readIntLittle(u32); + if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge; + const read_count = @as(usize, content_size) + 8; + if (read_count > src.len) return error.SkippableSizeTooLarge; + return ReadWriteCount{ + .read_count = read_count, + .write_count = 0, + }; + }, + } +} + +/// Decodes the frame at the start of `src` into `dest`. Returns the number of +/// bytes read from `src`. +/// +/// Errors returned: +/// - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic +/// number for a Zstandard or skippable frame +/// - `error.WindowSizeUnknown` if the frame does not have a valid window size +/// - `error.WindowTooLarge` if the window size is larger than +/// `window_size_max` +/// - `error.ContentSizeTooLarge` if the frame header indicates a content size +/// that is larger than `std.math.maxInt(usize)` +/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary +/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame +/// contains a checksum that does not match the checksum of the decompressed +/// data +/// - `error.ReservedBitSet` if any of the reserved bits of the frame header +/// are set +/// - `error.EndOfStream` if `src` does not contain a complete frame +/// - `error.BadContentSize` if the content size declared by the frame does +/// not equal the actual size of decompressed data +/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory +/// - an error in `block.Error` if there are errors decoding a block +/// - `error.SkippableSizeTooLarge` if the frame is skippable and reports a +/// size greater than `src.len` +pub fn decodeFrameArrayList( + allocator: Allocator, + dest: *std.ArrayList(u8), + src: []const u8, + verify_checksum: bool, + window_size_max: usize, +) (error{ BadMagic, OutOfMemory, SkippableSizeTooLarge } || FrameContext.Error || FrameError)!usize { + var fbs = std.io.fixedBufferStream(src); + const reader = fbs.reader(); + const magic = try reader.readIntLittle(u32); + switch (try frameType(magic)) { + .zstandard => return decodeZstandardFrameArrayList( + allocator, + dest, + src, + verify_checksum, + window_size_max, + ), + .skippable => { + const content_size = try fbs.reader().readIntLittle(u32); + if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge; + const read_count = @as(usize, content_size) + 8; + if (read_count > src.len) return error.SkippableSizeTooLarge; + return read_count; + }, + } +} + +/// Returns the frame checksum corresponding to the data fed into `hasher` +pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 { + const hash = hasher.final(); + return @intCast(u32, hash & 0xFFFFFFFF); +} + +const FrameError = error{ + ChecksumFailure, + BadContentSize, + EndOfStream, + ReservedBitSet, +} || block.Error; + +/// Decode a Zstandard frame from `src` into `dest`, returning the number of +/// bytes read from `src` and written to `dest`. The first four bytes of `src` +/// must be the magic number for a Zstandard frame. +/// +/// Error returned: +/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the +/// uncompressed content size +/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data +/// size declared by the frame header +/// - `error.WindowSizeUnknown` if the frame does not have a valid window size +/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary +/// - `error.ContentSizeTooLarge` if the frame header indicates a content size +/// that is larger than `std.math.maxInt(usize)` +/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame +/// contains a checksum that does not match the checksum of the decompressed +/// data +/// - `error.ReservedBitSet` if the reserved bit of the frame header is set +/// - `error.EndOfStream` if `src` does not contain a complete frame +/// - an error in `block.Error` if there are errors decoding a block +/// - `error.BadContentSize` if the content size declared by the frame does +/// not equal the actual size of decompressed data +pub fn decodeZstandardFrame( + dest: []u8, + src: []const u8, + verify_checksum: bool, +) (error{ + UnknownContentSizeUnsupported, + ContentTooLarge, + ContentSizeTooLarge, + WindowSizeUnknown, + DictionaryIdFlagUnsupported, +} || FrameError)!ReadWriteCount { + assert(readInt(u32, src[0..4]) == frame.Zstandard.magic_number); + var consumed_count: usize = 4; + + var frame_context = context: { + var fbs = std.io.fixedBufferStream(src[consumed_count..]); + var source = fbs.reader(); + const frame_header = try decodeZstandardHeader(source); + consumed_count += fbs.pos; + break :context FrameContext.init( + frame_header, + std.math.maxInt(usize), + verify_checksum, + ) catch |err| switch (err) { + error.WindowTooLarge => unreachable, + inline else => |e| return e, + }; + }; + const counts = try decodeZStandardFrameBlocks( + dest, + src[consumed_count..], + &frame_context, + ); + return ReadWriteCount{ + .read_count = counts.read_count + consumed_count, + .write_count = counts.write_count, + }; +} + +pub fn decodeZStandardFrameBlocks( + dest: []u8, + src: []const u8, + frame_context: *FrameContext, +) (error{ ContentTooLarge, UnknownContentSizeUnsupported } || FrameError)!ReadWriteCount { + const content_size = frame_context.content_size orelse + return error.UnknownContentSizeUnsupported; + if (dest.len < content_size) return error.ContentTooLarge; + + var consumed_count: usize = 0; + const written_count = decodeFrameBlocksInner( + dest[0..content_size], + src[consumed_count..], + &consumed_count, + if (frame_context.hasher_opt) |*hasher| hasher else null, + frame_context.block_size_max, + ) catch |err| switch (err) { + error.DestTooSmall => return error.BadContentSize, + inline else => |e| return e, + }; + + if (written_count != content_size) return error.BadContentSize; + if (frame_context.has_checksum) { + if (src.len < consumed_count + 4) return error.EndOfStream; + const checksum = readIntSlice(u32, src[consumed_count .. consumed_count + 4]); + consumed_count += 4; + if (frame_context.hasher_opt) |*hasher| { + if (checksum != computeChecksum(hasher)) return error.ChecksumFailure; + } + } + return ReadWriteCount{ .read_count = consumed_count, .write_count = written_count }; +} + +pub const FrameContext = struct { + hasher_opt: ?std.hash.XxHash64, + window_size: usize, + has_checksum: bool, + block_size_max: usize, + content_size: ?usize, + + const Error = error{ + DictionaryIdFlagUnsupported, + WindowSizeUnknown, + WindowTooLarge, + ContentSizeTooLarge, + }; + /// Validates `frame_header` and returns the associated `FrameContext`. + /// + /// Errors returned: + /// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary + /// - `error.WindowSizeUnknown` if the frame does not have a valid window + /// size + /// - `error.WindowTooLarge` if the window size is larger than + /// `window_size_max` + /// - `error.ContentSizeTooLarge` if the frame header indicates a content + /// size larger than `std.math.maxInt(usize)` + pub fn init( + frame_header: ZstandardHeader, + window_size_max: usize, + verify_checksum: bool, + ) Error!FrameContext { + if (frame_header.descriptor.dictionary_id_flag != 0) + return error.DictionaryIdFlagUnsupported; + + const window_size_raw = frameWindowSize(frame_header) orelse return error.WindowSizeUnknown; + const window_size = if (window_size_raw > window_size_max) + return error.WindowTooLarge + else + @intCast(usize, window_size_raw); + + const should_compute_checksum = + frame_header.descriptor.content_checksum_flag and verify_checksum; + + const content_size = if (frame_header.content_size) |size| + std.math.cast(usize, size) orelse return error.ContentSizeTooLarge + else + null; + + return .{ + .hasher_opt = if (should_compute_checksum) std.hash.XxHash64.init(0) else null, + .window_size = window_size, + .has_checksum = frame_header.descriptor.content_checksum_flag, + .block_size_max = @min(1 << 17, window_size), + .content_size = content_size, + }; + } +}; + +/// Decode a Zstandard from from `src` and return number of bytes read; see +/// `decodeZstandardFrame()`. The first four bytes of `src` must be the magic +/// number for a Zstandard frame. +/// +/// Errors returned: +/// - `error.WindowSizeUnknown` if the frame does not have a valid window size +/// - `error.WindowTooLarge` if the window size is larger than +/// `window_size_max` +/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary +/// - `error.ContentSizeTooLarge` if the frame header indicates a content size +/// that is larger than `std.math.maxInt(usize)` +/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame +/// contains a checksum that does not match the checksum of the decompressed +/// data +/// - `error.ReservedBitSet` if the reserved bit of the frame header is set +/// - `error.EndOfStream` if `src` does not contain a complete frame +/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory +/// - an error in `block.Error` if there are errors decoding a block +/// - `error.BadContentSize` if the content size declared by the frame does +/// not equal the size of decompressed data +pub fn decodeZstandardFrameArrayList( + allocator: Allocator, + dest: *std.ArrayList(u8), + src: []const u8, + verify_checksum: bool, + window_size_max: usize, +) (error{OutOfMemory} || FrameContext.Error || FrameError)!usize { + assert(readInt(u32, src[0..4]) == frame.Zstandard.magic_number); + var consumed_count: usize = 4; + + var frame_context = context: { + var fbs = std.io.fixedBufferStream(src[consumed_count..]); + var source = fbs.reader(); + const frame_header = try decodeZstandardHeader(source); + consumed_count += fbs.pos; + break :context try FrameContext.init(frame_header, window_size_max, verify_checksum); + }; + + consumed_count += try decodeZstandardFrameBlocksArrayList( + allocator, + dest, + src[consumed_count..], + &frame_context, + ); + return consumed_count; +} + +pub fn decodeZstandardFrameBlocksArrayList( + allocator: Allocator, + dest: *std.ArrayList(u8), + src: []const u8, + frame_context: *FrameContext, +) (error{OutOfMemory} || FrameError)!usize { + const initial_len = dest.items.len; + + var ring_buffer = try RingBuffer.init(allocator, frame_context.window_size); + defer ring_buffer.deinit(allocator); + + // These tables take 7680 bytes + var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined; + var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined; + var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined; + + var block_header = try block.decodeBlockHeaderSlice(src); + var consumed_count: usize = 3; + var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data); + while (true) : ({ + block_header = try block.decodeBlockHeaderSlice(src[consumed_count..]); + consumed_count += 3; + }) { + const written_size = try block.decodeBlockRingBuffer( + &ring_buffer, + src[consumed_count..], + block_header, + &decode_state, + &consumed_count, + frame_context.block_size_max, + ); + if (frame_context.content_size) |size| { + if (dest.items.len - initial_len > size) { + return error.BadContentSize; + } + } + if (written_size > 0) { + const written_slice = ring_buffer.sliceLast(written_size); + try dest.appendSlice(written_slice.first); + try dest.appendSlice(written_slice.second); + if (frame_context.hasher_opt) |*hasher| { + hasher.update(written_slice.first); + hasher.update(written_slice.second); + } + } + if (block_header.last_block) break; + } + if (frame_context.content_size) |size| { + if (dest.items.len - initial_len != size) { + return error.BadContentSize; + } + } + + if (frame_context.has_checksum) { + if (src.len < consumed_count + 4) return error.EndOfStream; + const checksum = readIntSlice(u32, src[consumed_count .. consumed_count + 4]); + consumed_count += 4; + if (frame_context.hasher_opt) |*hasher| { + if (checksum != computeChecksum(hasher)) return error.ChecksumFailure; + } + } + return consumed_count; +} + +fn decodeFrameBlocksInner( + dest: []u8, + src: []const u8, + consumed_count: *usize, + hash: ?*std.hash.XxHash64, + block_size_max: usize, +) (error{ EndOfStream, DestTooSmall } || block.Error)!usize { + // These tables take 7680 bytes + var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined; + var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined; + var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined; + + var block_header = try block.decodeBlockHeaderSlice(src); + var bytes_read: usize = 3; + defer consumed_count.* += bytes_read; + var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data); + var count: usize = 0; + while (true) : ({ + block_header = try block.decodeBlockHeaderSlice(src[bytes_read..]); + bytes_read += 3; + }) { + const written_size = try block.decodeBlock( + dest, + src[bytes_read..], + block_header, + &decode_state, + &bytes_read, + block_size_max, + count, + ); + if (hash) |hash_state| hash_state.update(dest[count .. count + written_size]); + count += written_size; + if (block_header.last_block) break; + } + return count; +} + +/// Decode the header of a skippable frame. The first four bytes of `src` must +/// be a valid magic number for a skippable frame. +pub fn decodeSkippableHeader(src: *const [8]u8) SkippableHeader { + const magic = readInt(u32, src[0..4]); + assert(isSkippableMagic(magic)); + const frame_size = readInt(u32, src[4..8]); + return .{ + .magic_number = magic, + .frame_size = frame_size, + }; +} + +/// Returns the window size required to decompress a frame, or `null` if it +/// cannot be determined (which indicates a malformed frame header). +pub fn frameWindowSize(header: ZstandardHeader) ?u64 { + if (header.window_descriptor) |descriptor| { + const exponent = (descriptor & 0b11111000) >> 3; + const mantissa = descriptor & 0b00000111; + const window_log = 10 + exponent; + const window_base = @as(u64, 1) << @intCast(u6, window_log); + const window_add = (window_base / 8) * mantissa; + return window_base + window_add; + } else return header.content_size; +} + +/// Decode the header of a Zstandard frame. +/// +/// Errors returned: +/// - `error.ReservedBitSet` if any of the reserved bits of the header are set +/// - `error.EndOfStream` if `source` does not contain a complete header +pub fn decodeZstandardHeader( + source: anytype, +) (@TypeOf(source).Error || error{ EndOfStream, ReservedBitSet })!ZstandardHeader { + const descriptor = @bitCast(ZstandardHeader.Descriptor, try source.readByte()); + + if (descriptor.reserved) return error.ReservedBitSet; + + var window_descriptor: ?u8 = null; + if (!descriptor.single_segment_flag) { + window_descriptor = try source.readByte(); + } + + var dictionary_id: ?u32 = null; + if (descriptor.dictionary_id_flag > 0) { + // if flag is 3 then field_size = 4, else field_size = flag + const field_size = (@as(u4, 1) << descriptor.dictionary_id_flag) >> 1; + dictionary_id = try source.readVarInt(u32, .Little, field_size); + } + + var content_size: ?u64 = null; + if (descriptor.single_segment_flag or descriptor.content_size_flag > 0) { + const field_size = @as(u4, 1) << descriptor.content_size_flag; + content_size = try source.readVarInt(u64, .Little, field_size); + if (field_size == 2) content_size.? += 256; + } + + const header = ZstandardHeader{ + .descriptor = descriptor, + .window_descriptor = window_descriptor, + .dictionary_id = dictionary_id, + .content_size = content_size, + }; + return header; +} + +test { + std.testing.refAllDecls(@This()); +} diff --git a/lib/std/compress/zstandard/readers.zig b/lib/std/compress/zstandard/readers.zig new file mode 100644 index 000000000000..e2f62ddc515d --- /dev/null +++ b/lib/std/compress/zstandard/readers.zig @@ -0,0 +1,82 @@ +const std = @import("std"); + +pub const ReversedByteReader = struct { + remaining_bytes: usize, + bytes: []const u8, + + const Reader = std.io.Reader(*ReversedByteReader, error{}, readFn); + + pub fn init(bytes: []const u8) ReversedByteReader { + return .{ + .bytes = bytes, + .remaining_bytes = bytes.len, + }; + } + + pub fn reader(self: *ReversedByteReader) Reader { + return .{ .context = self }; + } + + fn readFn(ctx: *ReversedByteReader, buffer: []u8) !usize { + if (ctx.remaining_bytes == 0) return 0; + const byte_index = ctx.remaining_bytes - 1; + buffer[0] = ctx.bytes[byte_index]; + // buffer[0] = @bitReverse(ctx.bytes[byte_index]); + ctx.remaining_bytes = byte_index; + return 1; + } +}; + +/// A bit reader for reading the reversed bit streams used to encode +/// FSE compressed data. +pub const ReverseBitReader = struct { + byte_reader: ReversedByteReader, + bit_reader: std.io.BitReader(.Big, ReversedByteReader.Reader), + + pub fn init(self: *ReverseBitReader, bytes: []const u8) error{BitStreamHasNoStartBit}!void { + self.byte_reader = ReversedByteReader.init(bytes); + self.bit_reader = std.io.bitReader(.Big, self.byte_reader.reader()); + if (bytes.len == 0) return; + var i: usize = 0; + while (i < 8 and 0 == self.readBitsNoEof(u1, 1) catch unreachable) : (i += 1) {} + if (i == 8) return error.BitStreamHasNoStartBit; + } + + pub fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: usize) error{EndOfStream}!U { + return self.bit_reader.readBitsNoEof(U, num_bits); + } + + pub fn readBits(self: *@This(), comptime U: type, num_bits: usize, out_bits: *usize) error{}!U { + return try self.bit_reader.readBits(U, num_bits, out_bits); + } + + pub fn alignToByte(self: *@This()) void { + self.bit_reader.alignToByte(); + } + + pub fn isEmpty(self: ReverseBitReader) bool { + return self.byte_reader.remaining_bytes == 0 and self.bit_reader.bit_count == 0; + } +}; + +pub fn BitReader(comptime Reader: type) type { + return struct { + underlying: std.io.BitReader(.Little, Reader), + + pub fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: usize) !U { + return self.underlying.readBitsNoEof(U, num_bits); + } + + pub fn readBits(self: *@This(), comptime U: type, num_bits: usize, out_bits: *usize) !U { + return self.underlying.readBits(U, num_bits, out_bits); + } + + pub fn alignToByte(self: *@This()) void { + self.underlying.alignToByte(); + } + }; +} + +pub fn bitReader(reader: anytype) BitReader(@TypeOf(reader)) { + return .{ .underlying = std.io.bitReader(.Little, reader) }; +} diff --git a/lib/std/compress/zstandard/types.zig b/lib/std/compress/zstandard/types.zig new file mode 100644 index 000000000000..db4fbdee2d92 --- /dev/null +++ b/lib/std/compress/zstandard/types.zig @@ -0,0 +1,401 @@ +pub const frame = struct { + pub const Kind = enum { zstandard, skippable }; + + pub const Zstandard = struct { + pub const magic_number = 0xFD2FB528; + + header: Header, + data_blocks: []Block, + checksum: ?u32, + + pub const Header = struct { + descriptor: Descriptor, + window_descriptor: ?u8, + dictionary_id: ?u32, + content_size: ?u64, + + pub const Descriptor = packed struct { + dictionary_id_flag: u2, + content_checksum_flag: bool, + reserved: bool, + unused: bool, + single_segment_flag: bool, + content_size_flag: u2, + }; + }; + + pub const Block = struct { + pub const Header = struct { + last_block: bool, + block_type: Block.Type, + block_size: u21, + }; + + pub const Type = enum(u2) { + raw, + rle, + compressed, + reserved, + }; + }; + }; + + pub const Skippable = struct { + pub const magic_number_min = 0x184D2A50; + pub const magic_number_max = 0x184D2A5F; + + pub const Header = struct { + magic_number: u32, + frame_size: u32, + }; + }; +}; + +pub const compressed_block = struct { + pub const LiteralsSection = struct { + header: Header, + huffman_tree: ?HuffmanTree, + streams: Streams, + + pub const Streams = union(enum) { + one: []const u8, + four: [4][]const u8, + }; + + pub const Header = struct { + block_type: BlockType, + size_format: u2, + regenerated_size: u20, + compressed_size: ?u18, + }; + + pub const BlockType = enum(u2) { + raw, + rle, + compressed, + treeless, + }; + + pub const HuffmanTree = struct { + max_bit_count: u4, + symbol_count_minus_one: u8, + nodes: [256]PrefixedSymbol, + + pub const PrefixedSymbol = struct { + symbol: u8, + prefix: u16, + weight: u4, + }; + + pub const Result = union(enum) { + symbol: u8, + index: usize, + }; + + pub fn query(self: HuffmanTree, index: usize, prefix: u16) error{NotFound}!Result { + var node = self.nodes[index]; + const weight = node.weight; + var i: usize = index; + while (node.weight == weight) { + if (node.prefix == prefix) return Result{ .symbol = node.symbol }; + if (i == 0) return error.NotFound; + i -= 1; + node = self.nodes[i]; + } + return Result{ .index = i }; + } + + pub fn weightToBitCount(weight: u4, max_bit_count: u4) u4 { + return if (weight == 0) 0 else ((max_bit_count + 1) - weight); + } + }; + + pub const StreamCount = enum { one, four }; + pub fn streamCount(size_format: u2, block_type: BlockType) StreamCount { + return switch (block_type) { + .raw, .rle => .one, + .compressed, .treeless => if (size_format == 0) .one else .four, + }; + } + }; + + pub const SequencesSection = struct { + header: SequencesSection.Header, + literals_length_table: Table, + offset_table: Table, + match_length_table: Table, + + pub const Header = struct { + sequence_count: u24, + match_lengths: Mode, + offsets: Mode, + literal_lengths: Mode, + + pub const Mode = enum(u2) { + predefined, + rle, + fse, + repeat, + }; + }; + }; + + pub const Table = union(enum) { + fse: []const Fse, + rle: u8, + + pub const Fse = struct { + symbol: u8, + baseline: u16, + bits: u8, + }; + }; + + pub const literals_length_code_table = [36]struct { u32, u5 }{ + .{ 0, 0 }, .{ 1, 0 }, .{ 2, 0 }, .{ 3, 0 }, + .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, + .{ 8, 0 }, .{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 }, + .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, .{ 15, 0 }, + .{ 16, 1 }, .{ 18, 1 }, .{ 20, 1 }, .{ 22, 1 }, + .{ 24, 2 }, .{ 28, 2 }, .{ 32, 3 }, .{ 40, 3 }, + .{ 48, 4 }, .{ 64, 6 }, .{ 128, 7 }, .{ 256, 8 }, + .{ 512, 9 }, .{ 1024, 10 }, .{ 2048, 11 }, .{ 4096, 12 }, + .{ 8192, 13 }, .{ 16384, 14 }, .{ 32768, 15 }, .{ 65536, 16 }, + }; + + pub const match_length_code_table = [53]struct { u32, u5 }{ + .{ 3, 0 }, .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, .{ 8, 0 }, + .{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 }, .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, + .{ 15, 0 }, .{ 16, 0 }, .{ 17, 0 }, .{ 18, 0 }, .{ 19, 0 }, .{ 20, 0 }, + .{ 21, 0 }, .{ 22, 0 }, .{ 23, 0 }, .{ 24, 0 }, .{ 25, 0 }, .{ 26, 0 }, + .{ 27, 0 }, .{ 28, 0 }, .{ 29, 0 }, .{ 30, 0 }, .{ 31, 0 }, .{ 32, 0 }, + .{ 33, 0 }, .{ 34, 0 }, .{ 35, 1 }, .{ 37, 1 }, .{ 39, 1 }, .{ 41, 1 }, + .{ 43, 2 }, .{ 47, 2 }, .{ 51, 3 }, .{ 59, 3 }, .{ 67, 4 }, .{ 83, 4 }, + .{ 99, 5 }, .{ 131, 7 }, .{ 259, 8 }, .{ 515, 9 }, .{ 1027, 10 }, .{ 2051, 11 }, + .{ 4099, 12 }, .{ 8195, 13 }, .{ 16387, 14 }, .{ 32771, 15 }, .{ 65539, 16 }, + }; + + pub const literals_length_default_distribution = [36]i16{ + 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, + -1, -1, -1, -1, + }; + + pub const match_lengths_default_distribution = [53]i16{ + 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, + -1, -1, -1, -1, -1, + }; + + pub const offset_codes_default_distribution = [29]i16{ + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, + }; + + pub const predefined_literal_fse_table = Table{ + .fse = &[64]Table.Fse{ + .{ .symbol = 0, .bits = 4, .baseline = 0 }, + .{ .symbol = 0, .bits = 4, .baseline = 16 }, + .{ .symbol = 1, .bits = 5, .baseline = 32 }, + .{ .symbol = 3, .bits = 5, .baseline = 0 }, + .{ .symbol = 4, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 5, .baseline = 0 }, + .{ .symbol = 9, .bits = 5, .baseline = 0 }, + .{ .symbol = 10, .bits = 5, .baseline = 0 }, + .{ .symbol = 12, .bits = 5, .baseline = 0 }, + .{ .symbol = 14, .bits = 6, .baseline = 0 }, + .{ .symbol = 16, .bits = 5, .baseline = 0 }, + .{ .symbol = 18, .bits = 5, .baseline = 0 }, + .{ .symbol = 19, .bits = 5, .baseline = 0 }, + .{ .symbol = 21, .bits = 5, .baseline = 0 }, + .{ .symbol = 22, .bits = 5, .baseline = 0 }, + .{ .symbol = 24, .bits = 5, .baseline = 0 }, + .{ .symbol = 25, .bits = 5, .baseline = 32 }, + .{ .symbol = 26, .bits = 5, .baseline = 0 }, + .{ .symbol = 27, .bits = 6, .baseline = 0 }, + .{ .symbol = 29, .bits = 6, .baseline = 0 }, + .{ .symbol = 31, .bits = 6, .baseline = 0 }, + .{ .symbol = 0, .bits = 4, .baseline = 32 }, + .{ .symbol = 1, .bits = 4, .baseline = 0 }, + .{ .symbol = 2, .bits = 5, .baseline = 0 }, + .{ .symbol = 4, .bits = 5, .baseline = 32 }, + .{ .symbol = 5, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 5, .baseline = 32 }, + .{ .symbol = 8, .bits = 5, .baseline = 0 }, + .{ .symbol = 10, .bits = 5, .baseline = 32 }, + .{ .symbol = 11, .bits = 5, .baseline = 0 }, + .{ .symbol = 13, .bits = 6, .baseline = 0 }, + .{ .symbol = 16, .bits = 5, .baseline = 32 }, + .{ .symbol = 17, .bits = 5, .baseline = 0 }, + .{ .symbol = 19, .bits = 5, .baseline = 32 }, + .{ .symbol = 20, .bits = 5, .baseline = 0 }, + .{ .symbol = 22, .bits = 5, .baseline = 32 }, + .{ .symbol = 23, .bits = 5, .baseline = 0 }, + .{ .symbol = 25, .bits = 4, .baseline = 0 }, + .{ .symbol = 25, .bits = 4, .baseline = 16 }, + .{ .symbol = 26, .bits = 5, .baseline = 32 }, + .{ .symbol = 28, .bits = 6, .baseline = 0 }, + .{ .symbol = 30, .bits = 6, .baseline = 0 }, + .{ .symbol = 0, .bits = 4, .baseline = 48 }, + .{ .symbol = 1, .bits = 4, .baseline = 16 }, + .{ .symbol = 2, .bits = 5, .baseline = 32 }, + .{ .symbol = 3, .bits = 5, .baseline = 32 }, + .{ .symbol = 5, .bits = 5, .baseline = 32 }, + .{ .symbol = 6, .bits = 5, .baseline = 32 }, + .{ .symbol = 8, .bits = 5, .baseline = 32 }, + .{ .symbol = 9, .bits = 5, .baseline = 32 }, + .{ .symbol = 11, .bits = 5, .baseline = 32 }, + .{ .symbol = 12, .bits = 5, .baseline = 32 }, + .{ .symbol = 15, .bits = 6, .baseline = 0 }, + .{ .symbol = 17, .bits = 5, .baseline = 32 }, + .{ .symbol = 18, .bits = 5, .baseline = 32 }, + .{ .symbol = 20, .bits = 5, .baseline = 32 }, + .{ .symbol = 21, .bits = 5, .baseline = 32 }, + .{ .symbol = 23, .bits = 5, .baseline = 32 }, + .{ .symbol = 24, .bits = 5, .baseline = 32 }, + .{ .symbol = 35, .bits = 6, .baseline = 0 }, + .{ .symbol = 34, .bits = 6, .baseline = 0 }, + .{ .symbol = 33, .bits = 6, .baseline = 0 }, + .{ .symbol = 32, .bits = 6, .baseline = 0 }, + }, + }; + + pub const predefined_match_fse_table = Table{ + .fse = &[64]Table.Fse{ + .{ .symbol = 0, .bits = 6, .baseline = 0 }, + .{ .symbol = 1, .bits = 4, .baseline = 0 }, + .{ .symbol = 2, .bits = 5, .baseline = 32 }, + .{ .symbol = 3, .bits = 5, .baseline = 0 }, + .{ .symbol = 5, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 5, .baseline = 0 }, + .{ .symbol = 8, .bits = 5, .baseline = 0 }, + .{ .symbol = 10, .bits = 6, .baseline = 0 }, + .{ .symbol = 13, .bits = 6, .baseline = 0 }, + .{ .symbol = 16, .bits = 6, .baseline = 0 }, + .{ .symbol = 19, .bits = 6, .baseline = 0 }, + .{ .symbol = 22, .bits = 6, .baseline = 0 }, + .{ .symbol = 25, .bits = 6, .baseline = 0 }, + .{ .symbol = 28, .bits = 6, .baseline = 0 }, + .{ .symbol = 31, .bits = 6, .baseline = 0 }, + .{ .symbol = 33, .bits = 6, .baseline = 0 }, + .{ .symbol = 35, .bits = 6, .baseline = 0 }, + .{ .symbol = 37, .bits = 6, .baseline = 0 }, + .{ .symbol = 39, .bits = 6, .baseline = 0 }, + .{ .symbol = 41, .bits = 6, .baseline = 0 }, + .{ .symbol = 43, .bits = 6, .baseline = 0 }, + .{ .symbol = 45, .bits = 6, .baseline = 0 }, + .{ .symbol = 1, .bits = 4, .baseline = 16 }, + .{ .symbol = 2, .bits = 4, .baseline = 0 }, + .{ .symbol = 3, .bits = 5, .baseline = 32 }, + .{ .symbol = 4, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 5, .baseline = 32 }, + .{ .symbol = 7, .bits = 5, .baseline = 0 }, + .{ .symbol = 9, .bits = 6, .baseline = 0 }, + .{ .symbol = 12, .bits = 6, .baseline = 0 }, + .{ .symbol = 15, .bits = 6, .baseline = 0 }, + .{ .symbol = 18, .bits = 6, .baseline = 0 }, + .{ .symbol = 21, .bits = 6, .baseline = 0 }, + .{ .symbol = 24, .bits = 6, .baseline = 0 }, + .{ .symbol = 27, .bits = 6, .baseline = 0 }, + .{ .symbol = 30, .bits = 6, .baseline = 0 }, + .{ .symbol = 32, .bits = 6, .baseline = 0 }, + .{ .symbol = 34, .bits = 6, .baseline = 0 }, + .{ .symbol = 36, .bits = 6, .baseline = 0 }, + .{ .symbol = 38, .bits = 6, .baseline = 0 }, + .{ .symbol = 40, .bits = 6, .baseline = 0 }, + .{ .symbol = 42, .bits = 6, .baseline = 0 }, + .{ .symbol = 44, .bits = 6, .baseline = 0 }, + .{ .symbol = 1, .bits = 4, .baseline = 32 }, + .{ .symbol = 1, .bits = 4, .baseline = 48 }, + .{ .symbol = 2, .bits = 4, .baseline = 16 }, + .{ .symbol = 4, .bits = 5, .baseline = 32 }, + .{ .symbol = 5, .bits = 5, .baseline = 32 }, + .{ .symbol = 7, .bits = 5, .baseline = 32 }, + .{ .symbol = 8, .bits = 5, .baseline = 32 }, + .{ .symbol = 11, .bits = 6, .baseline = 0 }, + .{ .symbol = 14, .bits = 6, .baseline = 0 }, + .{ .symbol = 17, .bits = 6, .baseline = 0 }, + .{ .symbol = 20, .bits = 6, .baseline = 0 }, + .{ .symbol = 23, .bits = 6, .baseline = 0 }, + .{ .symbol = 26, .bits = 6, .baseline = 0 }, + .{ .symbol = 29, .bits = 6, .baseline = 0 }, + .{ .symbol = 52, .bits = 6, .baseline = 0 }, + .{ .symbol = 51, .bits = 6, .baseline = 0 }, + .{ .symbol = 50, .bits = 6, .baseline = 0 }, + .{ .symbol = 49, .bits = 6, .baseline = 0 }, + .{ .symbol = 48, .bits = 6, .baseline = 0 }, + .{ .symbol = 47, .bits = 6, .baseline = 0 }, + .{ .symbol = 46, .bits = 6, .baseline = 0 }, + }, + }; + + pub const predefined_offset_fse_table = Table{ + .fse = &[32]Table.Fse{ + .{ .symbol = 0, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 4, .baseline = 0 }, + .{ .symbol = 9, .bits = 5, .baseline = 0 }, + .{ .symbol = 15, .bits = 5, .baseline = 0 }, + .{ .symbol = 21, .bits = 5, .baseline = 0 }, + .{ .symbol = 3, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 4, .baseline = 0 }, + .{ .symbol = 12, .bits = 5, .baseline = 0 }, + .{ .symbol = 18, .bits = 5, .baseline = 0 }, + .{ .symbol = 23, .bits = 5, .baseline = 0 }, + .{ .symbol = 5, .bits = 5, .baseline = 0 }, + .{ .symbol = 8, .bits = 4, .baseline = 0 }, + .{ .symbol = 14, .bits = 5, .baseline = 0 }, + .{ .symbol = 20, .bits = 5, .baseline = 0 }, + .{ .symbol = 2, .bits = 5, .baseline = 0 }, + .{ .symbol = 7, .bits = 4, .baseline = 16 }, + .{ .symbol = 11, .bits = 5, .baseline = 0 }, + .{ .symbol = 17, .bits = 5, .baseline = 0 }, + .{ .symbol = 22, .bits = 5, .baseline = 0 }, + .{ .symbol = 4, .bits = 5, .baseline = 0 }, + .{ .symbol = 8, .bits = 4, .baseline = 16 }, + .{ .symbol = 13, .bits = 5, .baseline = 0 }, + .{ .symbol = 19, .bits = 5, .baseline = 0 }, + .{ .symbol = 1, .bits = 5, .baseline = 0 }, + .{ .symbol = 6, .bits = 4, .baseline = 16 }, + .{ .symbol = 10, .bits = 5, .baseline = 0 }, + .{ .symbol = 16, .bits = 5, .baseline = 0 }, + .{ .symbol = 28, .bits = 5, .baseline = 0 }, + .{ .symbol = 27, .bits = 5, .baseline = 0 }, + .{ .symbol = 26, .bits = 5, .baseline = 0 }, + .{ .symbol = 25, .bits = 5, .baseline = 0 }, + .{ .symbol = 24, .bits = 5, .baseline = 0 }, + }, + }; + pub const start_repeated_offset_1 = 1; + pub const start_repeated_offset_2 = 4; + pub const start_repeated_offset_3 = 8; + + pub const table_accuracy_log_max = struct { + pub const literal = 9; + pub const match = 9; + pub const offset = 8; + }; + + pub const table_symbol_count_max = struct { + pub const literal = 36; + pub const match = 53; + pub const offset = 32; + }; + + pub const default_accuracy_log = struct { + pub const literal = 6; + pub const match = 6; + pub const offset = 5; + }; + pub const table_size_max = struct { + pub const literal = 1 << table_accuracy_log_max.literal; + pub const match = 1 << table_accuracy_log_max.match; + pub const offset = 1 << table_accuracy_log_max.match; + }; +}; + +test { + const testing = @import("std").testing; + testing.refAllDeclsRecursive(@This()); +} diff --git a/lib/std/hash.zig b/lib/std/hash.zig index 2680a8e26332..8e92b4c9de7c 100644 --- a/lib/std/hash.zig +++ b/lib/std/hash.zig @@ -32,6 +32,10 @@ pub const CityHash64 = cityhash.CityHash64; const wyhash = @import("hash/wyhash.zig"); pub const Wyhash = wyhash.Wyhash; +const xxhash = @import("hash/xxhash.zig"); +pub const XxHash64 = xxhash.XxHash64; +pub const XxHash32 = xxhash.XxHash32; + test "hash" { _ = adler; _ = auto_hash; @@ -40,4 +44,5 @@ test "hash" { _ = murmur; _ = cityhash; _ = wyhash; + _ = xxhash; } diff --git a/lib/std/hash/xxhash.zig b/lib/std/hash/xxhash.zig new file mode 100644 index 000000000000..bf4877e029f2 --- /dev/null +++ b/lib/std/hash/xxhash.zig @@ -0,0 +1,268 @@ +const std = @import("std"); +const mem = std.mem; +const expectEqual = std.testing.expectEqual; + +const rotl = std.math.rotl; + +pub const XxHash64 = struct { + acc1: u64, + acc2: u64, + acc3: u64, + acc4: u64, + + seed: u64, + buf: [32]u8, + buf_len: usize, + byte_count: usize, + + const prime_1 = 0x9E3779B185EBCA87; // 0b1001111000110111011110011011000110000101111010111100101010000111 + const prime_2 = 0xC2B2AE3D27D4EB4F; // 0b1100001010110010101011100011110100100111110101001110101101001111 + const prime_3 = 0x165667B19E3779F9; // 0b0001011001010110011001111011000110011110001101110111100111111001 + const prime_4 = 0x85EBCA77C2B2AE63; // 0b1000010111101011110010100111011111000010101100101010111001100011 + const prime_5 = 0x27D4EB2F165667C5; // 0b0010011111010100111010110010111100010110010101100110011111000101 + + pub fn init(seed: u64) XxHash64 { + return XxHash64{ + .seed = seed, + .acc1 = seed +% prime_1 +% prime_2, + .acc2 = seed +% prime_2, + .acc3 = seed, + .acc4 = seed -% prime_1, + .buf = undefined, + .buf_len = 0, + .byte_count = 0, + }; + } + + pub fn update(self: *XxHash64, input: []const u8) void { + if (input.len < 32 - self.buf_len) { + mem.copy(u8, self.buf[self.buf_len..], input); + self.buf_len += input.len; + return; + } + + var i: usize = 0; + + if (self.buf_len > 0) { + i = 32 - self.buf_len; + mem.copy(u8, self.buf[self.buf_len..], input[0..i]); + self.processStripe(&self.buf); + self.buf_len = 0; + } + + while (i + 32 <= input.len) : (i += 32) { + self.processStripe(input[i..][0..32]); + } + + const remaining_bytes = input[i..]; + mem.copy(u8, &self.buf, remaining_bytes); + self.buf_len = remaining_bytes.len; + } + + inline fn processStripe(self: *XxHash64, buf: *const [32]u8) void { + self.acc1 = round(self.acc1, mem.readIntLittle(u64, buf[0..8])); + self.acc2 = round(self.acc2, mem.readIntLittle(u64, buf[8..16])); + self.acc3 = round(self.acc3, mem.readIntLittle(u64, buf[16..24])); + self.acc4 = round(self.acc4, mem.readIntLittle(u64, buf[24..32])); + self.byte_count += 32; + } + + inline fn round(acc: u64, lane: u64) u64 { + const a = acc +% (lane *% prime_2); + const b = rotl(u64, a, 31); + return b *% prime_1; + } + + pub fn final(self: *XxHash64) u64 { + var acc: u64 = undefined; + + if (self.byte_count < 32) { + acc = self.seed +% prime_5; + } else { + acc = rotl(u64, self.acc1, 1) +% rotl(u64, self.acc2, 7) +% + rotl(u64, self.acc3, 12) +% rotl(u64, self.acc4, 18); + acc = mergeAccumulator(acc, self.acc1); + acc = mergeAccumulator(acc, self.acc2); + acc = mergeAccumulator(acc, self.acc3); + acc = mergeAccumulator(acc, self.acc4); + } + + acc = acc +% @as(u64, self.byte_count) +% @as(u64, self.buf_len); + + var pos: usize = 0; + while (pos + 8 <= self.buf_len) : (pos += 8) { + const lane = mem.readIntLittle(u64, self.buf[pos..][0..8]); + acc ^= round(0, lane); + acc = rotl(u64, acc, 27) *% prime_1; + acc +%= prime_4; + } + + if (pos + 4 <= self.buf_len) { + const lane = @as(u64, mem.readIntLittle(u32, self.buf[pos..][0..4])); + acc ^= lane *% prime_1; + acc = rotl(u64, acc, 23) *% prime_2; + acc +%= prime_3; + pos += 4; + } + + while (pos < self.buf_len) : (pos += 1) { + const lane = @as(u64, self.buf[pos]); + acc ^= lane *% prime_5; + acc = rotl(u64, acc, 11) *% prime_1; + } + + acc ^= acc >> 33; + acc *%= prime_2; + acc ^= acc >> 29; + acc *%= prime_3; + acc ^= acc >> 32; + + return acc; + } + + inline fn mergeAccumulator(acc: u64, other: u64) u64 { + const a = acc ^ round(0, other); + const b = a *% prime_1; + return b +% prime_4; + } + + pub fn hash(input: []const u8) u64 { + var hasher = XxHash64.init(0); + hasher.update(input); + return hasher.final(); + } +}; + +pub const XxHash32 = struct { + acc1: u32, + acc2: u32, + acc3: u32, + acc4: u32, + + seed: u32, + buf: [16]u8, + buf_len: usize, + byte_count: usize, + + const prime_1 = 0x9E3779B1; // 0b10011110001101110111100110110001 + const prime_2 = 0x85EBCA77; // 0b10000101111010111100101001110111 + const prime_3 = 0xC2B2AE3D; // 0b11000010101100101010111000111101 + const prime_4 = 0x27D4EB2F; // 0b00100111110101001110101100101111 + const prime_5 = 0x165667B1; // 0b00010110010101100110011110110001 + + pub fn init(seed: u32) XxHash32 { + return XxHash32{ + .seed = seed, + .acc1 = seed +% prime_1 +% prime_2, + .acc2 = seed +% prime_2, + .acc3 = seed, + .acc4 = seed -% prime_1, + .buf = undefined, + .buf_len = 0, + .byte_count = 0, + }; + } + + pub fn update(self: *XxHash32, input: []const u8) void { + if (input.len < 16 - self.buf_len) { + mem.copy(u8, self.buf[self.buf_len..], input); + self.buf_len += input.len; + return; + } + + var i: usize = 0; + + if (self.buf_len > 0) { + i = 16 - self.buf_len; + mem.copy(u8, self.buf[self.buf_len..], input[0..i]); + self.processStripe(&self.buf); + self.buf_len = 0; + } + + while (i + 16 <= input.len) : (i += 16) { + self.processStripe(input[i..][0..16]); + } + + const remaining_bytes = input[i..]; + mem.copy(u8, &self.buf, remaining_bytes); + self.buf_len = remaining_bytes.len; + } + + inline fn processStripe(self: *XxHash32, buf: *const [16]u8) void { + self.acc1 = round(self.acc1, mem.readIntLittle(u32, buf[0..4])); + self.acc2 = round(self.acc2, mem.readIntLittle(u32, buf[4..8])); + self.acc3 = round(self.acc3, mem.readIntLittle(u32, buf[8..12])); + self.acc4 = round(self.acc4, mem.readIntLittle(u32, buf[12..16])); + self.byte_count += 16; + } + + inline fn round(acc: u32, lane: u32) u32 { + const a = acc +% (lane *% prime_2); + const b = rotl(u32, a, 13); + return b *% prime_1; + } + + pub fn final(self: *XxHash32) u32 { + var acc: u32 = undefined; + + if (self.byte_count < 16) { + acc = self.seed +% prime_5; + } else { + acc = rotl(u32, self.acc1, 1) +% rotl(u32, self.acc2, 7) +% + rotl(u32, self.acc3, 12) +% rotl(u32, self.acc4, 18); + } + + acc = acc +% @intCast(u32, self.byte_count) +% @intCast(u32, self.buf_len); + + var pos: usize = 0; + while (pos + 4 <= self.buf_len) : (pos += 4) { + const lane = mem.readIntLittle(u32, self.buf[pos..][0..4]); + acc +%= lane *% prime_3; + acc = rotl(u32, acc, 17) *% prime_4; + } + + while (pos < self.buf_len) : (pos += 1) { + const lane = @as(u32, self.buf[pos]); + acc +%= lane *% prime_5; + acc = rotl(u32, acc, 11) *% prime_1; + } + + acc ^= acc >> 15; + acc *%= prime_2; + acc ^= acc >> 13; + acc *%= prime_3; + acc ^= acc >> 16; + + return acc; + } + + pub fn hash(input: []const u8) u32 { + var hasher = XxHash32.init(0); + hasher.update(input); + return hasher.final(); + } +}; + +test "xxhash64" { + const hash = XxHash64.hash; + + try expectEqual(hash(""), 0xef46db3751d8e999); + try expectEqual(hash("a"), 0xd24ec4f1a98c6e5b); + try expectEqual(hash("abc"), 0x44bc2cf5ad770999); + try expectEqual(hash("message digest"), 0x066ed728fceeb3be); + try expectEqual(hash("abcdefghijklmnopqrstuvwxyz"), 0xcfe1f278fa89835c); + try expectEqual(hash("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0xaaa46907d3047814); + try expectEqual(hash("12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0xe04a477f19ee145d); +} + +test "xxhash32" { + const hash = XxHash32.hash; + + try expectEqual(hash(""), 0x02cc5d05); + try expectEqual(hash("a"), 0x550d7456); + try expectEqual(hash("abc"), 0x32d153ff); + try expectEqual(hash("message digest"), 0x7c948494); + try expectEqual(hash("abcdefghijklmnopqrstuvwxyz"), 0x63a14d5f); + try expectEqual(hash("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x9c285e64); + try expectEqual(hash("12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x9c05f475); +} diff --git a/lib/std/std.zig b/lib/std/std.zig index 5b0963ba20d2..4a6d33000313 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -31,6 +31,7 @@ pub const PackedIntSliceEndian = @import("packed_int_array.zig").PackedIntSliceE pub const PriorityQueue = @import("priority_queue.zig").PriorityQueue; pub const PriorityDequeue = @import("priority_dequeue.zig").PriorityDequeue; pub const Progress = @import("Progress.zig"); +pub const RingBuffer = @import("RingBuffer.zig"); pub const SegmentedList = @import("segmented_list.zig").SegmentedList; pub const SemanticVersion = @import("SemanticVersion.zig"); pub const SinglyLinkedList = @import("linked_list.zig").SinglyLinkedList;