diff --git a/README.md b/README.md index cea0d80..d195fad 100644 --- a/README.md +++ b/README.md @@ -157,6 +157,7 @@ assert_eq!(kind.extension(), "foo"); - **rar** - `application/vnd.rar` - **gz** - `application/gzip` - **bz2** - `application/x-bzip2` +- **bz3** - `application/vnd.bzip3` - **7z** - `application/x-7z-compressed` - **xz** - `application/x-xz` - **pdf** - `application/pdf` @@ -175,6 +176,7 @@ assert_eq!(kind.extension(), "foo"); - **rpm** - `application/x-rpm` - **dcm** - `application/dicom` - **zst** - `application/zstd` +- **lz4** - `application/x-lz4` - **msi** - `application/x-ole-storage` - **cpio** - `application/x-cpio` diff --git a/src/map.rs b/src/map.rs index cd0b9ad..3858f47 100644 --- a/src/map.rs +++ b/src/map.rs @@ -462,6 +462,12 @@ matcher_map!( "bz2", matchers::archive::is_bz2 ), + ( + MatcherType::Archive, + "application/vnd.bzip3", + "bz3", + matchers::archive::is_bz3 + ), ( MatcherType::Archive, "application/x-7z-compressed", @@ -570,6 +576,12 @@ matcher_map!( "zst", matchers::archive::is_zst ), + ( + MatcherType::Archive, + "application/x-lz4", + "lz4", + matchers::archive::is_lz4 + ), ( MatcherType::Archive, "application/x-ole-storage", diff --git a/src/matchers/archive.rs b/src/matchers/archive.rs index ff03ca3..751a1e9 100644 --- a/src/matchers/archive.rs +++ b/src/matchers/archive.rs @@ -52,11 +52,21 @@ pub fn is_gz(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x1F && buf[1] == 0x8B && buf[2] == 0x8 } -/// Returns whether a buffer is a bzip archive. +/// Returns whether a buffer is a bzip2 archive. pub fn is_bz2(buf: &[u8]) -> bool { buf.len() > 2 && buf[0] == 0x42 && buf[1] == 0x5A && buf[2] == 0x68 } +/// Returns whether a buffer is a bzip3 archive. +pub fn is_bz3(buf: &[u8]) -> bool { + buf.len() > 4 + && buf[0] == b'B' + && buf[1] == b'Z' + && buf[2] == b'3' + && buf[3] == b'v' + && buf[4] == b'1' +} + /// Returns whether a buffer is a 7z archive. pub fn is_7z(buf: &[u8]) -> bool { buf.len() > 5 @@ -239,6 +249,41 @@ pub fn is_zst(buf: &[u8]) -> bool { is_zst(next_frame) } +/// Returns whether a buffer is a LZ4 archive. +// LZ4 compressed data is made of one or more frames. +// There are two frame formats defined by LZ4: LZ4 Frame format and Skippable frames. +// See more details from https://github.com/lz4/lz4/blob/v1.9.4/doc/lz4_Frame_format.md +pub fn is_lz4(buf: &[u8]) -> bool { + if buf.len() > 3 && buf[0] == 0x04 && buf[1] == 0x22 && buf[2] == 0x4D && buf[3] == 0x18 { + return true; + } + + if buf.len() < 8 { + return false; + } + + let magic = u32::from_le_bytes(buf[0..4].try_into().unwrap()); + let Ok(magic) = usize::try_from(magic) else { + return false; + }; + + if magic & ZSTD_SKIP_MASK != ZSTD_SKIP_START { + return false; + } + + let data_len = u32::from_le_bytes(buf[4..8].try_into().unwrap()); + let Ok(data_len) = usize::try_from(data_len) else { + return false; + }; + + if buf.len() < 8 + data_len { + return false; + } + + let next_frame = &buf[8 + data_len..]; + is_lz4(next_frame) +} + /// Returns whether a buffer is a MSI Windows Installer archive. pub fn is_msi(buf: &[u8]) -> bool { buf.len() > 7 diff --git a/testdata/sample.tar.bz3 b/testdata/sample.tar.bz3 new file mode 100644 index 0000000..ddd256a Binary files /dev/null and b/testdata/sample.tar.bz3 differ diff --git a/testdata/sample.tar.lz4 b/testdata/sample.tar.lz4 new file mode 100644 index 0000000..08e5982 Binary files /dev/null and b/testdata/sample.tar.lz4 differ diff --git a/tests/archive.rs b/tests/archive.rs index 6c34ee5..f0d1f46 100644 --- a/tests/archive.rs +++ b/tests/archive.rs @@ -1,5 +1,12 @@ mod common; +test_format!( + Archive, + "application/vnd.bzip3", + "bz3", + bz3, + "sample.tar.bz3" +); test_format!( Archive, "application/vnd.sqlite3", @@ -9,6 +16,7 @@ test_format!( ); test_format!(Archive, "application/zstd", "zst", zst, "sample.tar.zst"); +test_format!(Archive, "application/x-lz4", "lz4", lz4, "sample.tar.lz4"); test_format!(Archive, "application/x-cpio", "cpio", cpio, "sample.cpio"); test_format!( Archive,