Skip to content

Commit

Permalink
Add GNU ar Support (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
LebJe authored Jul 14, 2021
1 parent 8b87ec2 commit ec33ce8
Show file tree
Hide file tree
Showing 11 changed files with 314 additions and 37 deletions.
1 change: 1 addition & 0 deletions Brewfile
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
brew "pre-commit"
brew "swiftformat"
brew "prettier"
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.3.0](https://github.com/LebJe/ArArchiveKit/releases/tag/0.3.0) - 2021-07-14

### Added

- Added support for the [GNU `ar` format](<https://en.wikipedia.org/wiki/Ar_(Unix)#System_V_(or_GNU)_variant>).
- Added a `variant` field to `ArArchiveReader`. This field contains the `Variant ` of the archive that was parsed.
- Add a `noEntries` case to `ArArchiveError`.

### Changed

- `ArArchiveWriter` now requires users to call `finalize` to access the bytes of the archive.

### Fixed

- Correctly parse archive headers whose `mode` field is 0.

### Removed

- `ArArchiveWriter.bytes`.

## [0.2.3](https://github.com/LebJe/ArArchiveKit/releases/tag/0.2.3) - 2021-06-22

### Added
Expand Down
2 changes: 2 additions & 0 deletions Examples/Foundationless/Sources/Foundationless/main.swift
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ func main() throws {

let reader = try ArArchiveReader(archive: bytes)

print("Archive Variant: " + reader.variant.rawValue)

for (header, file) in reader {
print("---------------------------")

Expand Down
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
- [Windows](#windows)
- [Contributing](#contributing)

<!-- Added by: lebje, at: Fri Apr 9 18:48:17 EDT 2021 -->
<!-- Added by: lebje, at: Wed Jul 14 12:30:02 EDT 2021 -->

<!--te-->

Expand All @@ -38,9 +38,9 @@ Documentation is available [here](https://lebje.github.io/ArArchiveKit).

## `ar` Variations

ArArchiveKit supports the BSD variation of `ar` as described in [FreeBSD manpages](https://www.freebsd.org/cgi/man.cgi?query=ar&sektion=5).
ArArchiveKit supports the BSD and GNU variations of `ar` as described in [FreeBSD manpages](https://www.freebsd.org/cgi/man.cgi?query=ar&sektion=5).

Support for the GNU variant may come soon.
Suport for symbol tables may come soon

## Installation

Expand Down Expand Up @@ -114,8 +114,9 @@ writer.addFile(header: header, contents: "Hello")
Once you have added your files, you can get the archive like this:

```swift
// The binary representation (Array<UInt8>) of the archive.
let bytes = writer.bytes
// Call finalize to get the binary representation (Array<UInt8>) of the archive.
let bytes = writer.finalize()

// You convert it to data like this:
let data = Data(bytes)

Expand Down
142 changes: 130 additions & 12 deletions Sources/ArArchiveKit/ArArchiveReader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@
// The full text of the license can be found in the file named LICENSE.

/// `ArArchiveReader` reads `ar` files.
///
/// ```swift
/// let archiveData: Data = ...
/// let reader = ArArchiveReader(archive: Array(archiveData))
///
/// print("Name: \(reader.headers[0])")
/// print("Contents:\n \(String(reader[0]))")
/// ```
public struct ArArchiveReader {
private var data: [UInt8]
private var currentIndex: Int = 0
Expand All @@ -25,35 +33,45 @@ public struct ArArchiveReader {
/// The amount of files in this archive.
public var count: Int { self.headers.count }

/// The `Variant` of this archive.
public private(set) var variant: Variant

/// The initializer reads all the `ar` headers in preparation for random access to the header's file contents later.
///
/// - Parameters:
/// - archive: The bytes of the archive you want to read.
/// - Throws: `ArArchiveError`.
public init(archive: [UInt8]) throws {
// Validate archive.
if archive.isEmpty {
throw ArArchiveError.emptyArchive
} else if archive.count < 8 {
// The global header is missing.
throw ArArchiveError.missingMagicBytes
} else if Array(archive[0...7]) != globalHeader.asciiArray {
} else if Array(archive[0...7]) != Constants.globalHeader.asciiArray {
// The global header is invalid.
throw ArArchiveError.invalidMagicBytes
}

// Drop the global header from the byte array.
// Remove the global header from the byte array.
self.data = Array(archive[8...])

if self.data.isEmpty {
throw ArArchiveError.noEntries
}

var index = 0

self.variant = .common

// Read all the headers so we can provide random access to the data later.
while index < (self.data.count - 1), (index + (headerSize - 1)) < self.data.count - 1 {
var h = try self.parseHeader(bytes: Array(self.data[index...(index + headerSize - 1)]))
while index < (self.data.count - 1), (index + (Constants.headerSize - 1)) < self.data.count - 1 {
var h = try self.parseHeader(bytes: Array(self.data[index...(index + Constants.headerSize - 1)]))

h.contentLocation = (index + headerSize) + (h.nameSize != nil ? h.nameSize! : 0)
h.contentLocation = (index + Constants.headerSize) + (h.nameSize != nil ? h.nameSize! : 0)

// Jump past the header.
index += headerSize
index += Constants.headerSize

h.name = h.nameSize != nil ? String(Array(self.data[h.contentLocation - h.nameSize!..<h.contentLocation])) : h.name

Expand All @@ -62,6 +80,26 @@ public struct ArArchiveReader {

self.headers.append(h)
}

let nameTableHeaderIndex: Int? = self.headers[0].name == "//" ? 0 : self.headers.count >= 2 ? self.headers[1].name == "//" ? 1 : nil : nil

if let nameTableHeaderIndex = nameTableHeaderIndex {
let offsets = self.getNamesFromGNUNameTable(table: String(self[nameTableHeaderIndex]))

self.variant = .gnu

for i in 0..<self.headers.count {
if self.headers[i].name.first == "/", let offset = Int(String(self.headers[i].name.dropFirst())) {
self.headers[i].name = offsets[offset] ?? self.headers[i].name
}
}

self.headers.remove(at: nameTableHeaderIndex)
}

if self.headers[0].name == "/" {
self.headers.remove(at: 0)
}
}

/// Retrieves the bytes of the item at `index`, where index is the index of the `header` stored in the `headers` property of the reader.
Expand All @@ -80,7 +118,7 @@ public struct ArArchiveReader {
Array(self.data[header.contentLocation..<header.contentLocation + header.size])
}

private func parseHeader(bytes: [UInt8]) throws -> Header {
private mutating func parseHeader(bytes: [UInt8]) throws -> Header {
var start = 0
var name = self.readString(from: Array(bytes[start...15]))

Expand All @@ -98,7 +136,16 @@ public struct ArArchiveReader {

start += 6

let mode = UInt32(String(readString(from: Array(bytes[start...(start + 5)])).dropFirst(3)), radix: 8)
let modeBytes = Array(bytes[start...(start + 5)]).filter({ $0 != 32 })
let mode: UInt32?

if modeBytes.isEmpty {
mode = 0
} else if modeBytes.count > 3, modeBytes[0..<3] == [49, 48, 48] /* 100 */ {
mode = UInt32(String(self.readString(from: Array(modeBytes.dropFirst(3)))), radix: 8)
} else {
mode = UInt32(String(self.readString(from: modeBytes)), radix: 8)
}

start += 8

Expand All @@ -114,20 +161,33 @@ public struct ArArchiveReader {

var h = Header(name: name, userID: u, groupID: g, mode: m, modificationTime: mT)

// BSD archive
if name.hasPrefix("#1/") {
self.variant = .bsd
name.removeSubrange(name.startIndex..<name.index(name.startIndex, offsetBy: 3))

guard let nameSize = Int(name) else { throw ArArchiveError.invalidHeader }

h.size = s - nameSize
h.nameSize = nameSize
} else { h.size = s }
// GNU archive
} else if name.hasSuffix("/"), h.name != "//", h.name != "/" {
h.name = String(h.name.dropLast())
h.size = s
// Common archive
} else {
h.size = s
}

return h
}

/// From [blakesmith/ar/reader.go: line 62](https://github.com/blakesmith/ar/blob/809d4375e1fb5bb262c159fc3ec2e7a86a8bfd28/reader.go#L62) .
/// From [blakesmith/ar/reader.go: line 62](https://github.com/blakesmith/ar/blob/809d4375e1fb5bb262c159fc3ec2e7a86a8bfd28/reader.go#L62).
private func readString(from bytes: [UInt8]) -> String {
if bytes.count == 1 {
return String(Character(Unicode.Scalar(bytes[0])))
}

var i = bytes.count - 1

while i > 0, bytes[i] == 32 /* ASCII space character */ {
Expand All @@ -138,12 +198,70 @@ public struct ArArchiveReader {
}

private func readInt(from bytes: [UInt8], radix: Int? = nil) -> Int? {
var s = self.readString(from: bytes).filter({ $0 != " " })
if s == "" { s = "0" }

if let r = radix {
return Int(self.readString(from: bytes), radix: r)
return Int(s, radix: r)
} else {
return Int(self.readString(from: bytes))
return Int(s)
}
}

/// Extracts the filenames from a GNU archive name table and generates the offsets to those filenames.
/// - Parameter table: The table to extract the filenames from.
/// - Returns: A `Dictionary<Int, String>`, whose keys are the filename offsets, and whose values are the filenames.
///
/// Before:
///
/// ```
/// Very Long Filename With Spaces.txt/
/// Very Long Filename With Spaces 2.txt/
/// ```
///
/// After:
///
/// ```swift
/// [
/// 0: "Very Long Filename With Spaces.txt",
/// 36: "Very Long Filename With Spaces 2.txt"
/// ]
/// ```
private func getNamesFromGNUNameTable(table: String) -> [Int: String] {
var offsetsAndNames: [Int: String] = [:]
var offset = 0
var names: [String] = []
var currentName = ""
var skipNextChar = false

// Collect all the names.
for i in 0..<table.count {
if skipNextChar {
skipNextChar = false
continue
}

if
table[table.index(table.startIndex, offsetBy: i)] == "/",
let index = table.index(table.startIndex, offsetBy: i + 1, limitedBy: table.endIndex),
table[index] == "\n"
{
skipNextChar = true
names.append(currentName)
currentName = ""
} else {
currentName.append(table[table.index(table.startIndex, offsetBy: i)])
}
}

for name in names {
offsetsAndNames[offset] = name

offset += name.count + 3
}

return offsetsAndNames
}
}

extension ArArchiveReader: Sequence {
Expand Down
Loading

0 comments on commit ec33ce8

Please sign in to comment.