Skip to content

Commit

Permalink
feat: Support chunk upload in Swift (box/box-codegen#513) (#142)
Browse files Browse the repository at this point in the history
  • Loading branch information
box-sdk-build authored Jun 21, 2024
1 parent 8673a24 commit 9e0b4e2
Show file tree
Hide file tree
Showing 12 changed files with 475 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .codegen.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{ "engineHash": "1dc8a5d", "specHash": "ee83bc7", "version": "0.1.0" }
{ "engineHash": "06b32a5", "specHash": "ee83bc7", "version": "0.1.0" }
48 changes: 48 additions & 0 deletions BoxSdkGen.xcodeproj/project.pbxproj

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Sources/Internal/ArrayInputStream.swift
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import Foundation

extension InputStream: @unchecked Sendable {}

class ArrayInputStream: InputStream {
private let inputStreams: [InputStream]

Expand Down
62 changes: 62 additions & 0 deletions Sources/Internal/Data+Extensions.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import Foundation

/// Extension providing convenience methods for handling data.
public extension Data {

/// Initializes `Data` from a value of generic type `T`.
///
/// - Parameter value: The value to convert into `Data`.
internal init<T>(value: T) {
self = withUnsafePointer(to: value) { (ptr: UnsafePointer<T>) -> Data in
return Data(buffer: UnsafeBufferPointer(start: ptr, count: 1))
}
}

/// Appends a value of generic type `T` to the end of `Data`.
///
/// - Parameter value: The value to append to `Data`.
internal mutating func append<T>(value: T) {
withUnsafePointer(to: value) { (ptr: UnsafePointer<T>) in
append(UnsafeBufferPointer(start: ptr, count: 1))
}
}

/// Initializes `Data` from a hexadecimal string.
///
/// - Parameter hex: The hexadecimal string representation of the data.
/// - Returns: An optional `Data` object initialized from the hexadecimal string, or `nil` if the string is invalid.
init?(fromHexString hex: String) {
guard hex.count % 2 == 0 else {
return nil
}

var data = Data()
var index = hex.startIndex
while index < hex.endIndex {
let nextIndex = hex.index(index, offsetBy: 2)
let byteString = hex[index..<nextIndex]
guard let byte = UInt8(byteString, radix: 16) else {
return nil
}
data.append(byte)
index = nextIndex
}

self = data
}

/// Returns the `Data` object as a base64-encoded string.
///
/// - Returns: A base64-encoded string representation of the `Data`.
func base64EncodedString() -> String {
return self.base64EncodedString(options: [])
}

/// Returns the `Data` object as a hexadecimal string.
///
/// - Returns: A hexadecimal string representation of the `Data`.
func hexString() -> String {
let hexBytes = self.map { String(format: "%02hhx", $0) }
return hexBytes.joined()
}
}
49 changes: 49 additions & 0 deletions Sources/Internal/Hash.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import Foundation

/// Enum defining available hashing algorithms.
public enum HashName {
/// SHA-1 hashing algorithm.
case sha1
}

/// Class for hashing data using a specified algorithm.
public class Hash {
private static let Base64Encoding = "base64"

private let algorithm: HashName
private var data: Data

/// Initializes a `Hash` instance with the specified algorithm.
///
/// - Parameter algorithm: The hashing algorithm to use.
public init(algorithm: HashName) {
self.algorithm = algorithm
self.data = Data()
}

/// Updates the hash with additional data.
///
/// - Parameter data: The data to append to the hash.
public func updateHash(data: Data) {
self.data.append(data)
}

/// Calculates the digest of the accumulated data using the specified encoding.
///
/// - Parameter encoding: The string encoding to use for the digest result.
/// - Returns: The base64-encoded or hexadecimal string representation of the hash digest.
public func digestHash(encoding: String) async -> String {
var digest = Data()

switch algorithm {
case .sha1:
digest = SHA1.sha1(data: data)
}

if encoding == Self.Base64Encoding {
return digest.base64EncodedString()
} else {
return digest.hexString()
}
}
}
107 changes: 107 additions & 0 deletions Sources/Internal/SHA1.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import Foundation

struct SHA1 {
// Hash initial values
private static let h0: UInt32 = 0x67452301
private static let h1: UInt32 = 0xEFCDAB89
private static let h2: UInt32 = 0x98BADCFE
private static let h3: UInt32 = 0x10325476
private static let h4: UInt32 = 0xC3D2E1F0

static func sha1(data: Data) -> Data {
var message = data
let messageLength = UInt64(message.count * 8)

// Padding the message
message.append(0x80)
while (message.count % 64) != 56 {
message.append(0x00)
}

// Append the length of the original message as a 64-bit big-endian integer
let lengthBytes = messageLength.bigEndian
message.append(value: lengthBytes)

// Initialize hash values
var h:[UInt32]=[SHA1.h0,SHA1.h1,SHA1.h2,SHA1.h3,SHA1.h4]

// Process the message in successive 512-bit chunks
for chunkOffset in stride(from: 0, to: message.count, by: 64) {
let chunk = message[chunkOffset..<chunkOffset + 64]
var words = [UInt32](repeating: 0, count: 80)

// Break chunk into sixteen 32-bit big-endian words
for i in 0..<16 {
let start = chunk.index(chunk.startIndex, offsetBy: i * 4)
let end = chunk.index(start, offsetBy: 4)
words[i] = UInt32(bigEndian: chunk[start..<end].withUnsafeBytes { $0.load(as: UInt32.self) })
}

// Extend the sixteen 32-bit words into eighty 32-bit words
for i in 16..<80 {
words[i] = leftRotate(words[i - 3] ^ words[i - 8] ^ words[i - 14] ^ words[i - 16], by: 1)
}

// Initialize hash value for this chunk
var a = h[0]
var b = h[1]
var c = h[2]
var d = h[3]
var e = h[4]

// Main loop
for i in 0..<80 {
var f: UInt32 = 0
var k: UInt32 = 0

switch i {
case 0...19:
f = (b & c) | ((bitwiseNot(b)) & d)
k = 0x5A827999
case 20...39:
f = b ^ c ^ d
k = 0x6ED9EBA1
case 40...59:
f = (b & c) | (b & d) | (c & d)
k = 0x8F1BBCDC
case 60...79:
f = b ^ c ^ d
k = 0xCA62C1D6
default:
break
}

let temp = leftRotate(a, by: 5) &+ f &+ e &+ k &+ words[i]
e = d
d = c
c = leftRotate(b, by: 30)
b = a
a = temp
}

// Add this chunk's hash to result so far
h[0] = h[0] &+ a
h[1] = h[1] &+ b
h[2] = h[2] &+ c
h[3] = h[3] &+ d
h[4] = h[4] &+ e
}

// Produce the final hash value (big-endian)
var hash = Data()
[h[0], h[1], h[2], h[3], h[4]].forEach {
let bigEndianValue = $0.bigEndian
hash.append(value: bigEndianValue)
}

return hash
}

private static func bitwiseNot<T: FixedWidthInteger>(_ value: T) -> T {
return value ^ T.max
}

private static func leftRotate(_ value: UInt32, by bits: UInt32) -> UInt32 {
return (value << bits) | (value >> (32 - bits))
}
}
73 changes: 73 additions & 0 deletions Sources/Internal/Utils.swift
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,16 @@ public enum Utils {
public static func from(data: Data) -> String {
return String(decoding: data, as: UTF8.self)
}

/// Converts from hex string to base64 string.
///
/// - Parameters:
/// - value: The hex string
/// - Returns: The base64 string.
public static func hextToBase64(value: String) -> String {
let data = Data(fromHexString: value) ?? Data()
return data.base64EncodedString()
}
}

/// Helper methods for Date
Expand Down Expand Up @@ -262,10 +272,73 @@ public enum Utils {
return buffer1 == buffer2
}

/// Gets length of a buffer
///
/// - Parameters:
/// - buffer: The iinstances of Data.
/// - Returns: The length of the buffer.
public static func bufferLength(buffer: Data) -> Int {
return buffer.count
}

/// Returns the path of the temporary directory for the current user.
///
/// - Returns: The path path of the temporary directory for the current user.
public static func temporaryDirectoryPath() -> String {
FileManager.default.temporaryDirectory.absoluteString
}


/// Iterates over a stream and yields chunks of it
///
/// - Parameters:
/// - stream: InputStream to iterate over
/// - chunkSize: Size of chunk
/// - Returns: The asynchronous sequence AsyncStream
public static func iterateChunks(stream: InputStream, chunkSize: Int64) -> AsyncStream<InputStream> {
return AsyncStream<InputStream> { continuation in
_Concurrency.Task {
stream.open()

let bufferSize = Int(chunkSize)
var buffer = [UInt8](repeating: 0, count: bufferSize)

defer {
stream.close()
continuation.finish()
}

while stream.hasBytesAvailable {
let read = stream.read(&buffer, maxLength: buffer.count)
if read < 0, let error = stream.streamError {
throw error
} else if read == 0 {
return
}

continuation.yield(InputStream(data:Data(buffer.prefix(read))))
}
}
}
}

/// Asynchronously reduces the elements of an `AsyncStream` using a specified reducer function and initial value.
///
/// - Parameters:
/// - iterator: The `AsyncStream` providing elements to be reduced.
/// - reducer: A closure that combines an accumulated value (`U`) with each element of the stream (`T`) asynchronously.
/// - initialValue: The initial value to start the reduction.
/// - Returns: The result of combining all elements of the stream using the provided reducer function.
/// - Throws: Any error thrown by the `reducer` closure during the reduction process.
public static func reduceIterator<T,U>(iterator: AsyncStream<T>, reducer: (U, T) async throws -> U, initialValue: U) async throws -> U
{
var result = initialValue

for await item in iterator {
result = try await reducer(result, item)
}

return result
}

}
52 changes: 52 additions & 0 deletions Sources/Managers/ChunkedUploads/ChunkedUploadsManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,56 @@ public class ChunkedUploadsManager {
return try Files.deserialize(from: response.data)
}

public func reducer(acc: PartAccumulator, chunk: InputStream) async throws -> PartAccumulator {
let lastIndex: Int = acc.lastIndex
let parts: [UploadPart] = acc.parts
let chunkBuffer: Data = Utils.readByteStream(byteStream: chunk)
let hash: Hash = Hash(algorithm: HashName.sha1)
hash.updateHash(data: chunkBuffer)
let sha1: String = await hash.digestHash(encoding: "base64")
let digest: String = "\("sha=")\(sha1)"
let chunkSize: Int = Utils.bufferLength(buffer: chunkBuffer)
let bytesStart: Int = lastIndex + 1
let bytesEnd: Int = lastIndex + chunkSize
let contentRange: String = "\("bytes ")\(Utils.Strings.toString(value: bytesStart)!)\("-")\(Utils.Strings.toString(value: bytesEnd)!)\("/")\(Utils.Strings.toString(value: acc.fileSize)!)"
let uploadedPart: UploadedPart = try await self.uploadFilePart(uploadSessionId: acc.uploadSessionId, requestBody: Utils.generateByteStreamFromBuffer(buffer: chunkBuffer), headers: UploadFilePartHeaders(digest: digest, contentRange: contentRange))
let part: UploadPart = uploadedPart.part!
let partSha1: String = Utils.Strings.hextToBase64(value: part.sha1!)
assert(partSha1 == sha1)
assert(part.size! == chunkSize)
assert(part.offset! == bytesStart)
acc.fileHash.updateHash(data: chunkBuffer)
return PartAccumulator(lastIndex: bytesEnd, parts: parts + [part], fileSize: acc.fileSize, uploadSessionId: acc.uploadSessionId, fileHash: acc.fileHash)
}

/// Starts the process of chunk uploading a big file. Should return a File object representing uploaded file.
///
/// - Parameters:
/// - file: The stream of the file to upload.
/// - fileName: The name of the file, which will be used for storage in Box.
/// - fileSize: The total size of the file for the chunked upload in bytes.
/// - parentFolderId: The ID of the folder where the file should be uploaded.
/// - Returns: The `FileFull`.
/// - Throws: The `GeneralError`.
public func uploadBigFile(file: InputStream, fileName: String, fileSize: Int64, parentFolderId: String) async throws -> FileFull {
let uploadSession: UploadSession = try await self.createFileUploadSession(requestBody: CreateFileUploadSessionRequestBody(folderId: parentFolderId, fileSize: fileSize, fileName: fileName))
let uploadSessionId: String = uploadSession.id!
let partSize: Int64 = uploadSession.partSize!
let totalParts: Int = uploadSession.totalParts!
assert(partSize * Int64(totalParts) >= fileSize)
assert(uploadSession.numPartsProcessed == 0)
let fileHash: Hash = Hash(algorithm: HashName.sha1)
let chunksIterator: AsyncStream<InputStream> = Utils.iterateChunks(stream: file, chunkSize: partSize)
let results: PartAccumulator = try await Utils.reduceIterator(iterator: chunksIterator, reducer: self.reducer, initialValue: PartAccumulator(lastIndex: -1, parts: [], fileSize: fileSize, uploadSessionId: uploadSessionId, fileHash: fileHash))
let parts: [UploadPart] = results.parts
let processedSessionParts: UploadParts = try await self.getFileUploadSessionParts(uploadSessionId: uploadSessionId)
assert(processedSessionParts.totalCount! == totalParts)
let processedSession: UploadSession = try await self.getFileUploadSessionById(uploadSessionId: uploadSessionId)
assert(processedSession.numPartsProcessed == totalParts)
let sha1: String = await fileHash.digestHash(encoding: "base64")
let digest: String = "\("sha=")\(sha1)"
let committedSession: Files = try await self.createFileUploadSessionCommit(uploadSessionId: uploadSessionId, requestBody: CreateFileUploadSessionCommitRequestBody(parts: parts), headers: CreateFileUploadSessionCommitHeaders(digest: digest))
return committedSession.entries![0]
}

}
Loading

0 comments on commit 9e0b4e2

Please sign in to comment.