-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsnappy.nim
297 lines (246 loc) · 10.2 KB
/
snappy.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
{.push raises: [].}
import
stew/[arrayops, endians2, leb128],
results,
./snappy/[codec, decoder, encoder]
export codec, results
## Compression and decompression utilities for the snappy compression algorithm:
##
## * [Landing page](http://google.github.io/snappy/)
## * [Format description](https://github.com/google/snappy/blob/main/format_description.txt)
##
## This file contains the in-memory API - see
## `snappy/faststreams` and `snappy/streams` for `faststreams` and `std/streams`
## support.
##
## * `compress`/`uncompress` work with caller-allocated buffers
## * `encode`/`decode` are convenience wrappers for the above that take care of
## memory allocation
##
## Framed encodings are also supported via functions carrying the `Framed` suffix
##
## * [Framing format](https://github.com/google/snappy/blob/main/framing_format.txt)
func compress*(
input: openArray[byte],
output: var openArray[byte]): Result[int, CodecError] =
## Compresses `input` and returns the number of bytes written to `output`.
##
## `input` may be no larger than 2^32-1 bytes, or `CodecError.invalidInput` is
## returned.
##
## `output` must be at least `maxCompressedLen(input.len)` bytes, or
## `CodecError.bufferTooSmall` is returned.
##
## See `compressFramed` for the framed format that supports arbitrary inputs.
## See `snappy/faststreams` and `snappy/streams` for stream-based versions.
let
lenU32 = checkInputLen(input.len).valueOr:
return err(CodecError.invalidInput)
if output.len.uint64 < maxCompressedLen(lenU32):
return err(CodecError.bufferTooSmall)
let
# The block starts with the varint-encoded length of the unencoded bytes.
header = lenU32.toBytes(Leb128)
output[0..<header.len] = header.toOpenArray()
var
read = 0
written = int(header.len)
while (let remaining = input.len - read; remaining > 0):
let
blockSize = min(remaining, maxBlockLen.int)
written += encodeBlock(
input.toOpenArray(read, read + blockSize - 1),
output.toOpenArray(written, output.high))
read += blockSize
ok(written)
func encode*(input: openArray[byte]): seq[byte] =
## Compresses `input` and returns the compressed output.
##
## `input` may be no larger than 2^32-1 bytes, or an empty buffer is returned.
## `input` must also be small enough that we can construct the output buffer
## with at least `maxCompressedLen(input.len)` bytes, or an empty buffer is
## returned.
##
## See `encodeFramed` for the framed format that supports arbitrary lengths.
## See `snappy/faststreams` and `snappy/streams` for stream-based versions.
let
maxCompressed = maxCompressedLen(input.len).valueOr:
return
# TODO https://github.com/nim-lang/Nim/issues/19357
result = newSeqUninitialized[byte](maxCompressed)
let written = compress(input, result).expect("we've checked lengths already")
result.setLen(written)
func uncompress*(input: openArray[byte], output: var openArray[byte]):
Result[int, CodecError] =
## Write the uncompressed bytes of `input` to `output` and return the number
## of bytes written.
##
## `output` must be at least `uncompressedLen` bytes.
##
## In case of errors, `output` may have been partially written to.
let (lenU32, bytesRead) = uint32.fromBytes(input, Leb128)
if bytesRead <= 0:
return err(CodecError.invalidInput)
if output.len.uint64 < lenU32.uint64:
return err(CodecError.bufferTooSmall)
if lenU32 == 0:
if bytesRead != input.len():
return err(CodecError.invalidInput)
return ok(0)
let written =
? decodeAllTags(input.toOpenArray(bytesRead, input.high), output)
if written.uint64 != lenU32:
return err(CodecError.invalidInput) # Header does not match content
ok(written)
func decode*(input: openArray[byte], maxSize = maxUncompressedLen): seq[byte] =
## Decode input returning the uncompressed output. On error, return an empty
## sequence, including when output would exceed `maxSize`.
##
## `maxSize` must be used for untrusted inputs to limit the amount of memory
## allocated by this function, which otherwise is read from the stream.
let uncompressed = uncompressedLen(input).valueOr:
return
if uncompressed > maxSize or uncompressed > int.high.uint64:
return
# TODO https://github.com/nim-lang/Nim/issues/19357
result = newSeqUninitialized[byte](int uncompressed)
if uncompress(input, result).isErr():
result = @[] # Empty return on error
func compressFramed*(input: openArray[byte], output: var openArray[byte]):
Result[int, FrameError] =
## Compresses `input` and returns the number of bytes written to `output`.
##
## `output` must be at least `maxCompressedLenFramed(input.len)` bytes, or
## `SnappyError.bufferTooSmall` is returned.
##
## See `compress` for the simple non-framed snappy format.
## See `snappy/faststreams` and `snappy/streams` for stream-based versions.
if output.len.uint64 < maxCompressedLenFramed(input.len):
return err(FrameError.bufferTooSmall)
output[0..<framingHeader.len] = framingHeader
var
read = 0
written = framingHeader.len
while (let remaining = input.len - read; remaining > 0):
let
frameSize = min(remaining, int maxUncompressedFrameDataLen)
written += encodeFrame(
input.toOpenArray(read, read + frameSize - 1),
output.toOpenArray(written, output.high))
read += frameSize
ok(written)
func encodeFramed*(input: openArray[byte]): seq[byte] =
let maxCompressed = maxCompressedLenFramed(input.len)
if maxCompressed > int.high.uint64:
return
# TODO https://github.com/nim-lang/Nim/issues/19357
result = newSeqUninitialized[byte](int maxCompressed)
let
written = compressFramed(input, result).expect("lengths checked")
result.setLen(written)
func uncompressFramed*(
input: openArray[byte], output: var openArray[byte], checkHeader = true,
checkIntegrity = true):
Result[tuple[read: int, written: int], FrameError] =
## Uncompress as many frames as possible from `input` and write them to
## `output`, returning the number of bytes read and written.
##
## When the `output` buffer is too small to hold the uncompressed data,
## the function will return the number of bytes consumed from the input and
## the number of correctly written bytes in the output (which may be smaller
## than the length of the output buffer).
##
## Decompression can be resumed by calling `uncompressFramed` again with
## `checkHeader = false` and the input positioned at the returned read offset
## and a new output buffer.
##
## In case of errors, `output` may be partially overwritten with invalid data.
var
read =
if checkHeader:
if input.len < framingHeader.len:
return err(FrameError.invalidInput)
if input.toOpenArray(0, framingHeader.len - 1) != framingHeader:
return err(FrameError.invalidInput)
framingHeader.len
else:
0
written = 0
while (let remaining = input.len - read; remaining > 0):
if remaining < 4:
return err(FrameError.invalidInput)
let
(id, dataLen) = decodeFrameHeader(input.toOpenArray(read, read + 3))
read += 4
if remaining - 4 < dataLen:
return err(FrameError.invalidInput)
if id == chunkCompressed:
if dataLen < 4:
return err(FrameError.invalidInput)
let
crc = uint32.fromBytesLE input.toOpenArray(read, read + 3)
maxOutput = min(maxUncompressedFrameDataLen.int, output.len - written)
uncompressed = uncompress(
input.toOpenArray(read + 4, read + dataLen - 1),
output.toOpenArray(written, written + maxOutput - 1)).valueOr:
let res = case error
of CodecError.bufferTooSmall:
let uncompressed =
uncompressedLen(input.toOpenArray(read + 4, read + dataLen - 1))
if uncompressed.isErr() or
uncompressed.get() > maxUncompressedFrameDataLen:
err(FrameError.invalidInput)
else:
ok((read - 4, written))
of CodecError.invalidInput: err(FrameError.invalidInput)
return res
if checkIntegrity and maskedCrc(
output.toOpenArray(written, written + (uncompressed - 1))) != crc:
return err(FrameError.crcMismatch)
written += uncompressed
elif id == chunkUncompressed:
if dataLen < 4:
return err(FrameError.invalidInput)
let
crc = uint32.fromBytesLE input.toOpenArray(read, read + 3)
if checkIntegrity and
maskedCrc(input.toOpenArray(read + 4, read + (dataLen - 1))) != crc:
return err(FrameError.crcMismatch)
let uncompressed = dataLen - 4 # dataLen includes CRC length
if uncompressed > maxUncompressedFrameDataLen.int:
return err(FrameError.invalidInput)
if uncompressed > output.len - written:
return ok((read - 4, written))
copyMem(addr output[written], unsafeAddr input[read + 4], uncompressed)
written += uncompressed
elif id < 0x80:
return err(FrameError.unknownChunk) # Reserved unskippable chunk
else:
discard # Reserved skippable chunk (for example framing format header)
read += dataLen
ok((read, written))
func decodeFramed*(
input: openArray[byte], maxSize = int.high,
checkIntegrity = true): seq[byte] =
## Uncompress as many frames as possible from `input` and return the
## uncompressed output.
##
## `maxSize` puts a cap on actual memory consumption, not the final length
## of the data - reading will continue until we run out of space based on
## the margins in maxCompresssedLen!
##
## In case of errors, an empty buffer is returned.
let uncompressed = uncompressedLenFramed(input).valueOr:
return
if uncompressed > maxSize.uint64:
return
# TODO https://github.com/nim-lang/Nim/issues/19357
result = newSeqUninitialized[byte](int uncompressed)
if uncompressFramed(input, result, checkIntegrity = checkIntegrity).isErr():
result = @[] # Empty return on error
template compress*(input: openArray[byte]): seq[byte] {.
deprecated: "use `encode` - compress is for user-supplied buffers".} =
encode(input)
template uncompress*(input: openArray[byte]): seq[byte] {.
deprecated: "use `decode` - uncompress is for user-supplied buffers".} =
decode(input)