Skip to content

Commit

Permalink
hex2bytes for Vector{UInt8} (#23267)
Browse files Browse the repository at this point in the history
* hex2bytes for Vector{UInt8}

* Incorporated review comments.

* Added example code.

* Modified to support AbstractArray

* Fixed comments and changed to AbstractVector

* Example output fixed.

* updated the description.

* Add method to stdlib index

* length should not be used with AbstractArray()

in exception handling as it can raise exception.

* removed resize as it may not work for subarrays.

* Added subarray test cases.

* typo fixed

* Review comments incorporated related to comments
and test cases.

* Checking in code provided by stevengj

There are some test case failures reported related to views.

* Update util.jl

further condense implementation and documentation, merging with `hex2bytes(s::AbstractString)`

* test updates

use ===, not ==, to test that operation occurs in-place, and `hex2bytes!` now throws an error for incorrect output length

* fix hex2bytes for views (where start is not an index)

* rm redundant length check
  • Loading branch information
sambitdash authored and fredrikekre committed Aug 22, 2017
1 parent 3a24cbb commit eaca8f2
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 25 deletions.
1 change: 1 addition & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -755,6 +755,7 @@ export
graphemes,
hex,
hex2bytes,
hex2bytes!,
ind2chr,
info,
is_assigned_char,
Expand Down
76 changes: 51 additions & 25 deletions base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -437,50 +437,76 @@ replace(s::AbstractString, pat, f) = replace_new(String(s), pat, f, typemax(Int)
# hex <-> bytes conversion

"""
hex2bytes(s::AbstractString)
hex2bytes(s::Union{AbstractString,AbstractVector{UInt8}})
Convert an arbitrarily long hexadecimal string to its binary representation. Returns an
`Array{UInt8,1}`, i.e. an array of bytes.
Given a string or array `s` of ASCII codes for a sequence of hexadecimal digits, returns a
`Vector{UInt8}` of bytes corresponding to the binary representation: each successive pair
of hexadecimal digits in `s` gives the value of one byte in the return vector.
The length of `s` must be even, and the returned array has half of the length of `s`.
See also [`hex2bytes!`](@ref) for an in-place version, and [`bytes2hex`](@ref) for the inverse.
# Examples
```jldoctest
julia> a = hex(12345)
julia> s = hex(12345)
"3039"
julia> hex2bytes(a)
julia> hex2bytes(s)
2-element Array{UInt8,1}:
0x30
0x39
julia> a = b"01abEF"
6-element Array{UInt8,1}:
0x30
0x31
0x61
0x62
0x45
0x46
julia> hex2bytes(a)
3-element Array{UInt8,1}:
0x01
0xab
0xef
```
"""
function hex2bytes(s::AbstractString)
a = zeros(UInt8, div(endof(s), 2))
i, j = start(s), 0
while !done(s, i)
c, i = next(s, i)
n = '0' <= c <= '9' ? c - '0' :
'a' <= c <= 'f' ? c - 'a' + 10 :
'A' <= c <= 'F' ? c - 'A' + 10 :
throw(ArgumentError("not a hexadecimal string: $(repr(s))"))
done(s, i) &&
throw(ArgumentError("string length must be even: length($(repr(s))) == $(length(s))"))
c, i = next(s, i)
n = '0' <= c <= '9' ? n << 4 + c - '0' :
'a' <= c <= 'f' ? n << 4 + c - 'a' + 10 :
'A' <= c <= 'F' ? n << 4 + c - 'A' + 10 :
throw(ArgumentError("not a hexadecimal string: $(repr(s))"))
a[j += 1] = n
function hex2bytes end

hex2bytes(s::AbstractString) = hex2bytes(Vector{UInt8}(String(s)))
hex2bytes(s::AbstractVector{UInt8}) = hex2bytes!(Vector{UInt8}(length(s) >> 1), s)

"""
hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8})
Convert an array `s` of bytes representing a hexadecimal string to its binary
representation, similar to [`hex2bytes`](@ref) except that the output is written in-place
in `d`. The length of `s` must be exactly twice the length of `d`.
"""
function hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8})
if 2length(d) != length(s)
isodd(length(s)) && throw(ArgumentError("input hex array must have even length"))
throw(ArgumentError("output array must be half length of input array"))
end
resize!(a, j)
return a
j = first(eachindex(d)) - 1
for i = first(eachindex(s)):2:endof(s)
@inbounds d[j += 1] = number_from_hex(s[i]) << 4 + number_from_hex(s[i+1])
end
return d
end

@inline number_from_hex(c) =
(UInt8('0') <= c <= UInt8('9')) ? c - UInt8('0') :
(UInt8('A') <= c <= UInt8('F')) ? c - (UInt8('A') - 0x0a) :
(UInt8('a') <= c <= UInt8('f')) ? c - (UInt8('a') - 0x0a) :
throw(ArgumentError("byte is not an ASCII hexadecimal digit"))

"""
bytes2hex(bin_arr::Array{UInt8, 1}) -> String
Convert an array of bytes to its hexadecimal representation.
All characters are in lower-case.
# Examples
```jldoctest
julia> a = hex(12345)
Expand Down
1 change: 1 addition & 0 deletions doc/src/stdlib/numbers.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ Base.Math.exponent
Base.complex(::Complex)
Base.bswap
Base.hex2bytes
Base.hex2bytes!
Base.bytes2hex
```

Expand Down
21 changes: 21 additions & 0 deletions test/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -247,3 +247,24 @@ bin_val = hex2bytes("07bf")

#non-hex characters
@test_throws ArgumentError hex2bytes("0123456789abcdefABCDEFGH")

@testset "Issue 23161" begin
arr = b"0123456789abcdefABCDEF"
arr1 = Vector{UInt8}(length(arr) >> 1)
@test hex2bytes!(arr1, arr) === arr1 # check in-place
@test "0123456789abcdefabcdef" == bytes2hex(arr1)
@test hex2bytes("0123456789abcdefABCDEF") == hex2bytes(arr)
@test_throws ArgumentError hex2bytes!(arr1, b"") # incorrect arr1 length
@test hex2bytes(b"") == UInt8[]
@test hex2bytes(view(b"012345",1:6)) == UInt8[0x01,0x23,0x45]
@test begin
s = view(b"012345ab",1:6)
d = view(zeros(UInt8, 10),1:3)
hex2bytes!(d,s) == UInt8[0x01,0x23,0x45]
end
# odd size
@test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEF0")

#non-hex characters
@test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEFGH")
end

0 comments on commit eaca8f2

Please sign in to comment.