diff --git a/base/exports.jl b/base/exports.jl index 49d3ceb5d3568..96d860e7d6bb7 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -755,6 +755,7 @@ export graphemes, hex, hex2bytes, + hex2bytes!, ind2chr, info, is_assigned_char, diff --git a/base/strings/util.jl b/base/strings/util.jl index 783ac0a6b20cb..665fae4c6398d 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -437,50 +437,76 @@ replace(s::AbstractString, pat, f) = replace_new(String(s), pat, f, typemax(Int) # hex <-> bytes conversion """ - hex2bytes(s::AbstractString) + hex2bytes(s::Union{AbstractString,AbstractVector{UInt8}}) -Convert an arbitrarily long hexadecimal string to its binary representation. Returns an -`Array{UInt8,1}`, i.e. an array of bytes. +Given a string or array `s` of ASCII codes for a sequence of hexadecimal digits, returns a +`Vector{UInt8}` of bytes corresponding to the binary representation: each successive pair +of hexadecimal digits in `s` gives the value of one byte in the return vector. + +The length of `s` must be even, and the returned array has half of the length of `s`. +See also [`hex2bytes!`](@ref) for an in-place version, and [`bytes2hex`](@ref) for the inverse. # Examples ```jldoctest -julia> a = hex(12345) +julia> s = hex(12345) "3039" -julia> hex2bytes(a) +julia> hex2bytes(s) 2-element Array{UInt8,1}: 0x30 0x39 + +julia> a = b"01abEF" +6-element Array{UInt8,1}: + 0x30 + 0x31 + 0x61 + 0x62 + 0x45 + 0x46 + +julia> hex2bytes(a) +3-element Array{UInt8,1}: + 0x01 + 0xab + 0xef ``` """ -function hex2bytes(s::AbstractString) - a = zeros(UInt8, div(endof(s), 2)) - i, j = start(s), 0 - while !done(s, i) - c, i = next(s, i) - n = '0' <= c <= '9' ? c - '0' : - 'a' <= c <= 'f' ? c - 'a' + 10 : - 'A' <= c <= 'F' ? c - 'A' + 10 : - throw(ArgumentError("not a hexadecimal string: $(repr(s))")) - done(s, i) && - throw(ArgumentError("string length must be even: length($(repr(s))) == $(length(s))")) - c, i = next(s, i) - n = '0' <= c <= '9' ? n << 4 + c - '0' : - 'a' <= c <= 'f' ? n << 4 + c - 'a' + 10 : - 'A' <= c <= 'F' ? n << 4 + c - 'A' + 10 : - throw(ArgumentError("not a hexadecimal string: $(repr(s))")) - a[j += 1] = n +function hex2bytes end + +hex2bytes(s::AbstractString) = hex2bytes(Vector{UInt8}(String(s))) +hex2bytes(s::AbstractVector{UInt8}) = hex2bytes!(Vector{UInt8}(length(s) >> 1), s) + +""" + hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) + +Convert an array `s` of bytes representing a hexadecimal string to its binary +representation, similar to [`hex2bytes`](@ref) except that the output is written in-place +in `d`. The length of `s` must be exactly twice the length of `d`. +""" +function hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) + if 2length(d) != length(s) + isodd(length(s)) && throw(ArgumentError("input hex array must have even length")) + throw(ArgumentError("output array must be half length of input array")) end - resize!(a, j) - return a + j = first(eachindex(d)) - 1 + for i = first(eachindex(s)):2:endof(s) + @inbounds d[j += 1] = number_from_hex(s[i]) << 4 + number_from_hex(s[i+1]) + end + return d end +@inline number_from_hex(c) = + (UInt8('0') <= c <= UInt8('9')) ? c - UInt8('0') : + (UInt8('A') <= c <= UInt8('F')) ? c - (UInt8('A') - 0x0a) : + (UInt8('a') <= c <= UInt8('f')) ? c - (UInt8('a') - 0x0a) : + throw(ArgumentError("byte is not an ASCII hexadecimal digit")) + """ bytes2hex(bin_arr::Array{UInt8, 1}) -> String Convert an array of bytes to its hexadecimal representation. All characters are in lower-case. - # Examples ```jldoctest julia> a = hex(12345) diff --git a/doc/src/stdlib/numbers.md b/doc/src/stdlib/numbers.md index 91941c4a338fa..bd4aea8f52f8d 100644 --- a/doc/src/stdlib/numbers.md +++ b/doc/src/stdlib/numbers.md @@ -59,6 +59,7 @@ Base.Math.exponent Base.complex(::Complex) Base.bswap Base.hex2bytes +Base.hex2bytes! Base.bytes2hex ``` diff --git a/test/strings/util.jl b/test/strings/util.jl index 7185964ae7aec..18f2d5002a6e8 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -247,3 +247,24 @@ bin_val = hex2bytes("07bf") #non-hex characters @test_throws ArgumentError hex2bytes("0123456789abcdefABCDEFGH") + +@testset "Issue 23161" begin + arr = b"0123456789abcdefABCDEF" + arr1 = Vector{UInt8}(length(arr) >> 1) + @test hex2bytes!(arr1, arr) === arr1 # check in-place + @test "0123456789abcdefabcdef" == bytes2hex(arr1) + @test hex2bytes("0123456789abcdefABCDEF") == hex2bytes(arr) + @test_throws ArgumentError hex2bytes!(arr1, b"") # incorrect arr1 length + @test hex2bytes(b"") == UInt8[] + @test hex2bytes(view(b"012345",1:6)) == UInt8[0x01,0x23,0x45] + @test begin + s = view(b"012345ab",1:6) + d = view(zeros(UInt8, 10),1:3) + hex2bytes!(d,s) == UInt8[0x01,0x23,0x45] + end + # odd size + @test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEF0") + + #non-hex characters + @test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEFGH") +end