From 82e337dbecbff54eaba4f1bd1cb83a2530508dd3 Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Tue, 15 Aug 2017 18:55:06 +0530 Subject: [PATCH 01/18] hex2bytes for Vector{UInt8} --- base/strings/util.jl | 49 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/base/strings/util.jl b/base/strings/util.jl index f7297ae6810fc..762bba6440612 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -463,6 +463,55 @@ function hex2bytes(s::AbstractString) return a end +""" + hex2bytes(d::Vector{UInt8}, s::Vector{UInt8}, nInBytes::Int=length(s)) + +Convert the first `nInBytes` hexadecimal bytes array to its binary representation. The +results are populated into a destination array. The function returns the number of bytes +copied into the destination array. The size of destination array must be at least half of +the `nInBytes` parameter. +""" +function hex2bytes(d::Vector{UInt8}, s::Vector{UInt8}, nInBytes::Int=length(s)) + if isodd(nInBytes) + throw(ArgumentError("Input data length should be even")) + end + + len2 = div(nInBytes, 2) + if size(d)[1] < len2 + throw(ArgumentError("Destination data buffer should be sufficiently large")) + end + + i = 0 + j = 1 + # This line is important as this ensures computation happens in word boundary and not + # byte boundary. Boundary computation can be almost 10 times slower + n = c1 = c2 = UInt(0) + for j = 1:len2 + n = 0 + @inbounds c1 = UInt(s[i+=1]) + @inbounds c2 = UInt(s[i+=1]) + n = get_number_from_hex(c1) + n <<= 4 + n += get_number_from_hex(c2) + @inbounds d[j] = (n & 0xFF) + end + return j +end + +@inline get_number_from_hex(c::UInt) = begin + const DIGIT_ZERO = UInt('0') + const DIGIT_NINE = UInt('9') + const LATIN_UPPER_A = UInt('A') + const LATIN_UPPER_F = UInt('F') + const LATIN_A = UInt('a') + const LATIN_F = UInt('f') + + return (DIGIT_ZERO <= c <= DIGIT_NINE) ? c - DIGIT_ZERO : + (LATIN_UPPER_A <= c <= LATIN_UPPER_F) ? c - LATIN_UPPER_A + 10 : + (LATIN_A <= c <= LATIN_F) ? c - LATIN_A + 10 : + throw(ArgumentError("Not a hexadecimal number")) +end + """ bytes2hex(bin_arr::Array{UInt8, 1}) -> String From 1d1c0f2e4a6dec72d0f6aa9a7c7514e5b3e32554 Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Wed, 16 Aug 2017 00:14:23 +0530 Subject: [PATCH 02/18] Incorporated review comments. --- base/exports.jl | 1 + base/strings/util.jl | 66 +++++++++++++++++++++++++++----------------- test/strings/util.jl | 21 ++++++++++++++ 3 files changed, 63 insertions(+), 25 deletions(-) diff --git a/base/exports.jl b/base/exports.jl index eecd75f36d46e..9d1e02468685c 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -762,6 +762,7 @@ export graphemes, hex, hex2bytes, + hex2bytes!, ind2chr, info, is_assigned_char, diff --git a/base/strings/util.jl b/base/strings/util.jl index 762bba6440612..6e98644aa227e 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -464,47 +464,63 @@ function hex2bytes(s::AbstractString) end """ - hex2bytes(d::Vector{UInt8}, s::Vector{UInt8}, nInBytes::Int=length(s)) + hex2bytes(s::Vector{UInt8}) -Convert the first `nInBytes` hexadecimal bytes array to its binary representation. The +Convert the hexadecimal bytes array to its binary representation. Returns an +`Array{UInt8,1}`, i.e. an array of bytes. +""" +@inline function hex2bytes(s::Vector{UInt8}) + d = Vector{UInt8}(div(length(s), 2)) + hex2bytes!(d, s) + return d +end + +""" + hex2bytes!(d::Vector{UInt8}, s::Vector{UInt8}, n::Int=length(s)) + +Convert the first `n` hexadecimal bytes array to its binary representation. The results are populated into a destination array. The function returns the number of bytes -copied into the destination array. The size of destination array must be at least half of -the `nInBytes` parameter. +copied into the destination array. The size of destination array must be at least half +of the `n` parameter. """ -function hex2bytes(d::Vector{UInt8}, s::Vector{UInt8}, nInBytes::Int=length(s)) - if isodd(nInBytes) +function hex2bytes!(d::Vector{UInt8}, s::Vector{UInt8}, n::Int=length(s)) + if isodd(n) throw(ArgumentError("Input data length should be even")) end - len2 = div(nInBytes, 2) - if size(d)[1] < len2 + if n > length(s) + throw(ArgumentError("Input data length should not exceed array length")) + end + + len2 = div(n, 2) + + if length(d) < len2 throw(ArgumentError("Destination data buffer should be sufficiently large")) end - i = 0 - j = 1 - # This line is important as this ensures computation happens in word boundary and not - # byte boundary. Boundary computation can be almost 10 times slower + i = j = 0 + # This line is important as this ensures computation happens on word boundary and + # not byte boundary. Byte boundary computation can be almost 10 times slower. n = c1 = c2 = UInt(0) for j = 1:len2 - n = 0 + num = 0 @inbounds c1 = UInt(s[i+=1]) @inbounds c2 = UInt(s[i+=1]) - n = get_number_from_hex(c1) - n <<= 4 - n += get_number_from_hex(c2) - @inbounds d[j] = (n & 0xFF) + num = number_from_hex(c1) + num <<= 4 + num += number_from_hex(c2) + @inbounds d[j] = (num & 0xFF) end return j end -@inline get_number_from_hex(c::UInt) = begin - const DIGIT_ZERO = UInt('0') - const DIGIT_NINE = UInt('9') - const LATIN_UPPER_A = UInt('A') - const LATIN_UPPER_F = UInt('F') - const LATIN_A = UInt('a') - const LATIN_F = UInt('f') +@inline function number_from_hex(c::UInt) + DIGIT_ZERO = UInt('0') + DIGIT_NINE = UInt('9') + LATIN_UPPER_A = UInt('A') + LATIN_UPPER_F = UInt('F') + LATIN_A = UInt('a') + LATIN_F = UInt('f') return (DIGIT_ZERO <= c <= DIGIT_NINE) ? c - DIGIT_ZERO : (LATIN_UPPER_A <= c <= LATIN_UPPER_F) ? c - LATIN_UPPER_A + 10 : @@ -517,7 +533,7 @@ end Convert an array of bytes to its hexadecimal representation. All characters are in lower-case. - +it # Examples ```jldoctest julia> a = hex(12345) diff --git a/test/strings/util.jl b/test/strings/util.jl index 7185964ae7aec..4e25315b157b0 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -247,3 +247,24 @@ bin_val = hex2bytes("07bf") #non-hex characters @test_throws ArgumentError hex2bytes("0123456789abcdefABCDEFGH") + +function test_23161() + arr = UInt8["0123456789abcdefABCDEF"...] + arr1 = Vector{UInt8}(11) + @test hex2bytes!(arr1, arr) == 11 + @test "0123456789abcdefabcdef" == bytes2hex(arr1) + @test hex2bytes("0123456789abcdefABCDEF") == hex2bytes(arr) + @test hex2bytes!(arr1, UInt8[""...]) == 0 + @test hex2bytes(UInt8[""...]) == UInt8[] + + # odd size + @test_throws ArgumentError hex2bytes(UInt8["0123456789abcdefABCDEF0"...]) + + # Input array size smaller than the number of bytes to be converted. + @test_throws ArgumentError hex2bytes!(arr1, arr, 24) + + #non-hex characters + @test_throws ArgumentError hex2bytes(UInt8["0123456789abcdefABCDEFGH"...]) +end + +test_23161() From 216580783c5626ec429bd765d057328741b3525d Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Wed, 16 Aug 2017 00:41:08 +0530 Subject: [PATCH 03/18] Added example code. --- base/strings/util.jl | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/base/strings/util.jl b/base/strings/util.jl index 6e98644aa227e..1b702da8dd7de 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -482,6 +482,33 @@ Convert the first `n` hexadecimal bytes array to its binary representation. The results are populated into a destination array. The function returns the number of bytes copied into the destination array. The size of destination array must be at least half of the `n` parameter. + +# Examples +``` + julia> s = UInt8["01abEF"...] + 6-element Array{UInt8,1}: + 0x30 + 0x31 + 0x61 + 0x62 + 0x45 + 0x46 + + julia> d =zeros(UInt8, 3) + 3-element Array{UInt8,1}: + 0x00 + 0x00 + 0x00 + + julia> hex2bytes!(d, s, 6) + 3 + + julia> d + 3-element Array{UInt8,1}: + 0x01 + 0xab + 0xef +``` """ function hex2bytes!(d::Vector{UInt8}, s::Vector{UInt8}, n::Int=length(s)) if isodd(n) From 3db1d8479cc45a56f5ae82c456b38b2c60eb6806 Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Wed, 16 Aug 2017 03:06:02 +0530 Subject: [PATCH 04/18] Modified to support AbstractArray --- base/strings/util.jl | 68 +++++++++++++++++++------------------------- test/strings/util.jl | 7 ++--- 2 files changed, 31 insertions(+), 44 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 1b702da8dd7de..ab9b925dad26a 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -464,24 +464,23 @@ function hex2bytes(s::AbstractString) end """ - hex2bytes(s::Vector{UInt8}) + hex2bytes(s::AbstractVector{UInt8}) Convert the hexadecimal bytes array to its binary representation. Returns an -`Array{UInt8,1}`, i.e. an array of bytes. +`Vector{UInt8}`, i.e. a vector of bytes. """ -@inline function hex2bytes(s::Vector{UInt8}) +@inline function hex2bytes(s::AbstractVector{UInt8}) d = Vector{UInt8}(div(length(s), 2)) - hex2bytes!(d, s) - return d + return hex2bytes!(d, s) end """ - hex2bytes!(d::Vector{UInt8}, s::Vector{UInt8}, n::Int=length(s)) + hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) -Convert the first `n` hexadecimal bytes array to its binary representation. The -results are populated into a destination array. The function returns the number of bytes -copied into the destination array. The size of destination array must be at least half -of the `n` parameter. +Convert the hexadecimal bytes vector to its binary representation. The results are +populated into a destination vector. The function returns the number of bytes copied +into the destination array. The size of destination array must be half of the source +vector. `@view` macro can be used to pass `SubArray`s as arguments. # Examples ``` @@ -500,7 +499,7 @@ of the `n` parameter. 0x00 0x00 - julia> hex2bytes!(d, s, 6) + julia> hex2bytes!(d, s) 3 julia> d @@ -510,35 +509,26 @@ of the `n` parameter. 0xef ``` """ -function hex2bytes!(d::Vector{UInt8}, s::Vector{UInt8}, n::Int=length(s)) - if isodd(n) - throw(ArgumentError("Input data length should be even")) - end - - if n > length(s) - throw(ArgumentError("Input data length should not exceed array length")) - end - - len2 = div(n, 2) - - if length(d) < len2 - throw(ArgumentError("Destination data buffer should be sufficiently large")) - end - - i = j = 0 - # This line is important as this ensures computation happens on word boundary and - # not byte boundary. Byte boundary computation can be almost 10 times slower. - n = c1 = c2 = UInt(0) - for j = 1:len2 - num = 0 - @inbounds c1 = UInt(s[i+=1]) - @inbounds c2 = UInt(s[i+=1]) - num = number_from_hex(c1) - num <<= 4 - num += number_from_hex(c2) - @inbounds d[j] = (num & 0xFF) +function hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) + i, j = start(s), 0 + # This line is important as this ensures computation happens in word boundary and not + # byte boundary. Boundary computation can be almost 10 times slower + n::UInt = 0 + c1::UInt = 0 + c2::UInt = 0 + while !done(s, i) + n = 0 + c1, i = next(s, i) + done(s, i) && throw(ArgumentError( + "string length must be even: length($(repr(s))) == $(length(s))")) + c2, i = next(s, i) + n = number_from_hex(c1) + n <<= 4 + n += number_from_hex(c2) + d[j+=1] = (n & 0xFF) end - return j + resize!(d, j) + return d end @inline function number_from_hex(c::UInt) diff --git a/test/strings/util.jl b/test/strings/util.jl index 4e25315b157b0..3c28e9166474f 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -251,18 +251,15 @@ bin_val = hex2bytes("07bf") function test_23161() arr = UInt8["0123456789abcdefABCDEF"...] arr1 = Vector{UInt8}(11) - @test hex2bytes!(arr1, arr) == 11 + @test hex2bytes!(arr1, arr) == arr1 @test "0123456789abcdefabcdef" == bytes2hex(arr1) @test hex2bytes("0123456789abcdefABCDEF") == hex2bytes(arr) - @test hex2bytes!(arr1, UInt8[""...]) == 0 + @test hex2bytes!(arr1, UInt8[""...]) == arr1 @test hex2bytes(UInt8[""...]) == UInt8[] # odd size @test_throws ArgumentError hex2bytes(UInt8["0123456789abcdefABCDEF0"...]) - # Input array size smaller than the number of bytes to be converted. - @test_throws ArgumentError hex2bytes!(arr1, arr, 24) - #non-hex characters @test_throws ArgumentError hex2bytes(UInt8["0123456789abcdefABCDEFGH"...]) end From 3312ac01ea5efc08b55ea26503a1d38d357bf360 Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Wed, 16 Aug 2017 03:29:48 +0530 Subject: [PATCH 05/18] Fixed comments and changed to AbstractVector --- base/strings/util.jl | 56 +++++++++++++++++++++----------------------- test/strings/util.jl | 4 +--- 2 files changed, 28 insertions(+), 32 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index ab9b925dad26a..4ebfcedea6aa5 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -466,11 +466,11 @@ end """ hex2bytes(s::AbstractVector{UInt8}) -Convert the hexadecimal bytes array to its binary representation. Returns an +Convert the hexadecimal bytes array to its binary representation. Returns `Vector{UInt8}`, i.e. a vector of bytes. """ @inline function hex2bytes(s::AbstractVector{UInt8}) - d = Vector{UInt8}(div(length(s), 2)) + d = zeros(UInt8, div(endof(s), 2)) return hex2bytes!(d, s) end @@ -479,34 +479,33 @@ end Convert the hexadecimal bytes vector to its binary representation. The results are populated into a destination vector. The function returns the number of bytes copied -into the destination array. The size of destination array must be half of the source -vector. `@view` macro can be used to pass `SubArray`s as arguments. +into the destination vector. # Examples -``` - julia> s = UInt8["01abEF"...] - 6-element Array{UInt8,1}: - 0x30 - 0x31 - 0x61 - 0x62 - 0x45 - 0x46 - - julia> d =zeros(UInt8, 3) - 3-element Array{UInt8,1}: - 0x00 - 0x00 - 0x00 - - julia> hex2bytes!(d, s) - 3 - - julia> d - 3-element Array{UInt8,1}: - 0x01 - 0xab - 0xef +```jldoctest +julia> s = UInt8["01abEF"...] +6-element Array{UInt8,1}: + 0x30 + 0x31 + 0x61 + 0x62 + 0x45 + 0x46 + +julia> d =zeros(UInt8, 3) +3-element Array{UInt8,1}: + 0x00 + 0x00 + 0x00 + +julia> hex2bytes!(d, s) +3 + +julia> d +3-element Array{UInt8,1}: + 0x01 + 0xab + 0xef ``` """ function hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) @@ -550,7 +549,6 @@ end Convert an array of bytes to its hexadecimal representation. All characters are in lower-case. -it # Examples ```jldoctest julia> a = hex(12345) diff --git a/test/strings/util.jl b/test/strings/util.jl index 3c28e9166474f..c9a4785959035 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -248,7 +248,7 @@ bin_val = hex2bytes("07bf") #non-hex characters @test_throws ArgumentError hex2bytes("0123456789abcdefABCDEFGH") -function test_23161() +@testset "Issue 23161" begin arr = UInt8["0123456789abcdefABCDEF"...] arr1 = Vector{UInt8}(11) @test hex2bytes!(arr1, arr) == arr1 @@ -263,5 +263,3 @@ function test_23161() #non-hex characters @test_throws ArgumentError hex2bytes(UInt8["0123456789abcdefABCDEFGH"...]) end - -test_23161() From cae96cd52c3021d7d7bdc7ab7b1d1163fdf56a4f Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Thu, 17 Aug 2017 15:51:59 +0530 Subject: [PATCH 06/18] Example output fixed. --- base/strings/util.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 4ebfcedea6aa5..6cfa7bacd247b 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -499,9 +499,6 @@ julia> d =zeros(UInt8, 3) 0x00 julia> hex2bytes!(d, s) -3 - -julia> d 3-element Array{UInt8,1}: 0x01 0xab From b8a48af69a2272ecd0fa9f64af9cf3e21919761d Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Thu, 17 Aug 2017 17:05:22 +0530 Subject: [PATCH 07/18] updated the description. --- base/strings/util.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 6cfa7bacd247b..eecf0d667b330 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -478,8 +478,7 @@ end hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) Convert the hexadecimal bytes vector to its binary representation. The results are -populated into a destination vector. The function returns the number of bytes copied -into the destination vector. +populated into a destination vector. The function returns the destination vector. # Examples ```jldoctest From 7d6dd615e226c4a7d21699d3f68168be04bf9e02 Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Thu, 17 Aug 2017 23:56:56 +0530 Subject: [PATCH 08/18] Add method to stdlib index --- doc/src/stdlib/numbers.md | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/src/stdlib/numbers.md b/doc/src/stdlib/numbers.md index 91941c4a338fa..bd4aea8f52f8d 100644 --- a/doc/src/stdlib/numbers.md +++ b/doc/src/stdlib/numbers.md @@ -59,6 +59,7 @@ Base.Math.exponent Base.complex(::Complex) Base.bswap Base.hex2bytes +Base.hex2bytes! Base.bytes2hex ``` From ce6abc9215f776b41863b8299b5a52d437c2f38e Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Fri, 18 Aug 2017 08:46:39 +0530 Subject: [PATCH 09/18] length should not be used with AbstractArray() in exception handling as it can raise exception. --- base/strings/util.jl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index eecf0d667b330..adb31d4601179 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -477,7 +477,7 @@ end """ hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) -Convert the hexadecimal bytes vector to its binary representation. The results are +Converts the hexadecimal bytes vector to its binary representation. The results are populated into a destination vector. The function returns the destination vector. # Examples @@ -514,8 +514,7 @@ function hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) while !done(s, i) n = 0 c1, i = next(s, i) - done(s, i) && throw(ArgumentError( - "string length must be even: length($(repr(s))) == $(length(s))")) + done(s, i) && throw(ArgumentError("source vector length must be even")) c2, i = next(s, i) n = number_from_hex(c1) n <<= 4 @@ -537,7 +536,7 @@ end return (DIGIT_ZERO <= c <= DIGIT_NINE) ? c - DIGIT_ZERO : (LATIN_UPPER_A <= c <= LATIN_UPPER_F) ? c - LATIN_UPPER_A + 10 : (LATIN_A <= c <= LATIN_F) ? c - LATIN_A + 10 : - throw(ArgumentError("Not a hexadecimal number")) + throw(ArgumentError("not a hexadecimal number: '$(Char(c))'")) end """ From 1fc8eb4b52afe9c3fffceda81a433e36a5af617e Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Fri, 18 Aug 2017 16:51:02 +0530 Subject: [PATCH 10/18] removed resize as it may not work for subarrays. --- base/strings/util.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index adb31d4601179..99a2bc404e5e6 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -470,7 +470,11 @@ Convert the hexadecimal bytes array to its binary representation. Returns `Vector{UInt8}`, i.e. a vector of bytes. """ @inline function hex2bytes(s::AbstractVector{UInt8}) - d = zeros(UInt8, div(endof(s), 2)) + len = length(s) + if isodd(len) + throw(ArgumentError("source vector length must be even")) + end + d = zeros(UInt8, div(len, 2)) return hex2bytes!(d, s) end @@ -521,7 +525,6 @@ function hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) n += number_from_hex(c2) d[j+=1] = (n & 0xFF) end - resize!(d, j) return d end From c60e85db5a0ca847dca875eefddeff1fceddf017 Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Fri, 18 Aug 2017 17:55:14 +0530 Subject: [PATCH 11/18] Added subarray test cases. --- test/strings/util.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/strings/util.jl b/test/strings/util.jl index c9a4785959035..6027cc1b2ea97 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -256,7 +256,12 @@ bin_val = hex2bytes("07bf") @test hex2bytes("0123456789abcdefABCDEF") == hex2bytes(arr) @test hex2bytes!(arr1, UInt8[""...]) == arr1 @test hex2bytes(UInt8[""...]) == UInt8[] - + @test hex2bytes(view(UInt8["012345"...],1:6)) == UInt8[0x01,0x23,0x45] + @test begin + s = view(UInt8["012345ab"...],1:6) + d = view(zeros(UInt8, 10),1:3) + hex2bytes!(d,s) == UInt8[0x01,0x23,0x45] + end # odd size @test_throws ArgumentError hex2bytes(UInt8["0123456789abcdefABCDEF0"...]) From 4c2f17a9a7963493627bc955c0e63650b35fcbac Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Fri, 18 Aug 2017 18:06:19 +0530 Subject: [PATCH 12/18] typo fixed --- base/strings/util.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 99a2bc404e5e6..8b4a29865192b 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -495,7 +495,7 @@ julia> s = UInt8["01abEF"...] 0x45 0x46 -julia> d =zeros(UInt8, 3) +julia> d = zeros(UInt8, 3) 3-element Array{UInt8,1}: 0x00 0x00 From d6f25825ea6bbb8656537d89540e0e1a126441d8 Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Sat, 19 Aug 2017 10:11:19 +0530 Subject: [PATCH 13/18] Review comments incorporated related to comments and test cases. --- base/strings/util.jl | 14 +++++++++----- test/strings/util.jl | 14 +++++++------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 8b4a29865192b..783bcdb2d9220 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -466,8 +466,11 @@ end """ hex2bytes(s::AbstractVector{UInt8}) -Convert the hexadecimal bytes array to its binary representation. Returns -`Vector{UInt8}`, i.e. a vector of bytes. +Given an array `s` of ASCII codes for a sequence of hexadecimal digits, returns a +`Vector{UInt8}` of bytes corresponding to the binary representation: each successive pair +of hexadecimal digits in `s` gives the value of one byte in the return vector. + +The length of `s` must be even, and the returned array has half of the length of `s`. """ @inline function hex2bytes(s::AbstractVector{UInt8}) len = length(s) @@ -481,12 +484,13 @@ end """ hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) -Converts the hexadecimal bytes vector to its binary representation. The results are -populated into a destination vector. The function returns the destination vector. +Convert an array `s` of bytes representing a hexadecimal string to its binary +representation, similar to [`hex2bytes`](@ref) except that the output is written in-place +in `d`. The length of `s` must be exactly twice the length of `d`. # Examples ```jldoctest -julia> s = UInt8["01abEF"...] +julia> s = b"01abEF" 6-element Array{UInt8,1}: 0x30 0x31 diff --git a/test/strings/util.jl b/test/strings/util.jl index 6027cc1b2ea97..0cee42b535b69 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -249,22 +249,22 @@ bin_val = hex2bytes("07bf") @test_throws ArgumentError hex2bytes("0123456789abcdefABCDEFGH") @testset "Issue 23161" begin - arr = UInt8["0123456789abcdefABCDEF"...] + arr = b"0123456789abcdefABCDEF" arr1 = Vector{UInt8}(11) @test hex2bytes!(arr1, arr) == arr1 @test "0123456789abcdefabcdef" == bytes2hex(arr1) @test hex2bytes("0123456789abcdefABCDEF") == hex2bytes(arr) - @test hex2bytes!(arr1, UInt8[""...]) == arr1 - @test hex2bytes(UInt8[""...]) == UInt8[] - @test hex2bytes(view(UInt8["012345"...],1:6)) == UInt8[0x01,0x23,0x45] + @test hex2bytes!(arr1, b"") == arr1 + @test hex2bytes(b"") == UInt8[] + @test hex2bytes(view(b"012345",1:6)) == UInt8[0x01,0x23,0x45] @test begin - s = view(UInt8["012345ab"...],1:6) + s = view(b"012345ab",1:6) d = view(zeros(UInt8, 10),1:3) hex2bytes!(d,s) == UInt8[0x01,0x23,0x45] end # odd size - @test_throws ArgumentError hex2bytes(UInt8["0123456789abcdefABCDEF0"...]) + @test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEF0") #non-hex characters - @test_throws ArgumentError hex2bytes(UInt8["0123456789abcdefABCDEFGH"...]) + @test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEFGH") end From 5c9fbd54e943940445962071f90884493e2c2efc Mon Sep 17 00:00:00 2001 From: Sambit Kumar Dash Date: Sat, 19 Aug 2017 20:14:12 +0530 Subject: [PATCH 14/18] Checking in code provided by stevengj There are some test case failures reported related to views. --- base/strings/util.jl | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 783bcdb2d9220..9e8cc89998e3d 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -513,38 +513,24 @@ julia> hex2bytes!(d, s) ``` """ function hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) - i, j = start(s), 0 - # This line is important as this ensures computation happens in word boundary and not - # byte boundary. Boundary computation can be almost 10 times slower - n::UInt = 0 - c1::UInt = 0 - c2::UInt = 0 + if 2length(d) != length(s) + isodd(length(s)) && throw(ArgumentError("input hex array must have even length")) + throw(ArgumentError("output array must be half length of input array")) + end + i, j = start(s), start(d) while !done(s, i) - n = 0 - c1, i = next(s, i) - done(s, i) && throw(ArgumentError("source vector length must be even")) - c2, i = next(s, i) - n = number_from_hex(c1) - n <<= 4 - n += number_from_hex(c2) - d[j+=1] = (n & 0xFF) + @inbounds d[j] = number_from_hex(s[i]) << 4 + number_from_hex(s[i+1]) + i += 2 + j += 1 end return d end -@inline function number_from_hex(c::UInt) - DIGIT_ZERO = UInt('0') - DIGIT_NINE = UInt('9') - LATIN_UPPER_A = UInt('A') - LATIN_UPPER_F = UInt('F') - LATIN_A = UInt('a') - LATIN_F = UInt('f') - - return (DIGIT_ZERO <= c <= DIGIT_NINE) ? c - DIGIT_ZERO : - (LATIN_UPPER_A <= c <= LATIN_UPPER_F) ? c - LATIN_UPPER_A + 10 : - (LATIN_A <= c <= LATIN_F) ? c - LATIN_A + 10 : - throw(ArgumentError("not a hexadecimal number: '$(Char(c))'")) -end +@inline number_from_hex(c) = + (UInt8('0') <= c <= UInt8('9')) ? c - UInt8('0') : + (UInt8('A') <= c <= UInt8('F')) ? c - (UInt8('A') - 0x0a) : + (UInt8('a') <= c <= UInt8('f')) ? c - (UInt8('a') - 0x0a) : + throw(ArgumentError("byte is not a ASCII hexadecimal digit")) """ bytes2hex(bin_arr::Array{UInt8, 1}) -> String From 6f5fe836240c631351a9b7c53d40ac3d2d92c533 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 19 Aug 2017 10:56:46 -0400 Subject: [PATCH 15/18] Update util.jl further condense implementation and documentation, merging with `hex2bytes(s::AbstractString)` --- base/strings/util.jl | 101 ++++++++++++++----------------------------- 1 file changed, 32 insertions(+), 69 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 9e8cc89998e3d..eab4b3ff99546 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -425,72 +425,26 @@ replace(s::AbstractString, pat, f) = replace_new(String(s), pat, f, typemax(Int) # hex <-> bytes conversion """ - hex2bytes(s::AbstractString) + hex2bytes(s::Union{AbstractString,AbstractVector{UInt8}}) -Convert an arbitrarily long hexadecimal string to its binary representation. Returns an -`Array{UInt8,1}`, i.e. an array of bytes. +Given a string or array `s` of ASCII codes for a sequence of hexadecimal digits, returns a +`Vector{UInt8}` of bytes corresponding to the binary representation: each successive pair +of hexadecimal digits in `s` gives the value of one byte in the return vector. + +The length of `s` must be even, and the returned array has half of the length of `s`. +See also [`hex2bytes!`](@ref) for an in-place version, and [`bytes2hex`](@ref) for the inverse. # Examples ```jldoctest -julia> a = hex(12345) +julia> s = hex(12345) "3039" -julia> hex2bytes(a) +julia> hex2bytes(s) 2-element Array{UInt8,1}: 0x30 0x39 -``` -""" -function hex2bytes(s::AbstractString) - a = zeros(UInt8, div(endof(s), 2)) - i, j = start(s), 0 - while !done(s, i) - c, i = next(s, i) - n = '0' <= c <= '9' ? c - '0' : - 'a' <= c <= 'f' ? c - 'a' + 10 : - 'A' <= c <= 'F' ? c - 'A' + 10 : - throw(ArgumentError("not a hexadecimal string: $(repr(s))")) - done(s, i) && - throw(ArgumentError("string length must be even: length($(repr(s))) == $(length(s))")) - c, i = next(s, i) - n = '0' <= c <= '9' ? n << 4 + c - '0' : - 'a' <= c <= 'f' ? n << 4 + c - 'a' + 10 : - 'A' <= c <= 'F' ? n << 4 + c - 'A' + 10 : - throw(ArgumentError("not a hexadecimal string: $(repr(s))")) - a[j += 1] = n - end - resize!(a, j) - return a -end - -""" - hex2bytes(s::AbstractVector{UInt8}) -Given an array `s` of ASCII codes for a sequence of hexadecimal digits, returns a -`Vector{UInt8}` of bytes corresponding to the binary representation: each successive pair -of hexadecimal digits in `s` gives the value of one byte in the return vector. - -The length of `s` must be even, and the returned array has half of the length of `s`. -""" -@inline function hex2bytes(s::AbstractVector{UInt8}) - len = length(s) - if isodd(len) - throw(ArgumentError("source vector length must be even")) - end - d = zeros(UInt8, div(len, 2)) - return hex2bytes!(d, s) -end - -""" - hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) - -Convert an array `s` of bytes representing a hexadecimal string to its binary -representation, similar to [`hex2bytes`](@ref) except that the output is written in-place -in `d`. The length of `s` must be exactly twice the length of `d`. - -# Examples -```jldoctest -julia> s = b"01abEF" +julia> a = b"01abEF" 6-element Array{UInt8,1}: 0x30 0x31 @@ -499,29 +453,38 @@ julia> s = b"01abEF" 0x45 0x46 -julia> d = zeros(UInt8, 3) -3-element Array{UInt8,1}: - 0x00 - 0x00 - 0x00 - -julia> hex2bytes!(d, s) +julia> hex2bytes(a) 3-element Array{UInt8,1}: 0x01 0xab 0xef ``` """ +function hex2bytes end + +hex2bytes(s::AbstractString) = hex2bytes(Vector{UInt8}(String(s))) + +function hex2bytes(s::AbstractVector{UInt8}) + len = length(s) + isodd(len) && throw(ArgumentError("source vector length must be even")) + return hex2bytes!(Vector{UInt8}(len >> 1), s) +end + +""" + hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) + +Convert an array `s` of bytes representing a hexadecimal string to its binary +representation, similar to [`hex2bytes`](@ref) except that the output is written in-place +in `d`. The length of `s` must be exactly twice the length of `d`. +""" function hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) if 2length(d) != length(s) isodd(length(s)) && throw(ArgumentError("input hex array must have even length")) throw(ArgumentError("output array must be half length of input array")) end - i, j = start(s), start(d) - while !done(s, i) - @inbounds d[j] = number_from_hex(s[i]) << 4 + number_from_hex(s[i+1]) - i += 2 - j += 1 + j = start(d) - 1 + for i = start(s):2:endof(s) + @inbounds d[j += 1] = number_from_hex(s[i]) << 4 + number_from_hex(s[i+1]) end return d end @@ -530,7 +493,7 @@ end (UInt8('0') <= c <= UInt8('9')) ? c - UInt8('0') : (UInt8('A') <= c <= UInt8('F')) ? c - (UInt8('A') - 0x0a) : (UInt8('a') <= c <= UInt8('f')) ? c - (UInt8('a') - 0x0a) : - throw(ArgumentError("byte is not a ASCII hexadecimal digit")) + throw(ArgumentError("byte is not an ASCII hexadecimal digit")) """ bytes2hex(bin_arr::Array{UInt8, 1}) -> String From 8cd51b581feef0ed5ab5f7f15756452261e6a517 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 19 Aug 2017 11:10:51 -0400 Subject: [PATCH 16/18] test updates use ===, not ==, to test that operation occurs in-place, and `hex2bytes!` now throws an error for incorrect output length --- test/strings/util.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/strings/util.jl b/test/strings/util.jl index 0cee42b535b69..18f2d5002a6e8 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -250,11 +250,11 @@ bin_val = hex2bytes("07bf") @testset "Issue 23161" begin arr = b"0123456789abcdefABCDEF" - arr1 = Vector{UInt8}(11) - @test hex2bytes!(arr1, arr) == arr1 + arr1 = Vector{UInt8}(length(arr) >> 1) + @test hex2bytes!(arr1, arr) === arr1 # check in-place @test "0123456789abcdefabcdef" == bytes2hex(arr1) @test hex2bytes("0123456789abcdefABCDEF") == hex2bytes(arr) - @test hex2bytes!(arr1, b"") == arr1 + @test_throws ArgumentError hex2bytes!(arr1, b"") # incorrect arr1 length @test hex2bytes(b"") == UInt8[] @test hex2bytes(view(b"012345",1:6)) == UInt8[0x01,0x23,0x45] @test begin From 71c7d607f60822fcad3ffee9f2e90d9874eae9dd Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 19 Aug 2017 12:31:18 -0400 Subject: [PATCH 17/18] fix hex2bytes for views (where start is not an index) --- base/strings/util.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index eab4b3ff99546..8e413e1afa1d2 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -482,8 +482,8 @@ function hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8}) isodd(length(s)) && throw(ArgumentError("input hex array must have even length")) throw(ArgumentError("output array must be half length of input array")) end - j = start(d) - 1 - for i = start(s):2:endof(s) + j = first(eachindex(d)) - 1 + for i = first(eachindex(s)):2:endof(s) @inbounds d[j += 1] = number_from_hex(s[i]) << 4 + number_from_hex(s[i+1]) end return d From f09607a410e0c4212849fea8cc26bb03d82b5e96 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Sat, 19 Aug 2017 14:58:27 -0400 Subject: [PATCH 18/18] rm redundant length check --- base/strings/util.jl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 8e413e1afa1d2..d6ffe2e94fe71 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -463,12 +463,7 @@ julia> hex2bytes(a) function hex2bytes end hex2bytes(s::AbstractString) = hex2bytes(Vector{UInt8}(String(s))) - -function hex2bytes(s::AbstractVector{UInt8}) - len = length(s) - isodd(len) && throw(ArgumentError("source vector length must be even")) - return hex2bytes!(Vector{UInt8}(len >> 1), s) -end +hex2bytes(s::AbstractVector{UInt8}) = hex2bytes!(Vector{UInt8}(length(s) >> 1), s) """ hex2bytes!(d::AbstractVector{UInt8}, s::AbstractVector{UInt8})