diff --git a/velox/common/encode/Base32.cpp b/velox/common/encode/Base32.cpp index dea8946323e7b..f034b001407af 100644 --- a/velox/common/encode/Base32.cpp +++ b/velox/common/encode/Base32.cpp @@ -54,15 +54,6 @@ constexpr ReverseIndex kBase32ReverseIndexTable = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; -/// Verify that for each 32 entries in kBase32Charset, the corresponding entry -/// in kBase32ReverseIndexTable is correct. -static_assert( - checkForwardIndex( - sizeof(kBase32Charset) / 2 - 1, - kBase32Charset, - kBase32ReverseIndexTable), - "kBase32Charset has incorrect entries"); - /// Verify that for every entry in kBase32ReverseIndexTable, the corresponding /// entry in kBase32Charset is correct. static_assert( @@ -83,13 +74,13 @@ size_t Base32::calculateDecodedSize(const char* data, size_t& size) { /// If padded, ensure that the string length is a multiple of the encoded /// block size. if (size % kEncodedBlockByteSize != 0) { - throw EncoderException( + VELOX_USER_FAIL( "Base32::decode() - invalid input string: " "string length is not a multiple of 8."); } auto needed = (size * kBinaryBlockByteSize) / kEncodedBlockByteSize; - auto padding = countPadding(data, size); + auto padding = numPadding(data, size); size -= padding; // Adjust the needed size by deducting the bytes corresponding to the @@ -105,7 +96,7 @@ size_t Base32::calculateDecodedSize(const char* data, size_t& size) { // Adjust the needed size for extra bytes, if present. if (extra) { if ((extra == 6) || (extra == 3) || (extra == 1)) { - throw EncoderException( + VELOX_USER_FAIL( "Base32::decode() - invalid input string: " "string length cannot be 6, 3 or 1 more than a multiple of 8."); } @@ -133,7 +124,7 @@ size_t Base32::decodeImpl( auto needed = calculateDecodedSize(src, src_len); if (dst_len < needed) { - throw EncoderException( + VELOX_USER_FAIL( "Base32::decode() - invalid output string: " "output string is too small."); } diff --git a/velox/common/encode/Base32.h b/velox/common/encode/Base32.h index bf978a5367017..ebce9f01da19a 100644 --- a/velox/common/encode/Base32.h +++ b/velox/common/encode/Base32.h @@ -20,7 +20,8 @@ #include #include -#include "velox/common/encode/EncoderUtils.h" + +#include "EncoderUtils.h" namespace facebook::velox::encoding { diff --git a/velox/common/encode/Base64.cpp b/velox/common/encode/Base64.cpp index 4135935189baf..1832706a8108d 100644 --- a/velox/common/encode/Base64.cpp +++ b/velox/common/encode/Base64.cpp @@ -20,6 +20,7 @@ #include #include +#include "velox/common/encode/EncoderUtils.h" namespace facebook::velox::encoding { // Constants defining the size in bytes of binary and encoded blocks for Base64 @@ -81,13 +82,6 @@ constexpr const Base64::ReverseIndex kBase64UrlReverseIndexTable = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; -constexpr bool checkForwardIndex( - uint8_t idx, - const Base64::Charset& charset, - const Base64::ReverseIndex& table) { - return (table[static_cast(charset[idx])] == idx) && - (idx > 0 ? checkForwardIndex(idx - 1, charset, table) : true); -} // Verify that for every entry in kBase64Charset, the corresponding entry // in kBase64ReverseIndexTable is correct. static_assert( @@ -321,7 +315,7 @@ uint8_t Base64::Base64ReverseLookup( const Base64::ReverseIndex& reverse_lookup) { auto curr = reverse_lookup[(uint8_t)p]; if (curr >= 0x40) { - throw Base64Exception( + VELOX_USER_FAIL( "Base64::decode() - invalid input string: invalid characters"); } @@ -344,13 +338,13 @@ size_t Base64::calculateDecodedSize(const char* data, size_t& size) { // If padded, ensure that the string length is a multiple of the encoded // block size if (size % kEncodedBlockByteSize != 0) { - throw Base64Exception( + VELOX_USER_FAIL( "Base64::decode() - invalid input string: " "string length is not a multiple of 4."); } auto needed = (size * kBinaryBlockByteSize) / kEncodedBlockByteSize; - auto padding = countPadding(data, size); + auto padding = numPadding(data, size); size -= padding; // Adjust the needed size by deducting the bytes corresponding to the @@ -366,7 +360,7 @@ size_t Base64::calculateDecodedSize(const char* data, size_t& size) { // Adjust the needed size for extra bytes, if present if (extra) { if (extra == 1) { - throw Base64Exception( + VELOX_USER_FAIL( "Base64::decode() - invalid input string: " "string length cannot be 1 more than a multiple of 4."); } @@ -388,7 +382,7 @@ size_t Base64::decodeImpl( auto needed = calculateDecodedSize(src, src_len); if (dst_len < needed) { - throw Base64Exception( + VELOX_USER_FAIL( "Base64::decode() - invalid output string: " "output string is too small."); } diff --git a/velox/common/encode/Base64.h b/velox/common/encode/Base64.h index 2c7de463ea6fa..a723edfdb9508 100644 --- a/velox/common/encode/Base64.h +++ b/velox/common/encode/Base64.h @@ -22,18 +22,9 @@ #include #include -namespace facebook::velox::encoding { - -class Base64Exception : public std::exception { - public: - explicit Base64Exception(const char* msg) : msg_(msg) {} - const char* what() const noexcept override { - return msg_; - } +#include "EncoderUtils.h" - protected: - const char* msg_; -}; +namespace facebook::velox::encoding { class Base64 { public: @@ -94,19 +85,6 @@ class Base64 { constexpr static char kBase64Pad = '='; private: - static inline bool isPadded(const char* data, size_t len) { - return (len > 0 && data[len - 1] == kBase64Pad); - } - - static inline size_t countPadding(const char* src, size_t len) { - size_t numPadding{0}; - while (len > 0 && src[len - 1] == kBase64Pad) { - numPadding++; - len--; - } - - return numPadding; - } static uint8_t Base64ReverseLookup(char p, const ReverseIndex& table); diff --git a/velox/common/encode/EncoderUtils.h b/velox/common/encode/EncoderUtils.h index b863669547503..c29836dba7c48 100644 --- a/velox/common/encode/EncoderUtils.h +++ b/velox/common/encode/EncoderUtils.h @@ -20,25 +20,25 @@ #include #include - +#include "velox/common/base/Exceptions.h" namespace facebook::velox::encoding { -class EncoderException : public std::exception { - public: - explicit EncoderException(const char* msg) : msg_(msg) {} - const char* what() const noexcept override { - return msg_; - } - - protected: - const char* msg_; -}; +constexpr size_t kCharsetSize = 64; +constexpr size_t kReverseIndexSize = 256; -constexpr size_t CHARSET_SIZE = 64; -constexpr size_t REVERSE_INDEX_SIZE = 256; +// A character set used for encoding purposes. This array contains the specific +// characters that form the encoding scheme, with each position representing a +// distinct character according to the encoding base. For instance, in base64 +// encoding, this array will contain 64 different characters to represent all +// possible base64 values. +using Charset = std::array; -using Charset = std::array; -using ReverseIndex = std::array; +// A reverse lookup table for decoding purposes. It maps each possible encoded +// character (represented as an unsigned 8-bit integer index) to its +// corresponding numeric value within the encoding base. Characters that are not +// part of the encoding set are mapped to 255, which indicates an invalid or +// unrecognized character. +using ReverseIndex = std::array; /// Padding character used in encoding, placed at the end of the encoded string /// when padding is necessary. @@ -50,7 +50,7 @@ inline bool isPadded(const char* data, size_t len) { } // Counts the number of padding characters in encoded data. -inline size_t countPadding(const char* src, size_t len) { +inline size_t numPadding(const char* src, size_t len) { size_t padding_count = 0; for (; len > 0 && src[len - 1] == kPadding; --len) { padding_count++; @@ -58,14 +58,27 @@ inline size_t countPadding(const char* src, size_t len) { return padding_count; } -// Gets value corresponding to an encoded character -inline uint8_t -baseReverseLookup(int base, char p, const ReverseIndex& reverse_lookup) { - auto curr = reverse_lookup[(uint8_t)p]; - // Value of encoded character shall be less than base. +/// Retrieves the decoded numeric value corresponding to an encoded character. +/// +/// Parameters: +/// - base: The numerical base used for decoding (e.g., base 64). +/// - p: The encoded character to decode, provided as an integer or character. +/// - index: A lookup table that maps encoded characters to their numeric +/// values. +/// +/// Returns: +/// - The numeric value associated with the encoded character `p` according to +/// the lookup table. +/// +/// Throws: +/// - VELOX_USER_FAIL: If the numeric value of the encoded character is greater +/// than or equal to the specified base, indicating an invalid encoded +/// character. +inline uint8_t baseReverseLookup(int base, char p, const ReverseIndex& reverseIndex) { + auto curr = reverseIndex[(uint8_t)p]; + if (curr >= base) { - throw EncoderException( - "decode() - invalid input string: invalid characters"); + VELOX_USER_FAIL("decode() - invalid input string: invalid characters"); } return curr; @@ -75,9 +88,9 @@ baseReverseLookup(int base, char p, const ReverseIndex& reverse_lookup) { constexpr bool checkForwardIndex( uint8_t idx, const Charset& charset, - const ReverseIndex& table) { + const ReverseIndex& reverseIndex) { for (uint8_t i = 0; i <= idx; ++i) { - if (!(table[static_cast(charset[i])] == i)) { + if (!(reverseIndex[static_cast(charset[i])] == i)) { return false; } } @@ -107,15 +120,15 @@ constexpr bool checkReverseIndex( uint8_t idx, const Charset& charset, int base, - const ReverseIndex& table) { + const ReverseIndex& reverseIndex) { for (uint8_t currentIdx = idx;; --currentIdx) { - if (table[currentIdx] == 255) { + if (reverseIndex[currentIdx] == 255) { if (findCharacterInCharSet( charset, base, 0, static_cast(currentIdx))) { return false; // Character should not be found } } else { - if (!(charset[table[currentIdx]] == currentIdx)) { + if (!(charset[reverseIndex[currentIdx]] == currentIdx)) { return false; // Character at table index does not match currentIdx } } diff --git a/velox/common/encode/tests/Base32Test.cpp b/velox/common/encode/tests/Base32Test.cpp index 7e26878a492ad..f235ec273d656 100644 --- a/velox/common/encode/tests/Base32Test.cpp +++ b/velox/common/encode/tests/Base32Test.cpp @@ -50,8 +50,7 @@ TEST_F(Base32Test, calculateDecodedSizeProperSize) { TEST_F(Base32Test, errorWhenDecodedStringPartiallyPadded) { size_t encoded_size = 9; EXPECT_THROW( - Base32::calculateDecodedSize("MFRA====", encoded_size), - facebook::velox::encoding::EncoderException); + Base32::calculateDecodedSize("MFRA====", encoded_size), std::exception); } } // namespace facebook::velox::encoding \ No newline at end of file diff --git a/velox/common/encode/tests/EncoderUtilsTests.cpp b/velox/common/encode/tests/EncoderUtilsTests.cpp index 841095c1d6430..70b526d332fbf 100644 --- a/velox/common/encode/tests/EncoderUtilsTests.cpp +++ b/velox/common/encode/tests/EncoderUtilsTests.cpp @@ -21,15 +21,15 @@ namespace facebook::velox::encoding { class EncoderUtilsTest : public ::testing::Test {}; -TEST_F(EncoderUtilsTest, ChecksPadding) { +TEST_F(EncoderUtilsTest, isPadded) { EXPECT_TRUE(isPadded("ABC=", 4)); EXPECT_FALSE(isPadded("ABC", 3)); } -TEST_F(EncoderUtilsTest, CountsPaddingCorrectly) { - EXPECT_EQ(0, countPadding("ABC", 3)); - EXPECT_EQ(1, countPadding("ABC=", 4)); - EXPECT_EQ(2, countPadding("AB==", 4)); +TEST_F(EncoderUtilsTest, numPadding) { + EXPECT_EQ(0, numPadding("ABC", 3)); + EXPECT_EQ(1, numPadding("ABC=", 4)); + EXPECT_EQ(2, numPadding("AB==", 4)); } constexpr Charset testCharset = { @@ -59,18 +59,17 @@ constexpr ReverseIndex testReverseIndex = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}; -TEST_F(EncoderUtilsTest, HandlesLookupAndExceptions) { +TEST_F(EncoderUtilsTest, baseReverseLookup) { int base = 64; EXPECT_NO_THROW(baseReverseLookup(base, 'A', testReverseIndex)); - EXPECT_THROW( - baseReverseLookup(base, '=', testReverseIndex), EncoderException); + EXPECT_THROW(baseReverseLookup(base, '=', testReverseIndex), std::exception); } -TEST_F(EncoderUtilsTest, ValidatesCharsetWithReverseIndex) { +TEST_F(EncoderUtilsTest, checkForwardIndex) { EXPECT_TRUE(checkForwardIndex(63, testCharset, testReverseIndex)); } -TEST_F(EncoderUtilsTest, ValidatesReverseIndexWithCharset) { +TEST_F(EncoderUtilsTest, checkReverseIndex) { EXPECT_TRUE(checkReverseIndex(255, testCharset, 64, testReverseIndex)); } diff --git a/velox/functions/prestosql/BinaryFunctions.h b/velox/functions/prestosql/BinaryFunctions.h index a597a499de279..2b384accce0d7 100644 --- a/velox/functions/prestosql/BinaryFunctions.h +++ b/velox/functions/prestosql/BinaryFunctions.h @@ -294,7 +294,7 @@ struct FromBase64Function { encoding::Base64::calculateDecodedSize(input.data(), inputSize)); encoding::Base64::decode( input.data(), inputSize, result.data(), result.size()); - } catch (const encoding::Base64Exception& e) { + } catch (const std::exception& e) { VELOX_USER_FAIL(e.what()); } } @@ -329,7 +329,6 @@ struct ToBase64UrlFunction { template struct FromBase32Function { VELOX_DEFINE_FUNCTION_TYPES(T); - FOLLY_ALWAYS_INLINE void call( out_type& result, const arg_type& input) { @@ -339,7 +338,7 @@ struct FromBase32Function { encoding::Base32::calculateDecodedSize(input.data(), inputSize)); encoding::Base32::decode( input.data(), inputSize, result.data(), result.size()); - } catch (const encoding::EncoderException& e) { + } catch (const std::exception& e) { VELOX_USER_FAIL(e.what()); } }