Refactored for better maintenance

franz1981 · Jan 20, 2025 · 0115772 · 0115772
1 parent 7ec85cc
commit 0115772
Showing 1 changed file with 99 additions and 46 deletions.
diff --git a/independent-projects/qute/core/src/main/java/io/quarkus/qute/JsonEscaper.java b/independent-projects/qute/core/src/main/java/io/quarkus/qute/JsonEscaper.java
@@ -7,47 +7,110 @@
 
 public final class JsonEscaper implements ResultMapper {
 
-    private static final int NO_NEED_REPLACEMENT_DATA = 0x0100_0000;
-    // this int is packing:
-    // length of replacement (1 byte), padding byte,
-    // additional char (1 byte), replacement char (1 byte) = 4 bytes
-    private static final int[] REPLACEMENTS_DATA = new int[256];
-    private static final String[] CTRL_REPLACEMENTS = new String[32];
-
-    static {
-        // All Unicode characters may be placed within the quotation marks,
-        // except for the characters that MUST be escaped: quotation mark,
-        // reverse solidus, and the control characters (U+0000 through U+001F).
-        // See also https://datatracker.ietf.org/doc/html/rfc8259#autoid-10
+    private static final int LENGTH_BITS_OFFSET = 24;
+    private static final int SECOND_CHAR_OFFSET = 8;
+    private static final int MAX_LATIN_CHAR = 255;
+    private static final int NO_REPLACEMENT_DATA = packReplacementData(0, 0, 1);
+    private static final int[] REPLACEMENTS_DATA = createReplacementData();
 
+    /**
+     * Packs the replacement data into a single int.<br>
+     * The replacement data is packed as follows:<br>
+     * write an ASCII art of the int:<br>
+     * The visual order chosen reflect what Integer::toHexString would print since Java ints are stored big-endian.<br>
+     *
+     * <pre>
+     *         |----------|-----------|-------------|------------|
+     *  bits   |   24-31  |   16-23   |    8-15     |    0-7     |
+     *  field  |  length  |  padding  |   2nd char  |  1st char  |
+     *  values |  {1,2,6} |    [0]    |   [0-255]   |   [0-255]  |
+     *         |----------|-----------|-------------|------------|
+     * </pre>
+     *
+     */
+    private static int packReplacementData(int first, int second, int length) {
+        if (length != 1 && length != 2 && length != 6) {
+            throw new IllegalArgumentException("Length must be 1, 2 or 6 but was: " + length);
+        }
+        if (first < 0 || first > 255) {
+            throw new IllegalArgumentException("First char must be in range [0, 255] but was: " + first);
+        }
+        if (second < 0 || second > 255) {
+            throw new IllegalArgumentException("Second char must be in range [0, 255] but was: " + second);
+        }
+        return (first | (second << SECOND_CHAR_OFFSET)) | (length << LENGTH_BITS_OFFSET);
+    }
+
+    private static int replacementLength(int replacementData) {
+        // length isn't bigger than 127, which means preserving sign (which is faster) won't affect the shift
+        return replacementData >> LENGTH_BITS_OFFSET;
+    }
+
+    private static char secondChar(int replacementData) {
+        // since past the second char we have padding === 0 we can just cast to char
+        return (char) (replacementData >> SECOND_CHAR_OFFSET);
+    }
+
+    private static char firstChar(int replacementData) {
+        // we need to filter the first byte
+        return (char) (replacementData & 0xFF);
+    }
+
+    private static int toLatinChar(int c) {
+        return c & 0xFF;
+    }
+
+    private static int replacementDataOf(char c) {
+        // NOTE: char type cannot be negative
+        // Both non latin and latin char with length 1 doesn't need replacement
+        if (c > MAX_LATIN_CHAR) {
+            return NO_REPLACEMENT_DATA;
+        }
+        return REPLACEMENTS_DATA[toLatinChar(c)];
+    }
+
+    private static void writeReplacementData(char[] out, int pos, int replacementData) {
+        out[pos] = firstChar(replacementData);
+        out[pos + 1] = secondChar(replacementData);
+    }
+
+    /**
+     * All Unicode characters may be placed within the quotation marks,
+     * except for the characters that MUST be escaped: quotation mark,
+     * reverse solidus, and the control characters (U+0000 through U+001F).
+     * See also https://datatracker.ietf.org/doc/html/rfc8259#autoid-10
+     */
+    private static int[] createReplacementData() {
+        int[] table = new int[256];
         // by default ctrl ASCII chars replace 6 chars
-        Arrays.fill(REPLACEMENTS_DATA, 0, 32, 0x0600_0000);
+        Arrays.fill(table, 0, 32, packReplacementData(0, 0, 6));
         // default Latin chars just replace themselves
         for (int i = 32; i < 256; i++) {
-            REPLACEMENTS_DATA[i] = i | (1 << 24);
+            table[i] = packReplacementData(i, 0, 1);
         }
         // special ASCII chars - which include some control chars: replace 2 chars
-        REPLACEMENTS_DATA['"'] = (0x005c | (('"' & 0xFF) << 8)) | (2 << 24);
-        REPLACEMENTS_DATA['\\'] = (0x005c | (('\\') << 8)) | (2 << 24);
-        REPLACEMENTS_DATA['\r'] = (0x005c | (('r') << 8)) | (2 << 24);
-        REPLACEMENTS_DATA['\b'] = (0x005c | (('b') << 8)) | (2 << 24);
-        REPLACEMENTS_DATA['\n'] = (0x005c | (('n') << 8)) | (2 << 24);
-        REPLACEMENTS_DATA['\t'] = (0x005c | (('t') << 8)) | (2 << 24);
-        REPLACEMENTS_DATA['\f'] = (0x005c | (('f') << 8)) | (2 << 24);
-        REPLACEMENTS_DATA['/'] = (0x005c | (('/') << 8)) | (2 << 24);
+        table['"'] = packReplacementData('\\', '"', 2);
+        table['\\'] = packReplacementData('\\', '\\', 2);
+        table['\r'] = packReplacementData('\\', 'r', 2);
+        table['\b'] = packReplacementData('\\', 'b', 2);
+        table['\n'] = packReplacementData('\\', 'n', 2);
+        table['\t'] = packReplacementData('\\', 't', 2);
+        table['\f'] = packReplacementData('\\', 'f', 2);
+        table['/'] = packReplacementData('\\', '/', 2);
+        return table;
     }
 
-    private static int replacementDataOf(char c) {
-        // no need to escape if the length is 1
-        if (c >= 256) {
-            return NO_NEED_REPLACEMENT_DATA;
-        }
-        return REPLACEMENTS_DATA[c];
-    }
+    // This is a cache for the control chars replacements, which are [0-31]
+    private static final char[][] CTRL_REPLACEMENTS = new char[32][];
 
-    private static int replacementLength(int replacementData) {
-        // length isn't bigger than 127, which means preserving sign (which is faster) won't affect the shift
-        return replacementData >> 24;
+    private static char[] doEscapeCtrl(int c) {
+        var replacement = CTRL_REPLACEMENTS[c];
+        if (replacement == null) {
+            replacement = String.format("\\u%04x", c).toCharArray();
+            assert replacement.length == 6;
+            CTRL_REPLACEMENTS[c] = replacement;
+        }
+        return replacement;
     }
 
     static String escape(String toEscape) {
@@ -71,20 +134,19 @@ private static String doEscape(String value, int firstToReplace, int firstReplac
         int outputLength = firstToReplace;
         for (int i = 0; i < remainingChars; i++) {
             char c = value.charAt(firstToReplace + i);
-            if (c < 256) {
-                int latinChar = c & 0x00FF;
+            if (c <= MAX_LATIN_CHAR) {
+                int latinChar = toLatinChar(c);
                 int replacementData = REPLACEMENTS_DATA[latinChar];
                 int replacementLength = replacementLength(replacementData);
                 if (replacementLength == 6) {
                     var ctrlEscape = doEscapeCtrl(c);
                     buffer = ensureCapacity(buffer, outputLength, 6, (remainingChars - i) - 1);
-                    ctrlEscape.getChars(0, 6, buffer, outputLength);
+                    System.arraycopy(ctrlEscape, 0, buffer, outputLength, ctrlEscape.length);
                     outputLength += 6;
                 } else {
                     assert replacementLength == 1 || replacementLength == 2;
                     buffer = ensureCapacity(buffer, outputLength, 2, (remainingChars - i) - 1);
-                    buffer[outputLength] = (char) (replacementData & 0xFF);
-                    buffer[outputLength + 1] = (char) (replacementData >> 8);
+                    writeReplacementData(buffer, outputLength, replacementData);
                     outputLength += replacementLength;
                 }
             } else {
@@ -129,13 +191,4 @@ public boolean appliesTo(Origin origin, Object result) {
     public String map(Object result, Expression expression) {
         return escape(result.toString());
     }
-
-    private static String doEscapeCtrl(int c) {
-        var replacement = CTRL_REPLACEMENTS[c];
-        if (replacement == null) {
-            replacement = String.format("\\u%04x", c);
-            CTRL_REPLACEMENTS[c] = replacement;
-        }
-        return replacement;
-    }
 }