Skip to content

Commit

Permalink
Refactored for better maintenance
Browse files Browse the repository at this point in the history
  • Loading branch information
franz1981 committed Jan 20, 2025
1 parent 7ec85cc commit 0115772
Showing 1 changed file with 99 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,47 +7,110 @@

public final class JsonEscaper implements ResultMapper {

private static final int NO_NEED_REPLACEMENT_DATA = 0x0100_0000;
// this int is packing:
// length of replacement (1 byte), padding byte,
// additional char (1 byte), replacement char (1 byte) = 4 bytes
private static final int[] REPLACEMENTS_DATA = new int[256];
private static final String[] CTRL_REPLACEMENTS = new String[32];

static {
// All Unicode characters may be placed within the quotation marks,
// except for the characters that MUST be escaped: quotation mark,
// reverse solidus, and the control characters (U+0000 through U+001F).
// See also https://datatracker.ietf.org/doc/html/rfc8259#autoid-10
private static final int LENGTH_BITS_OFFSET = 24;
private static final int SECOND_CHAR_OFFSET = 8;
private static final int MAX_LATIN_CHAR = 255;
private static final int NO_REPLACEMENT_DATA = packReplacementData(0, 0, 1);
private static final int[] REPLACEMENTS_DATA = createReplacementData();

/**
* Packs the replacement data into a single int.<br>
* The replacement data is packed as follows:<br>
* write an ASCII art of the int:<br>
* The visual order chosen reflect what Integer::toHexString would print since Java ints are stored big-endian.<br>
*
* <pre>
* |----------|-----------|-------------|------------|
* bits | 24-31 | 16-23 | 8-15 | 0-7 |
* field | length | padding | 2nd char | 1st char |
* values | {1,2,6} | [0] | [0-255] | [0-255] |
* |----------|-----------|-------------|------------|
* </pre>
*
*/
private static int packReplacementData(int first, int second, int length) {
if (length != 1 && length != 2 && length != 6) {
throw new IllegalArgumentException("Length must be 1, 2 or 6 but was: " + length);
}
if (first < 0 || first > 255) {
throw new IllegalArgumentException("First char must be in range [0, 255] but was: " + first);
}
if (second < 0 || second > 255) {
throw new IllegalArgumentException("Second char must be in range [0, 255] but was: " + second);
}
return (first | (second << SECOND_CHAR_OFFSET)) | (length << LENGTH_BITS_OFFSET);
}

private static int replacementLength(int replacementData) {
// length isn't bigger than 127, which means preserving sign (which is faster) won't affect the shift
return replacementData >> LENGTH_BITS_OFFSET;
}

private static char secondChar(int replacementData) {
// since past the second char we have padding === 0 we can just cast to char
return (char) (replacementData >> SECOND_CHAR_OFFSET);
}

private static char firstChar(int replacementData) {
// we need to filter the first byte
return (char) (replacementData & 0xFF);
}

private static int toLatinChar(int c) {
return c & 0xFF;
}

private static int replacementDataOf(char c) {
// NOTE: char type cannot be negative
// Both non latin and latin char with length 1 doesn't need replacement
if (c > MAX_LATIN_CHAR) {
return NO_REPLACEMENT_DATA;
}
return REPLACEMENTS_DATA[toLatinChar(c)];
}

private static void writeReplacementData(char[] out, int pos, int replacementData) {
out[pos] = firstChar(replacementData);
out[pos + 1] = secondChar(replacementData);
}

/**
* All Unicode characters may be placed within the quotation marks,
* except for the characters that MUST be escaped: quotation mark,
* reverse solidus, and the control characters (U+0000 through U+001F).
* See also https://datatracker.ietf.org/doc/html/rfc8259#autoid-10
*/
private static int[] createReplacementData() {
int[] table = new int[256];
// by default ctrl ASCII chars replace 6 chars
Arrays.fill(REPLACEMENTS_DATA, 0, 32, 0x0600_0000);
Arrays.fill(table, 0, 32, packReplacementData(0, 0, 6));
// default Latin chars just replace themselves
for (int i = 32; i < 256; i++) {
REPLACEMENTS_DATA[i] = i | (1 << 24);
table[i] = packReplacementData(i, 0, 1);
}
// special ASCII chars - which include some control chars: replace 2 chars
REPLACEMENTS_DATA['"'] = (0x005c | (('"' & 0xFF) << 8)) | (2 << 24);
REPLACEMENTS_DATA['\\'] = (0x005c | (('\\') << 8)) | (2 << 24);
REPLACEMENTS_DATA['\r'] = (0x005c | (('r') << 8)) | (2 << 24);
REPLACEMENTS_DATA['\b'] = (0x005c | (('b') << 8)) | (2 << 24);
REPLACEMENTS_DATA['\n'] = (0x005c | (('n') << 8)) | (2 << 24);
REPLACEMENTS_DATA['\t'] = (0x005c | (('t') << 8)) | (2 << 24);
REPLACEMENTS_DATA['\f'] = (0x005c | (('f') << 8)) | (2 << 24);
REPLACEMENTS_DATA['/'] = (0x005c | (('/') << 8)) | (2 << 24);
table['"'] = packReplacementData('\\', '"', 2);
table['\\'] = packReplacementData('\\', '\\', 2);
table['\r'] = packReplacementData('\\', 'r', 2);
table['\b'] = packReplacementData('\\', 'b', 2);
table['\n'] = packReplacementData('\\', 'n', 2);
table['\t'] = packReplacementData('\\', 't', 2);
table['\f'] = packReplacementData('\\', 'f', 2);
table['/'] = packReplacementData('\\', '/', 2);
return table;
}

private static int replacementDataOf(char c) {
// no need to escape if the length is 1
if (c >= 256) {
return NO_NEED_REPLACEMENT_DATA;
}
return REPLACEMENTS_DATA[c];
}
// This is a cache for the control chars replacements, which are [0-31]
private static final char[][] CTRL_REPLACEMENTS = new char[32][];

private static int replacementLength(int replacementData) {
// length isn't bigger than 127, which means preserving sign (which is faster) won't affect the shift
return replacementData >> 24;
private static char[] doEscapeCtrl(int c) {
var replacement = CTRL_REPLACEMENTS[c];
if (replacement == null) {
replacement = String.format("\\u%04x", c).toCharArray();
assert replacement.length == 6;
CTRL_REPLACEMENTS[c] = replacement;
}
return replacement;
}

static String escape(String toEscape) {
Expand All @@ -71,20 +134,19 @@ private static String doEscape(String value, int firstToReplace, int firstReplac
int outputLength = firstToReplace;
for (int i = 0; i < remainingChars; i++) {
char c = value.charAt(firstToReplace + i);
if (c < 256) {
int latinChar = c & 0x00FF;
if (c <= MAX_LATIN_CHAR) {
int latinChar = toLatinChar(c);
int replacementData = REPLACEMENTS_DATA[latinChar];
int replacementLength = replacementLength(replacementData);
if (replacementLength == 6) {
var ctrlEscape = doEscapeCtrl(c);
buffer = ensureCapacity(buffer, outputLength, 6, (remainingChars - i) - 1);
ctrlEscape.getChars(0, 6, buffer, outputLength);
System.arraycopy(ctrlEscape, 0, buffer, outputLength, ctrlEscape.length);
outputLength += 6;
} else {
assert replacementLength == 1 || replacementLength == 2;
buffer = ensureCapacity(buffer, outputLength, 2, (remainingChars - i) - 1);
buffer[outputLength] = (char) (replacementData & 0xFF);
buffer[outputLength + 1] = (char) (replacementData >> 8);
writeReplacementData(buffer, outputLength, replacementData);
outputLength += replacementLength;
}
} else {
Expand Down Expand Up @@ -129,13 +191,4 @@ public boolean appliesTo(Origin origin, Object result) {
public String map(Object result, Expression expression) {
return escape(result.toString());
}

private static String doEscapeCtrl(int c) {
var replacement = CTRL_REPLACEMENTS[c];
if (replacement == null) {
replacement = String.format("\\u%04x", c);
CTRL_REPLACEMENTS[c] = replacement;
}
return replacement;
}
}

0 comments on commit 0115772

Please sign in to comment.