From 46e8eb6a9a45b11f9e4c97474ed6c02b1faa43af Mon Sep 17 00:00:00 2001 From: Jacob Zimmerman Date: Sat, 7 Sep 2024 19:02:36 -0400 Subject: [PATCH] fix(client): partial parsing update to handle strings small testing additions lint --- package.json | 1 + src/_vendor/partial-json-parser/README.md | 2 +- src/_vendor/partial-json-parser/parser.ts | 469 +++++++++--------- .../partial-json-parsing.test.ts | 58 +++ yarn.lock | 12 + 5 files changed, 298 insertions(+), 244 deletions(-) create mode 100644 tests/_vendor/partial-json-parser/partial-json-parsing.test.ts diff --git a/package.json b/package.json index 2f684d212..934e1e722 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "eslint": "^8.49.0", "eslint-plugin-prettier": "^5.0.1", "eslint-plugin-unused-imports": "^3.0.0", + "fast-check": "^3.22.0", "jest": "^29.4.0", "prettier": "^3.0.0", "prettier-2": "npm:prettier@^2", diff --git a/src/_vendor/partial-json-parser/README.md b/src/_vendor/partial-json-parser/README.md index bc6ea4e3d..d4e1c85d6 100644 --- a/src/_vendor/partial-json-parser/README.md +++ b/src/_vendor/partial-json-parser/README.md @@ -1,3 +1,3 @@ # Partial JSON Parser -Vendored from https://www.npmjs.com/package/partial-json-parser and updated to use TypeScript. +Vendored from https://www.npmjs.com/package/partial-json with some modifications diff --git a/src/_vendor/partial-json-parser/parser.ts b/src/_vendor/partial-json-parser/parser.ts index 9470c462f..5ee62b76b 100644 --- a/src/_vendor/partial-json-parser/parser.ts +++ b/src/_vendor/partial-json-parser/parser.ts @@ -1,264 +1,247 @@ -type Token = { - type: string; - value: string; +const STR = 0b000000001; +const NUM = 0b000000010; +const ARR = 0b000000100; +const OBJ = 0b000001000; +const NULL = 0b000010000; +const BOOL = 0b000100000; +const NAN = 0b001000000; +const INFINITY = 0b010000000; +const MINUS_INFINITY = 0b100000000; + +const INF = INFINITY | MINUS_INFINITY; +const SPECIAL = NULL | BOOL | INF | NAN; +const ATOM = STR | NUM | SPECIAL; +const COLLECTION = ARR | OBJ; +const ALL = ATOM | COLLECTION; + +const Allow = { + STR, + NUM, + ARR, + OBJ, + NULL, + BOOL, + NAN, + INFINITY, + MINUS_INFINITY, + INF, + SPECIAL, + ATOM, + COLLECTION, + ALL, }; -const tokenize = (input: string): Token[] => { - let current = 0; - let tokens: Token[] = []; - - while (current < input.length) { - let char = input[current]; - - if (char === '\\') { - current++; - continue; - } - - if (char === '{') { - tokens.push({ - type: 'brace', - value: '{', - }); - - current++; - continue; - } - - if (char === '}') { - tokens.push({ - type: 'brace', - value: '}', - }); - - current++; - continue; - } - - if (char === '[') { - tokens.push({ - type: 'paren', - value: '[', - }); - - current++; - continue; - } - - if (char === ']') { - tokens.push({ - type: 'paren', - value: ']', - }); - - current++; - continue; - } - - if (char === ':') { - tokens.push({ - type: 'separator', - value: ':', - }); - - current++; - continue; +// The JSON string segment was unable to be parsed completely +class PartialJSON extends Error {} + +class MalformedJSON extends Error {} + +/** + * Parse incomplete JSON + * @param {string} jsonString Partial JSON to be parsed + * @param {number} allowPartial Specify what types are allowed to be partial, see {@link Allow} for details + * @returns The parsed JSON + * @throws {PartialJSON} If the JSON is incomplete (related to the `allow` parameter) + * @throws {MalformedJSON} If the JSON is malformed + */ +function parseJSON(jsonString: string, allowPartial: number = Allow.ALL): any { + if (typeof jsonString !== 'string') { + throw new TypeError(`expecting str, got ${typeof jsonString}`); + } + if (!jsonString.trim()) { + throw new Error(`${jsonString} is empty`); + } + return _parseJSON(jsonString.trim(), allowPartial); +} + +const _parseJSON = (jsonString: string, allow: number) => { + const length = jsonString.length; + let index = 0; + + const markPartialJSON = (msg: string) => { + throw new PartialJSON(`${msg} at position ${index}`); + }; + + const throwMalformedError = (msg: string) => { + throw new MalformedJSON(`${msg} at position ${index}`); + }; + + const parseAny: () => any = () => { + skipBlank(); + if (index >= length) markPartialJSON('Unexpected end of input'); + if (jsonString[index] === '"') return parseStr(); + if (jsonString[index] === '{') return parseObj(); + if (jsonString[index] === '[') return parseArr(); + if ( + jsonString.substring(index, index + 4) === 'null' || + (Allow.NULL & allow && length - index < 4 && 'null'.startsWith(jsonString.substring(index))) + ) { + index += 4; + return null; + } + if ( + jsonString.substring(index, index + 4) === 'true' || + (Allow.BOOL & allow && length - index < 4 && 'true'.startsWith(jsonString.substring(index))) + ) { + index += 4; + return true; + } + if ( + jsonString.substring(index, index + 5) === 'false' || + (Allow.BOOL & allow && length - index < 5 && 'false'.startsWith(jsonString.substring(index))) + ) { + index += 5; + return false; + } + if ( + jsonString.substring(index, index + 8) === 'Infinity' || + (Allow.INFINITY & allow && length - index < 8 && 'Infinity'.startsWith(jsonString.substring(index))) + ) { + index += 8; + return Infinity; + } + if ( + jsonString.substring(index, index + 9) === '-Infinity' || + (Allow.MINUS_INFINITY & allow && + 1 < length - index && + length - index < 9 && + '-Infinity'.startsWith(jsonString.substring(index))) + ) { + index += 9; + return -Infinity; + } + if ( + jsonString.substring(index, index + 3) === 'NaN' || + (Allow.NAN & allow && length - index < 3 && 'NaN'.startsWith(jsonString.substring(index))) + ) { + index += 3; + return NaN; + } + return parseNum(); + }; + + const parseStr: () => string = () => { + const start = index; + let escape = false; + index++; // skip initial quote + while (index < length && (jsonString[index] !== '"' || (escape && jsonString[index - 1] === '\\'))) { + escape = jsonString[index] === '\\' ? !escape : false; + index++; + } + if (jsonString.charAt(index) == '"') { + try { + return JSON.parse(jsonString.substring(start, ++index - Number(escape))); + } catch (e) { + throwMalformedError(String(e)); } - - if (char === ',') { - tokens.push({ - type: 'delimiter', - value: ',', - }); - - current++; - continue; + } else if (Allow.STR & allow) { + try { + return JSON.parse(jsonString.substring(start, index - Number(escape)) + '"'); + } catch (e) { + // SyntaxError: Invalid escape sequence + return JSON.parse(jsonString.substring(start, jsonString.lastIndexOf('\\')) + '"'); } - - if (char === '"') { - let value = ''; - let danglingQuote = false; - - char = input[++current]; - - while (char !== '"') { - if (current === input.length) { - danglingQuote = true; - break; - } - - if (char === '\\') { - current++; - if (current === input.length) { - danglingQuote = true; - break; - } - value += char + input[current]; - char = input[++current]; - } else { - value += char; - char = input[++current]; - } - } - - char = input[++current]; - - if (!danglingQuote) { - tokens.push({ - type: 'string', - value, - }); + } + markPartialJSON('Unterminated string literal'); + }; + + const parseObj = () => { + index++; // skip initial brace + skipBlank(); + const obj: Record = {}; + try { + while (jsonString[index] !== '}') { + skipBlank(); + if (index >= length && Allow.OBJ & allow) return obj; + const key = parseStr(); + skipBlank(); + index++; // skip colon + try { + const value = parseAny(); + Object.defineProperty(obj, key, { value, writable: true, enumerable: true, configurable: true }); + } catch (e) { + if (Allow.OBJ & allow) return obj; + else throw e; } - continue; - } - - let WHITESPACE = /\s/; - if (char && WHITESPACE.test(char)) { - current++; - continue; + skipBlank(); + if (jsonString[index] === ',') index++; // skip comma } - - let NUMBERS = /[0-9]/; - if ((char && NUMBERS.test(char)) || char === '-' || char === '.') { - let value = ''; - - if (char === '-') { - value += char; - char = input[++current]; - } - - while ((char && NUMBERS.test(char)) || char === '.') { - value += char; - char = input[++current]; + } catch (e) { + if (Allow.OBJ & allow) return obj; + else markPartialJSON("Expected '}' at end of object"); + } + index++; // skip final brace + return obj; + }; + + const parseArr = () => { + index++; // skip initial bracket + const arr = []; + try { + while (jsonString[index] !== ']') { + arr.push(parseAny()); + skipBlank(); + if (jsonString[index] === ',') { + index++; // skip comma } - - tokens.push({ - type: 'number', - value, - }); - continue; } - - let LETTERS = /[a-z]/i; - if (char && LETTERS.test(char)) { - let value = ''; - - while (char && LETTERS.test(char)) { - if (current === input.length) { - break; - } - value += char; - char = input[++current]; - } - - if (value == 'true' || value == 'false' || value === 'null') { - tokens.push({ - type: 'name', - value, - }); - } else { - // unknown token, e.g. `nul` which isn't quite `null` - current++; - continue; - } - continue; + } catch (e) { + if (Allow.ARR & allow) { + return arr; } - - current++; + markPartialJSON("Expected ']' at end of array"); } - - return tokens; - }, - strip = (tokens: Token[]): Token[] => { - if (tokens.length === 0) { - return tokens; + index++; // skip final bracket + return arr; + }; + + const parseNum = () => { + if (index === 0) { + if (jsonString === '-' && Allow.NUM & allow) markPartialJSON("Not sure what '-' is"); + try { + return JSON.parse(jsonString); + } catch (e) { + if (Allow.NUM & allow) { + try { + if ('.' === jsonString[jsonString.length - 1]) + return JSON.parse(jsonString.substring(0, jsonString.lastIndexOf('.'))); + return JSON.parse(jsonString.substring(0, jsonString.lastIndexOf('e'))); + } catch (e) {} + } + throwMalformedError(String(e)); + } } - let lastToken = tokens[tokens.length - 1]!; + const start = index; - switch (lastToken.type) { - case 'separator': - tokens = tokens.slice(0, tokens.length - 1); - return strip(tokens); - break; - case 'number': - let lastCharacterOfLastToken = lastToken.value[lastToken.value.length - 1]; - if (lastCharacterOfLastToken === '.' || lastCharacterOfLastToken === '-') { - tokens = tokens.slice(0, tokens.length - 1); - return strip(tokens); - } - case 'string': - let tokenBeforeTheLastToken = tokens[tokens.length - 2]; - if (tokenBeforeTheLastToken?.type === 'delimiter') { - tokens = tokens.slice(0, tokens.length - 1); - return strip(tokens); - } else if (tokenBeforeTheLastToken?.type === 'brace' && tokenBeforeTheLastToken.value === '{') { - tokens = tokens.slice(0, tokens.length - 1); - return strip(tokens); - } - break; - case 'delimiter': - tokens = tokens.slice(0, tokens.length - 1); - return strip(tokens); - break; - } + if (jsonString[index] === '-') index++; + while (jsonString[index] && !',]}'.includes(jsonString[index]!)) index++; - return tokens; - }, - unstrip = (tokens: Token[]): Token[] => { - let tail: string[] = []; + if (index == length && !(Allow.NUM & allow)) markPartialJSON('Unterminated number literal'); - tokens.map((token) => { - if (token.type === 'brace') { - if (token.value === '{') { - tail.push('}'); - } else { - tail.splice(tail.lastIndexOf('}'), 1); - } + try { + return JSON.parse(jsonString.substring(start, index)); + } catch (e) { + if (jsonString.substring(start, index) === '-' && Allow.NUM & allow) + markPartialJSON("Not sure what '-' is"); + try { + return JSON.parse(jsonString.substring(start, jsonString.lastIndexOf('e'))); + } catch (e) { + throwMalformedError(String(e)); } - if (token.type === 'paren') { - if (token.value === '[') { - tail.push(']'); - } else { - tail.splice(tail.lastIndexOf(']'), 1); - } - } - }); - - if (tail.length > 0) { - tail.reverse().map((item) => { - if (item === '}') { - tokens.push({ - type: 'brace', - value: '}', - }); - } else if (item === ']') { - tokens.push({ - type: 'paren', - value: ']', - }); - } - }); } + }; - return tokens; - }, - generate = (tokens: Token[]): string => { - let output = ''; + const skipBlank = () => { + while (index < length && ' \n\r\t'.includes(jsonString[index]!)) { + index++; + } + }; - tokens.map((token) => { - switch (token.type) { - case 'string': - output += '"' + token.value + '"'; - break; - default: - output += token.value; - break; - } - }); + return parseAny(); +}; - return output; - }, - partialParse = (input: string): unknown => JSON.parse(generate(unstrip(strip(tokenize(input))))); +// using this function with malformed JSON is undefined behavior +const partialParse = (input: string) => parseJSON(input, Allow.ALL ^ Allow.NUM); -export { partialParse }; +export { partialParse, PartialJSON, MalformedJSON }; diff --git a/tests/_vendor/partial-json-parser/partial-json-parsing.test.ts b/tests/_vendor/partial-json-parser/partial-json-parsing.test.ts new file mode 100644 index 000000000..6fad8f1a9 --- /dev/null +++ b/tests/_vendor/partial-json-parser/partial-json-parsing.test.ts @@ -0,0 +1,58 @@ +import fc from 'fast-check'; +import { MalformedJSON, partialParse } from 'openai/_vendor/partial-json-parser/parser'; + +describe('partial parsing', () => { + test('should parse complete json', () => { + expect(partialParse('{"__proto__": 0}')).toEqual(JSON.parse('{"__proto__": 0}')); + + fc.assert( + fc.property(fc.json({ depthSize: 'large', noUnicodeString: false }), (jsonString) => { + const parsedNormal = JSON.parse(jsonString); + const parsedPartial = partialParse(jsonString); + expect(parsedPartial).toEqual(parsedNormal); + }), + ); + }); + + test('should parse partial json', () => { + expect(partialParse('{"field')).toEqual({}); + expect(partialParse('"')).toEqual(''); + expect(partialParse('[2, 3, 4')).toEqual([2, 3]); + expect(partialParse('{"field": true, "field2')).toEqual({ field: true }); + expect(partialParse('{"field": true, "field2":')).toEqual({ field: true }); + expect(partialParse('{"field": true, "field2":{')).toEqual({ field: true, field2: {} }); + expect(partialParse('{"field": true, "field2": { "obj": "somestr')).toEqual({ + field: true, + field2: { obj: 'somestr' }, + }); + expect(partialParse('{"field": true, "field2": { "obj": "somestr",')).toEqual({ + field: true, + field2: { obj: 'somestr' }, + }); + expect(partialParse('{"field": "va')).toEqual({ field: 'va' }); + expect(partialParse('[ "v1", 2, "v2", 3')).toEqual(['v1', 2, 'v2']); + expect(partialParse('[ "v1", 2, "v2", -')).toEqual(['v1', 2, 'v2']); + expect(partialParse('[1, 2e')).toEqual([1]); + }); + + test('should only throw errors parsing numbers', () => + fc.assert( + fc.property(fc.json({ depthSize: 'large', noUnicodeString: false }), (jsonString) => { + for (let i = 1; i < jsonString.length; i++) { + // speedup + i += Math.floor(Math.random() * 3); + const substring = jsonString.substring(0, i); + + // since we don't allow partial parsing for numbers + if ( + typeof JSON.parse(jsonString) === 'number' && + 'e-+.'.includes(substring[substring.length - 1]!) + ) { + expect(() => partialParse(substring)).toThrow(MalformedJSON); + } else { + partialParse(substring); + } + } + }), + )); +}); diff --git a/yarn.lock b/yarn.lock index 3c7bdb93e..68486892b 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1725,6 +1725,13 @@ expect@^29.0.0, expect@^29.7.0: jest-message-util "^29.7.0" jest-util "^29.7.0" +fast-check@^3.22.0: + version "3.22.0" + resolved "https://registry.yarnpkg.com/fast-check/-/fast-check-3.22.0.tgz#1a8153e9d6fbdcc60b818f447cbb9cac1fdd8fb6" + integrity sha512-8HKz3qXqnHYp/VCNn2qfjHdAdcI8zcSqOyX64GOMukp7SL2bfzfeDKjSd+UyECtejccaZv3LcvZTm9YDD22iCQ== + dependencies: + pure-rand "^6.1.0" + fast-deep-equal@^3.1.1, fast-deep-equal@^3.1.3: version "3.1.3" resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525" @@ -3037,6 +3044,11 @@ pure-rand@^6.0.0: resolved "https://registry.yarnpkg.com/pure-rand/-/pure-rand-6.0.4.tgz#50b737f6a925468679bff00ad20eade53f37d5c7" integrity sha512-LA0Y9kxMYv47GIPJy6MI84fqTd2HmYZI83W/kM/SkKfDlajnZYfmXFTxkbY+xSBPkLJxltMa9hIkmdc29eguMA== +pure-rand@^6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/pure-rand/-/pure-rand-6.1.0.tgz#d173cf23258231976ccbdb05247c9787957604f2" + integrity sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA== + qs@^6.10.3: version "6.13.0" resolved "https://registry.yarnpkg.com/qs/-/qs-6.13.0.tgz#6ca3bd58439f7e245655798997787b0d88a51906"