diff --git a/README.md b/README.md index 252246f..ec25379 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ npm run mtsc ./tests/singleVar.ts - [ ] Make semicolon a statement ender, not statement separator. - Hint: You'll need a predicate to peek at the next token and decide if it's the start of an element. - Bonus: Switch from semicolon to newline as statement ender. -- [ ] Add string literals. +- [x] Add string literals. - [ ] Add let. - Then add use-before-declaration errors in the checker. - Finally, add an ES2015 -> ES5 transform that transforms `let` to `var`. @@ -50,3 +50,6 @@ npm run mtsc ./tests/singleVar.ts - [ ] Add an ES5 transformer that converts let -> var. - [ ] Add function declarations and function calls. - [ ] Add arrow functions with an appropriate transform in ES5. +- [ ] Add support for the lexer to report errors + - report unterminated string literal error +- [ ] Refactor: rename `Literal` to `NumericLiteral` diff --git a/baselines/reference/singleTypedVar.errors.baseline b/baselines/reference/singleTypedVar.errors.baseline index e36e99b..85e71ae 100644 --- a/baselines/reference/singleTypedVar.errors.baseline +++ b/baselines/reference/singleTypedVar.errors.baseline @@ -2,5 +2,9 @@ { "pos": 17, "message": "Cannot assign initialiser of type 'number' to variable with declared type 'string'." + }, + { + "pos": 41, + "message": "Cannot assign initialiser of type 'string' to variable with declared type 'number'." } ] \ No newline at end of file diff --git a/baselines/reference/singleTypedVar.js.baseline b/baselines/reference/singleTypedVar.js.baseline index aefd623..1143102 100644 --- a/baselines/reference/singleTypedVar.js.baseline +++ b/baselines/reference/singleTypedVar.js.baseline @@ -1 +1 @@ -"var s = 1" \ No newline at end of file +"var s = 1;\nvar n = 'test'" \ No newline at end of file diff --git a/baselines/reference/singleTypedVar.tree.baseline b/baselines/reference/singleTypedVar.tree.baseline index a4a6855..dd05510 100644 --- a/baselines/reference/singleTypedVar.tree.baseline +++ b/baselines/reference/singleTypedVar.tree.baseline @@ -5,6 +5,12 @@ "kind": "Var", "pos": 3 } + ], + "n": [ + { + "kind": "Var", + "pos": 22 + } ] }, "statements": [ @@ -23,6 +29,22 @@ "value": 1 } }, + { + "kind": "Var", + "name": { + "kind": "Identifier", + "text": "n" + }, + "typename": { + "kind": "Identifier", + "text": "number" + }, + "init": { + "kind": "StringLiteral", + "value": "test", + "isSingleQuote": true + } + }, { "kind": "EmptyStatement" } diff --git a/baselines/reference/stringLiteral.errors.baseline b/baselines/reference/stringLiteral.errors.baseline new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/baselines/reference/stringLiteral.errors.baseline @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/baselines/reference/stringLiteral.js.baseline b/baselines/reference/stringLiteral.js.baseline new file mode 100644 index 0000000..e2891fd --- /dev/null +++ b/baselines/reference/stringLiteral.js.baseline @@ -0,0 +1 @@ +"var singleQuote = 'singleQuote';\nvar doubleQuote = \"doubleQuote\";\nvar escapedSingleQuote = 'escapedSingle\\'Quote';\nvar escapedDoubleQuote = \"escapedDouble\\\"Quote\";\nvar escapedB = 'escaped\\nB';\nvar escapedT = 'escaped\\nT';\nvar escapedN = 'escaped\\nN';\nvar escapedR = 'escaped\\nR'" \ No newline at end of file diff --git a/baselines/reference/stringLiteral.tree.baseline b/baselines/reference/stringLiteral.tree.baseline new file mode 100644 index 0000000..2fdc76b --- /dev/null +++ b/baselines/reference/stringLiteral.tree.baseline @@ -0,0 +1,153 @@ +{ + "locals": { + "singleQuote": [ + { + "kind": "Var", + "pos": 3 + } + ], + "doubleQuote": [ + { + "kind": "Var", + "pos": 36 + } + ], + "escapedSingleQuote": [ + { + "kind": "Var", + "pos": 69 + } + ], + "escapedDoubleQuote": [ + { + "kind": "Var", + "pos": 118 + } + ], + "escapedB": [ + { + "kind": "Var", + "pos": 167 + } + ], + "escapedT": [ + { + "kind": "Var", + "pos": 196 + } + ], + "escapedN": [ + { + "kind": "Var", + "pos": 225 + } + ], + "escapedR": [ + { + "kind": "Var", + "pos": 254 + } + ] + }, + "statements": [ + { + "kind": "Var", + "name": { + "kind": "Identifier", + "text": "singleQuote" + }, + "init": { + "kind": "StringLiteral", + "value": "singleQuote", + "isSingleQuote": true + } + }, + { + "kind": "Var", + "name": { + "kind": "Identifier", + "text": "doubleQuote" + }, + "init": { + "kind": "StringLiteral", + "value": "doubleQuote", + "isSingleQuote": false + } + }, + { + "kind": "Var", + "name": { + "kind": "Identifier", + "text": "escapedSingleQuote" + }, + "init": { + "kind": "StringLiteral", + "value": "escapedSingle'Quote", + "isSingleQuote": true + } + }, + { + "kind": "Var", + "name": { + "kind": "Identifier", + "text": "escapedDoubleQuote" + }, + "init": { + "kind": "StringLiteral", + "value": "escapedDouble\"Quote", + "isSingleQuote": false + } + }, + { + "kind": "Var", + "name": { + "kind": "Identifier", + "text": "escapedB" + }, + "init": { + "kind": "StringLiteral", + "value": "escaped\nB", + "isSingleQuote": true + } + }, + { + "kind": "Var", + "name": { + "kind": "Identifier", + "text": "escapedT" + }, + "init": { + "kind": "StringLiteral", + "value": "escaped\nT", + "isSingleQuote": true + } + }, + { + "kind": "Var", + "name": { + "kind": "Identifier", + "text": "escapedN" + }, + "init": { + "kind": "StringLiteral", + "value": "escaped\nN", + "isSingleQuote": true + } + }, + { + "kind": "Var", + "name": { + "kind": "Identifier", + "text": "escapedR" + }, + "init": { + "kind": "StringLiteral", + "value": "escaped\nR", + "isSingleQuote": true + } + }, + { + "kind": "EmptyStatement" + } + ] +} \ No newline at end of file diff --git a/src/check.ts b/src/check.ts index e1d5744..1a6ee62 100644 --- a/src/check.ts +++ b/src/check.ts @@ -58,6 +58,8 @@ export function check(module: Module) { return errorType; case Node.Literal: return numberType; + case Node.StringLiteral: + return stringType; case Node.Assignment: const v = checkExpression(expression.value); const t = checkExpression(expression.name); diff --git a/src/emit.ts b/src/emit.ts index 90a5309..c69e177 100644 --- a/src/emit.ts +++ b/src/emit.ts @@ -1,5 +1,22 @@ import { Statement, Node, Expression } from './types'; +const singleQuoteRegex = /[\\\'\t\v\f\b\r\n]/g; +const doubleQuoteRegex = /[\\\"\t\v\f\b\r\n]/g; + +const escapedCharsMap = new Map( + Object.entries({ + '\t': '\\t', + '\v': '\\v', + '\f': '\\f', + '\b': '\\b', + '\r': '\\r', + '\n': '\\n', + '\\': '\\\\', + '"': '\\"', + "'": "\\'", + }), +); + export function emit(statements: Statement[]) { return statements.map(emitStatement).join(';\n'); } @@ -26,7 +43,22 @@ function emitExpression(expression: Expression): string { return expression.text; case Node.Literal: return '' + expression.value; + case Node.StringLiteral: + return expression.isSingleQuote + ? `'${escapeString(expression.value, true)}'` + : `"${escapeString(expression.value, false)}"`; case Node.Assignment: return `${expression.name.text} = ${emitExpression(expression.value)}`; } } + +function escapeString(string: string, isSingleQuote: boolean) { + return string.replace( + isSingleQuote ? singleQuoteRegex : doubleQuoteRegex, + replacement, + ); +} + +function replacement(char: string) { + return escapedCharsMap.get(char) || char; +} diff --git a/src/lex.ts b/src/lex.ts index c332f00..df5a54e 100644 --- a/src/lex.ts +++ b/src/lex.ts @@ -1,4 +1,4 @@ -import { Token, Lexer } from './types'; +import { Token, Lexer, CharCodes } from './types'; const keywords = { function: Token.Function, @@ -11,12 +11,14 @@ export function lex(s: string): Lexer { let pos = 0; let text = ''; let token = Token.BOF; + let firstChar: string; return { scan, token: () => token, pos: () => pos, text: () => text, + isSingleQuote: () => firstChar === "'", }; function scan() { @@ -40,6 +42,10 @@ export function lex(s: string): Lexer { text in keywords ? keywords[text as keyof typeof keywords] : Token.Identifier; + } else if (['"', "'"].includes(s.charAt(pos))) { + firstChar = s.charAt(pos); + text = scanString(); + token = Token.String; } else { pos++; switch (s.charAt(pos - 1)) { @@ -62,6 +68,64 @@ export function lex(s: string): Lexer { function scanForward(pred: (x: string) => boolean) { while (pos < s.length && pred(s.charAt(pos))) pos++; } + + function scanString() { + const quote = s.charCodeAt(pos); + pos++; + + let stringValue = ''; + let start = pos; + + while (true) { + if (pos >= s.length) { + // report unterminated string literal error + } + + const char = s.charCodeAt(pos); + + if (char === quote) { + stringValue += s.slice(start, pos); + pos++; + break; + } + + if (char === CharCodes.backslash) { + stringValue += s.slice(start, pos); + stringValue += scanEscapeSequence(); + start = pos; + continue; + } + + pos++; + } + + return stringValue; + } + + function scanEscapeSequence() { + pos++; + const char = s.charCodeAt(pos); + pos++; + + switch (char) { + case CharCodes.b: + return '\b'; + case CharCodes.t: + return '\t'; + case CharCodes.n: + return '\n'; + case CharCodes.r: + return '\r'; + case CharCodes.singleQuote: + // prettier-ignore + return "\'"; + case CharCodes.doubleQuote: + // prettier-ignore + return '\"'; + default: + return String.fromCharCode(char); + } + } } export function lexAll(s: string) { diff --git a/src/parse.ts b/src/parse.ts index 45ab4d2..b16c618 100644 --- a/src/parse.ts +++ b/src/parse.ts @@ -38,6 +38,13 @@ export function parse(lexer: Lexer): Module { return { kind: Node.Identifier, text: lexer.text(), pos }; } else if (tryParseToken(Token.Literal)) { return { kind: Node.Literal, value: +lexer.text(), pos }; + } else if (tryParseToken(Token.String)) { + return { + kind: Node.StringLiteral, + value: lexer.text(), + pos, + isSingleQuote: lexer.isSingleQuote(), + }; } error( pos, diff --git a/src/types.ts b/src/types.ts index c318ebb..0465979 100644 --- a/src/types.ts +++ b/src/types.ts @@ -10,6 +10,7 @@ export enum Token { Semicolon = 'Semicolon', Colon = 'Colon', Whitespace = 'Whitespace', + String = 'String', Unknown = 'Unknown', BOF = 'BOF', EOF = 'EOF', @@ -20,6 +21,7 @@ export type Lexer = { token(): Token; pos(): number; text(): string; + isSingleQuote(): boolean; }; export enum Node { @@ -29,6 +31,7 @@ export enum Node { ExpressionStatement, Var, TypeAlias, + StringLiteral, EmptyStatement, } @@ -41,7 +44,7 @@ export interface Location { pos: number; } -export type Expression = Identifier | Literal | Assignment; +export type Expression = Identifier | Literal | Assignment | StringLiteral; export type Identifier = Location & { kind: Node.Identifier; @@ -53,6 +56,12 @@ export type Literal = Location & { value: number; }; +export type StringLiteral = Location & { + kind: Node.StringLiteral; + value: string; + isSingleQuote: boolean; +}; + export type Assignment = Location & { kind: Node.Assignment; name: Identifier; @@ -98,3 +107,13 @@ export type Module = { }; export type Type = { id: string }; + +export enum CharCodes { + b = 98, + t = 116, + n = 110, + r = 114, + singleQuote = 39, + doubleQuote = 34, + backslash = 92, +} diff --git a/tests/singleTypedVar.ts b/tests/singleTypedVar.ts index dab2958..7acf0bd 100644 --- a/tests/singleTypedVar.ts +++ b/tests/singleTypedVar.ts @@ -1 +1,2 @@ var s: string = 1; +var n: number = 'test'; diff --git a/tests/stringLiteral.ts b/tests/stringLiteral.ts new file mode 100644 index 0000000..4fdc4ac --- /dev/null +++ b/tests/stringLiteral.ts @@ -0,0 +1,8 @@ +var singleQuote = 'singleQuote'; +var doubleQuote = "doubleQuote"; +var escapedSingleQuote = 'escapedSingle\'Quote'; +var escapedDoubleQuote = "escapedDouble\"Quote"; +var escapedB = 'escaped\nB'; +var escapedT = 'escaped\nT'; +var escapedN = 'escaped\nN'; +var escapedR = 'escaped\nR';