From dc8a40b2f263940aa91f78c3412e50de1ed8608a Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Wed, 14 Oct 2015 17:52:56 +0100 Subject: [PATCH 1/4] Add JavaScript frontend. --- src/Makefile.am | 4 +- src/common.cc | 22 +++ src/common.h | 4 +- src/inputdata.cc | 11 ++ src/inputdata.h | 1 + src/main.cc | 7 +- src/rlhc-js.lm | 501 +++++++++++++++++++++++++++++++++++++++++++++++ src/rlhc.lm | 3 + src/xml.cc | 1 + 9 files changed, 551 insertions(+), 3 deletions(-) create mode 100644 src/rlhc-js.lm diff --git a/src/Makefile.am b/src/Makefile.am index 3ffeeb6..6f36638 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -63,6 +63,7 @@ EXTRA_DIST = \ rlhc-ocaml.lm \ rlhc-crack.lm \ rlhc-julia.lm \ + rlhc-js.lm \ ragel-c.lm \ ragel-crack.lm \ ragel-ruby.lm \ @@ -102,7 +103,8 @@ RLHC = \ rlhc-rust.lm \ rlhc-ocaml.lm \ rlhc-crack.lm \ - rlhc-julia.lm + rlhc-julia.lm \ + rlhc-js.lm rlhc.c: rlhc.lm $(RLHC) $(COLM_BINDEP) $(COLM) -c -b rlhc_object -o $@ $< diff --git a/src/common.cc b/src/common.cc index e45e178..010628f 100644 --- a/src/common.cc +++ b/src/common.cc @@ -117,6 +117,16 @@ HostType hostTypesJulia[] = { "u8", 0, "byte", true, true, false, 0, UCHAR_MAX, 4 }, }; +HostType hostTypesJS[] = +{ + { "Int8", 0, "int8", true, true, false, CHAR_MIN, CHAR_MAX, 1 }, + { "Uint8", 0, "uint8", false, true, false, 0, UCHAR_MAX, 1 }, + { "Int16", 0, "int16", true, true, false, SHRT_MIN, SHRT_MAX, 2 }, + { "Uint16", 0, "ushort", false, true, false, 0, USHRT_MAX, 2 }, + { "Int32", 0, "int", true, true, false, INT_MIN, INT_MAX, 4 }, + { "Uint32", 0, "uint", false, true, false, 0, UINT_MAX, 4 }, + { "Float64", 0, "number", true, true, false, LONG_MIN, LONG_MAX, 8 }, +}; const HostLang hostLangC = { "C", @@ -239,6 +249,17 @@ const HostLang hostLangJulia = { "julia" }; +const HostLang hostLangJS = { + "JavaScript", + "-P", + HostLang::JS, + hostTypesJS, 7, + hostTypesJS+1, + false, + true, + "js" +}; + const HostLang *hostLangs[] = { &hostLangC, &hostLangAsm, @@ -251,6 +272,7 @@ const HostLang *hostLangs[] = { &hostLangRust, &hostLangCrack, &hostLangJulia, + &hostLangJS, }; diff --git a/src/common.h b/src/common.h index a54e76f..2933a93 100644 --- a/src/common.h +++ b/src/common.h @@ -211,7 +211,8 @@ struct HostLang Crack, Asm, Rust, - Julia + Julia, + JS }; const char *name; @@ -236,6 +237,7 @@ extern const HostLang hostLangCrack; extern const HostLang hostLangAsm; extern const HostLang hostLangRust; extern const HostLang hostLangJulia; +extern const HostLang hostLangJS; extern const HostLang *hostLangs[]; extern const int numHostLangs; diff --git a/src/inputdata.cc b/src/inputdata.cc index 0c1b955..42603a6 100644 --- a/src/inputdata.cc +++ b/src/inputdata.cc @@ -138,6 +138,14 @@ void InputData::juliaDefaultFileName( const char *inputFile ) outputFileName = fileNameFromStem( inputFile, ".jl" ); } +void InputData::jsDefaultFileName( const char *inputFile ) +{ + /* If the output format is code and no output file name is given, then + * make a default. */ + if ( outputFileName == 0 ) + outputFileName = fileNameFromStem( inputFile, ".js" ); +} + void InputData::makeDefaultFileName() { switch ( hostLang->lang ) { @@ -172,6 +180,9 @@ void InputData::makeDefaultFileName() case HostLang::Julia: juliaDefaultFileName( inputFileName ); break; + case HostLang::JS: + jsDefaultFileName( inputFileName ); + break; } } diff --git a/src/inputdata.h b/src/inputdata.h index b0c01e4..993b000 100644 --- a/src/inputdata.h +++ b/src/inputdata.h @@ -311,6 +311,7 @@ struct InputData void asmDefaultFileName( const char *inputFile ); void rustDefaultFileName( const char *inputFile ); void juliaDefaultFileName( const char *inputFile ); + void jsDefaultFileName( const char *inputFile ); void writeOutput( InputItem *ii ); void writeLanguage( std::ostream &out ); diff --git a/src/main.cc b/src/main.cc index 5516555..8f1f3e3 100644 --- a/src/main.cc +++ b/src/main.cc @@ -115,6 +115,8 @@ void usage() " -T0 code style supported\n" " -K The host language is Crack\n" " -T0 code style supported\n" +" -P The host language is JavaScript\n" +" -T0 code style supported\n" "line directives:\n" " -L Inhibit writing of #line directives\n" "code style:\n" @@ -244,7 +246,7 @@ void escapeLineDirectivePath( std::ostream &out, char *path ) void InputData::parseArgs( int argc, const char **argv ) { - ParamCheck pc( "xo:dnmleabjkS:M:I:CDEJZRAOKUYvHh?-:sT:F:G:LpV", argc, argv ); + ParamCheck pc( "xo:dnmleabjkS:M:I:CDEJZRAOKUYPvHh?-:sT:F:G:LpV", argc, argv ); /* Decide if we were invoked using a path variable, or with an explicit path. */ const char *lastSlash = strrchr( argv[0], '/' ); @@ -385,6 +387,9 @@ void InputData::parseArgs( int argc, const char **argv ) case 'Y': hostLang = &hostLangJulia; break; + case 'P': + hostLang = &hostLangJS; + break; /* Version and help. */ case 'v': diff --git a/src/rlhc-js.lm b/src/rlhc-js.lm new file mode 100644 index 0000000..c4bea0b --- /dev/null +++ b/src/rlhc-js.lm @@ -0,0 +1,501 @@ + +namespace js_out + token _IN_ /''/ + token _EX_ /''/ + + lex + token comment / + '//' any* :> '\n' | + '/*' any* :>> '*/' + / + + token id + /[a-zA-Z_][a-zA-Z_0-9]*/ + + token number / + [0-9]+ + / + + token symbol / + '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | + '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | + '=' | '>' | '?' | '@' | '[' | ']' | '^' | '|' | + '~' / + + literal `{ `} + + token string / + '"' ( [^"\\] | '\\' any ) * '"' | + "'" ( [^'\\] | '\\' any ) * "'" + / + + ignore + /[ \t\v\r\n]+/ + end + + def item + [comment] + | [id] + | [number] + | [symbol] + | [string] + | [`{ _IN_ item* _EX_ `} ] + + def js_out + [_IN_ _EX_ item*] +end + +namespace js_gen + + global Parser: parser + + void tok_list( TL: host::tok* ) + { + for Tok: host::tok in repeat(TL) { + switch Tok + case [host::`${ StmtList: stmt* host::`}$] { + send Parser + "{ + " [stmt_list( StmtList )] + "} + } + case [host::`={ Expr: expr host::`}=] { + send Parser + "([expr( Expr )])" + } + case [E: escape] { + Str: str = $E + send Parser + "[Str.suffix( 1 )]" + } + default { + send Parser + [Tok] + } + } + } + + void embedded_host( EmbeddedHost: embedded_host ) + { + switch EmbeddedHost + case [`host `( string `, uint `) `={ TL: host::tok* host::`}=] + { + send Parser + "([tok_list( TL )])" + } + case [`host `( string `, uint `) `${ TL: host::tok* host::`}$] + { + send Parser + "{ + " [tok_list( TL )] + "} + } + case [`host `( string `, uint `) `@{ TL: host::tok* host::`}@] + { + send Parser + [tok_list( TL )] + } + } + + void expr_factor( ExprFactor: expr_factor ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [O:`( TL: expr C: `)] + { + send Parser + [O expr(TL) C] + } + case [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case ['offset' '(' ident ',' expr ')'] + { + send Parser + [expr( ExprFactor.expr )] + } + case ['deref' '(' ident ',' expr ')'] + { + send Parser [ExprFactor.ident] + if $ExprFactor.ident == 'data' + send Parser ['.charCodeAt(' expr( ExprFactor.expr ) ')'] + else + send Parser ['[' expr( ExprFactor.expr ) ']'] + } + case [T: `TRUE] + { + T.data = 'true' + send Parser [T] + } + case [F: `FALSE] + { + F.data = 'false' + send Parser [F] + } + case [N: `nil] + { + N.data = '0' + send Parser [N] + } + case [Number: number] + { + number( Number ) + } + case [E1: embedded_host `-> E2: expr_factor] + { + embedded_host( E1 ) + expr_factor( E2 ) + } + case [`cast Open: `( type Close: `) expr_factor] + { + expr_factor( ExprFactor._expr_factor ) + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void lvalue( ExprFactor: lvalue ) + { + switch ExprFactor + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [ident O: `[ TL: expr C: `]] + { + send Parser + [ExprFactor.ident O expr( TL ) C] + } + case [I: ident `[ E: expr `] `. F: ident] + { + send Parser + "[I]\[[ expr( E )]\].[F] + } + case [E1: embedded_host `-> E2: lvalue] + { + # The accessor operator is contained wihtin the lhs. + embedded_host( E1 ) + lvalue( E2 ) + } + default { + # Catches cases not specified + send Parser [ExprFactor] + } + } + + void expr_factor_op( ExprFactorOp: expr_factor_op ) + { + switch ExprFactorOp + case [B: `! expr_factor_op] + { + send Parser [B] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [T: `~ expr_factor_op] + { + send Parser [T] + expr_factor_op( ExprFactorOp._expr_factor_op ) + } + case [expr_factor] + { + expr_factor( ExprFactorOp.expr_factor ) + } + } + + void expr_bitwise( ExprBitwise: expr_bitwise ) + { + switch ExprBitwise + case [expr_bitwise A: `& expr_factor_op] + { + expr_bitwise( ExprBitwise._expr_bitwise ) + send Parser [A] + expr_factor_op( ExprBitwise.expr_factor_op ) + } + case [expr_factor_op] + { + expr_factor_op( ExprBitwise.expr_factor_op ) + } + } + + void expr_mult( ExprMult: expr_mult ) + { + switch ExprMult + case [expr_mult T: `* expr_bitwise] + { + expr_mult( ExprMult._expr_mult ) + send Parser [T] + expr_bitwise( ExprMult.expr_bitwise ) + } + case [expr_bitwise] + { + expr_bitwise( ExprMult.expr_bitwise ) + } + } + + void expr_add( ExprAdd: expr_add ) + { + switch ExprAdd + case [expr_add Op: add_op expr_mult] + { + expr_add( ExprAdd._expr_add ) + send Parser [Op] + expr_mult( ExprAdd.expr_mult ) + } + case [expr_mult] + { + expr_mult( ExprAdd.expr_mult ) + } + } + + void expr_shift( ExprShift: expr_shift ) + { + switch ExprShift + case [expr_shift Op: shift_op expr_add] + { + expr_shift( ExprShift._expr_shift ) + send Parser [Op] + expr_add( ExprShift.expr_add ) + } + case [expr_add] + { + expr_add( ExprShift.expr_add ) + } + } + + void expr_test( ExprTest: expr_test ) + { + switch ExprTest + case [expr_test Op: test_op expr_shift] + { + expr_test( ExprTest._expr_test ) + send Parser [Op] + expr_shift( ExprTest.expr_shift ) + } + case [expr_shift] + { + expr_shift( ExprTest.expr_shift ) + } + } + + void expr( Expr: expr ) + { + expr_test( Expr.expr_test ) + } + + void number( Number: number ) + { + switch Number + case [`u `( uint `) ] + send Parser [Number.uint] + default + send Parser [Number] + } + + void type( Type: type ) + { + switch Type + case 'u8' + send Parser 'Uint8' + case 'u16' + send Parser 'Uint16' + case 'u32' + send Parser 'Uint32' + case 's8' + send Parser 'Int8' + case 's16' + send Parser 'Int16' + case 's32' + send Parser 'Int32' + case 'Uint8' + send Parser 'Uint8' + default + send Parser 'Float64' + } + + void num_list( NumList: num_list ) + { + number( NumList.number ) + for CommaNum: comma_num in NumList { + send Parser [', '] + number( CommaNum.number ) + } + } + + void stmt( Stmt: stmt ) + { + switch Stmt + case [EH: embedded_host] + { + send Parser + [embedded_host( EH )] + } + case [A: static_array] { + send Parser + "var [A.ident] = new [type(A.type)]Array(\[ [num_list(A.num_list)]\]); + } + case [V: static_value] { + send Parser + "var [V.ident] = [V.number]; + } + case [ + 'if' O: `( IfExpr: expr C: `) IfStmt: stmt + ElseIfClauseList: else_if_clause* ElseClauseOpt: else_clause? + ] { + send Parser + "if ( [expr(IfExpr)] ) + " [stmt(IfStmt)] + + for ElseIfClause: else_if_clause in repeat( ElseIfClauseList ) { + match ElseIfClause + ['else if (' ElseIfExpr: expr ')' ElseIfStmt: stmt] + + send Parser + "else if ( [expr(ElseIfExpr)] ) + " [stmt(ElseIfStmt)] + } + + if ( match ElseClauseOpt ['else' ElseStmt: stmt] ) { + send Parser + "else + " [stmt(ElseStmt)] + } + } + case ['while' '(' WhileExpr: expr ')' WhileStmt: stmt] { + send Parser + "while ( [expr(WhileExpr)] ) + " [stmt(WhileStmt)] + } + case ['switch' '(' SwitchExpr: expr ')' '{' StmtList: stmt* '}'] { + send Parser + "switch ( [expr(SwitchExpr)] ) { + " [stmt_list(StmtList)] + "} + } + case [ExprExpr: expr Semi: `;] { + send Parser + [expr(ExprExpr) Semi] + } + case [L: `{ TL: stmt* R: `}] { + send Parser + "{ + " [stmt_list(TL)] + "} + } + case [ + TypeList: opt_const Type: type + Ident: ident OptInit: opt_init Semi: `; + ] + { + send Parser + "var [Ident] + + if match OptInit [E: `= expr] { + send Parser + [E expr(OptInit.expr)] + } + + send Parser + [Semi] + } + case [Export: export_stmt] + { + send Parser + "var [Export.ident] = [number(Export.number)]; + } + case ['fallthrough' ';'] + { + # Nothing needed here. + } + case [Index: index_stmt] + { + send Parser + "var [Index.ident]; + } + case [case_block] + { + send Parser + "case [expr( Stmt.case_block.expr )]: + "[stmt_list( Stmt.case_block._repeat_stmt )] + "break; + } + case [default_block] + { + send Parser + "default: + "[stmt_list( Stmt.default_block._repeat_stmt )] + "break; + } + case [goto_label] + { + send Parser "{}" + } + case [G: goto_stmt] + { + send Parser + "{ + " _goto_targ = [$G.Id]; + " continue _goto; + "} + } + case [entry_loop] + { + send Parser + "var _goto_targ = 0; + "_goto: do { + " switch ( _goto_targ ) { + " case 0: + " [stmt_list( Stmt.entry_loop._repeat_stmt )] + " } + " break; + "} while ( true ) + } + case [label_stmt] + { + send Parser + "case [Stmt.label_stmt.Id]: + "[stmt_list( Stmt.label_stmt._repeat_stmt )] + } + case [AS: assign_stmt] + { + send Parser + "[lvalue(AS.LValue) AS.assign_op expr(AS.expr)]; + } + default { + # catches unspecified cases + send Parser [Stmt] + } + } + + void stmt_list( StmtList: stmt* ) + { + for Stmt: stmt in repeat( StmtList ) + stmt( Stmt ) + } + + void trans( Output: stream, Start: start ) + { + Parser = new parser() + + stmt_list( Start._repeat_stmt ) + + CO: js_out::js_out = Parser->finish() + + if CO { + send Output + [CO] + } + else { + send stderr + "failed to parse output: [Parser->error] + } + } +end diff --git a/src/rlhc.lm b/src/rlhc.lm index 1fd1f74..48fdecd 100644 --- a/src/rlhc.lm +++ b/src/rlhc.lm @@ -9,6 +9,7 @@ include 'rlhc-ocaml.lm' include 'rlhc-rust.lm' include 'rlhc-crack.lm' include 'rlhc-julia.lm' +include 'rlhc-js.lm' str argvPop() { @@ -70,6 +71,8 @@ elsif ( Lang == 'crack' ) crack_gen::trans( Output, Start ) elsif ( Lang == 'julia' ) julia_gen::trans( Output, Start ) +elsif ( Lang == 'js' ) + js_gen::trans( Output, Start ) else { print( 'rlhc: unrecognized language: ', Lang, '\n' ) } diff --git a/src/xml.cc b/src/xml.cc index 0ff8b9b..53f8ba7 100644 --- a/src/xml.cc +++ b/src/xml.cc @@ -713,6 +713,7 @@ void InputData::writeLanguage( std::ostream &out ) case HostLang::Asm: out << "ASM"; break; case HostLang::Rust: out << "Rust"; break; case HostLang::Julia: out << "Julia"; break; + case HostLang::JS: out << "JavaScript"; break; } out << "\""; } From c9c532f3dc88e02a2affc238d0df7a5de6751582 Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Fri, 16 Oct 2015 16:25:20 +0100 Subject: [PATCH 2/4] Change type names for easier conversion. --- src/common.cc | 14 +++++++------- src/rlhc-js.lm | 6 ++---- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/common.cc b/src/common.cc index 010628f..cb56d0d 100644 --- a/src/common.cc +++ b/src/common.cc @@ -119,13 +119,13 @@ HostType hostTypesJulia[] = HostType hostTypesJS[] = { - { "Int8", 0, "int8", true, true, false, CHAR_MIN, CHAR_MAX, 1 }, - { "Uint8", 0, "uint8", false, true, false, 0, UCHAR_MAX, 1 }, - { "Int16", 0, "int16", true, true, false, SHRT_MIN, SHRT_MAX, 2 }, - { "Uint16", 0, "ushort", false, true, false, 0, USHRT_MAX, 2 }, - { "Int32", 0, "int", true, true, false, INT_MIN, INT_MAX, 4 }, - { "Uint32", 0, "uint", false, true, false, 0, UINT_MAX, 4 }, - { "Float64", 0, "number", true, true, false, LONG_MIN, LONG_MAX, 8 }, + { "s8", 0, "int8", true, true, false, CHAR_MIN, CHAR_MAX, 1 }, + { "u8", 0, "uint8", false, true, false, 0, UCHAR_MAX, 1 }, + { "s16", 0, "int16", true, true, false, SHRT_MIN, SHRT_MAX, 2 }, + { "u16", 0, "uint16", false, true, false, 0, USHRT_MAX, 2 }, + { "i32", 0, "int32", true, true, false, INT_MIN, INT_MAX, 4 }, + { "u32", 0, "uint32", false, true, false, 0, UINT_MAX, 4 }, + { "number", 0, "number", true, true, false, LONG_MIN, LONG_MAX, 8 }, }; const HostLang hostLangC = { diff --git a/src/rlhc-js.lm b/src/rlhc-js.lm index c4bea0b..c6f0b06 100644 --- a/src/rlhc-js.lm +++ b/src/rlhc-js.lm @@ -315,8 +315,6 @@ namespace js_gen send Parser 'Int16' case 's32' send Parser 'Int32' - case 'Uint8' - send Parser 'Uint8' default send Parser 'Float64' } @@ -450,13 +448,13 @@ namespace js_gen { send Parser "var _goto_targ = 0; - "_goto: do { + "_goto: while ( true ) { " switch ( _goto_targ ) { " case 0: " [stmt_list( Stmt.entry_loop._repeat_stmt )] " } " break; - "} while ( true ) + "} } case [label_stmt] { From eeda2dece468b1b8509f292738560ee1b7586d76 Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Fri, 16 Oct 2015 20:21:17 +0100 Subject: [PATCH 3/4] Enforce strict mode & comparisons in JavaScript target. --- src/rlhc-js.lm | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/rlhc-js.lm b/src/rlhc-js.lm index c6f0b06..29d446d 100644 --- a/src/rlhc-js.lm +++ b/src/rlhc-js.lm @@ -271,13 +271,24 @@ namespace js_gen } } + void expr_test_op( Op: test_op ) + { + switch Op + case [ `== ] + send Parser '===' + case [ `!= ] + send Parser '!==' + default + send Parser [Op] + } + void expr_test( ExprTest: expr_test ) { switch ExprTest - case [expr_test Op: test_op expr_shift] + case [expr_test test_op expr_shift] { expr_test( ExprTest._expr_test ) - send Parser [Op] + expr_test_op( ExprTest.test_op ) expr_shift( ExprTest.expr_shift ) } case [expr_shift] @@ -483,6 +494,9 @@ namespace js_gen { Parser = new parser() + send Parser + "'use strict'; + stmt_list( Start._repeat_stmt ) CO: js_out::js_out = Parser->finish() From 0b72bf4794944e736c899b32c89877b5c0374710 Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Mon, 19 Oct 2015 11:25:46 +0100 Subject: [PATCH 4/4] Emit -1 as replacement for `nil`. --- src/rlhc-js.lm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rlhc-js.lm b/src/rlhc-js.lm index 29d446d..79e4f11 100644 --- a/src/rlhc-js.lm +++ b/src/rlhc-js.lm @@ -140,7 +140,7 @@ namespace js_gen } case [N: `nil] { - N.data = '0' + N.data = '-1' send Parser [N] } case [Number: number]