Skip to content

Commit

Permalink
refactor lexer generation so that in memory and to source are the same
Browse files Browse the repository at this point in the history
  • Loading branch information
zaach committed Mar 30, 2014
1 parent 5885133 commit 0830aa0
Show file tree
Hide file tree
Showing 5 changed files with 290 additions and 122 deletions.
29 changes: 28 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# jison-lex
A lexical analyzer generator used by [jison](http://jison.org).
A lexical analyzer generator used by [jison](http://jison.org). It takes a lexical grammar definition (either in JSON or Bison's lexical grammar format) and outputs a JavaScript lexer.

## install
npm install jison-lex -g
Expand All @@ -16,5 +16,32 @@ Options:
--version print version and exit
```

## programatic usage

```
var JisonLex = require('jison-lex');
var grammar = {
rules: [
["x", "return 'X';" ],
["y", "return 'Y';" ],
["$", "return 'EOF';" ]
]
};
// or load from a file
// var grammar = fs.readFileSync('mylexer.l', 'utf8');
// generate source
var lexerSource = JisonLex.generate(grammar);
// or create a parser in memory
var lexer = new JisonLex(grammar);
lexer.setInput('xyxxy');
lexer.lex();
// => 'X'
lexer.lex();
// => 'Y'
## license
MIT
10 changes: 4 additions & 6 deletions cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ var opts = require("nomnom")
callback: function() {
return version;
}
})
.parse();
});

exports.main = function () {
exports.main = function (opts) {
if (opts.file) {
var raw = fs.readFileSync(path.normalize(opts.file), 'utf8'),
name = path.basename((opts.outfile||opts.file)).replace(/\..*$/g,'');
Expand Down Expand Up @@ -67,8 +66,7 @@ function processGrammar (file, name) {

grammar.options = settings;

var lexer = new RegExpLexer(grammar);
return lexer.generate(settings);
return RegExpLexer.generate(grammar);
}

function readin (cb) {
Expand All @@ -85,4 +83,4 @@ function readin (cb) {
}

if (require.main === module)
exports.main();
exports.main(opts.parse());
90 changes: 90 additions & 0 deletions examples/lex.l
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@

NAME [a-zA-Z_][a-zA-Z0-9_-]*
BR \r\n|\n|\r

%s indented trail rules
%x code start_condition options conditions action

%%

<action>"/*"(.|\n|\r)*?"*/" return 'ACTION_BODY';
<action>"//".* return 'ACTION_BODY';
<action>"/"[^ /]*?['"{}'][^ ]*?"/" return 'ACTION_BODY'; // regexp with braces or quotes (and no spaces)
<action>\"("\\\\"|'\"'|[^"])*\" return 'ACTION_BODY';
<action>"'"("\\\\"|"\'"|[^'])*"'" return 'ACTION_BODY';
<action>[/"'][^{}/"']+ return 'ACTION_BODY';
<action>[^{}/"']+ return 'ACTION_BODY';
<action>"{" yy.depth++; return '{'
<action>"}" yy.depth == 0 ? this.begin('trail') : yy.depth--; return '}'

<conditions>{NAME} return 'NAME';
<conditions>">" this.popState(); return '>';
<conditions>"," return ',';
<conditions>"*" return '*';

<rules>{BR}+ /* */
<rules>\s+{BR}+ /* */
<rules>\s+ this.begin('indented')
<rules>"%%" this.begin('code'); return '%%'
<rules>[a-zA-Z0-9_]+ return 'CHARACTER_LIT'

<options>{NAME} yy.options[yytext] = true
<options>{BR}+ this.begin('INITIAL')
<options>\s+{BR}+ this.begin('INITIAL')
<options>\s+ /* empty */

<start_condition>{NAME} return 'START_COND'
<start_condition>{BR}+ this.begin('INITIAL')
<start_condition>\s+{BR}+ this.begin('INITIAL')
<start_condition>\s+ /* empty */

<trail>.*{BR}+ this.begin('rules')

<indented>"{" yy.depth = 0; this.begin('action'); return '{'
<indented>"%{"(.|{BR})*?"%}" this.begin('trail'); yytext = yytext.substr(2, yytext.length-4);return 'ACTION'
"%{"(.|{BR})*?"%}" yytext = yytext.substr(2, yytext.length-4); return 'ACTION'
<indented>.+ this.begin('rules'); return 'ACTION'
"/*"(.|\n|\r)*?"*/" /* ignore */
"//".* /* ignore */
{BR}+ /* */
\s+ /* */
{NAME} return 'NAME';
\"("\\\\"|'\"'|[^"])*\" yytext = yytext.replace(/\\"/g,'"'); return 'STRING_LIT';
"'"("\\\\"|"\'"|[^'])*"'" yytext = yytext.replace(/\\'/g,"'"); return 'STRING_LIT';
"|" return '|';
"["("\\\\"|"\]"|[^\]])*"]" return 'ANY_GROUP_REGEX';
"(?:" return 'SPECIAL_GROUP';
"(?=" return 'SPECIAL_GROUP';
"(?!" return 'SPECIAL_GROUP';
"(" return '(';
")" return ')';
"+" return '+';
"*" return '*';
"?" return '?';
"^" return '^';
"," return ',';
"<<EOF>>" return '$';
"<" this.begin('conditions'); return '<';
"/!" return '/!';
"/" return '/';
"\\"([0-7]{1,3}|[rfntvsSbBwWdD\\*+()${}|[\]\/.^?]|"c"[A-Z]|"x"[0-9A-F]{2}|"u"[a-fA-F0-9]{4}) return 'ESCAPE_CHAR';
"\\". yytext = yytext.replace(/^\\/g,''); return 'ESCAPE_CHAR';
"$" return '$';
"." return '.';
"%options" yy.options = {}; this.begin('options');
"%s" this.begin('start_condition'); return 'START_INC';
"%x" this.begin('start_condition'); return 'START_EXC';
"%%" this.begin('rules'); return '%%';
"{"\d+(","\s?\d+|",")?"}" return 'RANGE_REGEX';
"{"{NAME}"}" return 'NAME_BRACE';
"{" return '{';
"}" return '}';
. /* ignore bad characters */
<*><<EOF>> return 'EOF';
<code>(.|{BR})+ return 'CODE';
%%
Loading

0 comments on commit 0830aa0

Please sign in to comment.