diff --git a/README.md b/README.md index b7f6206..92c270c 100644 --- a/README.md +++ b/README.md @@ -25,10 +25,12 @@ $ go install github.com/nihei9/vartan/cmd/vartan-go@latest vartan uses BNF-like DSL to define your grammar. As an example, let's write a grammar that represents a simple expression. ``` -%name expr +#name expr; -%left mul div -%left add sub +#prec ( + #left mul div + #left add sub +); expr : expr add expr @@ -234,7 +236,7 @@ exit status 1 ### Grammar name -A grammar name `%name ` is an identifier that represents a grammar name. For now, this identifier is used as a file name generated like _\_parser.go_. +A grammar name `#name ` is an identifier that represents a grammar name. For now, this identifier is used as a file name generated like _\_parser.go_. ### Production rules @@ -325,7 +327,7 @@ example 1: Consider a grammar that accepts comma-separated list of integers. You can avoid including brackets and commas in an AST by specifying only the necessary symbols int the `#ast` directive parameters. Also, you can flatten an AST using `...` operator. `...` operator expands child nodes of a specified symbol. ``` -%name example +#name example; list : '[' elems ']' #ast elems... @@ -356,7 +358,7 @@ example 2: Consider a grammar that accepts ternary-if expression (` ? : `). As this grammar contains two `int` symbols, you need to add labels to each symbol to distinguish them. A label consists of `@` + an identifier. ``` -%name example +#name example; if_expr : id '?' int@true ':' int@false #ast id true false @@ -405,7 +407,7 @@ An `#alias` directive aliases for a terminal symbol. You can use the alias in er example: ``` -%name example +#name example; s : id @@ -424,7 +426,7 @@ When the parser shifts a terminal symbol having the `#push` directive, the curre example: ``` -%name example +#name example; tag_pairs : tag_pairs tag_pair @@ -471,7 +473,7 @@ The parser doesn't shift a terminal symbol having a `#skip` directive. In other example: ``` -%name example +#name example; s : foo bar @@ -497,7 +499,7 @@ foobar ### Operator precedence and associativity -`%left` and `%right` allow you to define precedence and associativiry of symbols. `%left`/`%right` each assign the left/right associativity to symbols. +`#left` and `#right` directives allow you to define precedence and associativiry of symbols. `#left`/`#right` each assign the left/right associativity to symbols. When the right-most terminal symbol of an alternative has precedence or associativity defined explicitly, the alternative inherits its precedence and associativity. @@ -506,11 +508,13 @@ When the right-most terminal symbol of an alternative has precedence or associat The grammar for simple four arithmetic operations and assignment expression can be defined as follows: ``` -%name example +#name example; -%left mul div -%left add sub -%right assign +#prec ( + #left mul div + #left add sub + #right assign +); expr : expr add expr @@ -541,11 +545,11 @@ assign : '='; ``` -`%left` and `%right` can appear multiple times, and the first symbols applied to will have the highest precedence. That is, `mul` and `div` have the highest precedence, and `assign` has the lowest precedence. +`#left` and `#right` can appear multiple times, and the first symbols applied to will have the highest precedence. That is, `mul` and `div` have the highest precedence, and `assign` has the lowest precedence. ⚠️ In many Yacc-like tools, the last symbols defined have the highest precedence. Not that in vartan, it is the opposite. -When you compile the above grammar, some conflicts occur. However, vartan can resolve the conflicts following `%left`, `%right`, and `#prec`. +When you compile the above grammar, some conflicts occur. However, vartan can resolve the conflicts following `#left`, `#right`, and `#prec`. ``` $ echo -n 'foo = bar = x * -1 / -2' | vartan parse example.json @@ -572,7 +576,7 @@ expr Incidentally, using no directives, you can define the above example as the following grammar: ``` -%name example +#name example; expr : id assign expr @@ -651,7 +655,7 @@ When a syntax error occurs, the parser pops states from a state stack. If a stat Consider grammar of simple assignment statements. ``` -%name example +#name example; statements : statements statement #ast statements... statement diff --git a/driver/conflict_test.go b/driver/conflict_test.go index 7672c00..042d932 100644 --- a/driver/conflict_test.go +++ b/driver/conflict_test.go @@ -18,7 +18,7 @@ func TestParserWithConflicts(t *testing.T) { { caption: "when a shift/reduce conflict occurred, we prioritize the shift action", specSrc: ` -%name test +#name test; expr : expr assign expr @@ -48,7 +48,7 @@ assign: '='; { caption: "when a reduce/reduce conflict occurred, we prioritize the production defined earlier in the grammar", specSrc: ` -%name test +#name test; s : a @@ -73,10 +73,12 @@ id: "[A-Za-z0-9_]+"; { caption: "left associativities defined earlier in the grammar have higher precedence", specSrc: ` -%name test +#name test; -%left mul -%left add +#prec ( + #left mul + #left add +); expr : expr add expr @@ -120,9 +122,11 @@ mul: '*'; { caption: "left associativities defined in the same line have the same precedence", specSrc: ` -%name test +#name test; -%left add sub +#prec ( + #left add sub +); expr : expr add expr @@ -166,10 +170,12 @@ sub: '-'; { caption: "right associativities defined earlier in the grammar have higher precedence", specSrc: ` -%name test +#name test; -%right r1 -%right r2 +#prec ( + #right r1 + #right r2 +); expr : expr r2 expr @@ -218,9 +224,11 @@ id { caption: "right associativities defined in the same line have the same precedence", specSrc: ` -%name test +#name test; -%right r1 r2 +#prec ( + #right r1 r2 +); expr : expr r2 expr @@ -269,11 +277,13 @@ id { caption: "left and right associativities can be mixed", specSrc: ` -%name test +#name test; -%left mul div -%left add sub -%right assign +#prec ( + #left mul div + #left add sub + #right assign +); expr : expr add expr diff --git a/driver/lac_test.go b/driver/lac_test.go index c127c0b..54001d5 100644 --- a/driver/lac_test.go +++ b/driver/lac_test.go @@ -10,7 +10,7 @@ import ( func TestParserWithLAC(t *testing.T) { specSrc := ` -%name test +#name test; S : C C diff --git a/driver/parser_test.go b/driver/parser_test.go index c717205..c37d268 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -43,7 +43,7 @@ func TestParser_Parse(t *testing.T) { }{ { specSrc: ` -%name test +#name test; expr : expr "\+" term @@ -117,7 +117,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node. { specSrc: ` -%name test +#name test; s : foo bar @@ -140,7 +140,7 @@ bar_text: "bar"; // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node. { specSrc: ` -%name test +#name test; s : foo bar @@ -165,10 +165,12 @@ bar_text: "bar"; // A production can have multiple alternative productions. { specSrc: ` -%name test +#name test; -%left mul div -%left add sub +#prec ( + #left mul div + #left add sub +); expr : expr add expr @@ -224,7 +226,7 @@ div // A lexical production can have multiple production directives. { specSrc: ` -%name test +#name test; s : push_a push_b pop pop @@ -247,7 +249,7 @@ pop #mode a b #pop }, { specSrc: ` -%name test +#name test; mode_tran_seq : mode_tran_seq mode_tran @@ -275,7 +277,7 @@ whitespace #mode default m1 m2 #skip }, { specSrc: ` -%name test +#name test; s : foo bar @@ -291,7 +293,7 @@ bar #mode default // When #push and #pop are applied to the same symbol, #pop will run first, then #push. { specSrc: ` -%name test +#name test; s : foo bar baz @@ -316,7 +318,7 @@ baz #mode m2 // they are executed. { specSrc: ` -%name test +#name test; s : foo bar baz @@ -339,7 +341,7 @@ baz #mode m2 // The parser can skips specified tokens. { specSrc: ` -%name test +#name test; s : foo bar @@ -357,7 +359,7 @@ white_space #skip // A grammar can contain fragments. { specSrc: ` -%name test +#name test; s : tagline @@ -370,7 +372,7 @@ fragment words: "[A-Za-z\u{0020}]+"; // A grammar can contain ast actions. { specSrc: ` -%name test +#name test; list : "\[" elems "]" #ast elems... @@ -410,7 +412,7 @@ id // The '...' operator can expand child nodes. { specSrc: ` -%name test +#name test; s : a #ast a... @@ -433,7 +435,7 @@ foo // The '...' operator also can applied to an element having no children. { specSrc: ` -%name test +#name test; s : a ';' #ast a... @@ -449,9 +451,11 @@ a // A label can be a parameter of #ast directive. { specSrc: ` -%name test +#name test; -%left add sub +#prec ( + #left add sub +); expr : expr@lhs add expr@rhs #ast add lhs rhs @@ -482,7 +486,7 @@ num: "0|[1-9][0-9]*"; // An AST can contain a symbol name, even if the symbol has a label. That is, unused labels are allowed. { specSrc: ` -%name test +#name test; s : foo@x ';' #ast foo @@ -499,9 +503,11 @@ foo // A production has the same precedence and associativity as the right-most terminal symbol. { specSrc: ` -%name test +#name test; -%left add +#prec ( + #left add +); expr : expr add expr // This alternative has the same precedence and associativiry as 'add'. @@ -544,10 +550,12 @@ add // The 'prec' directive can set precedence of a production. { specSrc: ` -%name test +#name test; -%left mul div -%left add sub +#prec ( + #left mul div + #left add sub +); expr : expr add expr @@ -603,7 +611,7 @@ div // The grammar can contain the 'error' symbol. { specSrc: ` -%name test +#name test; s : id id id ';' @@ -620,7 +628,7 @@ id // The 'error' symbol can appear in an #ast directive. { specSrc: ` -%name test +#name test; s : foo ';' @@ -639,7 +647,7 @@ foo // The 'error' symbol can have a label, and an #ast can reference it. { specSrc: ` -%name test +#name test; s : foo ';' @@ -658,7 +666,7 @@ foo // The grammar can contain the 'recover' directive. { specSrc: ` -%name test +#name test; seq : seq elem @@ -679,7 +687,7 @@ id // The same label can be used between different alternatives. { specSrc: ` -%name test +#name test; s : foo@x bar diff --git a/driver/semantic_action_test.go b/driver/semantic_action_test.go index f9708b7..ad58780 100644 --- a/driver/semantic_action_test.go +++ b/driver/semantic_action_test.go @@ -47,7 +47,7 @@ func (a *testSemAct) MissError(cause VToken) { func TestParserWithSemanticAction(t *testing.T) { specSrcWithErrorProd := ` -%name test +#name test; seq : seq elem semicolon @@ -70,7 +70,7 @@ char ` specSrcWithoutErrorProd := ` -%name test +#name test; seq : seq elem semicolon diff --git a/driver/syntax_error_test.go b/driver/syntax_error_test.go index 329ccef..93cf637 100644 --- a/driver/syntax_error_test.go +++ b/driver/syntax_error_test.go @@ -19,7 +19,7 @@ func TestParserWithSyntaxErrors(t *testing.T) { { caption: "the parser can report a syntax error", specSrc: ` -%name test +#name test; s : foo @@ -33,7 +33,7 @@ foo: 'foo'; { caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state", specSrc: ` -%name test +#name test; seq : seq elem ';' @@ -59,7 +59,7 @@ c { caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears", specSrc: ` -%name test +#name test; seq : seq elem ';' @@ -87,7 +87,7 @@ c { caption: "when the parser performs shift three times, the parser recovers from the error state", specSrc: ` -%name test +#name test; seq : seq elem ';' diff --git a/grammar/first_test.go b/grammar/first_test.go index 578825e..134f4bd 100644 --- a/grammar/first_test.go +++ b/grammar/first_test.go @@ -24,7 +24,7 @@ func TestGenFirst(t *testing.T) { { caption: "productions contain only non-empty productions", src: ` -%name test +#name test; expr : expr add term @@ -63,7 +63,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; { caption: "productions contain the empty start production", src: ` -%name test +#name test; s : @@ -77,7 +77,7 @@ s { caption: "productions contain an empty production", src: ` -%name test +#name test; s : foo bar @@ -96,7 +96,7 @@ bar: "bar"; { caption: "a start production contains a non-empty alternative and empty alternative", src: ` -%name test +#name test; s : foo @@ -113,7 +113,7 @@ foo: "foo"; { caption: "a production contains non-empty alternative and empty alternative", src: ` -%name test +#name test; s : foo diff --git a/grammar/follow_test.go b/grammar/follow_test.go index af3f064..719582f 100644 --- a/grammar/follow_test.go +++ b/grammar/follow_test.go @@ -22,7 +22,7 @@ func TestFollowSet(t *testing.T) { { caption: "productions contain only non-empty productions", src: ` -%name test +#name test; expr : expr add term @@ -52,7 +52,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; { caption: "productions contain an empty start production", src: ` -%name test +#name test; s : @@ -66,7 +66,7 @@ s { caption: "productions contain an empty production", src: ` -%name test +#name test; s : foo @@ -84,7 +84,7 @@ foo { caption: "a start production contains a non-empty alternative and empty alternative", src: ` -%name test +#name test; s : foo @@ -100,7 +100,7 @@ foo: "foo"; { caption: "a production contains non-empty alternative and empty alternative", src: ` -%name test +#name test; s : foo diff --git a/grammar/grammar.go b/grammar/grammar.go index d46d70e..da0460b 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -109,30 +109,30 @@ func (b *GrammarBuilder) Build() (*Grammar, error) { var specName string { errOccurred := false - for _, md := range b.AST.MetaData { - if md.Name != "name" { + for _, dir := range b.AST.Directives { + if dir.Name != "name" { continue } - if len(md.Parameters) != 1 || md.Parameters[0].ID == "" { + if len(dir.Parameters) != 1 || dir.Parameters[0].ID == "" { b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrMDInvalidParam, + Cause: semErrDirInvalidParam, Detail: "'name' takes just one ID parameter", - Row: md.Pos.Row, - Col: md.Pos.Col, + Row: dir.Pos.Row, + Col: dir.Pos.Col, }) errOccurred = true break } - specName = md.Parameters[0].ID + specName = dir.Parameters[0].ID break } if specName == "" && !errOccurred { b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrMDMissingName, + Cause: semErrNoGrammarName, }) } } @@ -986,40 +986,63 @@ func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTable, prods *productionS termPrec := map[symbolNum]int{} termAssoc := map[symbolNum]assocType{} { + var precGroup []*spec.DirectiveNode + for _, dir := range b.AST.Directives { + if dir.Name == "prec" { + if dir.Parameters == nil || len(dir.Parameters) != 1 || dir.Parameters[0].Group == nil { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'prec' needs just one directive group", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + continue + } + precGroup = dir.Parameters[0].Group + continue + } + + if dir.Name != "name" && dir.Name != "prec" { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidName, + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + continue + } + } + precN := precMin - for _, md := range b.AST.MetaData { + for _, dir := range precGroup { var assocTy assocType - switch md.Name { + switch dir.Name { case "left": assocTy = assocTypeLeft case "right": assocTy = assocTypeRight - case "name": - // Since `name` is used for a purpose other than priority, we will ignore it here. - continue default: b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrMDInvalidName, - Row: md.Pos.Row, - Col: md.Pos.Col, + Cause: semErrDirInvalidName, + Row: dir.Pos.Row, + Col: dir.Pos.Col, }) return nil, nil } - if len(md.Parameters) == 0 { + if len(dir.Parameters) == 0 { b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrMDInvalidParam, + Cause: semErrDirInvalidParam, Detail: "associativity needs at least one symbol", - Row: md.Pos.Row, - Col: md.Pos.Col, + Row: dir.Pos.Row, + Col: dir.Pos.Col, }) return nil, nil } ASSOC_PARAM_LOOP: - for _, p := range md.Parameters { + for _, p := range dir.Parameters { if p.ID == "" { b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrMDInvalidParam, + Cause: semErrDirInvalidParam, Detail: "a parameter must be an ID", Row: p.Pos.Row, Col: p.Pos.Col, @@ -1030,7 +1053,7 @@ func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTable, prods *productionS sym, ok := symTab.toSymbol(p.ID) if !ok { b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrMDInvalidParam, + Cause: semErrDirInvalidParam, Detail: fmt.Sprintf("'%v' is undefined", p.ID), Row: p.Pos.Row, Col: p.Pos.Col, @@ -1039,7 +1062,7 @@ func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTable, prods *productionS } if !sym.isTerminal() { b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrMDInvalidParam, + Cause: semErrDirInvalidParam, Detail: fmt.Sprintf("associativity can take only terminal symbol ('%v' is a non-terminal)", p.ID), Row: p.Pos.Row, Col: p.Pos.Col, diff --git a/grammar/grammar_test.go b/grammar/grammar_test.go index 46458b0..07d8b58 100644 --- a/grammar/grammar_test.go +++ b/grammar/grammar_test.go @@ -17,9 +17,10 @@ func TestGrammarBuilderOK(t *testing.T) { nameTests := []*okTest{ { - caption: "the `%name` can be the same identifier as a non-terminal symbol", + caption: "the `#name` can be the same identifier as a non-terminal symbol", specSrc: ` -%name s +#name s; + s : foo ; @@ -35,9 +36,10 @@ foo }, }, { - caption: "the `%name` can be the same identifier as a terminal symbol", + caption: "the `#name` can be the same identifier as a terminal symbol", specSrc: ` -%name foo +#name foo; + s : foo ; @@ -53,9 +55,10 @@ foo }, }, { - caption: "the `%name` can be the same identifier as the error symbol", + caption: "the `#name` can be the same identifier as the error symbol", specSrc: ` -%name error +#name error; + s : foo | error @@ -72,9 +75,10 @@ foo }, }, { - caption: "the `%name` can be the same identifier as a fragment", + caption: "the `#name` can be the same identifier as a fragment", specSrc: ` -%name f +#name f; + s : foo ; @@ -97,7 +101,8 @@ fragment f { caption: "a `#mode` can be the same identifier as a non-terminal symbol", specSrc: ` -%name test +#name test; + s : foo bar ; @@ -121,7 +126,8 @@ bar #mode s { caption: "a `#mode` can be the same identifier as a terminal symbol", specSrc: ` -%name test +#name test; + s : foo bar ; @@ -145,7 +151,8 @@ bar #mode bar { caption: "a `#mode` can be the same identifier as the error symbol", specSrc: ` -%name test +#name test; + s : foo bar | error @@ -170,7 +177,8 @@ bar #mode error { caption: "a `#mode` can be the same identifier as a fragment", specSrc: ` -%name test +#name test; + s : foo bar ; @@ -193,12 +201,29 @@ fragment f t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) }, }, + { + caption: "a `#prec` allows the empty directive group", + specSrc: ` +#name test; + +#prec (); + +s + : foo + ; + +foo + : 'foo'; +`, + }, { caption: "a production has the same precedence and associativity as the right-most terminal symbol", specSrc: ` -%name test +#name test; -%left foo +#prec ( + #left foo +); s : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar', not 'foo'. @@ -236,10 +261,12 @@ bar { caption: "a production has the same precedence and associativity as the right-most terminal symbol", specSrc: ` -%name test +#name test; -%left foo -%right bar +#prec ( + #left foo + #right bar +); s : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar'. @@ -275,11 +302,13 @@ bar }, }, { - caption: "the `#prec` directive changes only precedence, not associativity", + caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", specSrc: ` -%name test +#name test; -%left foo +#prec ( + #left foo +); s : foo bar #prec foo @@ -315,12 +344,14 @@ bar }, }, { - caption: "the `#prec` directive changes only precedence, not associativity", + caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", specSrc: ` -%name test +#name test; -%left foo -%right bar +#prec ( + #left foo + #right bar +); s : foo bar #prec foo @@ -385,7 +416,9 @@ bar if err != nil { t.Fatalf("unexpected error: %v", err) } - test.validate(t, g) + if test.validate != nil { + test.validate(t, g) + } }) } } @@ -401,7 +434,7 @@ func TestGrammarBuilderSpecError(t *testing.T) { { caption: "a production `b` is unused", specSrc: ` -%name test +#name test; a : foo @@ -418,7 +451,7 @@ foo { caption: "a terminal symbol `bar` is unused", specSrc: ` -%name test +#name test; s : foo @@ -434,7 +467,7 @@ bar { caption: "a production `b` and terminal symbol `bar` is unused", specSrc: ` -%name test +#name test; a : foo @@ -456,7 +489,7 @@ bar { caption: "a production cannot have production directives", specSrc: ` -%name test +#name test; s #prec foo : foo @@ -470,7 +503,7 @@ foo { caption: "a lexical production cannot have alternative directives", specSrc: ` -%name test +#name test; s : foo @@ -484,7 +517,7 @@ foo { caption: "a production directive must not be duplicated", specSrc: ` -%name test +#name test; s : foo @@ -498,7 +531,7 @@ foo #skip #skip { caption: "an alternative directive must not be duplicated", specSrc: ` -%name test +#name test; s : foo bar #ast foo bar #ast foo bar @@ -514,7 +547,7 @@ bar { caption: "a production must not have a duplicate alternative (non-empty alternatives)", specSrc: ` -%name test +#name test; s : foo @@ -529,7 +562,7 @@ foo { caption: "a production must not have a duplicate alternative (non-empty and split alternatives)", specSrc: ` -%name test +#name test; s : foo @@ -552,7 +585,7 @@ bar { caption: "a production must not have a duplicate alternative (empty alternatives)", specSrc: ` -%name test +#name test; s : foo @@ -571,7 +604,7 @@ foo { caption: "a production must not have a duplicate alternative (empty and split alternatives)", specSrc: ` -%name test +#name test; s : foo @@ -593,7 +626,7 @@ foo { caption: "a terminal symbol and a non-terminal symbol (start symbol) are duplicates", specSrc: ` -%name test +#name test; s : foo @@ -609,7 +642,7 @@ s { caption: "a terminal symbol and a non-terminal symbol (not start symbol) are duplicates", specSrc: ` -%name test +#name test; s : foo @@ -629,11 +662,11 @@ a errs: []*SemanticError{semErrDuplicateName}, }, { - caption: "an invalid associativity type", + caption: "an invalid top-level directive", specSrc: ` -%name test +#name test; -%foo +#foo; s : a @@ -642,12 +675,12 @@ s a : 'a'; `, - errs: []*SemanticError{semErrMDInvalidName}, + errs: []*SemanticError{semErrDirInvalidName}, }, { caption: "a label must be unique in an alternative", specSrc: ` -%name test +#name test; s : foo@x bar@x @@ -663,7 +696,7 @@ bar { caption: "a label cannot be the same name as terminal symbols", specSrc: ` -%name test +#name test; s : foo bar@foo @@ -679,7 +712,7 @@ bar { caption: "a label cannot be the same name as non-terminal symbols", specSrc: ` -%name test +#name test; s : foo@a @@ -700,10 +733,85 @@ bar }, } - nameTests := []*specErrTest{ + nameDirTests := []*specErrTest{ + { + caption: "the `#name` directive is required", + specSrc: ` +s + : foo + ; + +foo + : 'foo'; +`, + errs: []*SemanticError{semErrNoGrammarName}, + }, + { + caption: "the `#name` directive needs an ID parameter", + specSrc: ` +#name; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#name` directive cannot take a pattern parameter", + specSrc: ` +#name "test"; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#name` directive cannot take a string parameter", + specSrc: ` +#name 'test'; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#name` directive takes just one parameter", + specSrc: ` +#name test1 test2; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + } + + precDirTests := []*specErrTest{ { - caption: "the `%name` is required", + caption: "the `#prec` directive needs a directive group parameter", specSrc: ` +#name test; + +#prec; + s : foo ; @@ -711,12 +819,14 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDMissingName}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%name` needs an ID parameter", + caption: "the `#prec` directive cannot take an ID parameter", specSrc: ` -%name +#name test; + +#prec foo; s : foo @@ -725,12 +835,14 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%name` takes just one parameter", + caption: "the `#prec` directive cannot take a pattern parameter", specSrc: ` -%name test1 test2 +#name test; + +#prec "foo"; s : foo @@ -739,17 +851,51 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a string parameter", + specSrc: ` +#name test; + +#prec 'foo'; + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive takes just one directive group parameter", + specSrc: ` +#name test; + +#prec () (); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, }, } - leftTests := []*specErrTest{ + leftDirTests := []*specErrTest{ { - caption: "the `%left` needs ID parameters", + caption: "the `#left` directive needs ID parameters", specSrc: ` -%name test +#name test; -%left +#prec ( + #left +); s : foo @@ -758,14 +904,16 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%left` cannot take an undefined symbol", + caption: "the `#left` directive cannot take an undefined symbol", specSrc: ` -%name test +#name test; -%left x +#prec ( + #left x +); s : foo @@ -774,14 +922,16 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%left` cannot take a non-terminal symbol", + caption: "the `#left` directive cannot take a non-terminal symbol", specSrc: ` -%name test +#name test; -%left s +#prec ( + #left s +); s : foo @@ -790,14 +940,16 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%left` cannot take a pattern parameter", + caption: "the `#left` directive cannot take a pattern parameter", specSrc: ` -%name test +#name test; -%left "foo" +#prec ( + #left "foo" +); s : foo @@ -806,14 +958,16 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%left` cannot take a string parameter", + caption: "the `#left` directive cannot take a string parameter", specSrc: ` -%name test +#name test; -%left 'foo' +#prec ( + #left 'foo' +); s : foo @@ -822,14 +976,34 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%left` cannot be specified multiple times for a symbol", + caption: "the `#left` directive cannot take a directive parameter", specSrc: ` -%name test +#name test; -%left foo foo +#prec ( + #left () +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#left` dirctive cannot be specified multiple times for a symbol", + specSrc: ` +#name test; + +#prec ( + #left foo foo +); s : foo @@ -843,10 +1017,12 @@ foo { caption: "a symbol cannot have different precedence", specSrc: ` -%name test +#name test; -%left foo -%left foo +#prec ( + #left foo + #left foo +); s : foo @@ -860,10 +1036,12 @@ foo { caption: "a symbol cannot have different associativity", specSrc: ` -%name test +#name test; -%right foo -%left foo +#prec ( + #right foo + #left foo +); s : foo @@ -876,13 +1054,15 @@ foo }, } - rightTests := []*specErrTest{ + rightDirTests := []*specErrTest{ { - caption: "the `%right` needs ID parameters", + caption: "the `#right` directive needs ID parameters", specSrc: ` -%name test +#name test; -%right +#prec ( + #right +); s : foo @@ -891,14 +1071,16 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%right` cannot take an undefined symbol", + caption: "the `#right` directive cannot take an undefined symbol", specSrc: ` -%name test +#name test; -%right x +#prec ( + #right x +); s : foo @@ -907,14 +1089,16 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%right` cannot take a non-terminal symbol", + caption: "the `#right` directive cannot take a non-terminal symbol", specSrc: ` -%name test +#name test; -%right s +#prec ( + #right s +); s : foo @@ -923,14 +1107,16 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%right` cannot take a pattern parameter", + caption: "the `#right` directive cannot take a pattern parameter", specSrc: ` -%name test +#name test; -%right "foo" +#prec ( + #right "foo" +); s : foo @@ -939,14 +1125,16 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%right` cannot take a string parameter", + caption: "the `#right` directive cannot take a string parameter", specSrc: ` -%name test +#name test; -%right 'foo' +#prec ( + #right 'foo' +); s : foo @@ -955,14 +1143,34 @@ s foo : 'foo'; `, - errs: []*SemanticError{semErrMDInvalidParam}, + errs: []*SemanticError{semErrDirInvalidParam}, }, { - caption: "the `%right` cannot be specified multiple times for a symbol", + caption: "the `#right` directive cannot take a directive group parameter", specSrc: ` -%name test +#name test; -%right foo foo +#prec ( + #right () +); + +s + : foo + ; + +foo + : 'foo'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#right` directive cannot be specified multiple times for a symbol", + specSrc: ` +#name test; + +#prec ( + #right foo foo +); s : foo @@ -976,10 +1184,12 @@ foo { caption: "a symbol cannot have different precedence", specSrc: ` -%name test +#name test; -%right foo -%right foo +#prec ( + #right foo + #right foo +); s : foo @@ -993,10 +1203,12 @@ foo { caption: "a symbol cannot have different associativity", specSrc: ` -%name test +#name test; -%left foo -%right foo +#prec ( + #left foo + #right foo +); s : foo @@ -1013,7 +1225,7 @@ foo { caption: "cannot use the error symbol as a non-terminal symbol", specSrc: ` -%name test +#name test; s : error @@ -1032,7 +1244,7 @@ foo: 'foo'; { caption: "cannot use the error symbol as a terminal symbol", specSrc: ` -%name test +#name test; s : error @@ -1045,7 +1257,7 @@ error: 'error'; { caption: "cannot use the error symbol as a terminal symbol, even if given the skip directive", specSrc: ` -%name test +#name test; s : foo @@ -1064,7 +1276,7 @@ error #skip { caption: "the `#ast` directive needs ID or label prameters", specSrc: ` -%name test +#name test; s : foo #ast @@ -1078,7 +1290,7 @@ foo { caption: "the `#ast` directive cannot take a pattern parameter", specSrc: ` -%name test +#name test; s : foo #ast "foo" @@ -1092,12 +1304,26 @@ foo { caption: "the `#ast` directive cannot take a string parameter", specSrc: ` -%name test +#name test; s : foo #ast 'foo' ; +foo + : "foo"; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#ast` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo #ast () + ; + foo : "foo"; `, @@ -1106,7 +1332,7 @@ foo { caption: "a parameter of the `#ast` directive must be either a symbol or a label in an alternative", specSrc: ` -%name test +#name test; s : foo bar #ast foo x @@ -1122,7 +1348,7 @@ bar { caption: "a symbol in a different alternative cannot be a parameter of the `#ast` directive", specSrc: ` -%name test +#name test; s : foo #ast bar @@ -1139,7 +1365,7 @@ bar { caption: "a label in a different alternative cannot be a parameter of the `#ast` directive", specSrc: ` -%name test +#name test; s : foo #ast b @@ -1156,7 +1382,7 @@ bar { caption: "a symbol can appear in the `#ast` directive only once", specSrc: ` -%name test +#name test; s : foo #ast foo foo @@ -1170,7 +1396,7 @@ foo { caption: "a label can appear in the `#ast` directive only once", specSrc: ` -%name test +#name test; s : foo@x #ast x x @@ -1184,7 +1410,7 @@ foo { caption: "a symbol can appear in the `#ast` directive only once, even if the symbol has a label", specSrc: ` -%name test +#name test; s : foo@x #ast foo x @@ -1198,7 +1424,7 @@ foo { caption: "symbol `foo` is ambiguous because it appears in an alternative twice", specSrc: ` -%name test +#name test; s : foo foo #ast foo @@ -1212,7 +1438,7 @@ foo { caption: "symbol `foo` is ambiguous because it appears in an alternative twice, even if one of them has a label", specSrc: ` -%name test +#name test; s : foo@x foo #ast foo @@ -1226,7 +1452,7 @@ foo { caption: "the expansion operator cannot be applied to a terminal symbol", specSrc: ` -%name test +#name test; s : foo #ast foo... @@ -1240,7 +1466,7 @@ foo { caption: "the expansion operator cannot be applied to a pattern", specSrc: ` -%name test +#name test; s : foo "bar"@b #ast foo b... @@ -1254,7 +1480,7 @@ foo { caption: "the expansion operator cannot be applied to a string", specSrc: ` -%name test +#name test; s : foo 'bar'@b #ast foo b... @@ -1267,11 +1493,11 @@ foo }, } - precDirTests := []*specErrTest{ + altPrecDirTests := []*specErrTest{ { caption: "the `#prec` directive needs an ID parameter", specSrc: ` -%name test +#name test; s : foo #prec @@ -1285,7 +1511,7 @@ foo { caption: "the `#prec` directive cannot take an undefined symbol", specSrc: ` -%name test +#name test; s : foo #prec x @@ -1299,7 +1525,7 @@ foo { caption: "the `#prec` directive cannot take a non-terminal symbol", specSrc: ` -%name test +#name test; s : a #prec b @@ -1322,7 +1548,7 @@ bar { caption: "the `#prec` directive cannot take a pattern parameter", specSrc: ` -%name test +#name test; s : foo #prec "foo" @@ -1336,12 +1562,26 @@ foo { caption: "the `#prec` directive cannot take a string parameter", specSrc: ` -%name test +#name test; s : foo #prec 'foo' ; +foo + : 'foo'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#prec` directive cannot take a directive parameter", + specSrc: ` +#name test; + +s + : foo #prec () + ; + foo : 'foo'; `, @@ -1350,7 +1590,7 @@ foo { caption: "a symbol the `#prec` directive takes must be given precedence explicitly", specSrc: ` -%name test +#name test; s : foo bar #prec foo @@ -1369,9 +1609,7 @@ bar { caption: "the `#recover` directive cannot take an ID parameter", specSrc: ` -%name test - -%name test +#name test; s : foo #recover foo @@ -1385,9 +1623,7 @@ foo { caption: "the `#recover` directive cannot take a pattern parameter", specSrc: ` -%name test - -%name test +#name test; s : foo #recover "foo" @@ -1401,14 +1637,26 @@ foo { caption: "the `#recover` directive cannot take a string parameter", specSrc: ` -%name test - -%name test +#name test; s : foo #recover 'foo' ; +foo + : 'foo'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#recover` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo #recover () + ; + foo : 'foo'; `, @@ -1420,7 +1668,7 @@ foo { caption: "a production cannot contain a fragment", specSrc: ` -%name test +#name test; s : f @@ -1434,7 +1682,7 @@ fragment f { caption: "fragments cannot be duplicated", specSrc: ` -%name test +#name test; s : foo @@ -1455,7 +1703,7 @@ fragment f { caption: "the `#alias` directive needs a string parameter", specSrc: ` -%name test +#name test; s : foo @@ -1469,7 +1717,7 @@ foo #alias { caption: "the `#alias` directive takes just one string parameter", specSrc: ` -%name test +#name test; s : foo @@ -1483,7 +1731,7 @@ foo #alias 'Foo' 'FOO' { caption: "the `#alias` directive cannot take an ID parameter", specSrc: ` -%name test +#name test; s : foo @@ -1497,7 +1745,7 @@ foo #alias Foo { caption: "the `#alias` directive cannot take a pattern parameter", specSrc: ` -%name test +#name test; s : foo @@ -1505,16 +1753,30 @@ s foo #alias "Foo" : 'foo'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#alias` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo + ; + +foo #alias () + : 'foo'; `, errs: []*SemanticError{semErrDirInvalidParam}, }, } - modeTests := []*specErrTest{ + modeDirTests := []*specErrTest{ { caption: "the `#mode` directive needs an ID parameter", specSrc: ` -%name test +#name test; s : foo bar @@ -1530,7 +1792,7 @@ bar #mode { caption: "the `#mode` directive cannot take a pattern parameter", specSrc: ` -%name test +#name test; s : foo bar @@ -1546,7 +1808,7 @@ bar #mode "mode_1" { caption: "the `#mode` directive cannot take a string parameter", specSrc: ` -%name test +#name test; s : foo bar @@ -1556,16 +1818,32 @@ foo #push mode_1 : 'foo'; bar #mode 'mode_1' : 'bar'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#mode` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push mode_1 + : 'foo'; +bar #mode () + : 'bar'; `, errs: []*SemanticError{semErrDirInvalidParam}, }, } - pushTests := []*specErrTest{ + pushDirTests := []*specErrTest{ { caption: "the `#push` directive needs an ID parameter", specSrc: ` -%name test +#name test; s : foo bar @@ -1581,7 +1859,7 @@ bar #mode mode_1 { caption: "the `#push` directive takes just one ID parameter", specSrc: ` -%name test +#name test; s : foo bar @@ -1597,7 +1875,7 @@ bar #mode mode_1 { caption: "the `#push` directive cannot take a pattern parameter", specSrc: ` -%name test +#name test; s : foo bar @@ -1613,7 +1891,7 @@ bar #mode mode_1 { caption: "the `#push` directive cannot take a string parameter", specSrc: ` -%name test +#name test; s : foo bar @@ -1623,16 +1901,32 @@ foo #push 'mode_1' : 'foo'; bar #mode mode_1 : 'bar'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#push` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #push () + : 'foo'; +bar #mode mode_1 + : 'bar'; `, errs: []*SemanticError{semErrDirInvalidParam}, }, } - popTests := []*specErrTest{ + popDirTests := []*specErrTest{ { caption: "the `#pop` directive cannot take an ID parameter", specSrc: ` -%name test +#name test; s : foo bar baz @@ -1650,7 +1944,7 @@ baz #pop mode_1 { caption: "the `#pop` directive cannot take a pattern parameter", specSrc: ` -%name test +#name test; s : foo bar baz @@ -1668,7 +1962,7 @@ baz #pop "mode_1" { caption: "the `#pop` directive cannot take a string parameter", specSrc: ` -%name test +#name test; s : foo bar baz @@ -1680,6 +1974,24 @@ bar #mode mode_1 : 'bar'; baz #pop 'mode_1' : 'baz'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#pop` directive cannot take a directive parameter", + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push mode_1 + : 'foo'; +bar #mode mode_1 + : 'bar'; +baz #pop () + : 'baz'; `, errs: []*SemanticError{semErrDirInvalidParam}, }, @@ -1689,7 +2001,7 @@ baz #pop 'mode_1' { caption: "the `#skip` directive cannot take an ID parameter", specSrc: ` -%name test +#name test; s : foo bar @@ -1705,7 +2017,7 @@ bar { caption: "the `#skip` directive cannot take a pattern parameter", specSrc: ` -%name test +#name test; s : foo bar @@ -1721,7 +2033,7 @@ bar { caption: "the `#skip` directive cannot take a string parameter", specSrc: ` -%name test +#name test; s : foo bar @@ -1731,13 +2043,29 @@ foo #skip 'bar' : 'foo'; bar : 'bar'; +`, + errs: []*SemanticError{semErrDirInvalidParam}, + }, + { + caption: "the `#skip` directive cannot take a directive group parameter", + specSrc: ` +#name test; + +s + : foo bar + ; + +foo #skip () + : 'foo'; +bar + : 'bar'; `, errs: []*SemanticError{semErrDirInvalidParam}, }, { caption: "a terminal symbol used in productions cannot have the skip directive", specSrc: ` -%name test +#name test; s : foo bar @@ -1754,18 +2082,19 @@ bar var tests []*specErrTest tests = append(tests, prodTests...) - tests = append(tests, nameTests...) - tests = append(tests, leftTests...) - tests = append(tests, rightTests...) + tests = append(tests, nameDirTests...) + tests = append(tests, precDirTests...) + tests = append(tests, leftDirTests...) + tests = append(tests, rightDirTests...) tests = append(tests, errorSymTests...) tests = append(tests, astDirTests...) - tests = append(tests, precDirTests...) + tests = append(tests, altPrecDirTests...) tests = append(tests, recoverDirTests...) tests = append(tests, fragmentTests...) tests = append(tests, aliasDirTests...) - tests = append(tests, modeTests...) - tests = append(tests, pushTests...) - tests = append(tests, popTests...) + tests = append(tests, modeDirTests...) + tests = append(tests, pushDirTests...) + tests = append(tests, popDirTests...) tests = append(tests, skipDirTests...) for _, test := range tests { t.Run(test.caption, func(t *testing.T) { diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go index beb2707..94dfd65 100644 --- a/grammar/lalr1_test.go +++ b/grammar/lalr1_test.go @@ -10,7 +10,7 @@ import ( func TestGenLALR1Automaton(t *testing.T) { // This grammar belongs to LALR(1) class, not SLR(1). src := ` -%name test +#name test; S: L eq R | R; L: ref R | id; diff --git a/grammar/lr0_test.go b/grammar/lr0_test.go index cde3f0a..1b20d78 100644 --- a/grammar/lr0_test.go +++ b/grammar/lr0_test.go @@ -17,7 +17,7 @@ type expectedLRState struct { func TestGenLR0Automaton(t *testing.T) { src := ` -%name test +#name test; expr : expr add term @@ -227,7 +227,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; func TestLR0AutomatonContainingEmptyProduction(t *testing.T) { src := ` -%name test +#name test; s : foo bar diff --git a/grammar/parsing_table_test.go b/grammar/parsing_table_test.go index 833a4d4..522ec1c 100644 --- a/grammar/parsing_table_test.go +++ b/grammar/parsing_table_test.go @@ -16,7 +16,7 @@ type expectedState struct { func TestGenLALRParsingTable(t *testing.T) { src := ` -%name test +#name test; S: L eq R | R; L: ref R | id; @@ -288,7 +288,7 @@ id: "[A-Za-z0-9_]+"; func TestGenSLRParsingTable(t *testing.T) { src := ` -%name test +#name test; expr : expr add term diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go index c81cb5f..a843719 100644 --- a/grammar/semantic_error.go +++ b/grammar/semantic_error.go @@ -15,9 +15,7 @@ func (e *SemanticError) Error() string { } var ( - semErrMDInvalidName = newSemanticError("invalid meta data name") - semErrMDInvalidParam = newSemanticError("invalid parameter") - semErrMDMissingName = newSemanticError("name is missing") + semErrNoGrammarName = newSemanticError("name is missing") semErrDuplicateAssoc = newSemanticError("associativity and precedence cannot be specified multiple times for a symbol") semErrUndefinedPrec = newSemanticError("symbol must has precedence") semErrUnusedProduction = newSemanticError("unused production") diff --git a/grammar/slr1_test.go b/grammar/slr1_test.go index 954446f..6748802 100644 --- a/grammar/slr1_test.go +++ b/grammar/slr1_test.go @@ -9,7 +9,7 @@ import ( func TestGenSLR1Automaton(t *testing.T) { src := ` -%name test +#name test; expr : expr add term diff --git a/spec/lexer.go b/spec/lexer.go index f9ad871..2459c40 100644 --- a/spec/lexer.go +++ b/spec/lexer.go @@ -25,7 +25,8 @@ const ( tokenKindLabelMarker = tokenKind("@") tokenKindDirectiveMarker = tokenKind("#") tokenKindExpantion = tokenKind("...") - tokenKindMetaDataMarker = tokenKind("%") + tokenKindLParen = tokenKind("(") + tokenKindRParen = tokenKind(")") tokenKindNewline = tokenKind("newline") tokenKindEOF = tokenKind("eof") tokenKindInvalid = tokenKind("invalid") @@ -265,8 +266,10 @@ func (l *lexer) lexAndSkipWSs() (*token, error) { return newSymbolToken(tokenKindDirectiveMarker, newPosition(tok.Row+1, tok.Col+1)), nil case KindIDExpansion: return newSymbolToken(tokenKindExpantion, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDMetadataMarker: - return newSymbolToken(tokenKindMetaDataMarker, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDLParen: + return newSymbolToken(tokenKindLParen, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDRParen: + return newSymbolToken(tokenKindRParen, newPosition(tok.Row+1, tok.Col+1)), nil default: return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil } diff --git a/spec/lexer_test.go b/spec/lexer_test.go index 4fab8db..621eff5 100644 --- a/spec/lexer_test.go +++ b/spec/lexer_test.go @@ -36,7 +36,7 @@ func TestLexer_Run(t *testing.T) { }{ { caption: "the lexer can recognize all kinds of tokens", - src: `id"terminal"'string':|;@...#%`, + src: `id"terminal"'string':|;@...#()`, tokens: []*token{ idTok("id"), termPatTok("terminal"), @@ -47,7 +47,8 @@ func TestLexer_Run(t *testing.T) { symTok(tokenKindLabelMarker), symTok(tokenKindExpantion), symTok(tokenKindDirectiveMarker), - symTok(tokenKindMetaDataMarker), + symTok(tokenKindLParen), + symTok(tokenKindRParen), newEOFToken(), }, }, diff --git a/spec/lexspec.json b/spec/lexspec.json index ff8ff0d..6a11a4a 100644 --- a/spec/lexspec.json +++ b/spec/lexspec.json @@ -118,8 +118,12 @@ "pattern": "#" }, { - "kind": "metadata_marker", - "pattern": "%" + "kind": "l_paren", + "pattern": "\\(" + }, + { + "kind": "r_paren", + "pattern": "\\)" } ] } diff --git a/spec/parser.go b/spec/parser.go index a1d23f0..3b5907e 100644 --- a/spec/parser.go +++ b/spec/parser.go @@ -9,7 +9,7 @@ import ( ) type RootNode struct { - MetaData []*DirectiveNode + Directives []*DirectiveNode Productions []*ProductionNode LexProductions []*ProductionNode Fragments []*FragmentNode @@ -58,6 +58,7 @@ type ParameterNode struct { ID string Pattern string String string + Group []*DirectiveNode Expansion bool Pos Position } @@ -134,14 +135,14 @@ func (p *parser) parseRoot() *RootNode { } }() - var metadata []*DirectiveNode + var dirs []*DirectiveNode var prods []*ProductionNode var lexProds []*ProductionNode var fragments []*FragmentNode for { - md := p.parseMetaData() - if md != nil { - metadata = append(metadata, md) + dir := p.parseTopLevelDirective() + if dir != nil { + dirs = append(dirs, dir) continue } @@ -167,14 +168,14 @@ func (p *parser) parseRoot() *RootNode { } return &RootNode{ - MetaData: metadata, + Directives: dirs, Productions: prods, LexProductions: lexProds, Fragments: fragments, } } -func (p *parser) parseMetaData() *DirectiveNode { +func (p *parser) parseTopLevelDirective() *DirectiveNode { defer func() { err := recover() if err == nil { @@ -187,35 +188,21 @@ func (p *parser) parseMetaData() *DirectiveNode { } p.errs = append(p.errs, specErr) - p.skipOverTo(tokenKindNewline) + p.skipOverTo(tokenKindSemicolon) }() - p.consume(tokenKindNewline) - - if !p.consume(tokenKindMetaDataMarker) { + dir := p.parseDirective() + if dir == nil { return nil } - mdPos := p.lastTok.pos - if !p.consume(tokenKindID) { - raiseSyntaxError(p.pos.Row, synErrNoMDName) - } - name := p.lastTok.text + p.consume(tokenKindNewline) - var params []*ParameterNode - for { - param := p.parseParameter() - if param == nil { - break - } - params = append(params, param) + if !p.consume(tokenKindSemicolon) { + raiseSyntaxError(p.pos.Row, synErrTopLevelDirNoSemicolon) } - return &DirectiveNode{ - Name: name, - Parameters: params, - Pos: mdPos, - } + return dir } func (p *parser) parseFragment() *FragmentNode { @@ -428,6 +415,8 @@ func (p *parser) parseElement() *ElementNode { } func (p *parser) parseDirective() *DirectiveNode { + p.consume(tokenKindNewline) + if !p.consume(tokenKindDirectiveMarker) { return nil } @@ -472,6 +461,30 @@ func (p *parser) parseParameter() *ParameterNode { String: p.lastTok.text, Pos: p.lastTok.pos, } + case p.consume(tokenKindLParen): + pos := p.lastTok.pos + var g []*DirectiveNode + for { + dir := p.parseDirective() + if dir == nil { + break + } + g = append(g, dir) + } + if !p.consume(tokenKindRParen) { + raiseSyntaxError(p.pos.Row, synErrUnclosedDirGroup) + } + if len(g) == 0 { + // Set an empty slice representing an empty directive group to distinguish between the following two cases. + // + // - #prec (); // vartan allows this case. + // - #prec; // This case will raise an error. + g = []*DirectiveNode{} + } + param = &ParameterNode{ + Group: g, + Pos: pos, + } default: return nil } diff --git a/spec/parser_test.go b/spec/parser_test.go index 2a44acd..de2c6f7 100644 --- a/spec/parser_test.go +++ b/spec/parser_test.go @@ -8,6 +8,20 @@ import ( ) func TestParse(t *testing.T) { + name := func(param *ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "name", + Parameters: []*ParameterNode{param}, + } + } + + prec := func(param *ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "prec", + Parameters: []*ParameterNode{param}, + } + } + leftAssoc := func(params ...*ParameterNode) *DirectiveNode { return &DirectiveNode{ Name: "left", @@ -66,6 +80,11 @@ func TestParse(t *testing.T) { param.Expansion = true return param } + group := func(dirs ...*DirectiveNode) *ParameterNode { + return &ParameterNode{ + Group: dirs, + } + } withParamPos := func(param *ParameterNode, pos Position) *ParameterNode { param.Pos = pos return param @@ -121,6 +140,82 @@ func TestParse(t *testing.T) { ast *RootNode synErr *SyntaxError }{ + { + caption: "a grammar can contain top-level directives", + src: ` +#name test; + +#prec ( + #left a b + #right c d +); +`, + ast: &RootNode{ + Directives: []*DirectiveNode{ + withDirPos( + name( + withParamPos( + idParam("test"), + newPos(2), + ), + ), + newPos(2), + ), + withDirPos( + prec( + withParamPos( + group( + withDirPos( + leftAssoc( + withParamPos( + idParam("a"), + newPos(5), + ), + withParamPos( + idParam("b"), + newPos(5), + ), + ), + newPos(5), + ), + withDirPos( + rightAssoc( + withParamPos( + idParam("c"), + newPos(6), + ), + withParamPos( + idParam("d"), + newPos(6), + ), + ), + newPos(6), + ), + ), + newPos(4), + ), + ), + newPos(4), + ), + }, + }, + }, + { + caption: "a top-level directive must be followed by ';'", + src: ` +#name test +`, + synErr: synErrTopLevelDirNoSemicolon, + }, + { + caption: "a directive group must be closed by ')'", + src: ` +#prec ( + #left a b +; +`, + synErr: synErrUnclosedDirGroup, + }, { caption: "single production is a valid grammar", src: `a: "a";`, @@ -643,10 +738,12 @@ foo { caption: "a grammar can contain left and right associativities", src: ` -%left l1 l2 -%left l3 -%right r1 r2 -%right r3 +#prec ( + #left l1 l2 + #left l3 + #right r1 r2 + #right r3 +); s : id l1 id l2 id l3 id @@ -671,33 +768,43 @@ id : "[A-Za-z0-9_]+"; `, ast: &RootNode{ - MetaData: []*DirectiveNode{ + Directives: []*DirectiveNode{ withDirPos( - leftAssoc( - withParamPos(idParam("l1"), newPos(2)), - withParamPos(idParam("l2"), newPos(2)), + prec( + withParamPos( + group( + withDirPos( + leftAssoc( + withParamPos(idParam("l1"), newPos(3)), + withParamPos(idParam("l2"), newPos(3)), + ), + newPos(3), + ), + withDirPos( + leftAssoc( + withParamPos(idParam("l3"), newPos(4)), + ), + newPos(4), + ), + withDirPos( + rightAssoc( + withParamPos(idParam("r1"), newPos(5)), + withParamPos(idParam("r2"), newPos(5)), + ), + newPos(5), + ), + withDirPos( + rightAssoc( + withParamPos(idParam("r3"), newPos(6)), + ), + newPos(6), + ), + ), + newPos(2), + ), ), newPos(2), ), - withDirPos( - leftAssoc( - withParamPos(idParam("l3"), newPos(3)), - ), - newPos(3), - ), - withDirPos( - rightAssoc( - withParamPos(idParam("r1"), newPos(4)), - withParamPos(idParam("r2"), newPos(4)), - ), - newPos(4), - ), - withDirPos( - rightAssoc( - withParamPos(idParam("r3"), newPos(5)), - ), - newPos(5), - ), }, Productions: []*ProductionNode{ prod("s", @@ -756,11 +863,11 @@ func testRootNode(t *testing.T, root, expected *RootNode, checkPosition bool) { if len(root.Productions) != len(expected.Productions) { t.Fatalf("unexpected length of productions; want: %v, got: %v", len(expected.Productions), len(root.Productions)) } - if len(root.MetaData) != len(expected.MetaData) { - t.Fatalf("unexpected length of meta data; want: %v, got: %v", len(expected.MetaData), len(root.MetaData)) + if len(root.Directives) != len(expected.Directives) { + t.Fatalf("unexpected length of top-level directives; want: %v, got: %v", len(expected.Directives), len(root.Directives)) } - for i, md := range root.MetaData { - testDirectives(t, []*DirectiveNode{md}, []*DirectiveNode{expected.MetaData[i]}, true) + for i, dir := range root.Directives { + testDirectives(t, []*DirectiveNode{dir}, []*DirectiveNode{expected.Directives[i]}, true) } for i, prod := range root.Productions { testProductionNode(t, prod, expected.Productions[i], checkPosition) diff --git a/spec/syntax_error.go b/spec/syntax_error.go index fdf9c40..3b44d2d 100644 --- a/spec/syntax_error.go +++ b/spec/syntax_error.go @@ -24,14 +24,15 @@ var ( synErrEmptyString = newSyntaxError("a string must include at least one character") // syntax errors - synErrInvalidToken = newSyntaxError("invalid token") - synErrNoMDName = newSyntaxError("a metadata name is missing") - synErrNoProductionName = newSyntaxError("a production name is missing") - synErrNoColon = newSyntaxError("the colon must precede alternatives") - synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative") - synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol") - synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @") - synErrNoDirectiveName = newSyntaxError("a directive needs a name") - synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline") - synErrFragmentNoPattern = newSyntaxError("a fragment needs one pattern element") + synErrInvalidToken = newSyntaxError("invalid token") + synErrTopLevelDirNoSemicolon = newSyntaxError("a top-level directive must be followed by ;") + synErrNoProductionName = newSyntaxError("a production name is missing") + synErrNoColon = newSyntaxError("the colon must precede alternatives") + synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative") + synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol") + synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @") + synErrNoDirectiveName = newSyntaxError("a directive needs a name") + synErrUnclosedDirGroup = newSyntaxError("a directive group must be closed by )") + synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline") + synErrFragmentNoPattern = newSyntaxError("a fragment needs one pattern element") ) diff --git a/spec/vartan_lexer.go b/spec/vartan_lexer.go index 8c9e7ad..146748a 100644 --- a/spec/vartan_lexer.go +++ b/spec/vartan_lexer.go @@ -356,14 +356,15 @@ const ( KindIDLabelMarker KindID = 11 KindIDExpansion KindID = 12 KindIDDirectiveMarker KindID = 13 - KindIDMetadataMarker KindID = 14 - KindIDPattern KindID = 15 - KindIDTerminalClose KindID = 16 - KindIDEscapeSymbol KindID = 17 - KindIDCharSeq KindID = 18 - KindIDEscapedQuot KindID = 19 - KindIDEscapedBackSlash KindID = 20 - KindIDStringLiteralClose KindID = 21 + KindIDLParen KindID = 14 + KindIDRParen KindID = 15 + KindIDPattern KindID = 16 + KindIDTerminalClose KindID = 17 + KindIDEscapeSymbol KindID = 18 + KindIDCharSeq KindID = 19 + KindIDEscapedQuot KindID = 20 + KindIDEscapedBackSlash KindID = 21 + KindIDStringLiteralClose KindID = 22 ) const ( @@ -381,7 +382,8 @@ const ( KindNameLabelMarker = "label_marker" KindNameExpansion = "expansion" KindNameDirectiveMarker = "directive_marker" - KindNameMetadataMarker = "metadata_marker" + KindNameLParen = "l_paren" + KindNameRParen = "r_paren" KindNamePattern = "pattern" KindNameTerminalClose = "terminal_close" KindNameEscapeSymbol = "escape_symbol" @@ -422,8 +424,10 @@ func KindIDToName(id KindID) string { return KindNameExpansion case KindIDDirectiveMarker: return KindNameDirectiveMarker - case KindIDMetadataMarker: - return KindNameMetadataMarker + case KindIDLParen: + return KindNameLParen + case KindIDRParen: + return KindNameRParen case KindIDPattern: return KindNamePattern case KindIDTerminalClose: @@ -467,7 +471,7 @@ func NewLexSpec() *lexSpec { pop: [][]bool{ nil, { - false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, }, { false, false, true, false, @@ -479,7 +483,7 @@ func NewLexSpec() *lexSpec { push: [][]ModeID{ nil, { - 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, }, { 0, 0, 0, 0, @@ -505,7 +509,7 @@ func NewLexSpec() *lexSpec { { 0, 0, 1, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 4, 5, 0, 0, 2, 6, 7, 8, 9, - 10, 11, 12, 13, 14, + 10, 11, 12, 13, 14, 15, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -536,7 +540,8 @@ func NewLexSpec() *lexSpec { KindIDLabelMarker, KindIDExpansion, KindIDDirectiveMarker, - KindIDMetadataMarker, + KindIDLParen, + KindIDRParen, }, { KindIDNil, @@ -568,7 +573,8 @@ func NewLexSpec() *lexSpec { KindNameLabelMarker, KindNameExpansion, KindNameDirectiveMarker, - KindNameMetadataMarker, + KindNameLParen, + KindNameRParen, KindNamePattern, KindNameTerminalClose, KindNameEscapeSymbol, @@ -587,7 +593,7 @@ func NewLexSpec() *lexSpec { { 0, 1, 2, 3, 4, 5, 6, 7, 6, 8, 6, 9, 6, 10, 6, 11, 12, 6, 13, 14, 6, 15, 16, 6, 17, 18, 19, 20, 21, 22, 23, 24, 24, 25, 26, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, }, { 0, 1, 2, 3, 2, 4, 2, 5, 2, 6, 2, 7, 8, 2, 9, 10, 2, 11, 12, 2, @@ -633,7 +639,7 @@ func NewLexSpec() *lexSpec { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, 1, -1, 1, -1, 1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, 1, -1, -1, -1, 1, 1, 1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 1, 1, 1, 1, 1, 1, @@ -978,7 +984,7 @@ func NewLexSpec() *lexSpec { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 13, 13, 15, 18, 18, 18, 21, 2, 35, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 36, 43, 0, 44, 0, 37, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 36, 43, 0, 0, 0, 37, 44, 45, 0, 0, 0, 0, 33, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 40, 0, 0, 0, 0, 41, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 32, 0, 32, 32, 32, 32, 32, 24, 32,