From ceb6649d3fb8b85ac8629a65dcfb9533763f4af0 Mon Sep 17 00:00:00 2001 From: Ryo Nihei Date: Thu, 19 May 2022 00:20:05 +0900 Subject: [PATCH] Add vartan-test command --- README.md | 31 +- cmd/vartan/test.go | 65 +++ spec/test/parser.go | 226 +++++++++++ spec/test/parser_test.go | 300 ++++++++++++++ spec/test/tree-report.json | 1 + spec/test/tree.json | 1 + spec/test/tree.vartan | 25 ++ spec/test/tree_lexer.go | 549 +++++++++++++++++++++++++ spec/test/tree_parser.go | 638 ++++++++++++++++++++++++++++++ spec/test/tree_semantic_action.go | 353 +++++++++++++++++ tester/tester.go | 177 +++++++++ tester/tester_test.go | 170 ++++++++ 12 files changed, 2535 insertions(+), 1 deletion(-) create mode 100644 cmd/vartan/test.go create mode 100644 spec/test/parser.go create mode 100644 spec/test/parser_test.go create mode 100644 spec/test/tree-report.json create mode 100644 spec/test/tree.json create mode 100644 spec/test/tree.vartan create mode 100644 spec/test/tree_lexer.go create mode 100644 spec/test/tree_parser.go create mode 100644 spec/test/tree_semantic_action.go create mode 100644 tester/tester.go create mode 100644 tester/tester_test.go diff --git a/README.md b/README.md index 900fa28..3d9e5f5 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,36 @@ When `vartan parse` command successfully parses the input data, it prints a CST $ vartan show expr-report.json ``` -### 4. Generate a parser +### 4. Test + +`vartan-test` command allows you to test whether your grammar recognizes an input text as a syntax tree with an expected structure. To do so, you need to define a test case as follows. + +``` +This is an example. +--- +a / b * 100 +--- +(expr + (expr + (expr (id)) + (div) + (expr (id))) + (mul) + (expr (int))) +``` + +The test case consists of a description, an input text, and a syntax tree you expect. Each part is separated by the delimiter `---`. The syntax tree is represented by the syntax like an [S-expression](https://en.wikipedia.org/wiki/S-expression). + +Save the above test case to `test.txt` file and run the following command. + +```sh +$ vartan-test expr.vartan test.txt +Passed test.txt +``` + +When you specify a directory as the 2nd argument of `vartan-test` command, it will run all test cases in the directory. + +### 5. Generate a parser Using `vartan-go` command, you can generate a source code of a parser to recognize your grammar. diff --git a/cmd/vartan/test.go b/cmd/vartan/test.go new file mode 100644 index 0000000..50ba8ca --- /dev/null +++ b/cmd/vartan/test.go @@ -0,0 +1,65 @@ +package main + +import ( + "errors" + "fmt" + "os" + + "github.com/nihei9/vartan/grammar" + "github.com/nihei9/vartan/tester" + "github.com/spf13/cobra" +) + +func init() { + cmd := &cobra.Command{ + Use: "test |", + Short: "Test a grammar", + Example: ` vartan test grammar.vartan test`, + Args: cobra.ExactArgs(2), + RunE: runTest, + } + rootCmd.AddCommand(cmd) +} + +func runTest(cmd *cobra.Command, args []string) error { + g, err := readGrammar(args[0]) + if err != nil { + return fmt.Errorf("Cannot read a grammar: %w", err) + } + cg, _, err := grammar.Compile(g) + if err != nil { + return fmt.Errorf("Cannot read a compiled grammar: %w", err) + } + + var cs []*tester.TestCaseWithMetadata + { + cs = tester.ListTestCases(args[1]) + errOccurred := false + for _, c := range cs { + if c.Error != nil { + fmt.Fprintf(os.Stderr, "Failed to read a test case or a directory: %v\n%v\n", c.FilePath, c.Error) + errOccurred = true + } + } + if errOccurred { + return errors.New("Cannot run test") + } + } + + t := &tester.Tester{ + Grammar: cg, + Cases: cs, + } + rs := t.Run() + testFailed := false + for _, r := range rs { + fmt.Fprintln(os.Stdout, r) + if r.Error != nil { + testFailed = true + } + } + if testFailed { + return errors.New("Test failed") + } + return nil +} diff --git a/spec/test/parser.go b/spec/test/parser.go new file mode 100644 index 0000000..0513ee3 --- /dev/null +++ b/spec/test/parser.go @@ -0,0 +1,226 @@ +//go:generate vartan compile tree.vartan -o tree.json +//go:generate vartan-go tree.json --package test + +package test + +import ( + "bufio" + "bytes" + "errors" + "fmt" + "io" + "regexp" + "strings" +) + +type TreeDiff struct { + ExpectedPath string + ActualPath string + Message string +} + +func newTreeDiff(expected, actual *Tree, message string) *TreeDiff { + return &TreeDiff{ + ExpectedPath: expected.path(), + ActualPath: actual.path(), + Message: message, + } +} + +type Tree struct { + Parent *Tree + Offset int + Kind string + Children []*Tree +} + +func NewTree(kind string, children ...*Tree) *Tree { + return &Tree{ + Kind: kind, + Children: children, + } +} + +func (t *Tree) Fill() *Tree { + for i, c := range t.Children { + c.Parent = t + c.Offset = i + c.Fill() + } + return t +} + +func (t *Tree) path() string { + if t.Parent == nil { + return t.Kind + } + return fmt.Sprintf("%v.[%v]%v", t.Parent.path(), t.Offset, t.Kind) +} + +func DiffTree(expected, actual *Tree) []*TreeDiff { + if expected == nil && actual == nil { + return nil + } + if actual.Kind != expected.Kind { + msg := fmt.Sprintf("unexpected kind: expected '%v' but got '%v'", expected.Kind, actual.Kind) + return []*TreeDiff{ + newTreeDiff(expected, actual, msg), + } + } + if len(actual.Children) != len(expected.Children) { + msg := fmt.Sprintf("unexpected node count: expected %v but got %v", len(expected.Children), len(actual.Children)) + return []*TreeDiff{ + newTreeDiff(expected, actual, msg), + } + } + var diffs []*TreeDiff + for i, exp := range expected.Children { + if ds := DiffTree(actual.Children[i], exp); len(ds) > 0 { + diffs = append(diffs, ds...) + } + } + return diffs +} + +type TestCase struct { + Description string + Source []byte + Output *Tree +} + +func ParseTestCase(r io.Reader) (*TestCase, error) { + bufs, err := splitIntoParts(r) + if err != nil { + return nil, err + } + if len(bufs) != 3 { + return nil, fmt.Errorf("too many or too few part delimiters: a test case consists of just tree parts: %v parts found", len(bufs)) + } + + tree, err := parseTree(bytes.NewReader(bufs[2])) + if err != nil { + return nil, err + } + + return &TestCase{ + Description: string(bufs[0]), + Source: bufs[1], + Output: tree, + }, nil +} + +func splitIntoParts(r io.Reader) ([][]byte, error) { + var bufs [][]byte + s := bufio.NewScanner(r) + for { + buf, err := readPart(s) + if err != nil { + return nil, err + } + if buf == nil { + break + } + bufs = append(bufs, buf) + } + if err := s.Err(); err != nil { + return nil, err + } + return bufs, nil +} + +var reDelim = regexp.MustCompile(`^\s*---+\s*$`) + +func readPart(s *bufio.Scanner) ([]byte, error) { + if !s.Scan() { + return nil, s.Err() + } + buf := &bytes.Buffer{} + line := s.Bytes() + if reDelim.Match(line) { + // Return an empty slice because (*bytes.Buffer).Bytes() returns nil if we have never written data. + return []byte{}, nil + } + _, err := buf.Write(line) + if err != nil { + return nil, err + } + for s.Scan() { + line := s.Bytes() + if reDelim.Match(line) { + return buf.Bytes(), nil + } + _, err := buf.Write([]byte("\n")) + if err != nil { + return nil, err + } + _, err = buf.Write(line) + if err != nil { + return nil, err + } + } + if err := s.Err(); err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func parseTree(src io.Reader) (*Tree, error) { + toks, err := NewTokenStream(src) + if err != nil { + return nil, err + } + gram := NewGrammar() + tb := NewDefaultSyntaxTreeBuilder() + p, err := NewParser(toks, gram, SemanticAction(NewASTActionSet(gram, tb))) + if err != nil { + return nil, err + } + err = p.Parse() + if err != nil { + return nil, err + } + synErrs := p.SyntaxErrors() + if len(synErrs) > 0 { + var b strings.Builder + b.WriteString("syntax error:") + for _, synErr := range synErrs { + b.WriteRune('\n') + b.Write(formatSyntaxError(synErr, gram)) + } + return nil, errors.New(b.String()) + } + return genTree(tb.Tree()).Fill(), nil +} + +func formatSyntaxError(synErr *SyntaxError, gram Grammar) []byte { + var b bytes.Buffer + + b.WriteString(fmt.Sprintf("%v:%v: %v: ", synErr.Row+1, synErr.Col+1, synErr.Message)) + + tok := synErr.Token + switch { + case tok.EOF(): + b.WriteString("") + case tok.Invalid(): + b.WriteString(fmt.Sprintf("'%v' ()", string(tok.Lexeme()))) + default: + b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), gram.Terminal(tok.TerminalID()))) + } + b.WriteString(fmt.Sprintf("; expected: %v", synErr.ExpectedTerminals[0])) + for _, t := range synErr.ExpectedTerminals[1:] { + b.WriteString(fmt.Sprintf(", %v", t)) + } + + return b.Bytes() +} + +func genTree(node *Node) *Tree { + var children []*Tree + if len(node.Children) > 1 { + children = make([]*Tree, len(node.Children)-1) + for i, c := range node.Children[1:] { + children[i] = genTree(c) + } + } + return NewTree(node.Children[0].Text, children...) +} diff --git a/spec/test/parser_test.go b/spec/test/parser_test.go new file mode 100644 index 0000000..6e77f6d --- /dev/null +++ b/spec/test/parser_test.go @@ -0,0 +1,300 @@ +package test + +import ( + "fmt" + "reflect" + "strings" + "testing" +) + +func TestDiffTree(t *testing.T) { + tests := []struct { + t1 *Tree + t2 *Tree + different bool + }{ + { + t1: NewTree("a"), + t2: NewTree("a"), + }, + { + t1: NewTree("a", + NewTree("b"), + ), + t2: NewTree("a", + NewTree("b"), + ), + }, + { + t1: NewTree("a", + NewTree("b"), + NewTree("c"), + NewTree("d"), + ), + t2: NewTree("a", + NewTree("b"), + NewTree("c"), + NewTree("d"), + ), + }, + { + t1: NewTree("a", + NewTree("b", + NewTree("c"), + ), + NewTree("d", + NewTree("d"), + ), + ), + t2: NewTree("a", + NewTree("b", + NewTree("c"), + ), + NewTree("d", + NewTree("d"), + ), + ), + }, + { + t1: NewTree("a"), + t2: NewTree("b"), + different: true, + }, + { + t1: NewTree("a", + NewTree("b"), + ), + t2: NewTree("a"), + different: true, + }, + { + t1: NewTree("a"), + t2: NewTree("a", + NewTree("b"), + ), + different: true, + }, + { + t1: NewTree("a", + NewTree("b"), + ), + t2: NewTree("a", + NewTree("c"), + ), + different: true, + }, + { + t1: NewTree("a", + NewTree("b"), + NewTree("c"), + NewTree("d"), + ), + t2: NewTree("a", + NewTree("b"), + NewTree("c"), + ), + different: true, + }, + { + t1: NewTree("a", + NewTree("b"), + NewTree("c"), + ), + t2: NewTree("a", + NewTree("b"), + NewTree("c"), + NewTree("d"), + ), + different: true, + }, + { + t1: NewTree("a", + NewTree("b", + NewTree("c"), + ), + ), + t2: NewTree("a", + NewTree("b", + NewTree("d"), + ), + ), + different: true, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + diffs := DiffTree(tt.t1, tt.t2) + if tt.different && len(diffs) == 0 { + t.Fatalf("unexpected result") + } else if !tt.different && len(diffs) > 0 { + t.Fatalf("unexpected result") + } + }) + } +} + +func TestParseTestCase(t *testing.T) { + tests := []struct { + src string + tc *TestCase + parseErr bool + }{ + { + src: `test +--- +foo +--- +(foo) +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo"), + Output: NewTree("foo").Fill(), + }, + }, + { + src: ` +test + +--- + +foo + +--- + +(foo) + +`, + tc: &TestCase{ + Description: "\ntest\n", + Source: []byte("\nfoo\n"), + Output: NewTree("foo").Fill(), + }, + }, + // The length of a part delimiter may be greater than 3. + { + src: ` +test +---- +foo +---- +(foo) +`, + tc: &TestCase{ + Description: "\ntest", + Source: []byte("foo"), + Output: NewTree("foo").Fill(), + }, + }, + // The description part may be empty. + { + src: `---- +foo +---- +(foo) +`, + tc: &TestCase{ + Description: "", + Source: []byte("foo"), + Output: NewTree("foo").Fill(), + }, + }, + // The source part may be empty. + { + src: `test +--- +--- +(foo) +`, + tc: &TestCase{ + Description: "test", + Source: []byte{}, + Output: NewTree("foo").Fill(), + }, + }, + // NOTE: If there is a delimiter at the end of a test case, we really want to make it a syntax error, + // but we allow it to simplify the implementation of the parser. + { + src: `test +---- +foo +---- +(foo) +--- +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo"), + Output: NewTree("foo").Fill(), + }, + }, + { + src: ``, + parseErr: true, + }, + { + src: `test +--- +`, + parseErr: true, + }, + { + src: `test +--- +foo +`, + parseErr: true, + }, + { + src: `test +--- +foo +--- +`, + parseErr: true, + }, + { + src: `test +-- +foo +-- +(foo) +`, + parseErr: true, + }, + { + src: `test +--- +foo +--- +? +`, + parseErr: true, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + tc, err := ParseTestCase(strings.NewReader(tt.src)) + if tt.parseErr { + if err == nil { + t.Fatalf("an expected error didn't occur") + } + } else { + if err != nil { + t.Fatal(err) + } + testTestCase(t, tt.tc, tc) + } + }) + } +} + +func testTestCase(t *testing.T, expected, actual *TestCase) { + t.Helper() + + if expected.Description != actual.Description || + !reflect.DeepEqual(expected.Source, actual.Source) || + len(DiffTree(expected.Output, actual.Output)) > 0 { + t.Fatalf("unexpected test case: want: %#v, got: %#v", expected, actual) + } +} diff --git a/spec/test/tree-report.json b/spec/test/tree-report.json new file mode 100644 index 0000000..71cdf0e --- /dev/null +++ b/spec/test/tree-report.json @@ -0,0 +1 @@ +{"class":"LALR(1)","terminals":[null,{"number":1,"name":"\u003ceof\u003e","anonymous":false,"alias":"","pattern":"","prec":0,"assoc":""},{"number":2,"name":"error","anonymous":false,"alias":"","pattern":"","prec":0,"assoc":""},{"number":3,"name":"ws","anonymous":false,"alias":"","pattern":"","prec":0,"assoc":""},{"number":4,"name":"l_paren","anonymous":false,"alias":"(","pattern":"","prec":1,"assoc":""},{"number":5,"name":"r_paren","anonymous":false,"alias":")","pattern":"","prec":0,"assoc":""},{"number":6,"name":"identifier","anonymous":false,"alias":"","pattern":"","prec":0,"assoc":""}],"non_terminals":[null,{"number":1,"name":"tree'"},{"number":2,"name":"tree"},{"number":3,"name":"tree_list"}],"productions":[null,{"number":1,"lhs":1,"rhs":[-2],"prec":0,"assoc":""},{"number":2,"lhs":2,"rhs":[4,6,-3,5],"prec":0,"assoc":""},{"number":3,"lhs":2,"rhs":[4,2,5],"prec":0,"assoc":""},{"number":4,"lhs":3,"rhs":[-3,-2],"prec":0,"assoc":""},{"number":5,"lhs":3,"rhs":[-2],"prec":0,"assoc":""},{"number":6,"lhs":3,"rhs":[],"prec":2,"assoc":""}],"states":[{"number":0,"kernel":[{"production":1,"dot":0}],"shift":[{"symbol":4,"state":2}],"reduce":null,"goto":[{"symbol":2,"state":1}],"sr_conflict":[],"rr_conflict":[]},{"number":1,"kernel":[{"production":1,"dot":1}],"shift":null,"reduce":[{"look_ahead":[1],"production":1}],"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":2,"kernel":[{"production":2,"dot":1},{"production":3,"dot":1}],"shift":[{"symbol":2,"state":3},{"symbol":6,"state":4}],"reduce":null,"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":3,"kernel":[{"production":3,"dot":2}],"shift":[{"symbol":5,"state":5}],"reduce":null,"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":4,"kernel":[{"production":2,"dot":2}],"shift":[{"symbol":4,"state":2}],"reduce":[{"look_ahead":[5,1],"production":6}],"goto":[{"symbol":2,"state":6},{"symbol":3,"state":7}],"sr_conflict":[{"symbol":4,"state":2,"production":6,"adopted_state":2,"adopted_production":null,"resolved_by":1}],"rr_conflict":[]},{"number":5,"kernel":[{"production":3,"dot":3}],"shift":null,"reduce":[{"look_ahead":[4,5,1],"production":3}],"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":6,"kernel":[{"production":5,"dot":1}],"shift":null,"reduce":[{"look_ahead":[4,5],"production":5}],"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":7,"kernel":[{"production":2,"dot":3},{"production":4,"dot":1}],"shift":[{"symbol":4,"state":2},{"symbol":5,"state":9}],"reduce":null,"goto":[{"symbol":2,"state":8}],"sr_conflict":[],"rr_conflict":[]},{"number":8,"kernel":[{"production":4,"dot":2}],"shift":null,"reduce":[{"look_ahead":[4,5],"production":4}],"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":9,"kernel":[{"production":2,"dot":4}],"shift":null,"reduce":[{"look_ahead":[4,5,1],"production":2}],"goto":null,"sr_conflict":[],"rr_conflict":[]}]} \ No newline at end of file diff --git a/spec/test/tree.json b/spec/test/tree.json new file mode 100644 index 0000000..8387bec --- /dev/null +++ b/spec/test/tree.json @@ -0,0 +1 @@ +{"name":"tree","lexical_specification":{"lexer":"maleeni","maleeni":{"spec":{"name":"tree","initial_mode_id":1,"mode_names":["","default"],"kind_names":["","ws","l_paren","r_paren","identifier"],"kind_ids":[null,[0,1,2,3,4]],"compression_level":2,"specs":[null,{"kind_names":["","ws","l_paren","r_paren","identifier"],"push":[0,0,0,0,0],"pop":[0,0,0,0,0],"dfa":{"initial_state_id":1,"accepting_states":[0,0,1,4,2,3],"row_count":6,"col_count":256,"transition":{"unique_entries":{"original_row_count":4,"original_col_count":256,"empty_value":0,"entries":[0,0,0,0,0,0,0,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,4,5,0,0,0,0,0,0,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"bounds":[-1,-1,-1,-1,-1,-1,-1,-1,-1,1,1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,1,1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1,1,1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,-1,-1,-1,-1,1,-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,3,3,3,3,-1,-1,-1,-1,-1,-1,-1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,-1,-1,-1,-1,3,-1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,-1,-1,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],"row_displacement":[0,0,189,75]},"row_nums":[0,1,2,3,0,0],"original_row_count":6,"original_col_count":256,"empty_value":0}}}]},"kind_to_terminal":[0,3,4,5,6],"terminal_to_kind":[0,0,0,1,2,3,4],"skip":[0,1,0,0,0],"kind_aliases":["","","","","(",")",""]}},"parsing_table":{"class":"lalr","action":[0,0,0,0,-2,0,0,0,1,0,0,0,0,0,0,0,-3,0,0,0,-4,0,0,0,0,0,-5,0,0,6,0,0,-2,6,0,0,3,0,0,3,3,0,0,0,0,0,5,5,0,0,0,0,0,-2,-9,0,0,0,0,0,4,4,0,0,2,0,0,2,2,0],"goto":[0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,7,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0],"state_count":10,"initial_state":0,"start_production":1,"lhs_symbols":[0,1,2,2,3,3,3],"alternative_symbol_counts":[0,1,4,3,2,1,0],"terminals":["","\u003ceof\u003e","error","ws","l_paren","r_paren","identifier"],"terminal_count":7,"non_terminals":["","tree'","tree","tree_list"],"non_terminal_count":4,"eof_symbol":1,"error_symbol":2,"error_trapper_states":[0,0,1,0,0,0,0,0,0,0],"recover_productions":[0,0,0,1,0,0,0]},"ast_action":{"entries":[null,null,[2,-3],[2],[-1,2],null,null]}} diff --git a/spec/test/tree.vartan b/spec/test/tree.vartan new file mode 100644 index 0000000..139014d --- /dev/null +++ b/spec/test/tree.vartan @@ -0,0 +1,25 @@ +#name tree; + +#prec ( + #assign l_paren + #assign $empty_tree +); + +tree + : l_paren identifier tree_list r_paren #ast identifier tree_list... + | l_paren error r_paren #recover #ast error + ; +tree_list + : tree_list tree #ast tree_list... tree + | tree + | #prec $empty_tree + ; + +ws #skip + : "[\u{0009}\u{000A}\u{000D}\u{0020}]+"; +l_paren + : '('; +r_paren + : ')'; +identifier + : "[0-9A-Za-z_]+"; diff --git a/spec/test/tree_lexer.go b/spec/test/tree_lexer.go new file mode 100644 index 0000000..931626f --- /dev/null +++ b/spec/test/tree_lexer.go @@ -0,0 +1,549 @@ +// Code generated by maleeni-go. DO NOT EDIT. +package test + +import ( + "fmt" + "io" + "io/ioutil" +) + +type ModeID int + +func (id ModeID) Int() int { + return int(id) +} + +type StateID int + +func (id StateID) Int() int { + return int(id) +} + +type KindID int + +func (id KindID) Int() int { + return int(id) +} + +type ModeKindID int + +func (id ModeKindID) Int() int { + return int(id) +} + +type LexSpec interface { + InitialMode() ModeID + Pop(mode ModeID, modeKind ModeKindID) bool + Push(mode ModeID, modeKind ModeKindID) (ModeID, bool) + ModeName(mode ModeID) string + InitialState(mode ModeID) StateID + NextState(mode ModeID, state StateID, v int) (StateID, bool) + Accept(mode ModeID, state StateID) (ModeKindID, bool) + KindIDAndName(mode ModeID, modeKind ModeKindID) (KindID, string) +} + +// Token representes a token. +type Token struct { + // ModeID is an ID of a lex mode. + ModeID ModeID + + // KindID is an ID of a kind. This is unique among all modes. + KindID KindID + + // ModeKindID is an ID of a lexical kind. This is unique only within a mode. + // Note that you need to use KindID field if you want to identify a kind across all modes. + ModeKindID ModeKindID + + // Row is a row number where a lexeme appears. + Row int + + // Col is a column number where a lexeme appears. + // Note that Col is counted in code points, not bytes. + Col int + + // Lexeme is a byte sequence matched a pattern of a lexical specification. + Lexeme []byte + + // When this field is true, it means the token is the EOF token. + EOF bool + + // When this field is true, it means the token is an error token. + Invalid bool +} + +type LexerOption func(l *Lexer) error + +// DisableModeTransition disables the active mode transition. Thus, even if the lexical specification has the push and pop +// operations, the lexer doesn't perform these operations. When the lexical specification has multiple modes, and this option is +// enabled, you need to call the Lexer.Push and Lexer.Pop methods to perform the mode transition. You can use the Lexer.Mode method +// to know the current lex mode. +func DisableModeTransition() LexerOption { + return func(l *Lexer) error { + l.passiveModeTran = true + return nil + } +} + +type Lexer struct { + spec LexSpec + src []byte + srcPtr int + row int + col int + prevRow int + prevCol int + tokBuf []*Token + modeStack []ModeID + passiveModeTran bool +} + +// NewLexer returns a new lexer. +func NewLexer(spec LexSpec, src io.Reader, opts ...LexerOption) (*Lexer, error) { + b, err := ioutil.ReadAll(src) + if err != nil { + return nil, err + } + l := &Lexer{ + spec: spec, + src: b, + srcPtr: 0, + row: 0, + col: 0, + modeStack: []ModeID{ + spec.InitialMode(), + }, + passiveModeTran: false, + } + for _, opt := range opts { + err := opt(l) + if err != nil { + return nil, err + } + } + + return l, nil +} + +// Next returns a next token. +func (l *Lexer) Next() (*Token, error) { + if len(l.tokBuf) > 0 { + tok := l.tokBuf[0] + l.tokBuf = l.tokBuf[1:] + return tok, nil + } + + tok, err := l.nextAndTransition() + if err != nil { + return nil, err + } + if !tok.Invalid { + return tok, nil + } + errTok := tok + for { + tok, err = l.nextAndTransition() + if err != nil { + return nil, err + } + if !tok.Invalid { + break + } + errTok.Lexeme = append(errTok.Lexeme, tok.Lexeme...) + } + l.tokBuf = append(l.tokBuf, tok) + + return errTok, nil +} + +func (l *Lexer) nextAndTransition() (*Token, error) { + tok, err := l.next() + if err != nil { + return nil, err + } + if tok.EOF || tok.Invalid { + return tok, nil + } + if l.passiveModeTran { + return tok, nil + } + mode := l.Mode() + if l.spec.Pop(mode, tok.ModeKindID) { + err := l.PopMode() + if err != nil { + return nil, err + } + } + if mode, ok := l.spec.Push(mode, tok.ModeKindID); ok { + l.PushMode(mode) + } + // The checking length of the mode stack must be at after pop and push operations because those operations can be performed + // at the same time. When the mode stack has just one element and popped it, the mode stack will be temporarily emptied. + // However, since a push operation may be performed immediately after it, the lexer allows the stack to be temporarily empty. + if len(l.modeStack) == 0 { + return nil, fmt.Errorf("a mode stack must have at least one element") + } + return tok, nil +} + +func (l *Lexer) next() (*Token, error) { + mode := l.Mode() + state := l.spec.InitialState(mode) + buf := []byte{} + unfixedBufLen := 0 + row := l.row + col := l.col + var tok *Token + for { + v, eof := l.read() + if eof { + if tok != nil { + l.unread(unfixedBufLen) + return tok, nil + } + // When `buf` has unaccepted data and reads the EOF, the lexer treats the buffered data as an invalid token. + if len(buf) > 0 { + return &Token{ + ModeID: mode, + ModeKindID: 0, + Lexeme: buf, + Row: row, + Col: col, + Invalid: true, + }, nil + } + return &Token{ + ModeID: mode, + ModeKindID: 0, + Row: 0, + Col: 0, + EOF: true, + }, nil + } + buf = append(buf, v) + unfixedBufLen++ + nextState, ok := l.spec.NextState(mode, state, int(v)) + if !ok { + if tok != nil { + l.unread(unfixedBufLen) + return tok, nil + } + return &Token{ + ModeID: mode, + ModeKindID: 0, + Lexeme: buf, + Row: row, + Col: col, + Invalid: true, + }, nil + } + state = nextState + if modeKindID, ok := l.spec.Accept(mode, state); ok { + kindID, _ := l.spec.KindIDAndName(mode, modeKindID) + tok = &Token{ + ModeID: mode, + KindID: kindID, + ModeKindID: modeKindID, + Lexeme: buf, + Row: row, + Col: col, + } + unfixedBufLen = 0 + } + } +} + +// Mode returns the current lex mode. +func (l *Lexer) Mode() ModeID { + return l.modeStack[len(l.modeStack)-1] +} + +// PushMode adds a lex mode onto the mode stack. +func (l *Lexer) PushMode(mode ModeID) { + l.modeStack = append(l.modeStack, mode) +} + +// PopMode removes a lex mode from the top of the mode stack. +func (l *Lexer) PopMode() error { + sLen := len(l.modeStack) + if sLen == 0 { + return fmt.Errorf("cannot pop a lex mode from a lex mode stack any more") + } + l.modeStack = l.modeStack[:sLen-1] + return nil +} + +func (l *Lexer) read() (byte, bool) { + if l.srcPtr >= len(l.src) { + return 0, true + } + + b := l.src[l.srcPtr] + l.srcPtr++ + + l.prevRow = l.row + l.prevCol = l.col + + // Count the token positions. + // The driver treats LF as the end of lines and counts columns in code points, not bytes. + // To count in code points, we refer to the First Byte column in the Table 3-6. + // + // Reference: + // - [Table 3-6] https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf > Table 3-6. UTF-8 Bit Distribution + if b < 128 { + // 0x0A is LF. + if b == 0x0A { + l.row++ + l.col = 0 + } else { + l.col++ + } + } else if b>>5 == 6 || b>>4 == 14 || b>>3 == 30 { + l.col++ + } + + return b, false +} + +// We must not call this function consecutively to record the token position correctly. +func (l *Lexer) unread(n int) { + l.srcPtr -= n + + l.row = l.prevRow + l.col = l.prevCol +} + +const ( + ModeIDNil ModeID = 0 + ModeIDDefault ModeID = 1 +) + +const ( + ModeNameNil = "" + ModeNameDefault = "default" +) + +// ModeIDToName converts a mode ID to a name. +func ModeIDToName(id ModeID) string { + switch id { + case ModeIDNil: + return ModeNameNil + case ModeIDDefault: + return ModeNameDefault + } + return "" +} + +const ( + KindIDNil KindID = 0 + KindIDWs KindID = 1 + KindIDLParen KindID = 2 + KindIDRParen KindID = 3 + KindIDIdentifier KindID = 4 +) + +const ( + KindNameNil = "" + KindNameWs = "ws" + KindNameLParen = "l_paren" + KindNameRParen = "r_paren" + KindNameIdentifier = "identifier" +) + +// KindIDToName converts a kind ID to a name. +func KindIDToName(id KindID) string { + switch id { + case KindIDNil: + return KindNameNil + case KindIDWs: + return KindNameWs + case KindIDLParen: + return KindNameLParen + case KindIDRParen: + return KindNameRParen + case KindIDIdentifier: + return KindNameIdentifier + } + return "" +} + +type lexSpec struct { + pop [][]bool + push [][]ModeID + modeNames []string + initialStates []StateID + acceptances [][]ModeKindID + kindIDs [][]KindID + kindNames []string + initialModeID ModeID + modeIDNil ModeID + modeKindIDNil ModeKindID + stateIDNil StateID + + rowNums [][]int + rowDisplacements [][]int + bounds [][]int + entries [][]StateID + originalColCounts []int +} + +func NewLexSpec() *lexSpec { + return &lexSpec{ + pop: [][]bool{ + nil, + { + false, false, false, false, false, + }, + }, + push: [][]ModeID{ + nil, + { + 0, 0, 0, 0, 0, + }, + }, + modeNames: []string{ + ModeNameNil, + ModeNameDefault, + }, + initialStates: []StateID{ + 0, + 1, + }, + acceptances: [][]ModeKindID{ + nil, + { + 0, 0, 1, 4, 2, 3, + }, + }, + kindIDs: [][]KindID{ + nil, + { + KindIDNil, + KindIDWs, + KindIDLParen, + KindIDRParen, + KindIDIdentifier, + }, + }, + kindNames: []string{ + KindNameNil, + KindNameWs, + KindNameLParen, + KindNameRParen, + KindNameIdentifier, + }, + initialModeID: ModeIDDefault, + modeIDNil: ModeIDNil, + modeKindIDNil: 0, + stateIDNil: 0, + + rowNums: [][]int{ + nil, + { + 0, 1, 2, 3, 0, 0, + }, + }, + rowDisplacements: [][]int{ + nil, + { + 0, 0, 189, 75, + }, + }, + bounds: [][]int{ + nil, + { + -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, + 1, 1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, + -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1, -1, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, -1, -1, -1, -1, 3, -1, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, + -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, + }, + }, + entries: [][]StateID{ + nil, + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, + 4, 5, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, + 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, 0, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, + 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + }, + }, + originalColCounts: nil, + } +} + +func (s *lexSpec) InitialMode() ModeID { + return s.initialModeID +} + +func (s *lexSpec) Pop(mode ModeID, modeKind ModeKindID) bool { + return s.pop[mode][modeKind] +} + +func (s *lexSpec) Push(mode ModeID, modeKind ModeKindID) (ModeID, bool) { + id := s.push[mode][modeKind] + return id, id != s.modeIDNil +} + +func (s *lexSpec) ModeName(mode ModeID) string { + return s.modeNames[mode] +} + +func (s *lexSpec) InitialState(mode ModeID) StateID { + return s.initialStates[mode] +} + +func (s *lexSpec) NextState(mode ModeID, state StateID, v int) (StateID, bool) { + rowNum := s.rowNums[mode][state] + d := s.rowDisplacements[mode][rowNum] + if s.bounds[mode][d+v] != rowNum { + return s.stateIDNil, false + } + return s.entries[mode][d+v], true +} + +func (s *lexSpec) Accept(mode ModeID, state StateID) (ModeKindID, bool) { + id := s.acceptances[mode][state] + return id, id != s.modeKindIDNil +} + +func (s *lexSpec) KindIDAndName(mode ModeID, modeKind ModeKindID) (KindID, string) { + id := s.kindIDs[mode][modeKind] + return id, s.kindNames[id] +} diff --git a/spec/test/tree_parser.go b/spec/test/tree_parser.go new file mode 100644 index 0000000..567e3b0 --- /dev/null +++ b/spec/test/tree_parser.go @@ -0,0 +1,638 @@ +// Code generated by vartan-go. DO NOT EDIT. +package test + +import ( + "fmt" + "io" +) + +type Grammar interface { + // Class returns a class of grammar. + Class() string + + // InitialState returns the initial state of a parser. + InitialState() int + + // StartProduction returns the start production of grammar. + StartProduction() int + + // Action returns an ACTION entry corresponding to a (state, terminal symbol) pair. + Action(state int, terminal int) int + + // GoTo returns a GOTO entry corresponding to a (state, non-terminal symbol) pair. + GoTo(state int, lhs int) int + + // ErrorTrapperState returns true when a state can shift the error symbol. + ErrorTrapperState(state int) bool + + // LHS returns a LHS symbol of a production. + LHS(prod int) int + + // AlternativeSymbolCount returns a symbol count of p production. + AlternativeSymbolCount(prod int) int + + // RecoverProduction returns true when a production has the recover directive. + RecoverProduction(prod int) bool + + // NonTerminal retuns a string representaion of a non-terminal symbol. + NonTerminal(nonTerminal int) string + + // TerminalCount returns a terminal symbol count of grammar. + TerminalCount() int + + // EOF returns the EOF symbol. + EOF() int + + // Error returns the error symbol. + Error() int + + // Terminal retuns a string representaion of a terminal symbol. + Terminal(terminal int) string + + // TerminalAlias returns an alias for a terminal. + TerminalAlias(terminal int) string + + // ASTAction returns an AST action entries. + ASTAction(prod int) []int +} + +type VToken interface { + // TerminalID returns a terminal ID. + TerminalID() int + + // Lexeme returns a lexeme. + Lexeme() []byte + + // EOF returns true when a token represents EOF. + EOF() bool + + // Invalid returns true when a token is invalid. + Invalid() bool + + // Position returns (row, column) pair. + Position() (int, int) + + // Skip returns true when a token must be skipped on syntax analysis. + Skip() bool +} + +type TokenStream interface { + Next() (VToken, error) +} + +type SyntaxError struct { + Row int + Col int + Message string + Token VToken + ExpectedTerminals []string +} + +type ParserOption func(p *Parser) error + +// DisableLAC disables LAC (lookahead correction). When the grammar has the LALR class, LAC is enabled by default. +func DisableLAC() ParserOption { + return func(p *Parser) error { + p.disableLAC = true + return nil + } +} + +func SemanticAction(semAct SemanticActionSet) ParserOption { + return func(p *Parser) error { + p.semAct = semAct + return nil + } +} + +type Parser struct { + toks TokenStream + gram Grammar + stateStack *stateStack + semAct SemanticActionSet + disableLAC bool + onError bool + shiftCount int + synErrs []*SyntaxError +} + +func NewParser(toks TokenStream, gram Grammar, opts ...ParserOption) (*Parser, error) { + p := &Parser{ + toks: toks, + gram: gram, + stateStack: &stateStack{}, + } + + if p.gram.Class() != "lalr" { + p.disableLAC = true + } + + for _, opt := range opts { + err := opt(p) + if err != nil { + return nil, err + } + } + + return p, nil +} + +func (p *Parser) Parse() error { + p.stateStack.push(p.gram.InitialState()) + tok, err := p.nextToken() + if err != nil { + return err + } + +ACTION_LOOP: + for { + act := p.lookupAction(tok) + + switch { + case act < 0: // Shift + nextState := act * -1 + + recovered := false + if p.onError { + p.shiftCount++ + + // When the parser performs shift three times, the parser recovers from the error state. + if p.shiftCount >= 3 { + p.onError = false + p.shiftCount = 0 + recovered = true + } + } + + p.shift(nextState) + + if p.semAct != nil { + p.semAct.Shift(tok, recovered) + } + + tok, err = p.nextToken() + if err != nil { + return err + } + case act > 0: // Reduce + prodNum := act + + recovered := false + if p.onError && p.gram.RecoverProduction(prodNum) { + p.onError = false + p.shiftCount = 0 + recovered = true + } + + accepted := p.reduce(prodNum) + if accepted { + if p.semAct != nil { + p.semAct.Accept() + } + + return nil + } + + if p.semAct != nil { + p.semAct.Reduce(prodNum, recovered) + } + default: // Error + if p.onError { + tok, err = p.nextToken() + if err != nil { + return err + } + if tok.EOF() { + if p.semAct != nil { + p.semAct.MissError(tok) + } + + return nil + } + + continue ACTION_LOOP + } + + row, col := tok.Position() + p.synErrs = append(p.synErrs, &SyntaxError{ + Row: row, + Col: col, + Message: "unexpected token", + Token: tok, + ExpectedTerminals: p.searchLookahead(p.stateStack.top()), + }) + + count, ok := p.trapError() + if !ok { + if p.semAct != nil { + p.semAct.MissError(tok) + } + + return nil + } + + p.onError = true + p.shiftCount = 0 + + act, err := p.lookupActionOnError() + if err != nil { + return err + } + + p.shift(act * -1) + + if p.semAct != nil { + p.semAct.TrapAndShiftError(tok, count) + } + } + } +} + +// validateLookahead validates whether `term` is a valid lookahead in the current context. When `term` is valid, +// this method returns `true`. +func (p *Parser) validateLookahead(term int) bool { + p.stateStack.enableExploratoryMode() + defer p.stateStack.disableExploratoryMode() + + for { + act := p.gram.Action(p.stateStack.topExploratorily(), term) + + switch { + case act < 0: // Shift + return true + case act > 0: // Reduce + prodNum := act + + lhs := p.gram.LHS(prodNum) + if lhs == p.gram.LHS(p.gram.StartProduction()) { + return true + } + n := p.gram.AlternativeSymbolCount(prodNum) + p.stateStack.popExploratorily(n) + state := p.gram.GoTo(p.stateStack.topExploratorily(), lhs) + p.stateStack.pushExploratorily(state) + default: // Error + return false + } + } +} + +func (p *Parser) nextToken() (VToken, error) { + for { + // We don't have to check whether the token is invalid because the kind ID of the invalid token is 0, + // and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect + // a syntax error because the parser cannot find an entry corresponding to the invalid token. + tok, err := p.toks.Next() + if err != nil { + return nil, err + } + + if tok.Skip() { + continue + } + + return tok, nil + } +} + +func (p *Parser) tokenToTerminal(tok VToken) int { + if tok.EOF() { + return p.gram.EOF() + } + + return tok.TerminalID() +} + +func (p *Parser) lookupAction(tok VToken) int { + if !p.disableLAC { + term := p.tokenToTerminal(tok) + if !p.validateLookahead(term) { + return 0 + } + } + + return p.gram.Action(p.stateStack.top(), p.tokenToTerminal(tok)) +} + +func (p *Parser) lookupActionOnError() (int, error) { + act := p.gram.Action(p.stateStack.top(), p.gram.Error()) + if act >= 0 { + return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.stateStack.top(), p.gram.Terminal(p.gram.Error())) + } + + return act, nil +} + +func (p *Parser) shift(nextState int) { + p.stateStack.push(nextState) +} + +func (p *Parser) reduce(prodNum int) bool { + lhs := p.gram.LHS(prodNum) + if lhs == p.gram.LHS(p.gram.StartProduction()) { + return true + } + n := p.gram.AlternativeSymbolCount(prodNum) + p.stateStack.pop(n) + nextState := p.gram.GoTo(p.stateStack.top(), lhs) + p.stateStack.push(nextState) + return false +} + +func (p *Parser) trapError() (int, bool) { + count := 0 + for { + if p.gram.ErrorTrapperState(p.stateStack.top()) { + return count, true + } + + if p.stateStack.top() != p.gram.InitialState() { + p.stateStack.pop(1) + count++ + } else { + return 0, false + } + } +} + +func (p *Parser) SyntaxErrors() []*SyntaxError { + return p.synErrs +} + +func (p *Parser) searchLookahead(state int) []string { + kinds := []string{} + termCount := p.gram.TerminalCount() + for term := 0; term < termCount; term++ { + if p.disableLAC { + if p.gram.Action(p.stateStack.top(), term) == 0 { + continue + } + } else { + if !p.validateLookahead(term) { + continue + } + } + + // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol + // intentionally. + if term == p.gram.Error() { + continue + } + + if alias := p.gram.TerminalAlias(term); alias != "" { + kinds = append(kinds, alias) + } else { + kinds = append(kinds, p.gram.Terminal(term)) + } + } + + return kinds +} + +type stateStack struct { + items []int + itemsExp []int +} + +func (s *stateStack) enableExploratoryMode() { + s.itemsExp = make([]int, len(s.items)) + copy(s.itemsExp, s.items) +} + +func (s *stateStack) disableExploratoryMode() { + s.itemsExp = nil +} + +func (s *stateStack) top() int { + return s.items[len(s.items)-1] +} + +func (s *stateStack) topExploratorily() int { + return s.itemsExp[len(s.itemsExp)-1] +} + +func (s *stateStack) push(state int) { + s.items = append(s.items, state) +} + +func (s *stateStack) pushExploratorily(state int) { + s.itemsExp = append(s.itemsExp, state) +} + +func (s *stateStack) pop(n int) { + s.items = s.items[:len(s.items)-n] +} + +func (s *stateStack) popExploratorily(n int) { + s.itemsExp = s.itemsExp[:len(s.itemsExp)-n] +} + +type grammarImpl struct { + recoverProductions []int + action []int + goTo []int + alternativeSymbolCounts []int + errorTrapperStates []int + nonTerminals []string + lhsSymbols []int + terminals []string + terminalAliases []string + astActions [][]int +} + +func NewGrammar() *grammarImpl { + return &grammarImpl{ + recoverProductions: []int{ + 0, 0, 0, 1, 0, 0, 0, + }, + action: []int{ + 0, 0, 0, 0, -2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, + -4, 0, 0, 0, 0, 0, -5, 0, 0, 6, 0, 0, -2, 6, 0, 0, 3, 0, 0, 3, + 3, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, -2, -9, 0, 0, 0, 0, 0, + 4, 4, 0, 0, 2, 0, 0, 2, 2, 0, + }, + goTo: []int{ + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }, + alternativeSymbolCounts: []int{ + 0, 1, 4, 3, 2, 1, 0, + }, + errorTrapperStates: []int{ + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + }, + nonTerminals: []string{ + "", + "tree'", + "tree", + "tree_list", + }, + lhsSymbols: []int{ + 0, 1, 2, 2, 3, 3, 3, + }, + terminals: []string{ + "", + "", + "error", + "ws", + "l_paren", + "r_paren", + "identifier", + }, + terminalAliases: []string{ + "", + "", + "", + "", + "(", + ")", + "", + }, + astActions: [][]int{ + nil, + nil, + { + 2, -3, + }, + { + 2, + }, + { + -1, 2, + }, + nil, + nil, + }, + } +} + +func (g *grammarImpl) Class() string { + return "lalr" +} + +func (g *grammarImpl) InitialState() int { + return 0 +} + +func (g *grammarImpl) StartProduction() int { + return 1 +} + +func (g *grammarImpl) RecoverProduction(prod int) bool { + return g.recoverProductions[prod] != 0 +} + +func (g *grammarImpl) Action(state int, terminal int) int { + return g.action[state*7+terminal] +} + +func (g *grammarImpl) GoTo(state int, lhs int) int { + return g.goTo[state*4+lhs] +} + +func (g *grammarImpl) AlternativeSymbolCount(prod int) int { + return g.alternativeSymbolCounts[prod] +} + +func (g *grammarImpl) TerminalCount() int { + return 7 +} + +func (g *grammarImpl) ErrorTrapperState(state int) bool { + return g.errorTrapperStates[state] != 0 +} + +func (g *grammarImpl) NonTerminal(nonTerminal int) string { + return g.nonTerminals[nonTerminal] +} + +func (g *grammarImpl) LHS(prod int) int { + return g.lhsSymbols[prod] +} + +func (g *grammarImpl) EOF() int { + return 1 +} + +func (g *grammarImpl) Error() int { + return 2 +} + +func (g *grammarImpl) Terminal(terminal int) string { + return g.terminals[terminal] +} + +func (g *grammarImpl) TerminalAlias(terminal int) string { + return g.terminalAliases[terminal] +} + +func (g *grammarImpl) ASTAction(prod int) []int { + return g.astActions[prod] +} + +type vToken struct { + terminalID int + skip bool + tok *Token +} + +func (t *vToken) TerminalID() int { + return t.terminalID +} + +func (t *vToken) Lexeme() []byte { + return t.tok.Lexeme +} + +func (t *vToken) EOF() bool { + return t.tok.EOF +} + +func (t *vToken) Invalid() bool { + return t.tok.Invalid +} + +func (t *vToken) Skip() bool { + return t.skip +} + +func (t *vToken) Position() (int, int) { + return t.tok.Row, t.tok.Col +} + +var kindToTerminal = []int{ + 0, 3, 4, 5, 6, +} + +var skip = []int{ + 0, 1, 0, 0, 0, +} + +type tokenStream struct { + lex *Lexer + kindToTerminal []int + skip []int +} + +func NewTokenStream(src io.Reader) (*tokenStream, error) { + lex, err := NewLexer(NewLexSpec(), src) + if err != nil { + return nil, err + } + + return &tokenStream{ + lex: lex, + }, nil +} + +func (t *tokenStream) Next() (VToken, error) { + tok, err := t.lex.Next() + if err != nil { + return nil, err + } + return &vToken{ + terminalID: kindToTerminal[tok.KindID], + skip: skip[tok.KindID] > 0, + tok: tok, + }, nil +} diff --git a/spec/test/tree_semantic_action.go b/spec/test/tree_semantic_action.go new file mode 100644 index 0000000..9e240c2 --- /dev/null +++ b/spec/test/tree_semantic_action.go @@ -0,0 +1,353 @@ +// Code generated by vartan-go. DO NOT EDIT. +package test + +import ( + "encoding/json" + "fmt" + "io" +) + +// SemanticActionSet is a set of semantic actions a parser calls. +type SemanticActionSet interface { + // Shift runs when the parser shifts a symbol onto a state stack. `tok` is a token corresponding to the symbol. + // When the parser recovered from an error state by shifting the token, `recovered` is true. + Shift(tok VToken, recovered bool) + + // Reduce runs when the parser reduces an RHS of a production to its LHS. `prodNum` is a number of the production. + // When the parser recovered from an error state by reducing the production, `recovered` is true. + Reduce(prodNum int, recovered bool) + + // Accept runs when the parser accepts an input. + Accept() + + // TrapAndShiftError runs when the parser traps a syntax error and shifts a error symbol onto the state stack. + // `cause` is a token that caused a syntax error. `popped` is the number of frames that the parser discards + // from the state stack. + // Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token + // corresponding to the error symbol doesn't exist. + TrapAndShiftError(cause VToken, popped int) + + // MissError runs when the parser fails to trap a syntax error. `cause` is a token that caused a syntax error. + MissError(cause VToken) +} + +var _ SemanticActionSet = &SyntaxTreeActionSet{} + +// SyntaxTreeNode is a node of a syntax tree. A node type used in SyntaxTreeActionSet must implement SyntaxTreeNode interface. +type SyntaxTreeNode interface { + // ChildCount returns a child count of a node. A parser calls this method to know the child count to be expanded by an `#ast` + // directive with `...` operator. + ChildCount() int + + // ExpandChildren returns children of a node. A parser calls this method to fetch the children to be expanded by an `#ast` + // directive with `...` operator. + ExpandChildren() []SyntaxTreeNode +} + +var _ SyntaxTreeNode = &Node{} + +// SyntaxTreeBuilder allows you to construct a syntax tree containing arbitrary user-defined node types. +// The parser uses SyntaxTreeBuilder interface as a part of semantic actions via SyntaxTreeActionSet interface. +type SyntaxTreeBuilder interface { + Shift(kindName string, text string, row, col int) SyntaxTreeNode + ShiftError(kindName string) SyntaxTreeNode + Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode + Accept(f SyntaxTreeNode) +} + +var _ SyntaxTreeBuilder = &DefaulSyntaxTreeBuilder{} + +// DefaulSyntaxTreeBuilder is a implementation of SyntaxTreeBuilder. +type DefaulSyntaxTreeBuilder struct { + tree *Node +} + +// NewDefaultSyntaxTreeBuilder returns a new DefaultSyntaxTreeBuilder. +func NewDefaultSyntaxTreeBuilder() *DefaulSyntaxTreeBuilder { + return &DefaulSyntaxTreeBuilder{} +} + +// Shift is a implementation of SyntaxTreeBuilder.Shift. +func (b *DefaulSyntaxTreeBuilder) Shift(kindName string, text string, row, col int) SyntaxTreeNode { + return &Node{ + Type: NodeTypeTerminal, + KindName: kindName, + Text: text, + Row: row, + Col: col, + } +} + +// ShiftError is a implementation of SyntaxTreeBuilder.ShiftError. +func (b *DefaulSyntaxTreeBuilder) ShiftError(kindName string) SyntaxTreeNode { + return &Node{ + Type: NodeTypeError, + KindName: kindName, + } +} + +// Reduce is a implementation of SyntaxTreeBuilder.Reduce. +func (b *DefaulSyntaxTreeBuilder) Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode { + cNodes := make([]*Node, len(children)) + for i, c := range children { + cNodes[i] = c.(*Node) + } + return &Node{ + Type: NodeTypeNonTerminal, + KindName: kindName, + Children: cNodes, + } +} + +// Accept is a implementation of SyntaxTreeBuilder.Accept. +func (b *DefaulSyntaxTreeBuilder) Accept(f SyntaxTreeNode) { + b.tree = f.(*Node) +} + +// Tree returns a syntax tree when the parser has accepted an input. If a syntax error occurs, the return value is nil. +func (b *DefaulSyntaxTreeBuilder) Tree() *Node { + return b.tree +} + +// SyntaxTreeActionSet is a implementation of SemanticActionSet interface and constructs a syntax tree. +type SyntaxTreeActionSet struct { + gram Grammar + builder SyntaxTreeBuilder + semStack *semanticStack + disableASTAction bool +} + +// NewASTActionSet returns a new SyntaxTreeActionSet that constructs an AST (Abstract Syntax Tree). +// When grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns interprets them. +func NewASTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet { + return &SyntaxTreeActionSet{ + gram: gram, + builder: builder, + semStack: newSemanticStack(), + } +} + +// NewCSTTActionSet returns a new SyntaxTreeActionSet that constructs a CST (Concrete Syntax Tree). +// Even if grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns ignores them. +func NewCSTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet { + return &SyntaxTreeActionSet{ + gram: gram, + builder: builder, + semStack: newSemanticStack(), + disableASTAction: true, + } +} + +// Shift is a implementation of SemanticActionSet.Shift method. +func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) { + term := a.tokenToTerminal(tok) + row, col := tok.Position() + a.semStack.push(a.builder.Shift(a.gram.Terminal(term), string(tok.Lexeme()), row, col)) +} + +// Reduce is a implementation of SemanticActionSet.Reduce method. +func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) { + lhs := a.gram.LHS(prodNum) + + // When an alternative is empty, `n` will be 0, and `handle` will be empty slice. + n := a.gram.AlternativeSymbolCount(prodNum) + handle := a.semStack.pop(n) + + var astAct []int + if !a.disableASTAction { + astAct = a.gram.ASTAction(prodNum) + } + var children []SyntaxTreeNode + if astAct != nil { + // Count the number of children in advance to avoid frequent growth in a slice for children. + { + l := 0 + for _, e := range astAct { + if e > 0 { + l++ + } else { + offset := e*-1 - 1 + l += handle[offset].ChildCount() + } + } + + children = make([]SyntaxTreeNode, l) + } + + p := 0 + for _, e := range astAct { + if e > 0 { + offset := e - 1 + children[p] = handle[offset] + p++ + } else { + offset := e*-1 - 1 + for _, c := range handle[offset].ExpandChildren() { + children[p] = c + p++ + } + } + } + } else { + // If an alternative has no AST action, a driver generates + // a node with the same structure as a CST. + children = handle + } + + a.semStack.push(a.builder.Reduce(a.gram.NonTerminal(lhs), children)) +} + +// Accept is a implementation of SemanticActionSet.Accept method. +func (a *SyntaxTreeActionSet) Accept() { + top := a.semStack.pop(1) + a.builder.Accept(top[0]) +} + +// TrapAndShiftError is a implementation of SemanticActionSet.TrapAndShiftError method. +func (a *SyntaxTreeActionSet) TrapAndShiftError(cause VToken, popped int) { + a.semStack.pop(popped) + a.semStack.push(a.builder.ShiftError(a.gram.Terminal(a.gram.Error()))) +} + +// MissError is a implementation of SemanticActionSet.MissError method. +func (a *SyntaxTreeActionSet) MissError(cause VToken) { +} + +func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int { + if tok.EOF() { + return a.gram.EOF() + } + + return tok.TerminalID() +} + +type semanticStack struct { + frames []SyntaxTreeNode +} + +func newSemanticStack() *semanticStack { + return &semanticStack{ + frames: make([]SyntaxTreeNode, 0, 100), + } +} + +func (s *semanticStack) push(f SyntaxTreeNode) { + s.frames = append(s.frames, f) +} + +func (s *semanticStack) pop(n int) []SyntaxTreeNode { + fs := s.frames[len(s.frames)-n:] + s.frames = s.frames[:len(s.frames)-n] + + return fs +} + +type NodeType int + +const ( + NodeTypeError = 0 + NodeTypeTerminal = 1 + NodeTypeNonTerminal = 2 +) + +// Node is a implementation of SyntaxTreeNode interface. +type Node struct { + Type NodeType + KindName string + Text string + Row int + Col int + Children []*Node +} + +func (n *Node) MarshalJSON() ([]byte, error) { + switch n.Type { + case NodeTypeError: + return json.Marshal(struct { + Type NodeType `json:"type"` + KindName string `json:"kind_name"` + }{ + Type: n.Type, + KindName: n.KindName, + }) + case NodeTypeTerminal: + return json.Marshal(struct { + Type NodeType `json:"type"` + KindName string `json:"kind_name"` + Text string `json:"text"` + Row int `json:"row"` + Col int `json:"col"` + }{ + Type: n.Type, + KindName: n.KindName, + Text: n.Text, + Row: n.Row, + Col: n.Col, + }) + case NodeTypeNonTerminal: + return json.Marshal(struct { + Type NodeType `json:"type"` + KindName string `json:"kind_name"` + Children []*Node `json:"children"` + }{ + Type: n.Type, + KindName: n.KindName, + Children: n.Children, + }) + default: + return nil, fmt.Errorf("invalid node type: %v", n.Type) + } +} + +// ChildCount is a implementation of SyntaxTreeNode.ChildCount. +func (n *Node) ChildCount() int { + return len(n.Children) +} + +// ExpandChildren is a implementation of SyntaxTreeNode.ExpandChildren. +func (n *Node) ExpandChildren() []SyntaxTreeNode { + fs := make([]SyntaxTreeNode, len(n.Children)) + for i, n := range n.Children { + fs[i] = n + } + return fs +} + +// PrintTree prints a syntax tree whose root is `node`. +func PrintTree(w io.Writer, node *Node) { + printTree(w, node, "", "") +} + +func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) { + if node == nil { + return + } + + switch node.Type { + case NodeTypeError: + fmt.Fprintf(w, "%v!%v\n", ruledLine, node.KindName) + case NodeTypeTerminal: + fmt.Fprintf(w, "%v%v %#v\n", ruledLine, node.KindName, node.Text) + case NodeTypeNonTerminal: + fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) + + num := len(node.Children) + for i, child := range node.Children { + var line string + if num > 1 && i < num-1 { + line = "├─ " + } else { + line = "└─ " + } + + var prefix string + if i >= num-1 { + prefix = " " + } else { + prefix = "│ " + } + + printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix) + } + } +} diff --git a/tester/tester.go b/tester/tester.go new file mode 100644 index 0000000..ef3ca61 --- /dev/null +++ b/tester/tester.go @@ -0,0 +1,177 @@ +package tester + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "runtime/debug" + "strings" + + "github.com/nihei9/vartan/driver" + gspec "github.com/nihei9/vartan/spec/grammar" + tspec "github.com/nihei9/vartan/spec/test" +) + +type TestResult struct { + TestCasePath string + Error error + Diffs []*tspec.TreeDiff +} + +func (r *TestResult) String() string { + if r.Error != nil { + const indent1 = " " + const indent2 = indent1 + indent1 + + msgLines := strings.Split(r.Error.Error(), "\n") + msg := fmt.Sprintf("Failed %v:\n%v%v", r.TestCasePath, indent1, strings.Join(msgLines, "\n"+indent1)) + if len(r.Diffs) == 0 { + return msg + } + var diffLines []string + for _, diff := range r.Diffs { + diffLines = append(diffLines, diff.Message) + diffLines = append(diffLines, fmt.Sprintf("%vexpected path: %v", indent1, diff.ExpectedPath)) + diffLines = append(diffLines, fmt.Sprintf("%vactual path: %v", indent1, diff.ActualPath)) + } + return fmt.Sprintf("%v\n%v%v", msg, indent2, strings.Join(diffLines, "\n"+indent2)) + } + return fmt.Sprintf("Passed %v", r.TestCasePath) +} + +type TestCaseWithMetadata struct { + TestCase *tspec.TestCase + FilePath string + Error error +} + +func ListTestCases(testPath string) []*TestCaseWithMetadata { + fi, err := os.Stat(testPath) + if err != nil { + return []*TestCaseWithMetadata{ + { + FilePath: testPath, + Error: err, + }, + } + } + if !fi.IsDir() { + c, err := parseTestCase(testPath) + return []*TestCaseWithMetadata{ + { + TestCase: c, + FilePath: testPath, + Error: err, + }, + } + } + + es, err := os.ReadDir(testPath) + if err != nil { + return []*TestCaseWithMetadata{ + { + FilePath: testPath, + Error: err, + }, + } + } + var cases []*TestCaseWithMetadata + for _, e := range es { + cs := ListTestCases(filepath.Join(testPath, e.Name())) + cases = append(cases, cs...) + } + return cases +} + +func parseTestCase(testCasePath string) (*tspec.TestCase, error) { + f, err := os.Open(testCasePath) + if err != nil { + return nil, err + } + defer f.Close() + return tspec.ParseTestCase(f) +} + +type Tester struct { + Grammar *gspec.CompiledGrammar + Cases []*TestCaseWithMetadata +} + +func (t *Tester) Run() []*TestResult { + var rs []*TestResult + for _, c := range t.Cases { + rs = append(rs, runTest(t.Grammar, c)) + } + return rs +} + +func runTest(g *gspec.CompiledGrammar, c *TestCaseWithMetadata) *TestResult { + var p *driver.Parser + var tb *driver.DefaulSyntaxTreeBuilder + { + gram := driver.NewGrammar(g) + toks, err := driver.NewTokenStream(g, bytes.NewReader(c.TestCase.Source)) + if err != nil { + return &TestResult{ + TestCasePath: c.FilePath, + Error: err, + } + } + tb = driver.NewDefaultSyntaxTreeBuilder() + p, err = driver.NewParser(toks, gram, driver.SemanticAction(driver.NewASTActionSet(gram, tb))) + if err != nil { + return &TestResult{ + TestCasePath: c.FilePath, + Error: err, + } + } + } + + err := p.Parse() + if err != nil { + return &TestResult{ + TestCasePath: c.FilePath, + Error: err, + } + } + + if tb.Tree() == nil { + var err error + if len(p.SyntaxErrors()) > 0 { + err = fmt.Errorf("parse tree was not generated: syntax error occurred") + } else { + // The parser should always generate a parse tree in the vartan-test command, so if there is no parse + // tree, it is a bug. We also include a stack trace in the error message to be sure. + err = fmt.Errorf("parse tree was not generated: no syntax error:\n%v", string(debug.Stack())) + } + return &TestResult{ + TestCasePath: c.FilePath, + Error: err, + } + } + + // When a parse tree exists, the test continues regardless of whether or not syntax errors occurred. + diffs := tspec.DiffTree(genTree(tb.Tree()).Fill(), c.TestCase.Output) + if len(diffs) > 0 { + return &TestResult{ + TestCasePath: c.FilePath, + Error: fmt.Errorf("output mismatch"), + Diffs: diffs, + } + } + return &TestResult{ + TestCasePath: c.FilePath, + } +} + +func genTree(dTree *driver.Node) *tspec.Tree { + var children []*tspec.Tree + if len(dTree.Children) > 0 { + children = make([]*tspec.Tree, len(dTree.Children)) + for i, c := range dTree.Children { + children[i] = genTree(c) + } + } + return tspec.NewTree(dTree.KindName, children...) +} diff --git a/tester/tester_test.go b/tester/tester_test.go new file mode 100644 index 0000000..957d739 --- /dev/null +++ b/tester/tester_test.go @@ -0,0 +1,170 @@ +package tester + +import ( + "strings" + "testing" + + "github.com/nihei9/vartan/grammar" + gspec "github.com/nihei9/vartan/spec/grammar" + tspec "github.com/nihei9/vartan/spec/test" +) + +func TestTester_Run(t *testing.T) { + grammarSrc1 := ` +#name test; + +s + : foo bar baz + | foo error baz #recover + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +foo + : 'foo'; +bar + : 'bar'; +baz + : 'baz'; +` + + grammarSrc2 := ` +#name test; + +s + : foos + ; +foos + : foos foo #ast foos... foo + | foo + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +foo + : 'foo'; +` + + tests := []struct { + grammarSrc string + testSrc string + error bool + }{ + { + grammarSrc: grammarSrc1, + testSrc: ` +Test +--- +foo bar baz +--- +(s + (foo) (bar) (baz)) +`, + }, + { + grammarSrc: grammarSrc1, + testSrc: ` +Test +--- +foo ? baz +--- +(s + (foo) (error) (baz)) +`, + }, + { + grammarSrc: grammarSrc1, + testSrc: ` +Test +--- +foo bar baz +--- +(s) +`, + error: true, + }, + { + grammarSrc: grammarSrc1, + testSrc: ` +Test +--- +foo bar baz +--- +(s + (foo) (bar)) +`, + error: true, + }, + { + grammarSrc: grammarSrc1, + testSrc: ` +Test +--- +foo bar baz +--- +(s + (foo) (bar) (xxx)) +`, + error: true, + }, + { + grammarSrc: grammarSrc2, + testSrc: ` +Test +--- +foo foo foo +--- +(s + (foos + (foo) (foo) (foo))) +`, + }, + } + for _, tt := range tests { + ast, err := gspec.Parse(strings.NewReader(tt.grammarSrc)) + if err != nil { + t.Fatal(err) + } + b := grammar.GrammarBuilder{ + AST: ast, + } + g, err := b.Build() + if err != nil { + t.Fatal(err) + } + cg, _, err := grammar.Compile(g) + if err != nil { + t.Fatal(err) + } + c, err := tspec.ParseTestCase(strings.NewReader(tt.testSrc)) + if err != nil { + t.Fatal(err) + } + tester := &Tester{ + Grammar: cg, + Cases: []*TestCaseWithMetadata{ + { + TestCase: c, + }, + }, + } + rs := tester.Run() + if tt.error { + errOccurred := false + for _, r := range rs { + if r.Error != nil { + errOccurred = true + } + } + if !errOccurred { + t.Fatal("this test must fail, but it passed") + } + } else { + for _, r := range rs { + if r.Error != nil { + t.Fatalf("unexpected error occurred: %v", r.Error) + } + } + } + } +}