Add vartan-test command

nihei9 · May 29, 2022 · ceb6649 · ceb6649
1 parent 4c0f7eb
commit ceb6649
Show file tree

Hide file tree

Showing 12 changed files with 2,535 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -121,7 +121,36 @@ When `vartan parse` command successfully parses the input data, it prints a CST
 $ vartan show expr-report.json
 ```
 
-### 4. Generate a parser
+### 4. Test
+
+`vartan-test` command allows you to test whether your grammar recognizes an input text as a syntax tree with an expected structure. To do so, you need to define a test case as follows.
+
+```
+This is an example.
+---
+a / b * 100
+---
+(expr
+	(expr
+		(expr (id))
+		(div)
+		(expr (id)))
+	(mul)
+	(expr (int)))
+```
+
+The test case consists of a description, an input text, and a syntax tree you expect. Each part is separated by the delimiter `---`. The syntax tree is represented by the syntax like an [S-expression](https://en.wikipedia.org/wiki/S-expression).
+
+Save the above test case to `test.txt` file and run the following command.
+
+```sh
+$ vartan-test expr.vartan test.txt
+Passed test.txt
+```
+
+When you specify a directory as the 2nd argument of `vartan-test` command, it will run all test cases in the directory.
+
+### 5. Generate a parser
 
 Using `vartan-go` command, you can generate a source code of a parser to recognize your grammar.
 

diff --git a/cmd/vartan/test.go b/cmd/vartan/test.go
@@ -0,0 +1,65 @@
+package main
+
+import (
+	"errors"
+	"fmt"
+	"os"
+
+	"github.com/nihei9/vartan/grammar"
+	"github.com/nihei9/vartan/tester"
+	"github.com/spf13/cobra"
+)
+
+func init() {
+	cmd := &cobra.Command{
+		Use:     "test <grammar file path> <test file path>|<test directory path>",
+		Short:   "Test a grammar",
+		Example: `  vartan test grammar.vartan test`,
+		Args:    cobra.ExactArgs(2),
+		RunE:    runTest,
+	}
+	rootCmd.AddCommand(cmd)
+}
+
+func runTest(cmd *cobra.Command, args []string) error {
+	g, err := readGrammar(args[0])
+	if err != nil {
+		return fmt.Errorf("Cannot read a grammar: %w", err)
+	}
+	cg, _, err := grammar.Compile(g)
+	if err != nil {
+		return fmt.Errorf("Cannot read a compiled grammar: %w", err)
+	}
+
+	var cs []*tester.TestCaseWithMetadata
+	{
+		cs = tester.ListTestCases(args[1])
+		errOccurred := false
+		for _, c := range cs {
+			if c.Error != nil {
+				fmt.Fprintf(os.Stderr, "Failed to read a test case or a directory: %v\n%v\n", c.FilePath, c.Error)
+				errOccurred = true
+			}
+		}
+		if errOccurred {
+			return errors.New("Cannot run test")
+		}
+	}
+
+	t := &tester.Tester{
+		Grammar: cg,
+		Cases:   cs,
+	}
+	rs := t.Run()
+	testFailed := false
+	for _, r := range rs {
+		fmt.Fprintln(os.Stdout, r)
+		if r.Error != nil {
+			testFailed = true
+		}
+	}
+	if testFailed {
+		return errors.New("Test failed")
+	}
+	return nil
+}
diff --git a/spec/test/parser.go b/spec/test/parser.go
@@ -0,0 +1,226 @@
+//go:generate vartan compile tree.vartan -o tree.json
+//go:generate vartan-go tree.json --package test
+
+package test
+
+import (
+	"bufio"
+	"bytes"
+	"errors"
+	"fmt"
+	"io"
+	"regexp"
+	"strings"
+)
+
+type TreeDiff struct {
+	ExpectedPath string
+	ActualPath   string
+	Message      string
+}
+
+func newTreeDiff(expected, actual *Tree, message string) *TreeDiff {
+	return &TreeDiff{
+		ExpectedPath: expected.path(),
+		ActualPath:   actual.path(),
+		Message:      message,
+	}
+}
+
+type Tree struct {
+	Parent   *Tree
+	Offset   int
+	Kind     string
+	Children []*Tree
+}
+
+func NewTree(kind string, children ...*Tree) *Tree {
+	return &Tree{
+		Kind:     kind,
+		Children: children,
+	}
+}
+
+func (t *Tree) Fill() *Tree {
+	for i, c := range t.Children {
+		c.Parent = t
+		c.Offset = i
+		c.Fill()
+	}
+	return t
+}
+
+func (t *Tree) path() string {
+	if t.Parent == nil {
+		return t.Kind
+	}
+	return fmt.Sprintf("%v.[%v]%v", t.Parent.path(), t.Offset, t.Kind)
+}
+
+func DiffTree(expected, actual *Tree) []*TreeDiff {
+	if expected == nil && actual == nil {
+		return nil
+	}
+	if actual.Kind != expected.Kind {
+		msg := fmt.Sprintf("unexpected kind: expected '%v' but got '%v'", expected.Kind, actual.Kind)
+		return []*TreeDiff{
+			newTreeDiff(expected, actual, msg),
+		}
+	}
+	if len(actual.Children) != len(expected.Children) {
+		msg := fmt.Sprintf("unexpected node count: expected %v but got %v", len(expected.Children), len(actual.Children))
+		return []*TreeDiff{
+			newTreeDiff(expected, actual, msg),
+		}
+	}
+	var diffs []*TreeDiff
+	for i, exp := range expected.Children {
+		if ds := DiffTree(actual.Children[i], exp); len(ds) > 0 {
+			diffs = append(diffs, ds...)
+		}
+	}
+	return diffs
+}
+
+type TestCase struct {
+	Description string
+	Source      []byte
+	Output      *Tree
+}
+
+func ParseTestCase(r io.Reader) (*TestCase, error) {
+	bufs, err := splitIntoParts(r)
+	if err != nil {
+		return nil, err
+	}
+	if len(bufs) != 3 {
+		return nil, fmt.Errorf("too many or too few part delimiters: a test case consists of just tree parts: %v parts found", len(bufs))
+	}
+
+	tree, err := parseTree(bytes.NewReader(bufs[2]))
+	if err != nil {
+		return nil, err
+	}
+
+	return &TestCase{
+		Description: string(bufs[0]),
+		Source:      bufs[1],
+		Output:      tree,
+	}, nil
+}
+
+func splitIntoParts(r io.Reader) ([][]byte, error) {
+	var bufs [][]byte
+	s := bufio.NewScanner(r)
+	for {
+		buf, err := readPart(s)
+		if err != nil {
+			return nil, err
+		}
+		if buf == nil {
+			break
+		}
+		bufs = append(bufs, buf)
+	}
+	if err := s.Err(); err != nil {
+		return nil, err
+	}
+	return bufs, nil
+}
+
+var reDelim = regexp.MustCompile(`^\s*---+\s*$`)
+
+func readPart(s *bufio.Scanner) ([]byte, error) {
+	if !s.Scan() {
+		return nil, s.Err()
+	}
+	buf := &bytes.Buffer{}
+	line := s.Bytes()
+	if reDelim.Match(line) {
+		// Return an empty slice because (*bytes.Buffer).Bytes() returns nil if we have never written data.
+		return []byte{}, nil
+	}
+	_, err := buf.Write(line)
+	if err != nil {
+		return nil, err
+	}
+	for s.Scan() {
+		line := s.Bytes()
+		if reDelim.Match(line) {
+			return buf.Bytes(), nil
+		}
+		_, err := buf.Write([]byte("\n"))
+		if err != nil {
+			return nil, err
+		}
+		_, err = buf.Write(line)
+		if err != nil {
+			return nil, err
+		}
+	}
+	if err := s.Err(); err != nil {
+		return nil, err
+	}
+	return buf.Bytes(), nil
+}
+
+func parseTree(src io.Reader) (*Tree, error) {
+	toks, err := NewTokenStream(src)
+	if err != nil {
+		return nil, err
+	}
+	gram := NewGrammar()
+	tb := NewDefaultSyntaxTreeBuilder()
+	p, err := NewParser(toks, gram, SemanticAction(NewASTActionSet(gram, tb)))
+	if err != nil {
+		return nil, err
+	}
+	err = p.Parse()
+	if err != nil {
+		return nil, err
+	}
+	synErrs := p.SyntaxErrors()
+	if len(synErrs) > 0 {
+		var b strings.Builder
+		b.WriteString("syntax error:")
+		for _, synErr := range synErrs {
+			b.WriteRune('\n')
+			b.Write(formatSyntaxError(synErr, gram))
+		}
+		return nil, errors.New(b.String())
+	}
+	return genTree(tb.Tree()).Fill(), nil
+}
+
+func formatSyntaxError(synErr *SyntaxError, gram Grammar) []byte {
+	var b bytes.Buffer
+
+	b.WriteString(fmt.Sprintf("%v:%v: %v: ", synErr.Row+1, synErr.Col+1, synErr.Message))
+
+	tok := synErr.Token
+	switch {
+	case tok.EOF():
+		b.WriteString("<eof>")
+	case tok.Invalid():
+		b.WriteString(fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme())))
+	default:
+		b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), gram.Terminal(tok.TerminalID())))
+	}
+	b.WriteString(fmt.Sprintf("; expected: %v", synErr.ExpectedTerminals[0]))
+	for _, t := range synErr.ExpectedTerminals[1:] {
+		b.WriteString(fmt.Sprintf(", %v", t))
+	}
+
+	return b.Bytes()
+}
+
+func genTree(node *Node) *Tree {
+	var children []*Tree
+	if len(node.Children) > 1 {
+		children = make([]*Tree, len(node.Children)-1)
+		for i, c := range node.Children[1:] {
+			children[i] = genTree(c)
+		}
+	}
+	return NewTree(node.Children[0].Text, children...)
+}