Handle block comments /* */ (#81)

* Handle block comments /* */ * almost there * wip cleaning up the newline and block comments issue * better test output and focus on first block comment issue until fixed, added make target showing env vars to setup logger in tests * SameLineAsNext:true for /*inline*/ shows something is off with what parser/lexer sets * finally got it working... sigh * simplify if now that it works * add failing commented out tests about inline comment in the middle of an expression (not working) * treat comments as NULL object when trying to evaluate them * add a block comment example in the wasm / online demo
grol-io · Aug 1, 2024 · aa14201 · aa14201
1 parent d4321a2
commit aa14201
Show file tree

Hide file tree

Showing 13 changed files with 236 additions and 80 deletions.
diff --git a/Makefile b/Makefile
@@ -29,6 +29,10 @@ tinygo: Makefile *.go */*.go $(GEN)
 	strip grol.tiny
 	ls -lh grol.tiny
 
+parser-test:
+	LOGGER_LOG_FILE_AND_LINE=false LOGGER_IGNORE_CLI_MODE=true LOGGER_LEVEL=debug go test \
+		-v -run '^TestFormat$$' ./parser | logc
+
 TINYGO_STACKS:=-stack-size=40mb
 
 wasm: Makefile *.go */*.go $(GEN) wasm/wasm_exec.js wasm/wasm_exec.html wasm/grol_wasm.html

diff --git a/ast/ast.go b/ast/ast.go
@@ -8,6 +8,7 @@ import (
 	"strconv"
 	"strings"
 
+	"fortio.org/log"
 	"grol.io/grol/token"
 )
 
@@ -97,32 +98,47 @@ type Statements struct {
 	Statements []Node
 }
 
-func sameLine(node Node) bool {
+func keepSameLineAsPrevious(node Node) bool {
 	switch n := node.(type) { //nolint:exahustive // we may add more later
 	case *Comment:
-		return n.SameLine
+		return n.SameLineAsPrevious
 	default:
 		return false
 	}
 }
 
+func needNewLineAfter(node Node) bool {
+	switch n := node.(type) { //nolint:exahustive // we may add more later
+	case *Comment:
+		return !n.SameLineAsNext
+	default:
+		return true
+	}
+}
+
 func (p Statements) PrettyPrint(ps *PrintState) *PrintState {
 	oldExpressionLevel := ps.ExpressionLevel
 	if ps.IndentLevel > 0 {
 		ps.Print("{") // first statement might be a comment on same line.
 	}
 	ps.IndentLevel++
 	ps.ExpressionLevel = 0
+	var prev Node
 	for i, s := range p.Statements {
+		log.Debugf("PrettyPrint statement %T %s i %d\tcurSameLine=%v,\tcurHadNewline=%v,\tprevHadNewline=%v",
+			s, s.Value().Literal(), i, keepSameLineAsPrevious(s), needNewLineAfter(s), needNewLineAfter(prev))
 		if i > 0 || ps.IndentLevel > 1 {
-			if sameLine(s) {
+			if keepSameLineAsPrevious(s) || !needNewLineAfter(prev) {
+				log.Debugf("=> PrettyPrint adding just a space")
 				_, _ = ps.Out.Write([]byte{' '})
 				ps.IndentationDone = true
 			} else {
+				log.Debugf("=> PrettyPrint adding newline")
 				ps.Println()
 			}
 		}
 		s.PrettyPrint(ps)
+		prev = s
 	}
 	ps.Println()
 	ps.IndentLevel--
@@ -144,7 +160,8 @@ func (i Identifier) PrettyPrint(out *PrintState) *PrintState {
 
 type Comment struct {
 	Base
-	SameLine bool
+	SameLineAsPrevious bool
+	SameLineAsNext     bool
 }
 
 func (c Comment) PrettyPrint(out *PrintState) *PrintState {

diff --git a/eval/eval.go b/eval/eval.go
@@ -174,6 +174,8 @@ func (s *State) evalInternal(node any) object.Object {
 		left := s.evalInternal(node.Left)
 		index := s.evalInternal(node.Index)
 		return evalIndexExpression(left, index)
+	case *ast.Comment:
+		return object.NULL
 	}
 	return object.Error{Value: fmt.Sprintf("unknown node type: %T", node)}
 }
@@ -406,6 +408,9 @@ func (s *State) evalStatements(stmts []ast.Node) object.Object {
 
 func (s *State) evalPrefixExpression(operator token.Type, right object.Object) object.Object {
 	switch operator { //nolint:exhaustive // we have default.
+	case token.BLOCKCOMMENT:
+		// /* comment */ treated as identity operator. TODO: implement in parser.
+		return right
 	case token.BANG:
 		return s.evalBangOperatorExpression(right)
 	case token.MINUS:

diff --git a/eval/eval_test.go b/eval/eval_test.go
@@ -20,7 +20,10 @@ func TestEvalIntegerExpression(t *testing.T) {
 		{"10", 10},
 		{"-5", -5},
 		{"-10", -10},
-		{"5 + 5 + 5 + 5 - 10", 10},
+		{"5 + 5 + 5 + 5 - 10 /* some block comment */", 10},
+		/* These don't work, we need to make comment a identity operator or prune them entirely from the AST. */
+		// {"5 + /* block comment in middle of expression */ 2", 7},
+		// {" - /* inline of prefix */ 5", -5},
 		{"2 * 2 * 2 * 2 * 2", 32},
 		{"-50 + 100 + -50", 0},
 		{"5 * 2 + 10", 20},

diff --git a/examples/pi2.gr b/examples/pi2.gr
@@ -1,5 +1,7 @@
-// Γ(x + 1/2) ~ Γ(x)x^(1/2) = (x-1)!√x
-// Γ(x + 1/2) = (2x - 1)!! * 2^-x * √π
+/*
+ Γ(x + 1/2) ~ Γ(x)x^(1/2) = (x-1)!√x
+ Γ(x + 1/2) = (2x - 1)!! * 2^-x * √π
+*/
 f=func(i,n, prod) {
     //log(i, prod)
 	if (i==n+1) {

diff --git a/examples/sample.gr b/examples/sample.gr
@@ -1,7 +1,8 @@
-// Sample file that our gorepl can interpret
-// <--- comments
-// See also the other *.gr files
-
+/*
+   Sample file that our gorepl can interpret
+   This is a block commen
+   See also the other *.gr files
+*/
 unless = macro(cond, iffalse, iftrue) {
     quote(if (!(unquote(cond))) {
         unquote(iffalse)
@@ -17,7 +18,7 @@ fact=func(n) { // function
     if (n<=1) {
         return 1
     }
-    n*fact(n-1) // recursion, also last evaluated expression is returned (ie return at the end is optional)
+    /* recursion: */ n*fact(n-1) // also last evaluated expression is returned (ie return at the end is optional)
 }
 
 a=[fact(5), "abc", 76-3] // array can contain different types

diff --git a/lexer/lexer.go b/lexer/lexer.go
@@ -11,7 +11,7 @@ type Lexer struct {
 	pos           int
 	lineMode      bool
 	hadWhitespace bool
-	hadNewline    bool
+	hadNewline    bool // newline was seen before current token
 }
 
 // Mode with input expected the be complete (multiline/file).
@@ -31,32 +31,36 @@ func NewBytes(input []byte) *Lexer {
 func (l *Lexer) NextToken() *token.Token {
 	l.skipWhitespace()
 	ch := l.readChar()
+	nextChar := l.peekChar()
 	switch ch { // Maybe benchmark and do our own lookup table?
 	case '=', '!', ':':
-		if l.peekChar() == '=' {
-			nextChar := l.readChar()
+		if nextChar == '=' {
+			l.pos++
 			// := is aliased directly to ASSIGN (with = as literal), a bit hacky but
 			// so we normalize := like it didn't exist.
 			return token.ConstantTokenChar2(ch, nextChar)
 		}
 		return token.ConstantTokenChar(ch)
 	case '+', '-':
-		if l.peekChar() == ch {
-			nextChar := l.readChar()
+		if nextChar == ch {
+			l.pos++
 			return token.ConstantTokenChar2(ch, nextChar) // increment/decrement
 		}
 		return token.ConstantTokenChar(ch)
 	case '%', '*', ';', ',', '{', '}', '(', ')', '[', ']':
 		// TODO maybe reorder so it's a continuous range for pure single character tokens
 		return token.ConstantTokenChar(ch)
 	case '/':
-		if l.peekChar() == '/' {
+		if nextChar == '/' {
 			return token.Intern(token.LINECOMMENT, l.readLineComment())
 		}
+		if nextChar == '*' {
+			return token.Intern(token.BLOCKCOMMENT, l.readBlockComment())
+		}
 		return token.ConstantTokenChar(ch)
 	case '<', '>':
-		if l.peekChar() == '=' {
-			nextChar := l.readChar()
+		if nextChar == '=' {
+			l.pos++
 			return token.ConstantTokenChar2(ch, nextChar)
 		}
 		return token.ConstantTokenChar(ch)
@@ -168,6 +172,25 @@ func (l *Lexer) readLineComment() string {
 	return strings.TrimSpace(string(l.input[pos:l.pos]))
 }
 
+func (l *Lexer) endBlockComment(ch byte) bool {
+	return ch == '*' && l.peekChar() == '/'
+}
+
+func (l *Lexer) readBlockComment() string {
+	pos1 := l.pos - 1
+	l.pos++
+	ch := l.readChar()
+	for ch != 0 && !l.endBlockComment(ch) {
+		ch = l.readChar()
+	}
+	if ch == 0 {
+		l.pos--
+	} else {
+		l.pos++
+	}
+	return string(l.input[pos1:l.pos])
+}
+
 func (l *Lexer) readNumber(ch byte) (token.Type, string) {
 	t := token.INT
 	if ch == '.' {

diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go
@@ -39,6 +39,9 @@ a3:=5
 4>=3.1
 i++
 j--
+/*/*/
+/* This is a
+   multiline comment */
 @
 `
 	tests := []struct {
@@ -156,6 +159,8 @@ j--
 		{token.INCR, "++"},
 		{token.IDENT, "j"},
 		{token.DECR, "--"},
+		{token.BLOCKCOMMENT, "/*/*/"},
+		{token.BLOCKCOMMENT, "/* This is a\n   multiline comment */"},
 		{token.ILLEGAL, "@"},
 		{token.EOF, ""},
 	}
@@ -172,15 +177,14 @@ j--
 		}
 
 		if tok.Literal() != tt.expectedLiteral {
-			t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%v",
-				i, tt.expectedLiteral, tok)
+			t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q",
+				i, tt.expectedLiteral, tok.Literal())
 		}
 	}
 }
 
 func TestNextTokenEOLMode(t *testing.T) {
-	input := `if .5 { x (
-`
+	input := "if .5 { x (  \n  "
 	l := NewLineMode(input)
 	tests := []struct {
 		expectedType    token.Type
@@ -225,3 +229,28 @@ func TestNextTokenEOLMode(t *testing.T) {
 		}
 	}
 }
+
+func TestNextTokenCommentEOLMode(t *testing.T) {
+	input := `/* incomplete`
+	l := NewLineMode(input)
+	tests := []struct {
+		expectedType    token.Type
+		expectedLiteral string
+	}{
+		{token.BLOCKCOMMENT, "/* incomplete"},
+		{token.EOL, ""},
+	}
+	for i, tt := range tests {
+		tok := l.NextToken()
+
+		if tok.Type() != tt.expectedType {
+			t.Fatalf("tests[%d] - tokentype wrong. expected=%q and %q, got=%v",
+				i, tt.expectedType, tt.expectedLiteral, tok)
+		}
+
+		if tok.Literal() != tt.expectedLiteral {
+			t.Fatalf("tests[%d] - literal wrong. expected=%q, got=%q",
+				i, tt.expectedLiteral, tok.Literal())
+		}
+	}
+}
diff --git a/parser/parser.go b/parser/parser.go
@@ -3,6 +3,7 @@ package parser
 import (
 	"fmt"
 	"strconv"
+	"strings"
 
 	"fortio.org/log"
 	"grol.io/grol/ast"
@@ -42,6 +43,7 @@ type Parser struct {
 	peekToken *token.Token
 
 	prevNewline bool
+	nextNewline bool
 
 	errors             []string
 	continuationNeeded bool
@@ -91,6 +93,7 @@ func New(l *lexer.Lexer) *Parser {
 	p.registerPrefix(token.LBRACKET, p.parseArrayLiteral)
 	p.registerPrefix(token.LBRACE, p.parseMapLiteral)
 	p.registerPrefix(token.LINECOMMENT, p.parseComment)
+	p.registerPrefix(token.BLOCKCOMMENT, p.parseComment)
 	p.registerPrefix(token.PRINT, p.parseBuiltin)
 	p.registerPrefix(token.LOG, p.parseBuiltin)
 	p.registerPrefix(token.MACRO, p.parseMacroLiteral)
@@ -131,10 +134,11 @@ func (p *Parser) Errors() []string {
 }
 
 func (p *Parser) nextToken() {
-	p.prevNewline = p.l.HadNewline()
 	p.prevToken = p.curToken
 	p.curToken = p.peekToken
 	p.peekToken = p.l.NextToken()
+	p.prevNewline = p.nextNewline
+	p.nextNewline = p.l.HadNewline()
 }
 
 func (p *Parser) ParseProgram() *ast.Statements {
@@ -168,7 +172,21 @@ func (p *Parser) parseStringLiteral() ast.Node {
 func (p *Parser) parseComment() ast.Node {
 	r := &ast.Comment{}
 	r.Token = p.curToken
-	r.SameLine = !p.prevNewline
+	r.SameLineAsPrevious = !p.prevNewline
+	r.SameLineAsNext = !p.nextNewline
+	isBlockComment := (p.curToken.Type() == token.BLOCKCOMMENT)
+	log.Debugf("parseComment: %#v", r)
+	if isBlockComment {
+		if !strings.HasSuffix(p.curToken.Literal(), "*/") {
+			log.LogVf("parseComment: block comment not closed: %s", p.curToken.DebugString())
+			p.continuationNeeded = true
+			return nil
+		}
+	} else {
+		if r.SameLineAsNext && !p.peekTokenIs(token.EOF) && !p.peekTokenIs(token.EOL) {
+			panic("parseComment for line comment: same line as next and not EOL/EOF")
+		}
+	}
 	return r
 }