libnixf: generate token spelling from python script (#648)

nix-community · Feb 1, 2025 · 06d1c85 · 06d1c85
1 parent 76c63a2
commit 06d1c85
Show file tree

Hide file tree

Showing 13 changed files with 203 additions and 166 deletions.
diff --git a/libnixf/include/nixf/Basic/Nodes/Op.h b/libnixf/include/nixf/Basic/Nodes/Op.h
@@ -1,9 +1,9 @@
 #pragma once
 
 #include "Basic.h"
+#include "Tokens.h"
 
 #include "nixf/Basic/Nodes/Attrs.h"
-#include "nixf/Basic/TokenKinds.h"
 
 #include <memory>
 

diff --git a/libnixf/include/nixf/Basic/TokenKinds.h b/libnixf/include/nixf/Basic/TokenKinds.h
diff --git a/libnixf/include/nixf/Basic/TokenKinds.inc b/libnixf/include/nixf/Basic/TokenKinds.inc
diff --git a/libnixf/include/nixf/Basic/Tokens.inc b/libnixf/include/nixf/Basic/Tokens.inc
diff --git a/libnixf/src/Parse/Lexer.cpp b/libnixf/src/Parse/Lexer.cpp
@@ -320,7 +320,7 @@ void Lexer::maybeKW() {
     Tok = tok_kw_##NAME;                                                       \
     return;                                                                    \
   }
-#include "nixf/Basic/TokenKinds.inc"
+#include "TokenKinds.inc"
 #undef TOK_KEYWORD
 }
 

diff --git a/libnixf/src/Parse/ParseOp.cpp b/libnixf/src/Parse/ParseOp.cpp
@@ -108,7 +108,7 @@ std::shared_ptr<Expr> Parser::parseExprOpBP(unsigned LeftRBP) {
   for (;;) {
     switch (Token Tok = peek(); Tok.kind()) {
 #define TOK_BIN_OP(NAME) case tok_op_##NAME:
-#include "nixf/Basic/TokenKinds.inc"
+#include "TokenKinds.inc"
 #undef TOK_BIN_OP
       {
         // For all binary ops:

diff --git a/libnixf/src/Parse/ParseSupport.cpp b/libnixf/src/Parse/ParseSupport.cpp
@@ -3,7 +3,7 @@
 
 #include "Parser.h"
 
-#include "nixf/Basic/TokenKinds.h"
+#include "Tokens.h"
 #include "nixf/Parse/Parser.h"
 
 using namespace nixf;

diff --git a/libnixf/src/Parse/Token.h b/libnixf/src/Parse/Token.h
@@ -1,54 +1,13 @@
 #pragma once
 
+#include "Tokens.h"
 #include "nixf/Basic/Range.h"
-#include "nixf/Basic/TokenKinds.h"
 
 #include <cassert>
 #include <string_view>
 
 namespace nixf {
 
-namespace tok {
-
-constexpr std::string_view spelling(TokenKind Kind) {
-  switch (Kind) {
-#define TOK_KEYWORD(NAME)                                                      \
-  case tok_kw_##NAME:                                                          \
-    return #NAME;
-#include "nixf/Basic/TokenKinds.inc"
-#undef TOK_KEYWORD
-  case tok_dquote:
-    return "\"";
-  case tok_quote2:
-    return "''";
-  case tok_dollar_curly:
-    return "${";
-  case tok_l_curly:
-    return "{";
-  case tok_r_curly:
-    return "}";
-  case tok_l_paren:
-    return "(";
-  case tok_r_paren:
-    return ")";
-  case tok_eq:
-    return "=";
-  case tok_semi_colon:
-    return ";";
-  case tok_l_bracket:
-    return "[";
-  case tok_r_bracket:
-    return "]";
-  case tok_colon:
-    return ":";
-  default:
-    assert(false && "Not yet implemented!");
-  }
-  __builtin_unreachable();
-}
-
-} // namespace tok
-
 /// \brief A token. With it's kind, and the range in source code.
 ///
 /// This class is trivially copyable.

diff --git a/libnixf/src/Parse/TokenKinds.inc.py b/libnixf/src/Parse/TokenKinds.inc.py
@@ -0,0 +1,29 @@
+from tokens import bin_op_tokens, keyword_tokens, tokens
+
+
+def generate_token_section(section_name: str, tokens: list) -> str:
+    if not tokens:
+        return ""
+
+    section = [f"#ifdef {section_name}"]
+    section.extend(f"{section_name}({token.name})" for token in tokens)
+    section.append(f"#endif // {section_name}\n")
+
+    return "\n".join(section)
+
+
+def generate_token_kinds_inc() -> str:
+    sections = [
+        generate_token_section("TOK_KEYWORD", keyword_tokens),
+        generate_token_section("TOK", tokens),
+        generate_token_section("TOK_BIN_OP", bin_op_tokens),
+    ]
+
+    return "\n".join(filter(None, sections)).strip()
+
+
+if __name__ == "__main__":
+    import sys
+
+    with open(sys.argv[1], "w") as f:
+        f.write(generate_token_kinds_inc())
diff --git a/libnixf/src/Parse/Tokens.h.py b/libnixf/src/Parse/Tokens.h.py
@@ -0,0 +1,49 @@
+import tokens
+
+
+def tok_id(tok: tokens.Token):
+    prefix = "tok"
+    if isinstance(tok, tokens.KwToken):
+        return f"{prefix}_kw_{tok.name}"
+    if isinstance(tok, tokens.OpToken):
+        return f"{prefix}_op_{tok.name}"
+    return f"{prefix}_{tok.name}"
+
+
+def generate_tokens_h() -> str:
+    header = """#pragma once
+
+#include <string_view>
+
+namespace nixf::tok {
+
+enum TokenKind {
+"""
+    for token in tokens.tokens:
+        header += f"    {tok_id(token)},\n"
+
+    header += "};\n\n"
+
+    header += """constexpr std::string_view spelling(int Kind) {
+    using namespace std::literals;
+    switch (Kind) {
+"""
+
+    for token in tokens.tokens:
+        header += f'        case {tok_id(token)}: return R"({token.spelling})"sv;\n'
+
+    header += """        default: return ""sv;
+    }
+}
+"""
+
+    header += "} // namespace nixf::tok"
+
+    return header
+
+
+if __name__ == "__main__":
+    import sys
+
+    with open(sys.argv[1], "w") as f:
+        f.write(generate_tokens_h())
diff --git a/libnixf/src/Parse/tokens.py b/libnixf/src/Parse/tokens.py
@@ -0,0 +1,87 @@
+from dataclasses import dataclass
+from typing import List
+
+
+@dataclass
+class Token:
+    name: str
+    spelling: str
+
+
+class KwToken(Token):
+    def __init__(self, name):
+        self.name = name
+        self.spelling = name
+
+
+keyword_tokens: List[Token] = [
+    KwToken("if"),
+    KwToken("then"),
+    KwToken("else"),
+    KwToken("assert"),
+    KwToken("with"),
+    KwToken("let"),
+    KwToken("in"),
+    KwToken("rec"),
+    KwToken("inherit"),
+    KwToken("or"),
+]
+
+
+class OpToken(Token):
+    pass
+
+
+bin_op_tokens: List[Token] = [
+    OpToken("not", "!"),
+    OpToken("impl", "->"),
+    OpToken("or", "||"),
+    OpToken("and", "&&"),
+    OpToken("eq", "=="),
+    OpToken("neq", "!="),
+    OpToken("lt", "<"),
+    OpToken("gt", ">"),
+    OpToken("le", "<="),
+    OpToken("ge", ">="),
+    OpToken("update", "//"),
+    OpToken("add", "+"),
+    OpToken("negate", "-"),
+    OpToken("mul", "*"),
+    OpToken("div", "/"),
+    OpToken("concat", "++"),
+    OpToken("pipe_into", "|>"),
+    OpToken("pipe_from", "<|"),
+]
+
+tokens: List[Token] = [
+    *keyword_tokens,
+    Token("eof", "eof"),
+    Token("id", "id"),
+    Token("int", "int"),
+    Token("float", "float"),
+    Token("dquote", '"'),
+    Token("string_part", "string_part"),
+    Token("string_escape", "string_escape"),
+    Token("quote2", "''"),
+    Token("path_fragment", "path_fragment"),
+    Token("spath", "<path>"),
+    Token("uri", "uri"),
+    Token("r_curly", "}"),
+    Token("dollar_curly", "${"),
+    Token("ellipsis", "..."),
+    Token("comma", ","),
+    Token("dot", "."),
+    Token("semi_colon", ";"),
+    Token("eq", "="),
+    Token("l_curly", "{"),
+    Token("l_paren", "("),
+    Token("r_paren", ")"),
+    Token("l_bracket", "["),
+    Token("r_bracket", "]"),
+    Token("question", "?"),
+    Token("at", "@"),
+    Token("colon", ":"),
+    Token("unknown", "unknown"),
+    Token("path_end", "path_end"),
+    *bin_op_tokens,
+]