Skip to content

Commit

Permalink
libnixf: generate token spelling from python script (#648)
Browse files Browse the repository at this point in the history
  • Loading branch information
inclyc authored Feb 1, 2025
1 parent 76c63a2 commit 06d1c85
Show file tree
Hide file tree
Showing 13 changed files with 203 additions and 166 deletions.
2 changes: 1 addition & 1 deletion libnixf/include/nixf/Basic/Nodes/Op.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#pragma once

#include "Basic.h"
#include "Tokens.h"

#include "nixf/Basic/Nodes/Attrs.h"
#include "nixf/Basic/TokenKinds.h"

#include <memory>

Expand Down
11 changes: 0 additions & 11 deletions libnixf/include/nixf/Basic/TokenKinds.h

This file was deleted.

100 changes: 0 additions & 100 deletions libnixf/include/nixf/Basic/TokenKinds.inc

This file was deleted.

9 changes: 0 additions & 9 deletions libnixf/include/nixf/Basic/Tokens.inc

This file was deleted.

2 changes: 1 addition & 1 deletion libnixf/src/Parse/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ void Lexer::maybeKW() {
Tok = tok_kw_##NAME; \
return; \
}
#include "nixf/Basic/TokenKinds.inc"
#include "TokenKinds.inc"
#undef TOK_KEYWORD
}

Expand Down
2 changes: 1 addition & 1 deletion libnixf/src/Parse/ParseOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ std::shared_ptr<Expr> Parser::parseExprOpBP(unsigned LeftRBP) {
for (;;) {
switch (Token Tok = peek(); Tok.kind()) {
#define TOK_BIN_OP(NAME) case tok_op_##NAME:
#include "nixf/Basic/TokenKinds.inc"
#include "TokenKinds.inc"
#undef TOK_BIN_OP
{
// For all binary ops:
Expand Down
2 changes: 1 addition & 1 deletion libnixf/src/Parse/ParseSupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include "Parser.h"

#include "nixf/Basic/TokenKinds.h"
#include "Tokens.h"
#include "nixf/Parse/Parser.h"

using namespace nixf;
Expand Down
43 changes: 1 addition & 42 deletions libnixf/src/Parse/Token.h
Original file line number Diff line number Diff line change
@@ -1,54 +1,13 @@
#pragma once

#include "Tokens.h"
#include "nixf/Basic/Range.h"
#include "nixf/Basic/TokenKinds.h"

#include <cassert>
#include <string_view>

namespace nixf {

namespace tok {

constexpr std::string_view spelling(TokenKind Kind) {
switch (Kind) {
#define TOK_KEYWORD(NAME) \
case tok_kw_##NAME: \
return #NAME;
#include "nixf/Basic/TokenKinds.inc"
#undef TOK_KEYWORD
case tok_dquote:
return "\"";
case tok_quote2:
return "''";
case tok_dollar_curly:
return "${";
case tok_l_curly:
return "{";
case tok_r_curly:
return "}";
case tok_l_paren:
return "(";
case tok_r_paren:
return ")";
case tok_eq:
return "=";
case tok_semi_colon:
return ";";
case tok_l_bracket:
return "[";
case tok_r_bracket:
return "]";
case tok_colon:
return ":";
default:
assert(false && "Not yet implemented!");
}
__builtin_unreachable();
}

} // namespace tok

/// \brief A token. With it's kind, and the range in source code.
///
/// This class is trivially copyable.
Expand Down
29 changes: 29 additions & 0 deletions libnixf/src/Parse/TokenKinds.inc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from tokens import bin_op_tokens, keyword_tokens, tokens


def generate_token_section(section_name: str, tokens: list) -> str:
if not tokens:
return ""

section = [f"#ifdef {section_name}"]
section.extend(f"{section_name}({token.name})" for token in tokens)
section.append(f"#endif // {section_name}\n")

return "\n".join(section)


def generate_token_kinds_inc() -> str:
sections = [
generate_token_section("TOK_KEYWORD", keyword_tokens),
generate_token_section("TOK", tokens),
generate_token_section("TOK_BIN_OP", bin_op_tokens),
]

return "\n".join(filter(None, sections)).strip()


if __name__ == "__main__":
import sys

with open(sys.argv[1], "w") as f:
f.write(generate_token_kinds_inc())
49 changes: 49 additions & 0 deletions libnixf/src/Parse/Tokens.h.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import tokens


def tok_id(tok: tokens.Token):
prefix = "tok"
if isinstance(tok, tokens.KwToken):
return f"{prefix}_kw_{tok.name}"
if isinstance(tok, tokens.OpToken):
return f"{prefix}_op_{tok.name}"
return f"{prefix}_{tok.name}"


def generate_tokens_h() -> str:
header = """#pragma once
#include <string_view>
namespace nixf::tok {
enum TokenKind {
"""
for token in tokens.tokens:
header += f" {tok_id(token)},\n"

header += "};\n\n"

header += """constexpr std::string_view spelling(int Kind) {
using namespace std::literals;
switch (Kind) {
"""

for token in tokens.tokens:
header += f' case {tok_id(token)}: return R"({token.spelling})"sv;\n'

header += """ default: return ""sv;
}
}
"""

header += "} // namespace nixf::tok"

return header


if __name__ == "__main__":
import sys

with open(sys.argv[1], "w") as f:
f.write(generate_tokens_h())
87 changes: 87 additions & 0 deletions libnixf/src/Parse/tokens.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from dataclasses import dataclass
from typing import List


@dataclass
class Token:
name: str
spelling: str


class KwToken(Token):
def __init__(self, name):
self.name = name
self.spelling = name


keyword_tokens: List[Token] = [
KwToken("if"),
KwToken("then"),
KwToken("else"),
KwToken("assert"),
KwToken("with"),
KwToken("let"),
KwToken("in"),
KwToken("rec"),
KwToken("inherit"),
KwToken("or"),
]


class OpToken(Token):
pass


bin_op_tokens: List[Token] = [
OpToken("not", "!"),
OpToken("impl", "->"),
OpToken("or", "||"),
OpToken("and", "&&"),
OpToken("eq", "=="),
OpToken("neq", "!="),
OpToken("lt", "<"),
OpToken("gt", ">"),
OpToken("le", "<="),
OpToken("ge", ">="),
OpToken("update", "//"),
OpToken("add", "+"),
OpToken("negate", "-"),
OpToken("mul", "*"),
OpToken("div", "/"),
OpToken("concat", "++"),
OpToken("pipe_into", "|>"),
OpToken("pipe_from", "<|"),
]

tokens: List[Token] = [
*keyword_tokens,
Token("eof", "eof"),
Token("id", "id"),
Token("int", "int"),
Token("float", "float"),
Token("dquote", '"'),
Token("string_part", "string_part"),
Token("string_escape", "string_escape"),
Token("quote2", "''"),
Token("path_fragment", "path_fragment"),
Token("spath", "<path>"),
Token("uri", "uri"),
Token("r_curly", "}"),
Token("dollar_curly", "${"),
Token("ellipsis", "..."),
Token("comma", ","),
Token("dot", "."),
Token("semi_colon", ";"),
Token("eq", "="),
Token("l_curly", "{"),
Token("l_paren", "("),
Token("r_paren", ")"),
Token("l_bracket", "["),
Token("r_bracket", "]"),
Token("question", "?"),
Token("at", "@"),
Token("colon", ":"),
Token("unknown", "unknown"),
Token("path_end", "path_end"),
*bin_op_tokens,
]
Loading

0 comments on commit 06d1c85

Please sign in to comment.