From 86f6987e1d8a0f0ce12db0db204ba1792cf75f26 Mon Sep 17 00:00:00 2001 From: CaptainZippy Date: Sat, 24 Aug 2024 12:05:08 +0100 Subject: [PATCH] Don't require inputs to be null terminated. --- src/littlelambda.cpp | 81 ++++++++++++++++++++++++-------------------- src/littlelambda.h | 14 ++++---- src/test.cpp | 39 ++++++++++++--------- 3 files changed, 76 insertions(+), 58 deletions(-) diff --git a/src/littlelambda.cpp b/src/littlelambda.cpp index ccd9fb3..ed2fd1f 100644 --- a/src/littlelambda.cpp +++ b/src/littlelambda.cpp @@ -1,5 +1,6 @@ #include "littlelambda.h" #include +#include #include #include #include @@ -32,6 +33,18 @@ static bool is_word_boundary(char c) { static bool is_newline(char c) { return c == '\r' || c == '\n'; } +static bool is_alpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +template +static bool _try_parse_as(const char* start, const char* end, T& out, V... v) { + auto [ptr, rc] = std::from_chars(start, end, out, v...); + if (rc == std::errc{} && ptr == end) { + return true; + } + return false; +} struct lam_vm { ugc_t gc; @@ -56,15 +69,15 @@ static T* callocPlus(size_t extra) { // Parse null terminated 'input' // Set 'restart' to the end of parsing. -lam_result lam_parse(const char* input, const char** restart) { +lam_result lam_parse(const char* input, const char* endInput, const char** restart) { *restart = input; // No recursion - explicit stack for lists. std::vector> stack; - for (const char* cur = input; true;) { - const char* startCur = cur; + for (const char* cur = input; cur < endInput;) { std::optional parsed{}; + const char* startCur = cur; - // Try to parse an single va;ie & put it into 'parsed' + // Try to parse an single value & put it into 'parsed' // After this switch, there is a common block to manage the stack // and update the restart point etc. switch (*cur++) { @@ -86,15 +99,15 @@ lam_result lam_parse(const char* input, const char** restart) { } // parse_comment ";;" to eol is comment case ';': { - if (*cur != ';') { + if (cur >= endInput || *cur != ';') { return lam_result::fail(ParseUnexpectedSemiColon, "Unexpected single ';'"); } // Look for the start of any newline sequence \r, \n, \r\n - while (*cur && !is_newline(*cur)) { + while (cur < endInput && !is_newline(*cur)) { ++cur; } // Consume any sequence of \r,\n - while (*cur && is_newline(*cur)) { + while (cur < endInput && is_newline(*cur)) { ++cur; } break; @@ -118,9 +131,9 @@ lam_result lam_parse(const char* input, const char** restart) { lam_value v = curList.back(); if (v.type() == lam_type::Symbol && strcmp(v.as_symbol()->val(), ".") == 0) { std::vector tail; - for (bool slurp = true; slurp;) { + for (bool slurp = true; slurp && cur < endInput;) { const char* next = nullptr; - lam_result res = lam_parse(cur, &next); + lam_result res = lam_parse(cur, endInput, &next); cur = next; switch (res.code) { case 0: @@ -145,17 +158,17 @@ lam_result lam_parse(const char* input, const char** restart) { // parse_string case '"': { const char* start = cur; // start of the current run - std::string res; - while (!parsed.has_value()) { + std::string acc; // accumulator for current string + while (!parsed.has_value() && cur < endInput) { switch (char c = *cur++) { case 0: { return lam_result::fail(ParseUnexpectedNull, "Unexpected null when parsing string"); } case '\\': { - if (*cur == 'n') { - res.append(start, cur - 1); - res.push_back('\n'); + if (cur < endInput && *cur == 'n') { + acc.append(start, cur - 1); + acc.push_back('\n'); cur += 1; start = cur; } else { @@ -165,8 +178,8 @@ lam_result lam_parse(const char* input, const char** restart) { break; } case '"': { - res.append(start, cur - 1); - auto s = lam_make_string(res.data(), res.size()); + acc.append(start, cur - 1); + auto s = lam_make_string(acc.data(), acc.size()); parsed.emplace(s); break; } @@ -179,7 +192,7 @@ lam_result lam_parse(const char* input, const char** restart) { // parse_quote case '\'': { const char* after = nullptr; - lam_result quoted = lam_parse(cur, &after); + lam_result quoted = lam_parse(cur, endInput, &after); if (quoted.code != 0) { return quoted; } @@ -209,28 +222,27 @@ lam_result lam_parse(const char* input, const char** restart) { //} // parse_number parse_symbol default: { - while (!is_word_boundary(*cur)) { + while (cur < endInput && !is_word_boundary(*cur)) { ++cur; } - const char* end = cur; - char* endparse; - long asInt = strtol(startCur, &endparse, 10); - if (endparse == end) { + + // Todo: tighten these checks. Numbers must begin with - or . or digit? + if (cur > startCur && is_alpha(startCur[0])) { + parsed.emplace(lam_make_symbol(startCur, cur - startCur)); + } else if (long asInt; _try_parse_as(startCur, cur, asInt, 10)) { parsed.emplace(lam_make_int(asInt)); + } else if (double asDbl; _try_parse_as(startCur, cur, asDbl)) { + parsed.emplace(lam_make_double(asDbl)); } else { - double asDbl = strtold(startCur, &endparse); - if (endparse == end) { - parsed.emplace(lam_make_double(asDbl)); - } else { - parsed.emplace(lam_make_symbol(startCur, end - startCur)); - } + parsed.emplace(lam_make_symbol(startCur, cur - startCur)); } + break; } } // Consume any whitespace & advance restart point - while (is_white(*cur)) { + while (cur < endInput && is_white(*cur)) { ++cur; } *restart = cur; @@ -246,13 +258,10 @@ lam_result lam_parse(const char* input, const char** restart) { } } } -} - -lam_result lam_parse(const char* input) { - const char* restart = nullptr; - auto r = lam_parse(input, &restart); - assert(*restart == 0); - return r; + if (stack.size()) { + return lam_result::fail(ParseEndOfInput, "Unexpected Eof"); + } + return lam_result::ok(lam_make_int(0)); } lam_value lam_make_symbol(const char* s, size_t n) { diff --git a/src/littlelambda.h b/src/littlelambda.h index c63442a..7bcfebb 100644 --- a/src/littlelambda.h +++ b/src/littlelambda.h @@ -21,6 +21,10 @@ extern void __debugbreak(); // Compiler Intrinsic if (!(COND)) { \ lam_debugbreak(); \ } +#define assert2(COND, MSG)\ + if (!(COND)) { \ + lam_debugbreak(); \ + } /// Types a lam_value can contain. enum class lam_type { @@ -307,12 +311,10 @@ static inline lam_value lam_make_value(lam_obj* obj) { /// Evaluate the given value in the given environment. lam_value lam_eval(lam_value val, lam_env* env); -/// Parse and return a single possibly-compound value from the given input. -lam_result lam_parse(const char* input); - -/// Parse and return a single possibly-compound value from the given input. -/// Sets the 'restart' pointer to the end of the input consumed. -lam_result lam_parse(const char* input, const char** restart); +/// Parse and return the first value from the given input. +/// Sets the 'restart' pointer to past the end of the input consumed. +/// Call this multiple times to consume all input. +lam_result lam_parse(const char* input, const char* end, const char** restart); /// Print the given value. void lam_print(lam_value val, const char* end = nullptr); diff --git a/src/test.cpp b/src/test.cpp index b758ae6..f5a9fb8 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -16,7 +16,6 @@ static int slurp_file(const char* path, std::vector& buf) { buf.insert(buf.end(), &b[0], &b[n]); } fclose(fin); - buf.push_back(0); return 0; } @@ -51,10 +50,10 @@ lam_result read_and_eval(const char* path) { std::vector buf; if (slurp_file(path, buf) == 0) { auto env = lam_make_env_default(); - - for (const char* cur = buf.data(); *cur;) { + const char* bufEnd = buf.data() + buf.size(); + for (const char* cur = buf.data(); cur < bufEnd;) { const char* next = nullptr; - lam_result res = lam_parse(cur, &next); + lam_result res = lam_parse(cur, bufEnd, &next); if (res.code != 0) { return res; } @@ -67,10 +66,18 @@ lam_result read_and_eval(const char* path) { return lam_result::fail(10000, "module not found"); } -static lam_value lam_parse_or_die(const char* input) { - lam_result res = lam_parse(input); - assert(res.code == 0); - return res.value; +static lam_value _lam_parse_or_die(const char* input, int N) { + const char* restart = nullptr; + const char* end = input + N; + auto r = lam_parse(input, end, &restart); + assert2(restart == end, "Input was not consumed"); + assert(r.code == 0); + return r.value; +} + +template +static lam_value lam_parse_or_die(const char (&input)[N]) { + return _lam_parse_or_die(input, N-1); // not null terminator } int main() { @@ -78,14 +85,14 @@ int main() { read_and_eval("module.ll"); read_and_eval("test.ll"); if (1) { - lam_parse("hello"); - lam_parse("\"world\""); - lam_parse("12"); - lam_parse("12.2"); - lam_parse("(hello world)"); - lam_parse("(hello (* num 141.0) world)"); - lam_parse("(begin ($define r 10) (* 3.4 (* r r)))"); - lam_parse("(begin ($define r null) (print r))"); + lam_parse_or_die("hello"); + lam_parse_or_die("\"world\""); + lam_parse_or_die("12"); + lam_parse_or_die("12.2"); + lam_parse_or_die("(hello world)"); + lam_parse_or_die("(hello (* num 141.0) world)"); + lam_parse_or_die("(begin ($define r 10) (* 3.4 (* r r)))"); + lam_parse_or_die("(begin ($define r null) (print r))"); } read_and_eval("01-Basic.ll");