Skip to content

Commit

Permalink
Don't require inputs to be null terminated.
Browse files Browse the repository at this point in the history
  • Loading branch information
CaptainZippy committed Aug 24, 2024
1 parent edc42cf commit 86f6987
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 58 deletions.
81 changes: 45 additions & 36 deletions src/littlelambda.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "littlelambda.h"
#include <inttypes.h>
#include <charconv>
#include <cstring>
#include <optional>
#include <span>
Expand Down Expand Up @@ -32,6 +33,18 @@ static bool is_word_boundary(char c) {
static bool is_newline(char c) {
return c == '\r' || c == '\n';
}
static bool is_alpha(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}

template <typename T, typename... V>
static bool _try_parse_as(const char* start, const char* end, T& out, V... v) {
auto [ptr, rc] = std::from_chars(start, end, out, v...);
if (rc == std::errc{} && ptr == end) {
return true;
}
return false;
}

struct lam_vm {
ugc_t gc;
Expand All @@ -56,15 +69,15 @@ static T* callocPlus(size_t extra) {

// Parse null terminated 'input'
// Set 'restart' to the end of parsing.
lam_result lam_parse(const char* input, const char** restart) {
lam_result lam_parse(const char* input, const char* endInput, const char** restart) {
*restart = input;
// No recursion - explicit stack for lists.
std::vector<std::vector<lam_value>> stack;
for (const char* cur = input; true;) {
const char* startCur = cur;
for (const char* cur = input; cur < endInput;) {
std::optional<lam_value> parsed{};
const char* startCur = cur;

// Try to parse an single va;ie & put it into 'parsed'
// Try to parse an single value & put it into 'parsed'
// After this switch, there is a common block to manage the stack
// and update the restart point etc.
switch (*cur++) {
Expand All @@ -86,15 +99,15 @@ lam_result lam_parse(const char* input, const char** restart) {
}
// parse_comment ";;" to eol is comment
case ';': {
if (*cur != ';') {
if (cur >= endInput || *cur != ';') {
return lam_result::fail(ParseUnexpectedSemiColon, "Unexpected single ';'");
}
// Look for the start of any newline sequence \r, \n, \r\n
while (*cur && !is_newline(*cur)) {
while (cur < endInput && !is_newline(*cur)) {
++cur;
}
// Consume any sequence of \r,\n
while (*cur && is_newline(*cur)) {
while (cur < endInput && is_newline(*cur)) {
++cur;
}
break;
Expand All @@ -118,9 +131,9 @@ lam_result lam_parse(const char* input, const char** restart) {
lam_value v = curList.back();
if (v.type() == lam_type::Symbol && strcmp(v.as_symbol()->val(), ".") == 0) {
std::vector<lam_value> tail;
for (bool slurp = true; slurp;) {
for (bool slurp = true; slurp && cur < endInput;) {
const char* next = nullptr;
lam_result res = lam_parse(cur, &next);
lam_result res = lam_parse(cur, endInput, &next);
cur = next;
switch (res.code) {
case 0:
Expand All @@ -145,17 +158,17 @@ lam_result lam_parse(const char* input, const char** restart) {
// parse_string
case '"': {
const char* start = cur; // start of the current run
std::string res;
while (!parsed.has_value()) {
std::string acc; // accumulator for current string
while (!parsed.has_value() && cur < endInput) {
switch (char c = *cur++) {
case 0: {
return lam_result::fail(ParseUnexpectedNull,
"Unexpected null when parsing string");
}
case '\\': {
if (*cur == 'n') {
res.append(start, cur - 1);
res.push_back('\n');
if (cur < endInput && *cur == 'n') {
acc.append(start, cur - 1);
acc.push_back('\n');
cur += 1;
start = cur;
} else {
Expand All @@ -165,8 +178,8 @@ lam_result lam_parse(const char* input, const char** restart) {
break;
}
case '"': {
res.append(start, cur - 1);
auto s = lam_make_string(res.data(), res.size());
acc.append(start, cur - 1);
auto s = lam_make_string(acc.data(), acc.size());
parsed.emplace(s);
break;
}
Expand All @@ -179,7 +192,7 @@ lam_result lam_parse(const char* input, const char** restart) {
// parse_quote
case '\'': {
const char* after = nullptr;
lam_result quoted = lam_parse(cur, &after);
lam_result quoted = lam_parse(cur, endInput, &after);
if (quoted.code != 0) {
return quoted;
}
Expand Down Expand Up @@ -209,28 +222,27 @@ lam_result lam_parse(const char* input, const char** restart) {
//}
// parse_number parse_symbol
default: {
while (!is_word_boundary(*cur)) {
while (cur < endInput && !is_word_boundary(*cur)) {
++cur;
}
const char* end = cur;
char* endparse;
long asInt = strtol(startCur, &endparse, 10);
if (endparse == end) {

// Todo: tighten these checks. Numbers must begin with - or . or digit?
if (cur > startCur && is_alpha(startCur[0])) {
parsed.emplace(lam_make_symbol(startCur, cur - startCur));
} else if (long asInt; _try_parse_as<long>(startCur, cur, asInt, 10)) {
parsed.emplace(lam_make_int(asInt));
} else if (double asDbl; _try_parse_as<double>(startCur, cur, asDbl)) {
parsed.emplace(lam_make_double(asDbl));
} else {
double asDbl = strtold(startCur, &endparse);
if (endparse == end) {
parsed.emplace(lam_make_double(asDbl));
} else {
parsed.emplace(lam_make_symbol(startCur, end - startCur));
}
parsed.emplace(lam_make_symbol(startCur, cur - startCur));
}

break;
}
}

// Consume any whitespace & advance restart point
while (is_white(*cur)) {
while (cur < endInput && is_white(*cur)) {
++cur;
}
*restart = cur;
Expand All @@ -246,13 +258,10 @@ lam_result lam_parse(const char* input, const char** restart) {
}
}
}
}

lam_result lam_parse(const char* input) {
const char* restart = nullptr;
auto r = lam_parse(input, &restart);
assert(*restart == 0);
return r;
if (stack.size()) {
return lam_result::fail(ParseEndOfInput, "Unexpected Eof");
}
return lam_result::ok(lam_make_int(0));
}

lam_value lam_make_symbol(const char* s, size_t n) {
Expand Down
14 changes: 8 additions & 6 deletions src/littlelambda.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ extern void __debugbreak(); // Compiler Intrinsic
if (!(COND)) { \
lam_debugbreak(); \
}
#define assert2(COND, MSG)\
if (!(COND)) { \
lam_debugbreak(); \
}

/// Types a lam_value can contain.
enum class lam_type {
Expand Down Expand Up @@ -307,12 +311,10 @@ static inline lam_value lam_make_value(lam_obj* obj) {
/// Evaluate the given value in the given environment.
lam_value lam_eval(lam_value val, lam_env* env);

/// Parse and return a single possibly-compound value from the given input.
lam_result lam_parse(const char* input);

/// Parse and return a single possibly-compound value from the given input.
/// Sets the 'restart' pointer to the end of the input consumed.
lam_result lam_parse(const char* input, const char** restart);
/// Parse and return the first value from the given input.
/// Sets the 'restart' pointer to past the end of the input consumed.
/// Call this multiple times to consume all input.
lam_result lam_parse(const char* input, const char* end, const char** restart);

/// Print the given value.
void lam_print(lam_value val, const char* end = nullptr);
Expand Down
39 changes: 23 additions & 16 deletions src/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ static int slurp_file(const char* path, std::vector<char>& buf) {
buf.insert(buf.end(), &b[0], &b[n]);
}
fclose(fin);
buf.push_back(0);
return 0;
}

Expand Down Expand Up @@ -51,10 +50,10 @@ lam_result read_and_eval(const char* path) {
std::vector<char> buf;
if (slurp_file(path, buf) == 0) {
auto env = lam_make_env_default();

for (const char* cur = buf.data(); *cur;) {
const char* bufEnd = buf.data() + buf.size();
for (const char* cur = buf.data(); cur < bufEnd;) {
const char* next = nullptr;
lam_result res = lam_parse(cur, &next);
lam_result res = lam_parse(cur, bufEnd, &next);
if (res.code != 0) {
return res;
}
Expand All @@ -67,25 +66,33 @@ lam_result read_and_eval(const char* path) {
return lam_result::fail(10000, "module not found");
}

static lam_value lam_parse_or_die(const char* input) {
lam_result res = lam_parse(input);
assert(res.code == 0);
return res.value;
static lam_value _lam_parse_or_die(const char* input, int N) {
const char* restart = nullptr;
const char* end = input + N;
auto r = lam_parse(input, end, &restart);
assert2(restart == end, "Input was not consumed");
assert(r.code == 0);
return r.value;
}

template<int N>
static lam_value lam_parse_or_die(const char (&input)[N]) {
return _lam_parse_or_die(input, N-1); // not null terminator
}

int main() {
lam_init();
read_and_eval("module.ll");
read_and_eval("test.ll");
if (1) {
lam_parse("hello");
lam_parse("\"world\"");
lam_parse("12");
lam_parse("12.2");
lam_parse("(hello world)");
lam_parse("(hello (* num 141.0) world)");
lam_parse("(begin ($define r 10) (* 3.4 (* r r)))");
lam_parse("(begin ($define r null) (print r))");
lam_parse_or_die("hello");
lam_parse_or_die("\"world\"");
lam_parse_or_die("12");
lam_parse_or_die("12.2");
lam_parse_or_die("(hello world)");
lam_parse_or_die("(hello (* num 141.0) world)");
lam_parse_or_die("(begin ($define r 10) (* 3.4 (* r r)))");
lam_parse_or_die("(begin ($define r null) (print r))");
}

read_and_eval("01-Basic.ll");
Expand Down

0 comments on commit 86f6987

Please sign in to comment.