From 2a800d3af768c3841b74f75e210c987b507d21fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafael=20Garc=C3=ADa?= Date: Wed, 22 May 2024 16:26:36 +0000 Subject: [PATCH] Streamed Json Parser --- .github/workflows/ci.yml | 4 +- cli-player/Cargo.toml | 4 +- lib/Cargo.toml | 10 +- lib/src/callstack.rs | 2 +- lib/src/flow.rs | 2 +- lib/src/{ => json}/json_read.rs | 85 +++- lib/src/json/json_read_stream.rs | 646 +++++++++++++++++++++++++++++++ lib/src/json/json_tokenizer.rs | 311 +++++++++++++++ lib/src/{ => json}/json_write.rs | 0 lib/src/json/mod.rs | 4 + lib/src/lib.rs | 9 +- lib/src/story/mod.rs | 63 +-- lib/src/story_error.rs | 6 + lib/src/story_state.rs | 2 +- lib/src/variables_state.rs | 2 +- 15 files changed, 1073 insertions(+), 77 deletions(-) rename lib/src/{ => json}/json_read.rs (82%) create mode 100644 lib/src/json/json_read_stream.rs create mode 100644 lib/src/json/json_tokenizer.rs rename lib/src/{ => json}/json_write.rs (100%) create mode 100644 lib/src/json/mod.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 672f6a7..6fa0ffe 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,4 +26,6 @@ jobs: run: cargo fmt -- --check - name: Test - run: cargo test + run: | + cargo test + cargo test --features stream-json-parser diff --git a/cli-player/Cargo.toml b/cli-player/Cargo.toml index ae5ddeb..816b35a 100644 --- a/cli-player/Cargo.toml +++ b/cli-player/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "binkplayer" -version = "1.0.3" +version = "1.1.0" description = """ Console player for compiled .json Ink story files. """ @@ -16,7 +16,7 @@ path = "src/main.rs" [dependencies] anyhow = "1.0.75" -bladeink = { path = "../lib", "version" = "1.0.3" } +bladeink = { path = "../lib", "version" = "1.1.0" } clap = { "version" = "4.4.6", features = ["derive"] } rand = "0.8.5" diff --git a/lib/Cargo.toml b/lib/Cargo.toml index c1741fd..09f069a 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bladeink" -version = "1.0.3" +version = "1.1.0" authors = ["Rafael Garcia "] description = """ This is a Rust port of inkle's ink, a scripting language for writing interactive narrative. @@ -19,11 +19,13 @@ path = "src/lib.rs" [dependencies] serde = { version = "1.0.152", features = ["derive"] } serde_json = "1.0.93" -strum = { version = "0.25.0", features = ["derive"] } +strum = { version = "0.26.2", features = ["derive"] } as-any = "0.3.0" rand = "0.8.5" instant = "0.1.12" +stringreader = "0.1.1" [features] -stdweb = [ "instant/stdweb" ] -wasm-bindgen = [ "instant/wasm-bindgen" ] +stream-json-parser = [] +stdweb = ["instant/stdweb"] +wasm-bindgen = ["instant/wasm-bindgen"] diff --git a/lib/src/callstack.rs b/lib/src/callstack.rs index 81722c2..e1856a5 100644 --- a/lib/src/callstack.rs +++ b/lib/src/callstack.rs @@ -4,7 +4,7 @@ use serde_json::{json, Map}; use crate::{ container::Container, - json_read, json_write, + json::{json_read, json_write}, object::Object, path::Path, pointer::{self, Pointer}, diff --git a/lib/src/flow.rs b/lib/src/flow.rs index 21bac13..db07aa1 100644 --- a/lib/src/flow.rs +++ b/lib/src/flow.rs @@ -6,7 +6,7 @@ use crate::{ callstack::{CallStack, Thread}, choice::Choice, container::Container, - json_read, json_write, + json::{json_read, json_write}, object::RTObject, story_error::StoryError, }; diff --git a/lib/src/json_read.rs b/lib/src/json/json_read.rs similarity index 82% rename from lib/src/json_read.rs rename to lib/src/json/json_read.rs index a40e899..0b37599 100644 --- a/lib/src/json_read.rs +++ b/lib/src/json/json_read.rs @@ -3,15 +3,88 @@ use std::{collections::HashMap, rc::Rc}; use serde_json::Map; use crate::{ - choice::Choice, choice_point::ChoicePoint, container::Container, - control_command::ControlCommand, divert::Divert, glue::Glue, ink_list::InkList, - ink_list_item::InkListItem, list_definition::ListDefinition, - list_definitions_origin::ListDefinitionsOrigin, native_function_call::NativeFunctionCall, - object::RTObject, path::Path, push_pop::PushPopType, story_error::StoryError, tag::Tag, - value::Value, variable_assigment::VariableAssignment, variable_reference::VariableReference, + choice::Choice, + choice_point::ChoicePoint, + container::Container, + control_command::ControlCommand, + divert::Divert, + glue::Glue, + ink_list::InkList, + ink_list_item::InkListItem, + list_definition::ListDefinition, + list_definitions_origin::ListDefinitionsOrigin, + native_function_call::NativeFunctionCall, + object::RTObject, + path::Path, + push_pop::PushPopType, + story::{INK_VERSION_CURRENT, INK_VERSION_MINIMUM_COMPATIBLE}, + story_error::StoryError, + tag::Tag, + value::Value, + variable_assigment::VariableAssignment, + variable_reference::VariableReference, void::Void, }; +pub fn load_from_string( + s: &str, +) -> Result<(i32, Rc, Rc), StoryError> { + let json: serde_json::Value = match serde_json::from_str(s) { + Ok(value) => value, + Err(_) => return Err(StoryError::BadJson("Story not in JSON format.".to_owned())), + }; + + let version_opt = json.get("inkVersion"); + + if version_opt.is_none() || !version_opt.unwrap().is_number() { + return Err(StoryError::BadJson( + "ink version number not found. Are you sure it's a valid .ink.json file?".to_owned(), + )); + } + + let version: i32 = version_opt.unwrap().as_i64().unwrap().try_into().unwrap(); + + if version > INK_VERSION_CURRENT { + return Err(StoryError::BadJson( + "Version of ink used to build story was newer than the current version of the engine" + .to_owned(), + )); + } else if version < INK_VERSION_MINIMUM_COMPATIBLE { + return Err(StoryError::BadJson("Version of ink used to build story is too old to be loaded by this version of the engine".to_owned())); + } + + let root_token = match json.get("root") { + Some(value) => value, + None => { + return Err(StoryError::BadJson( + "Root node for ink not found. Are you sure it's a valid .ink.json file?".to_owned(), + )) + } + }; + + let list_definitions = match json.get("listDefs") { + Some(def) => Rc::new(jtoken_to_list_definitions(def)?), + None => return Err(StoryError::BadJson( + "List Definitions node for ink not found. Are you sure it's a valid .ink.json file?" + .to_owned(), + )), + }; + + let main_content_container = jtoken_to_runtime_object(root_token, None)?; + + let main_content_container = main_content_container.into_any().downcast::(); + + if main_content_container.is_err() { + return Err(StoryError::BadJson( + "Root node for ink is not a container?".to_owned(), + )); + }; + + let main_content_container = main_content_container.unwrap(); // unwrap: checked for err above + + Ok((version, main_content_container, list_definitions)) +} + pub fn jtoken_to_runtime_object( token: &serde_json::Value, name: Option, diff --git a/lib/src/json/json_read_stream.rs b/lib/src/json/json_read_stream.rs new file mode 100644 index 0000000..01f52d5 --- /dev/null +++ b/lib/src/json/json_read_stream.rs @@ -0,0 +1,646 @@ +//! This is a JSON parser that process the JSON in a streaming fashion. It can be used as a replacement for the Serde based parser. +//! This is useful for large JSON files that don't fit in memory hence the JSON is not loaded all at once as Serde does. +//! This parser has been used to load 'The Intercept' example story in an ESP32-s2 microcontroller with an external RAM of 2MB. With the Serde based parser, it is impossible, it does not have enogh memory to load the story. + +use std::{collections::HashMap, rc::Rc}; + +use crate::{ + choice_point::ChoicePoint, + container::Container, + control_command::ControlCommand, + divert::Divert, + glue::Glue, + ink_list::InkList, + ink_list_item::InkListItem, + list_definition::ListDefinition, + list_definitions_origin::ListDefinitionsOrigin, + native_function_call::NativeFunctionCall, + object::RTObject, + path::Path, + push_pop::PushPopType, + story::{INK_VERSION_CURRENT, INK_VERSION_MINIMUM_COMPATIBLE}, + story_error::StoryError, + tag::Tag, + value::Value, + variable_assigment::VariableAssignment, + variable_reference::VariableReference, + void::Void, +}; + +use super::json_tokenizer::{JsonTokenizer, JsonValue}; + +pub fn load_from_string( + s: &str, +) -> Result<(i32, Rc, Rc), StoryError> { + let mut tok = JsonTokenizer::new_from_str(s); + + parse(&mut tok) +} + +fn parse( + tok: &mut JsonTokenizer, +) -> Result<(i32, Rc, Rc), StoryError> { + tok.expect('{')?; + + let version_key = tok.read_obj_key()?; + + if version_key != "inkVersion" { + return Err(StoryError::BadJson( + "ink version number not found. Are you sure it's a valid .ink.json file?".to_owned(), + )); + } + + let version: i32 = tok.read_number().unwrap().as_integer().unwrap(); + + if version > INK_VERSION_CURRENT { + return Err(StoryError::BadJson( + "Version of ink used to build story was newer than the current version of the engine" + .to_owned(), + )); + } else if version < INK_VERSION_MINIMUM_COMPATIBLE { + return Err(StoryError::BadJson( + "Version of ink used to build story is too old to be loaded by this version of the engine".to_owned(), + )); + } + + tok.expect(',')?; + + let root_key = tok.read_obj_key()?; + + if root_key != "root" { + return Err(StoryError::BadJson( + "Root node for ink not found. Are you sure it's a valid .ink.json file?".to_owned(), + )); + } + + let root_value = tok.read_value()?; + let main_content_container = match jtoken_to_runtime_object(tok, root_value, None)? { + ArrayElement::RTObject(rt_obj) => rt_obj, + _ => { + return Err(StoryError::BadJson( + "Root node for ink is not a container?".to_owned(), + )) + } + }; + + let main_content_container = main_content_container.into_any().downcast::(); + + if main_content_container.is_err() { + return Err(StoryError::BadJson( + "Root node for ink is not a container?".to_owned(), + )); + }; + + let main_content_container = main_content_container.unwrap(); // unwrap: checked for err above + + tok.expect(',')?; + let list_defs_key = tok.read_obj_key()?; + + if list_defs_key != "listDefs" { + return Err(StoryError::BadJson( + "List Definitions node for ink not found. Are you sure it's a valid .ink.json file?" + .to_owned(), + )); + } + + let list_defs = Rc::new(jtoken_to_list_definitions(tok)?); + + tok.expect('}')?; + + Ok((version, main_content_container, list_defs)) +} + +enum ArrayElement { + RTObject(Rc), + LastElement(i32, Option, HashMap>), + NullElement, +} + +fn jtoken_to_runtime_object( + tok: &mut JsonTokenizer, + value: JsonValue, + name: Option, +) -> Result { + match value { + JsonValue::Null => Ok(ArrayElement::NullElement), + JsonValue::Boolean(value) => Ok(ArrayElement::RTObject(Rc::new(Value::new_bool(value)))), + JsonValue::Number(value) => { + if value.is_integer() { + let val: i32 = value.as_integer().unwrap(); + Ok(ArrayElement::RTObject(Rc::new(Value::new_int(val)))) + } else { + let val: f32 = value.as_float().unwrap(); + Ok(ArrayElement::RTObject(Rc::new(Value::new_float(val)))) + } + } + JsonValue::String(value) => { + let str = value.as_str(); + + // String value + let first_char = str.chars().next().unwrap(); + if first_char == '^' { + return Ok(ArrayElement::RTObject(Rc::new(Value::new_string( + &str[1..], + )))); + } else if first_char == '\n' && str.len() == 1 { + return Ok(ArrayElement::RTObject(Rc::new(Value::new_string("\n")))); + } + + // Glue + if "<>".eq(str) { + return Ok(ArrayElement::RTObject(Rc::new(Glue::new()))); + } + + if let Some(control_command) = ControlCommand::new_from_name(str) { + return Ok(ArrayElement::RTObject(Rc::new(control_command))); + } + + // Native functions + // "^" conflicts with the way to identify strings, so now + // we know it's not a string, we can convert back to the proper + // symbol for the operator. + let mut call_str = str; + if "L^".eq(str) { + call_str = "^"; + } + if let Some(native_function_call) = NativeFunctionCall::new_from_name(call_str) { + return Ok(ArrayElement::RTObject(Rc::new(native_function_call))); + } + + // Void + if "void".eq(str) { + return Ok(ArrayElement::RTObject(Rc::new(Void::new()))); + } + + Err(StoryError::BadJson(format!( + "Failed to convert token to runtime RTObject: {}", + str + ))) + } + JsonValue::Array => Ok(ArrayElement::RTObject(jarray_to_container(tok, name)?)), + JsonValue::Object => { + let prop = tok.read_obj_key()?; + let prop_value = tok.read_value()?; + + // Divert target value to path + if prop == "^->" { + tok.expect('}')?; + return Ok(ArrayElement::RTObject(Rc::new(Value::new_divert_target( + Path::new_with_components_string(prop_value.as_str()), + )))); + } + + // // VariablePointerValue + if prop == "^var" { + let variable_name = prop_value.as_str().unwrap(); + let mut contex_index = -1; + + if tok.peek()? == ',' { + tok.expect(',')?; + tok.expect_obj_key("ci")?; + contex_index = tok.read_number().unwrap().as_integer().unwrap(); + } + + let var_ptr = Rc::new(Value::new_variable_pointer(variable_name, contex_index)); + tok.expect('}')?; + return Ok(ArrayElement::RTObject(var_ptr)); + } + + // // Divert + let mut is_divert = false; + let mut pushes_to_stack = false; + let mut div_push_type = PushPopType::Function; + let mut external = false; + + if prop == "->" { + is_divert = true; + } else if prop == "f()" { + is_divert = true; + pushes_to_stack = true; + div_push_type = PushPopType::Function; + } else if prop == "->t->" { + is_divert = true; + pushes_to_stack = true; + div_push_type = PushPopType::Tunnel; + } else if prop == "x()" { + is_divert = true; + external = true; + pushes_to_stack = false; + div_push_type = PushPopType::Function; + } + + if is_divert { + let target = prop_value.as_str().unwrap().to_string(); + + let mut var_divert_name: Option = None; + let mut target_path: Option = None; + + let mut conditional = false; + let mut external_args = 0; + + while tok.peek()? == ',' { + tok.expect(',')?; + let prop = tok.read_obj_key()?; + let prop_value = tok.read_value()?; + + // Variable target + if prop == "var" { + var_divert_name = Some(target.clone()); + } else if prop == "c" { + conditional = true; + } else if prop == "exArgs" { + external_args = prop_value.as_integer().unwrap() as usize; + } + } + + if var_divert_name.is_none() { + target_path = Some(target); + } + + tok.expect('}')?; + return Ok(ArrayElement::RTObject(Rc::new(Divert::new( + pushes_to_stack, + div_push_type, + external, + external_args, + conditional, + var_divert_name, + target_path.as_deref(), + )))); + } + + // Choice + if prop == "*" { + let mut flags = 0; + let path_string_on_choice = prop_value.as_str().unwrap(); + + if tok.peek()? == ',' { + tok.expect(',')?; + tok.expect_obj_key("flg")?; + flags = tok.read_number().unwrap().as_integer().unwrap(); + } + + tok.expect('}')?; + return Ok(ArrayElement::RTObject(Rc::new(ChoicePoint::new( + flags, + path_string_on_choice, + )))); + } + + // Variable reference + if prop == "VAR?" { + tok.expect('}')?; + return Ok(ArrayElement::RTObject(Rc::new(VariableReference::new( + prop_value.as_str().unwrap(), + )))); + } + + if prop == "CNT?" { + tok.expect('}')?; + return Ok(ArrayElement::RTObject(Rc::new( + VariableReference::from_path_for_count(prop_value.as_str().unwrap()), + ))); + } + + // Variable assignment + let mut is_var_ass = false; + let mut is_global_var = false; + + if prop == "VAR=" { + is_var_ass = true; + is_global_var = true; + } else if prop == "temp=" { + is_var_ass = true; + is_global_var = false; + } + + if is_var_ass { + let var_name = prop_value.as_str().unwrap(); + let mut is_new_decl = true; + + if tok.peek()? == ',' { + tok.expect(',')?; + tok.expect_obj_key("re")?; + let _ = tok.read_boolean()?; + is_new_decl = false; + } + + let var_ass = Rc::new(VariableAssignment::new( + var_name, + is_new_decl, + is_global_var, + )); + tok.expect('}')?; + return Ok(ArrayElement::RTObject(var_ass)); + } + + // // Legacy Tag + if prop == "#" { + tok.expect('}')?; + return Ok(ArrayElement::RTObject(Rc::new(Tag::new( + prop_value.as_str().unwrap(), + )))); + } + + // List value + if prop == "list" { + let list_content = parse_list(tok)?; + let mut raw_list = InkList::new(); + + if tok.peek()? == ',' { + tok.expect(',')?; + tok.expect_obj_key("origins")?; + + // read array of strings + tok.expect('[')?; + + let mut names = Vec::new(); + while tok.peek()? != ']' { + let name = tok.read_string()?; + names.push(name); + + if tok.peek()? != ']' { + tok.expect(',')?; + } + } + + tok.expect(']')?; + + raw_list.set_initial_origin_names(names); + } + + for (k, v) in list_content { + let item = InkListItem::from_full_name(k.as_str()); + raw_list.items.insert(item, v); + } + + tok.expect('}')?; + return Ok(ArrayElement::RTObject(Rc::new(Value::new_list(raw_list)))); + } + + // Used when serialising save state only + if prop == "originalChoicePath" { + todo!("originalChoicePath"); + // return jobject_to_choice(obj); // TODO + } + + // Last Element + let mut flags = 0; + let mut name: Option = None; + let mut named_only_content: HashMap> = HashMap::new(); + + let mut p = prop.clone(); + let mut pv = prop_value; + + loop { + if p == "#f" { + flags = pv.as_integer().unwrap(); + } else if p == "#n" { + name = Some(pv.as_str().unwrap().to_string()); + } else { + let named_content_item = jtoken_to_runtime_object(tok, pv, Some(p.clone()))?; + + let named_content_item = match named_content_item { + ArrayElement::RTObject(rt_obj) => rt_obj, + _ => { + return Err(StoryError::BadJson( + "Named content is not a runtime object".to_owned(), + )) + } + }; + + let named_sub_container = named_content_item + .into_any() + .downcast::() + .unwrap(); + + named_only_content.insert(p, named_sub_container); + } + + if tok.peek()? == ',' { + tok.expect(',')?; + p = tok.read_obj_key()?; + pv = tok.read_value()?; + } else if tok.peek()? == '}' { + tok.expect('}')?; + return Ok(ArrayElement::LastElement(flags, name, named_only_content)); + } else { + break; + } + } + + Err(StoryError::BadJson(format!( + "Failed to convert token to runtime RTObject: {}", + prop + ))) + } + } +} + +fn parse_list(tok: &mut JsonTokenizer) -> Result, StoryError> { + let mut list_content: HashMap = HashMap::new(); + + while tok.peek()? != '}' { + let key = tok.read_obj_key()?; + let value = tok.read_number().unwrap().as_integer().unwrap(); + list_content.insert(key, value); + + if tok.peek()? != '}' { + tok.expect(',')?; + } + } + + tok.expect('}')?; + + Ok(list_content) +} + +fn jarray_to_container( + tok: &mut JsonTokenizer, + name: Option, +) -> Result, StoryError> { + let (content, named) = jarray_to_runtime_obj_list(tok)?; + + // Final object in the array is always a combination of + // - named content + // - a "#f" key with the countFlags + + // (if either exists at all, otherwise null) + // let terminating_obj = jarray[jarray.len() - 1].as_object(); + let mut name: Option = name; + let mut flags = 0; + let mut named_only_content: HashMap> = HashMap::new(); + + if let Some(ArrayElement::LastElement(f, n, named_content)) = named { + flags = f; + + if n.is_some() { + name = n; + } + + named_only_content = named_content; + } + + let container = Container::new(name, flags, content, named_only_content); + Ok(container) +} + +fn jarray_to_runtime_obj_list( + tok: &mut JsonTokenizer, +) -> Result<(Vec>, Option), StoryError> { + let mut list: Vec> = Vec::new(); + let mut last_element: Option = None; + + while tok.peek()? != ']' { + let val = tok.read_value()?; + let runtime_obj = jtoken_to_runtime_object(tok, val, None)?; + + match runtime_obj { + ArrayElement::LastElement(flags, name, named_only_content) => { + last_element = Some(ArrayElement::LastElement(flags, name, named_only_content)); + break; + } + ArrayElement::RTObject(rt_obj) => list.push(rt_obj), + ArrayElement::NullElement => { + // Only the last element can be null + if tok.peek()? != ']' { + return Err(StoryError::BadJson( + "Only the last element can be null".to_owned(), + )); + } + } + } + + if tok.peek()? != ']' { + tok.expect(',')?; + } + } + + tok.expect(']')?; + + Ok((list, last_element)) +} + +fn jtoken_to_list_definitions( + tok: &mut JsonTokenizer, +) -> Result { + let mut all_defs: Vec = Vec::with_capacity(0); + + tok.expect('{')?; + + while tok.peek()? != '}' { + let name = tok.read_obj_key()?; + tok.expect('{')?; + + let items = parse_list(tok)?; + let def = ListDefinition::new(name, items); + all_defs.push(def); + + if tok.peek()? != '}' { + tok.expect(',')?; + } + } + + tok.expect('}')?; + + Ok(ListDefinitionsOrigin::new(&mut all_defs)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simple_load() { + let s = r##"{"inkVersion":21,"root":[["^Line.","\n",["done",{"#n":"g-0"}],null],"done",null],"listDefs":{}}"##; + let _ = load_from_string(s).unwrap(); + } + + #[test] + fn load_list() { + let s = r##" + { + "inkVersion": 21, + "root": [ + [ + "ev", + { + "VAR?": "A" + }, + { + "VAR?": "B" + }, + "+", + "LIST_ALL", + "out", + "/ev", + "\n", + [ + "done", + { + "#f": 5, + "#n": "g-0" + } + ], + null + ], + "done", + { + "global decl": [ + "ev", + { + "list": {}, + "origins": [ + "a" + ] + }, + { + "VAR=": "a" + }, + { + "list": {}, + "origins": [ + "b" + ] + }, + { + "VAR=": "b" + }, + "/ev", + "end", + null + ], + "#f": 1 + } + ], + "listDefs": { + "a": { + "A": 1 + }, + "b": { + "B": 1 + } + } + } + "##; + let _ = load_from_string(s).unwrap(); + } + + #[test] + fn load_choice() { + let s = r##"{"inkVersion":21,"root":[["^Hello world!","\n","ev","str","^Hello back!","/str","/ev",{"*":"0.c-0","flg":20},{"c-0":["\n","done",{"->":"0.g-0"},{"#f":5}],"g-0":["done",null]}],"done",null],"listDefs":{}}"##; + let (_, container, _) = load_from_string(s).unwrap(); + let mut sb = String::new(); + container.build_string_of_hierarchy(&mut sb, 0, None); + println!("{}", sb); + } + + #[test] + fn load_iffalse() { + let s = r##"{"inkVersion":21,"root":[["ev",{"VAR?":"x"},0,">","/ev",[{"->":".^.b","c":true},{"b":["\n","ev",{"VAR?":"x"},1,"-","/ev",{"VAR=":"y","re":true},{"->":"0.6"},null]}],"nop","\n","^The value is ","ev",{"VAR?":"y"},"out","/ev","^. ","end","\n",["done",{"#n":"g-0"}],null],"done",{"global decl":["ev",0,{"VAR=":"x"},3,{"VAR=":"y"},"/ev","end",null]}],"listDefs":{}}"##; + let (_, container, _) = load_from_string(s).unwrap(); + let mut sb = String::new(); + container.build_string_of_hierarchy(&mut sb, 0, None); + println!("{}", sb); + } +} diff --git a/lib/src/json/json_tokenizer.rs b/lib/src/json/json_tokenizer.rs new file mode 100644 index 0000000..8a914cd --- /dev/null +++ b/lib/src/json/json_tokenizer.rs @@ -0,0 +1,311 @@ +//! Tokenizer for the streamed JSON parser. +use std::io::{self, Read}; + +use stringreader::StringReader; + +#[derive(Debug)] +pub(super) enum Number { + Int(i32), + Float(f32), +} + +impl Number { + pub(super) fn as_integer(&self) -> Option { + match self { + Number::Int(n) => Some(*n), + Number::Float(n) => Some(*n as i32), + } + } + + pub(super) fn as_float(&self) -> Option { + match self { + Number::Int(n) => Some(*n as f32), + Number::Float(n) => Some(*n), + } + } + + pub(super) fn is_integer(&self) -> bool { + match self { + Number::Int(_) => true, + Number::Float(_) => false, + } + } +} + +#[derive(Debug)] +pub(super) enum JsonValue { + Array, + Object, + String(String), + Number(Number), + Boolean(bool), + Null, +} + +impl JsonValue { + pub(super) fn as_str(&self) -> Option<&str> { + match self { + JsonValue::String(s) => Some(s), + _ => None, + } + } + + pub(super) fn as_integer(&self) -> Option { + match self { + JsonValue::Number(n) => n.as_integer(), + _ => None, + } + } +} + +pub(super) struct JsonTokenizer<'a> { + reader: Box, + lookahead: Option, + skip_whitespaces: bool, +} + +impl<'a> JsonTokenizer<'a> { + pub(super) fn new_from_str(s: &'a str) -> JsonTokenizer<'a> { + JsonTokenizer { + reader: Box::new(StringReader::new(s)) as Box, + lookahead: None, + skip_whitespaces: true, + } + } + + pub(super) fn read(&mut self) -> io::Result { + let c = match self.lookahead { + Some(c) => { + self.lookahead = None; + c + } + None => self.read_no_lookahead()?, + }; + + Ok(c) + } + + fn read_no_lookahead(&mut self) -> io::Result { + let c = loop { + let c = self.read_utf8_char()?; + + if !self.skip_whitespaces || !c.is_whitespace() { + break c; + } + }; + + Ok(c) + } + + fn read_utf8_char(&mut self) -> io::Result { + let mut temp_buf = [0; 1]; + let mut utf8_char = Vec::new(); + + // Read bytes until a valid UTF-8 character is formed + loop { + self.reader.read_exact(&mut temp_buf)?; + utf8_char.push(temp_buf[0]); + + if let Ok(utf8_str) = std::str::from_utf8(&utf8_char) { + if let Some(ch) = utf8_str.chars().next() { + return Ok(ch); + } + } + + // If we have read 4 bytes and still not a valid character, return an error + if utf8_char.len() >= 4 { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "Invalid UTF-8 sequence", + )); + } + } + } + + pub(super) fn peek(&mut self) -> io::Result { + match self.lookahead { + Some(c) => Ok(c), + None => { + let c = self.read_no_lookahead()?; + self.lookahead = Some(c); + Ok(c) + } + } + } + + pub(super) fn read_boolean(&mut self) -> io::Result { + let string = self.read_until_separator()?; + + match string.trim() { + "true" => Ok(true), + "false" => Ok(false), + _ => Err(io::Error::new( + io::ErrorKind::InvalidData, + "Invalid boolean format", + )), + } + } + + pub(super) fn read_null(&mut self) -> io::Result<()> { + let string = self.read_until_separator()?; + + if string.trim() == "null" { + Ok(()) + } else { + Err(io::Error::new( + io::ErrorKind::InvalidData, + "Invalid null format", + )) + } + } + + pub(super) fn read_number(&mut self) -> io::Result { + let number_str = self.read_until_separator()?; + let number_str = number_str.trim(); + + // Check if the number is an integer + if let Ok(num) = number_str.parse::() { + return Ok(Number::Int(num)); + } + + // Convert the accumulated string to a f32 + match number_str.parse::() { + Ok(num) => Ok(Number::Float(num)), + Err(_) => Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Invalid number format: '{}'", number_str), + )), + } + } + + pub(super) fn read_string(&mut self) -> io::Result { + let mut result = String::new(); + let mut escape = false; + + self.expect('"')?; + self.skip_whitespaces = false; + + while let Ok(c) = self.read() { + if escape { + // Handle escape sequences + match c { + '\\' => result.push('\\'), + '"' => result.push('"'), + 'n' => result.push('\n'), + // 't' => result.push('\t'), + // 'r' => result.push('\r'), + // Add other escape sequences as needed + // _ => result.push(c), // Push the character as is if unknown escape + _ => {} + } + escape = false; + } else if c == '\\' { + escape = true; + } else if c == '"' { + self.skip_whitespaces = true; + break; // End of the quoted string + } else { + result.push(c); + } + } + + if !escape { + Ok(result) + } else { + Err(io::Error::new( + io::ErrorKind::InvalidData, + "Unterminated string", + )) + } + } + + fn read_until_separator(&mut self) -> io::Result { + let mut result = String::new(); + + self.skip_whitespaces = false; + + while !self.next_is_separator() { + let c = self.read()?; + result.push(c); + } + + self.skip_whitespaces = true; + + Ok(result) + } + + fn next_is_separator(&mut self) -> bool { + match self.peek() { + Ok(c) => c == ',' || c == '}' || c == ']', + Err(_) => true, + } + } + + pub(super) fn expect(&mut self, c: char) -> io::Result<()> { + while let Ok(c2) = self.read() { + if !c2.is_whitespace() { + if c2 == c { + return Ok(()); + } else { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Expected '{}', found '{}'", c, c2), + )); + } + } + } + + Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "Unexpected end of file", + )) + } + + pub(super) fn read_obj_key(&mut self) -> io::Result { + let s = self.read_string(); + self.expect(':')?; + s + } + + pub(super) fn expect_obj_key(&mut self, expected: &str) -> io::Result<()> { + let s = self.read_string()?; + if s != expected { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("Expected '{}', found '{}'", expected, s), + )); + } + let _ = self.expect(':'); + Ok(()) + } + + pub(super) fn read_value(&mut self) -> io::Result { + //self.skip_whitespaces()?; + match self.peek()? { + '[' => { + self.read()?; + Ok(JsonValue::Array) + } + '{' => { + self.read()?; + Ok(JsonValue::Object) + } + '"' => { + let s = self.read_string()?; + Ok(JsonValue::String(s)) + } + 't' | 'f' => { + let b = self.read_boolean()?; + Ok(JsonValue::Boolean(b)) + } + 'n' => { + self.read_null()?; + Ok(JsonValue::Null) + } + _ => { + let n = self.read_number()?; + Ok(JsonValue::Number(n)) + } + } + } +} diff --git a/lib/src/json_write.rs b/lib/src/json/json_write.rs similarity index 100% rename from lib/src/json_write.rs rename to lib/src/json/json_write.rs diff --git a/lib/src/json/mod.rs b/lib/src/json/mod.rs new file mode 100644 index 0000000..b5745e6 --- /dev/null +++ b/lib/src/json/mod.rs @@ -0,0 +1,4 @@ +pub mod json_read; +pub mod json_read_stream; +mod json_tokenizer; +pub mod json_write; diff --git a/lib/src/lib.rs b/lib/src/lib.rs index bc51376..dab3c4a 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -10,8 +10,8 @@ //! ``` //! # use bladeink::{story::Story, story_error::StoryError}; //! # fn main() -> Result<(), StoryError> { -//! # let json_string = r##"{"root":["done",null],"listDefs":{},"inkVersion":21}"##; -//! # let read_input = |_:&_| Ok(0); +//! # let json_string = r##"{"inkVersion":21, "root":["done",null],"listDefs":{}}"##; +//! # let read_input = |_:&_| 0; //! // story is the entry point of the `bladeink` lib. //! // json_string is a string with all the contents of the .ink.json file. //! let mut story = Story::new(json_string)?; @@ -27,7 +27,7 @@ //! if !choices.is_empty() { //! // read_input is a method that you should implement //! // to get the choice selected by the user. -//! let choice_idx = read_input(&choices)?; +//! let choice_idx:usize = read_input(&choices); //! // set the option selected by the user //! story.choose_choice_index(choice_idx)?; //! } else { @@ -55,8 +55,7 @@ mod flow; mod glue; mod ink_list; mod ink_list_item; -mod json_read; -mod json_write; +mod json; mod list_definition; mod list_definitions_origin; mod native_function_call; diff --git a/lib/src/story/mod.rs b/lib/src/story/mod.rs index 1f39dbe..8d80490 100644 --- a/lib/src/story/mod.rs +++ b/lib/src/story/mod.rs @@ -14,7 +14,7 @@ use std::{cell::RefCell, collections::HashMap, rc::Rc}; pub const INK_VERSION_CURRENT: i32 = 21; /// The minimum legacy version of ink that can be loaded by the current version /// of the code. -const INK_VERSION_MINIMUM_COMPATIBLE: i32 = 18; +pub const INK_VERSION_MINIMUM_COMPATIBLE: i32 = 18; #[derive(PartialEq)] pub(crate) enum OutputStateChange { @@ -44,10 +44,9 @@ pub struct Story { } mod misc { use crate::{ - container::Container, - json_read, + json::{json_read, json_read_stream}, object::{Object, RTObject}, - story::{Story, INK_VERSION_CURRENT, INK_VERSION_MINIMUM_COMPATIBLE}, + story::{Story, INK_VERSION_CURRENT}, story_error::StoryError, story_state::StoryState, value::Value, @@ -59,59 +58,13 @@ mod misc { /// Construct a `Story` out of a JSON string that was compiled with /// `inklecate`. pub fn new(json_string: &str) -> Result { - let json: serde_json::Value = match serde_json::from_str(json_string) { - Ok(value) => value, - Err(_) => return Err(StoryError::BadJson("Story not in JSON format.".to_owned())), - }; - - let version_opt = json.get("inkVersion"); - - if version_opt.is_none() || !version_opt.unwrap().is_number() { - return Err(StoryError::BadJson( - "ink version number not found. Are you sure it's a valid .ink.json file?" - .to_owned(), - )); - } - - let version: i32 = version_opt.unwrap().as_i64().unwrap().try_into().unwrap(); - - if version > INK_VERSION_CURRENT { - return Err(StoryError::BadJson("Version of ink used to build story was newer than the current version of the engine".to_owned())); - } else if version < INK_VERSION_MINIMUM_COMPATIBLE { - return Err(StoryError::BadJson("Version of ink used to build story is too old to be loaded by this version of the engine".to_owned())); - } - - let root_token = - match json.get("root") { - Some(value) => value, - None => return Err(StoryError::BadJson( - "Root node for ink not found. Are you sure it's a valid .ink.json file?" - .to_owned(), - )), + let (version, main_content_container, list_definitions) = + if cfg!(feature = "stream-json-parser") { + json_read_stream::load_from_string(json_string)? + } else { + json_read::load_from_string(json_string)? }; - let list_definitions = match json.get("listDefs") { - Some(def) => Rc::new(json_read::jtoken_to_list_definitions(def)?), - None => { - return Err( - StoryError::BadJson("List Definitions node for ink not found. Are you sure it's a valid .ink.json file?" - .to_owned()), - ) - } - }; - - let main_content_container = json_read::jtoken_to_runtime_object(root_token, None)?; - - let main_content_container = main_content_container.into_any().downcast::(); - - if main_content_container.is_err() { - return Err(StoryError::BadJson( - "Root node for ink is not a container?".to_owned(), - )); - }; - - let main_content_container = main_content_container.unwrap(); // unwrap: checked for err above - let mut story = Story { main_content_container: main_content_container.clone(), state: StoryState::new(main_content_container.clone(), list_definitions.clone()), diff --git a/lib/src/story_error.rs b/lib/src/story_error.rs index 96e2e01..ea265e0 100644 --- a/lib/src/story_error.rs +++ b/lib/src/story_error.rs @@ -26,6 +26,12 @@ impl StoryError { impl std::error::Error for StoryError {} +impl std::convert::From for StoryError { + fn from(err: std::io::Error) -> StoryError { + StoryError::BadJson(err.to_string()) + } +} + impl fmt::Display for StoryError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { diff --git a/lib/src/story_state.rs b/lib/src/story_state.rs index 1c1431d..37f848c 100644 --- a/lib/src/story_state.rs +++ b/lib/src/story_state.rs @@ -7,7 +7,7 @@ use crate::{ control_command::{CommandType, ControlCommand}, flow::Flow, glue::Glue, - json_read, json_write, + json::{json_read, json_write}, list_definitions_origin::ListDefinitionsOrigin, object::{Object, RTObject}, path::Path, diff --git a/lib/src/variables_state.rs b/lib/src/variables_state.rs index 2a54524..2e80838 100644 --- a/lib/src/variables_state.rs +++ b/lib/src/variables_state.rs @@ -8,7 +8,7 @@ use serde_json::Map; use crate::{ callstack::CallStack, - json_read, json_write, + json::{json_read, json_write}, list_definitions_origin::ListDefinitionsOrigin, state_patch::StatePatch, story_error::StoryError,