diff --git a/pyproject.toml b/pyproject.toml index 4cf0ebf..9535dc3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "scikit-learn==1.2.2", "torch==2.0.1", "torchvision==0.15.2", "PyYAML==6.0", "transformers==4.30.1", "sympy==1.12", "openai==0.27.8", "google-search-results==2.4.2", "PyPDF2==3.0.1", "ipython==8.14.0", "accelerate==0.20.3", "sentencepiece==0.1.99", "beautifulsoup4==4.12.2", "selenium==4.10.0", "webdriver-manager==3.8.6", "whisper==1.1.10", "pinecone-client==2.2.2", "tiktoken==0.4.0", - "chromedriver-autoinstaller==0.4.0" + "chromedriver-autoinstaller==0.4.0", "tika==2.6.0" ] [tool.setuptools.dynamic] diff --git a/symai/__init__.py b/symai/__init__.py index 14218c5..d4371b4 100644 --- a/symai/__init__.py +++ b/symai/__init__.py @@ -9,7 +9,7 @@ logging.getLogger("requests").setLevel(logging.WARNING) -SYMAI_VERSION = "0.2.22" +SYMAI_VERSION = "0.2.23" __version__ = SYMAI_VERSION __root_dir__ = Path.home() / '.symai' diff --git a/symai/backend/engine_file.py b/symai/backend/engine_file.py index ed40252..5695382 100644 --- a/symai/backend/engine_file.py +++ b/symai/backend/engine_file.py @@ -1,7 +1,6 @@ from typing import List - import PyPDF2 - +from tika import unpack from .base import Engine @@ -17,7 +16,7 @@ def forward(self, *args, **kwargs) -> List[str]: if 'pdf' in path: rsp = '' - with open(path, 'rb') as f: + with open(str(path), 'rb') as f: # creating a pdf reader object pdf_reader = PyPDF2.PdfReader(f) n_pages = len(pdf_reader.pages) @@ -27,8 +26,7 @@ def forward(self, *args, **kwargs) -> List[str]: page = pdf_reader.pages[i] rsp += page.extract_text() else: - with open(path, 'r') as f: - rsp = f.read() + rsp = unpack.from_file(str(path))['content'] output_handler = kwargs['output_handler'] if 'output_handler' in kwargs else None if output_handler: diff --git a/symai/components.py b/symai/components.py index 1484b08..8c6aad5 100644 --- a/symai/components.py +++ b/symai/components.py @@ -33,6 +33,7 @@ def __init__(self, expr: Expression, retries: int = 1): self.retries: int = retries def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.ftry(self.expr, retries=self.retries, **kwargs) @@ -58,6 +59,7 @@ def __init__(self, cases: List[str], default: Optional[str] = None): self.default: Optional[str] = default def forward(self, sym: Symbol, *args, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.choice(cases=self.cases, default=self.default, *args, **kwargs) @@ -94,7 +96,8 @@ def __init__(self, expr: Expression, force: bool = False): self.expr: Expression = expr self.force: bool = force - def forward(self, sym: Expression, **kwargs) -> Iterator[Symbol]: + def forward(self, sym: Symbol, **kwargs) -> Iterator[Symbol]: + sym = self._to_symbol(sym) if self.force: return sym.fstream(expr=self.expr, max_tokens=self.max_tokens, @@ -148,7 +151,8 @@ def __init__(self, template: str = "{{placeholder}}", self.placeholder = placeholder self.template_ = template - def forward(self, sym: Symbol, *args, **kwargs) -> Symbol: + def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.template(self.template_, self.placeholder, **kwargs) @@ -158,7 +162,8 @@ def __init__(self, description: str, libraries: List[str] = []): self.description: str = description self.libraries: List[str] = libraries - def forward(self, sym: Symbol, *args, **kwargs) -> Symbol: + def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.style(description=self.description, libraries=self.libraries, **kwargs) @@ -168,21 +173,25 @@ def __init__(self, prompt: str): self.prompt: str = prompt def forward(self, sym: Symbol, context: Symbol = None, *args, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.query(prompt=self.prompt, context=context, **kwargs) class Outline(Expression): def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.outline(**kwargs) class Clean(Expression): def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.clean(**kwargs) class Execute(Expression): def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.execute(**kwargs) @@ -192,26 +201,31 @@ def __init__(self, format: str = 'Python'): self.format = format def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.convert(format=self.format, **kwargs) class Embed(Expression): def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.embed(**kwargs) class Cluster(Expression): def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.cluster(**kwargs) class Compose(Expression): def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.compose(**kwargs) class Map(Expression): def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.map(**kwargs) @@ -221,6 +235,7 @@ def __init__(self, language: str = 'English'): self.language = language def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) if sym.isinstanceof(f'{self.language} text'): return sym return sym.translate(language=self.language, **kwargs) @@ -232,6 +247,7 @@ def __init__(self, include: str): self.include = include def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.filter(self.include, include=True, **kwargs) @@ -241,12 +257,13 @@ def __init__(self, exclude: str): self.exclude = exclude def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) return sym.filter(self.exclude, include=False, **kwargs) class Open(Expression): def forward(self, path: str, **kwargs) -> Expression: - return Expression.open(path, **kwargs) + return self.open(path, **kwargs) class FileQuery(Expression): @@ -260,6 +277,7 @@ def __init__(self, path: str, filter: str): self.file = file_open(path) def forward(self, sym: Symbol, **kwargs) -> Symbol: + sym = self._to_symbol(sym) res = Symbol(list(self.query_stream(self.file))) return res.query(prompt=sym, context=res, **kwargs) diff --git a/symai/symbol.py b/symai/symbol.py index 02300d8..4473402 100644 --- a/symai/symbol.py +++ b/symai/symbol.py @@ -644,7 +644,7 @@ def fstream(self, expr: "Expression", return self._sym_return_type(list(self.stream(expr, max_tokens, char_token_ratio, **kwargs))) def ftry(self, expr: "Expression", retries: int = 1, **kwargs) -> "Symbol": - prompt = {} + prompt = {'message': ''} def output_handler(input_): prompt['message'] = input_ kwargs['output_handler'] = output_handler @@ -652,7 +652,6 @@ def output_handler(input_): sym = self while True: try: - print(expr, sym, kwargs) sym = expr(sym, **kwargs) retry_cnt = 0 return sym