From 1c06347d7c304bcd8419fc680a3f5dcdb4e7968a Mon Sep 17 00:00:00 2001 From: Milind Lalwani Date: Tue, 7 Nov 2023 14:07:46 +0100 Subject: [PATCH 01/11] refactor(Pipelines) : Smart Data Frame Pipeline --- pandasai/pipelines/pipeline.py | 7 +- pandasai/pipelines/pipeline_context.py | 27 +- pandasai/schemas/df_config.py | 5 +- pandasai/smart_dataframe/__init__.py | 2 - pandasai/smart_datalake/__init__.py | 267 ++++++------------ pandasai/smart_datalake/code_execution.py | 107 +++++++ pandasai/smart_datalake/code_generator.py | 106 +++++++ .../generate_smart_datalake_pipeline.py | 31 ++ pandasai/smart_datalake/result_parsing.py | 38 +++ pandasai/smart_datalake/result_validation.py | 52 ++++ tests/test_codemanager.py | 2 + 11 files changed, 456 insertions(+), 188 deletions(-) create mode 100644 pandasai/smart_datalake/code_execution.py create mode 100644 pandasai/smart_datalake/code_generator.py create mode 100644 pandasai/smart_datalake/generate_smart_datalake_pipeline.py create mode 100644 pandasai/smart_datalake/result_parsing.py create mode 100644 pandasai/smart_datalake/result_validation.py diff --git a/pandasai/pipelines/pipeline.py b/pandasai/pipelines/pipeline.py index 6353cc977..cc2e15fe2 100644 --- a/pandasai/pipelines/pipeline.py +++ b/pandasai/pipelines/pipeline.py @@ -5,7 +5,6 @@ from pandasai.helpers.logger import Logger from pandasai.pipelines.pipeline_context import PipelineContext from pandasai.pipelines.base_logic_unit import BaseLogicUnit -from pandasai.smart_dataframe import SmartDataframe, load_smartdataframes from ..schemas.df_config import Config from typing import Any, Optional, List, Union from .abstract_pipeline import AbstractPipeline @@ -22,9 +21,7 @@ class Pipeline(AbstractPipeline): def __init__( self, - context: Union[ - List[Union[DataFrameType, SmartDataframe]], PipelineContext - ] = None, + context: Union[List[Union[DataFrameType, Any]], PipelineContext] = None, config: Optional[Union[Config, dict]] = None, steps: Optional[List] = None, logger: Optional[Logger] = None, @@ -40,6 +37,8 @@ def __init__( """ if not isinstance(context, PipelineContext): + from pandasai.smart_dataframe import load_smartdataframes + config = Config(**load_config(config)) smart_dfs = load_smartdataframes(context, config) context = PipelineContext(smart_dfs, config) diff --git a/pandasai/pipelines/pipeline_context.py b/pandasai/pipelines/pipeline_context.py index f38e0738c..a03647c3d 100644 --- a/pandasai/pipelines/pipeline_context.py +++ b/pandasai/pipelines/pipeline_context.py @@ -1,11 +1,11 @@ -from typing import List, Optional, Union +from typing import List, Optional, Union, Any from pandasai.helpers.cache import Cache from pandasai.helpers.df_info import DataFrameType from pandasai.helpers.memory import Memory +from pandasai.helpers.query_exec_tracker import QueryExecTracker from pandasai.helpers.skills_manager import SkillsManager from pandasai.schemas.df_config import Config -from pandasai.smart_dataframe import SmartDataframe, load_smartdataframes class PipelineContext: @@ -13,20 +13,25 @@ class PipelineContext: Pass Context to the pipeline which is accessible to each step via kwargs """ - _dfs: List[Union[DataFrameType, SmartDataframe]] + _dfs: List[Union[DataFrameType, Any]] _memory: Memory _skills: SkillsManager _cache: Cache _config: Config + _query_exec_tracker: QueryExecTracker + _intermediate_values: dict def __init__( self, - dfs: List[Union[DataFrameType, SmartDataframe]], + dfs: List[Union[DataFrameType, Any]], config: Optional[Union[Config, dict]] = None, memory: Memory = None, skills: SkillsManager = None, cache: Cache = None, + query_exec_tracker: QueryExecTracker = None, ) -> None: + from pandasai.smart_dataframe import load_smartdataframes + if isinstance(config, dict): config = Config(**config) @@ -35,9 +40,11 @@ def __init__( self._skills = skills if skills is not None else SkillsManager() self._cache = cache if cache is not None else Cache() self._config = config + self._query_exec_tracker = query_exec_tracker + self._intermediate_values = {} @property - def dfs(self) -> List[Union[DataFrameType, SmartDataframe]]: + def dfs(self) -> List[Union[DataFrameType, Any]]: return self._dfs @property @@ -55,3 +62,13 @@ def cache(self): @property def config(self): return self._config + + @property + def query_exec_tracker(self): + return self._query_exec_tracker + + def add_intermediate_value(self, key: str, value: Any): + self._intermediate_values[key] = value + + def get_intermediate_value(self, key: str): + return self._intermediate_values[key] diff --git a/pandasai/schemas/df_config.py b/pandasai/schemas/df_config.py index 2cd1f312d..2eee96f70 100644 --- a/pandasai/schemas/df_config.py +++ b/pandasai/schemas/df_config.py @@ -1,7 +1,6 @@ from pydantic import BaseModel, validator, Field -from typing import Optional, List, Any, Dict, Type, TypedDict +from typing import Optional, List, Any, Dict, TypedDict from pandasai.constants import DEFAULT_CHART_DIRECTORY -from pandasai.responses import ResponseParser from ..middlewares.base import Middleware from ..callbacks.base import BaseCallback from ..llm import LLM, LangchainLLM @@ -31,7 +30,7 @@ class Config(BaseModel): middlewares: List[Middleware] = Field(default_factory=list) callback: Optional[BaseCallback] = None lazy_load_connector: bool = True - response_parser: Type[ResponseParser] = None + response_parser: Any = None llm: Any = None data_viz_library: Optional[VisualizationLibrary] = None log_server: LogServerConfig = None diff --git a/pandasai/smart_dataframe/__init__.py b/pandasai/smart_dataframe/__init__.py index 278355a1f..07e21de71 100644 --- a/pandasai/smart_dataframe/__init__.py +++ b/pandasai/smart_dataframe/__init__.py @@ -737,8 +737,6 @@ def load_smartdataframes( dfs (List[Union[DataFrameType, Any]]): List of dataframes to be used """ - from ..smart_dataframe import SmartDataframe - smart_dfs = [] for df in dfs: if not isinstance(df, SmartDataframe): diff --git a/pandasai/smart_datalake/__init__.py b/pandasai/smart_datalake/__init__.py index b480f1e4a..db950558a 100644 --- a/pandasai/smart_datalake/__init__.py +++ b/pandasai/smart_datalake/__init__.py @@ -20,16 +20,19 @@ import uuid import logging import os -import traceback from pandasai.constants import DEFAULT_CHART_DIRECTORY from pandasai.helpers.skills_manager import SkillsManager +from pandasai.pipelines.pipeline_context import PipelineContext from pandasai.skills import skill from pandasai.helpers.query_exec_tracker import QueryExecTracker +from pandasai.smart_datalake.generate_smart_datalake_pipeline import ( + GenerateSmartDatalakePipeline, +) -from ..helpers.output_types import output_type_factory -from ..helpers.viz_library_types import viz_lib_type_factory +from pandasai.helpers.output_types import output_type_factory +from pandasai.helpers.viz_library_types import viz_lib_type_factory from pandasai.responses.context import Context from pandasai.responses.response_parser import ResponseParser from ..llm.base import LLM @@ -41,9 +44,8 @@ from ..config import load_config from ..prompts.base import AbstractPrompt from ..prompts.correct_error_prompt import CorrectErrorPrompt -from ..prompts.generate_python_code import GeneratePythonCodePrompt from typing import Union, List, Any, Type, Optional -from ..helpers.code_manager import CodeExecutionContext, CodeManager +from ..helpers.code_manager import CodeManager from ..middlewares.base import Middleware from ..helpers.df_info import DataFrameType from ..helpers.path import find_project_root @@ -311,22 +313,6 @@ def _get_prompt( self.logger.log(f"Using prompt: {prompt}") return prompt - def _get_cache_key(self) -> str: - """ - Return the cache key for the current conversation. - - Returns: - str: The cache key for the current conversation - """ - cache_key = self._memory.get_conversation() - - # make the cache key unique for each combination of dfs - for df in self._dfs: - hash = df.column_hash() - cache_key += str(hash) - - return cache_key - def chat(self, query: str, output_type: Optional[str] = None): """ Run a query on the dataframe. @@ -351,144 +337,12 @@ def chat(self, query: str, output_type: Optional[str] = None): ValueError: If the query is empty """ - self._query_exec_tracker.start_new_track() - - self.logger.log(f"Question: {query}") - self.logger.log(f"Running PandasAI with {self._llm.type} LLM...") - - self._assign_prompt_id() - - self._query_exec_tracker.add_query_info( - self._conversation_id, self._instance, query, output_type + pipeline_context = self.prepare_context_for_smart_datalake_pipeline( + query=query, output_type=output_type ) - self._query_exec_tracker.add_dataframes(self._dfs) - - self._memory.add(query, True) - try: - output_type_helper = output_type_factory(output_type, logger=self.logger) - viz_lib_helper = viz_lib_type_factory(self._viz_lib, logger=self.logger) - - if ( - self._config.enable_cache - and self._cache - and self._cache.get(self._get_cache_key()) - ): - self.logger.log("Using cached response") - code = self._query_exec_tracker.execute_func( - self._cache.get, self._get_cache_key(), tag="cache_hit" - ) - - else: - default_values = { - # TODO: find a better way to determine the engine, - "engine": self._dfs[0].engine, - "output_type_hint": output_type_helper.template_hint, - "viz_library_type": viz_lib_helper.template_hint, - } - - if ( - self.memory.size > 1 - and self.memory.count() > 1 - and self._last_code_generated - ): - default_values["current_code"] = self._last_code_generated - - generate_python_code_instruction = ( - self._query_exec_tracker.execute_func( - self._get_prompt, - "generate_python_code", - default_prompt=GeneratePythonCodePrompt, - default_values=default_values, - ) - ) - - [code, reasoning, answer] = self._query_exec_tracker.execute_func( - self._llm.generate_code, generate_python_code_instruction - ) - - self.last_reasoning = reasoning - self.last_answer = answer - - if self._config.enable_cache and self._cache: - self._cache.set(self._get_cache_key(), code) - - if self._config.callback is not None: - self._config.callback.on_code(code) - - self.last_code_generated = code - self.logger.log( - f"""Code generated: -``` -{code} -``` -""" - ) - - retry_count = 0 - code_to_run = code - result = None - while retry_count < self._config.max_retries: - try: - # Execute the code - context = CodeExecutionContext(self._last_prompt_id, self._skills) - result = self._code_manager.execute_code( - code=code_to_run, - context=context, - ) - - break - - except Exception as e: - if ( - not self._config.use_error_correction_framework - or retry_count >= self._config.max_retries - 1 - ): - raise e - - retry_count += 1 - - self._logger.log( - f"Failed to execute code with a correction framework " - f"[retry number: {retry_count}]", - level=logging.WARNING, - ) - - traceback_error = traceback.format_exc() - [ - code_to_run, - reasoning, - answer, - ] = self._query_exec_tracker.execute_func( - self._retry_run_code, code, traceback_error - ) - - if result is not None: - if isinstance(result, dict): - validation_ok, validation_logs = output_type_helper.validate(result) - if not validation_ok: - self.logger.log( - "\n".join(validation_logs), level=logging.WARNING - ) - self._query_exec_tracker.add_step( - { - "type": "Validating Output", - "success": False, - "message": "Output Validation Failed", - } - ) - else: - self._query_exec_tracker.add_step( - { - "type": "Validating Output", - "success": True, - "message": "Output Validation Successful", - } - ) - - self.last_result = result - self.logger.log(f"Answer: {result}") + result = GenerateSmartDatalakePipeline(pipeline_context, self.logger).run() except Exception as exception: self.last_error = str(exception) @@ -501,15 +355,7 @@ def chat(self, query: str, output_type: Optional[str] = None): f"\n{exception}\n" ) - self.logger.log( - f"Executed in: {self._query_exec_tracker.get_execution_time()}s" - ) - - self._add_result_to_memory(result) - - result = self._query_exec_tracker.execute_func( - self._response_parser.parse, result - ) + self.update_intermediate_value_post_pipeline_execution(pipeline_context) self._query_exec_tracker.success = True @@ -517,20 +363,93 @@ def chat(self, query: str, output_type: Optional[str] = None): return result - def _add_result_to_memory(self, result: dict): + def prepare_context_for_smart_datalake_pipeline( + self, query: str, output_type: Optional[str] = None + ) -> PipelineContext: """ - Add the result to the memory. + Prepare Pipeline Context to intiate Smart Data Lake Pipeline. Args: - result (dict): The result to add to the memory + query (str): Query to run on the dataframe + output_type (Optional[str]): Add a hint for LLM which + type should be returned by `analyze_data()` in generated + code. Possible values: "number", "dataframe", "plot", "string": + * number - specifies that user expects to get a number + as a response object + * dataframe - specifies that user expects to get + pandas/polars dataframe as a response object + * plot - specifies that user expects LLM to build + a plot + * string - specifies that user expects to get text + as a response object + If none `output_type` is specified, the type can be any + of the above or "text". + + Returns: + PipelineContext: The Pipeline Context to be used by Smart Data Lake Pipeline. + """ + + self._query_exec_tracker.start_new_track() + + self.logger.log(f"Question: {query}") + self.logger.log(f"Running PandasAI with {self._llm.type} LLM...") + + self._assign_prompt_id() + + self._query_exec_tracker.add_query_info( + self._conversation_id, self._instance, query, output_type + ) + + self._query_exec_tracker.add_dataframes(self._dfs) + + self._memory.add(query, True) + + output_type_helper = output_type_factory(output_type, logger=self.logger) + viz_lib_helper = viz_lib_type_factory(self._viz_lib, logger=self.logger) + + pipeline_context = PipelineContext( + dfs=self.dfs, + config=self.config, + memory=self.memory, + cache=self.cache, + query_exec_tracker=self._query_exec_tracker, + ) + pipeline_context.add_intermediate_value( + "output_type_helper", output_type_helper + ) + pipeline_context.add_intermediate_value("viz_lib_helper", viz_lib_helper) + pipeline_context.add_intermediate_value("last_reasoning", self._last_reasoning) + pipeline_context.add_intermediate_value("last_answer", self._last_answer) + pipeline_context.add_intermediate_value( + "last_code_generated", self._last_code_generated + ) + pipeline_context.add_intermediate_value("get_prompt", self._get_prompt) + pipeline_context.add_intermediate_value("llm", self.llm) + pipeline_context.add_intermediate_value("last_prompt_id", self.last_prompt_id) + pipeline_context.add_intermediate_value("skills", self._skills) + pipeline_context.add_intermediate_value("code_manager", self._code_manager) + pipeline_context.add_intermediate_value( + "response_parser", self._response_parser + ) + + return pipeline_context + + def update_intermediate_value_post_pipeline_execution( + self, pipeline_context: PipelineContext + ): """ - if result is None: - return + After the Smart Data Lake Pipeline has executed, update values of Smart Data Lake object. + + Args: + pipeline_context (PipelineContext): Pipeline Context after the Smart Data Lake pipeline execution - if result["type"] in ["string", "number"]: - self._memory.add(result["value"], False) - elif result["type"] in ["dataframe", "plot"]: - self._memory.add("Ok here it is", False) + """ + self._last_reasoning = pipeline_context.get_intermediate_value("last_reasoning") + self._last_answer = pipeline_context.get_intermediate_value("last_answer") + self._last_code_generated = pipeline_context.get_intermediate_value( + "last_code_generated" + ) + self._last_result = pipeline_context.get_intermediate_value("last_result") def _retry_run_code(self, code: str, e: Exception) -> List: """ diff --git a/pandasai/smart_datalake/code_execution.py b/pandasai/smart_datalake/code_execution.py new file mode 100644 index 000000000..ece9cdace --- /dev/null +++ b/pandasai/smart_datalake/code_execution.py @@ -0,0 +1,107 @@ +import logging +import traceback +from typing import Any, List +from pandasai.helpers.code_manager import CodeExecutionContext +from pandasai.helpers.logger import Logger +from pandasai.pipelines.base_logic_unit import BaseLogicUnit +from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.prompts.correct_error_prompt import CorrectErrorPrompt + + +class CodeExecution(BaseLogicUnit): + + """ + Code Execution Stage + """ + + def execute(self, input: Any, **kwargs) -> Any: + pipeline_context: PipelineContext = kwargs.get("context") + logger: Logger = kwargs.get("logger") + + code = input + retry_count = 0 + code_to_run = code + result = None + while retry_count < pipeline_context.config.max_retries: + try: + # Execute the code + code_context = CodeExecutionContext( + pipeline_context.get_intermediate_value("last_prompt_id"), + pipeline_context.get_intermediate_value("skills"), + ) + result = pipeline_context.get_intermediate_value( + "code_manager" + ).execute_code( + code=code_to_run, + context=code_context, + ) + + break + + except Exception as e: + if ( + not pipeline_context.config.use_error_correction_framework + or retry_count >= pipeline_context.config.max_retries - 1 + ): + raise e + + retry_count += 1 + + logger.log( + f"Failed to execute code with a correction framework " + f"[retry number: {retry_count}]", + level=logging.WARNING, + ) + + traceback_error = traceback.format_exc() + [ + code_to_run, + reasoning, + answer, + ] = pipeline_context.query_exec_tracker.execute_func( + self._retry_run_code, code, traceback_error + ) + + pipeline_context.set_intermediate_value("reasoning", reasoning) + pipeline_context.set_intermediate_value("answer", answer) + + return result + + def _retry_run_code( + self, code: str, context: PipelineContext, logger: Logger, e: Exception + ) -> List: + """ + A method to retry the code execution with error correction framework. + + Args: + code (str): A python code + context (PipelineContext) : Pipeline Context + logger (Logger) : Logger + e (Exception): An exception + dataframes + + Returns (str): A python code + """ + + logger.log(f"Failed with error: {e}. Retrying", logging.ERROR) + + default_values = { + "engine": context.dfs[0].engine, + "code": code, + "error_returned": e, + } + error_correcting_instruction = self.context.get_intermediate_value( + "get_prompt" + )( + "correct_error", + default_prompt=CorrectErrorPrompt, + default_values=default_values, + ) + + result = context.get_intermediate_value("llm").generate_code( + error_correcting_instruction + ) + if context.config.callback is not None: + context.config.callback.on_code(result[0]) + + return result diff --git a/pandasai/smart_datalake/code_generator.py b/pandasai/smart_datalake/code_generator.py new file mode 100644 index 000000000..287e9e3e6 --- /dev/null +++ b/pandasai/smart_datalake/code_generator.py @@ -0,0 +1,106 @@ +from typing import Any +from pandasai.helpers.logger import Logger +from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.pipelines.base_logic_unit import BaseLogicUnit +from pandasai.prompts.generate_python_code import GeneratePythonCodePrompt + + +class CodeGenerator(BaseLogicUnit): + """ + LLM Code Generation Stage + """ + + def execute(self, input: Any, **kwargs) -> Any: + pipeline_context: PipelineContext = kwargs.get("context") + logger: Logger = kwargs.get("logger") + + if ( + pipeline_context.config.enable_cache + and pipeline_context.cache + and pipeline_context.cache.get( + self._get_cache_key(context=pipeline_context) + ) + ): + logger.log("Using cached response") + code = pipeline_context.query_exec_tracker.execute_func( + pipeline_context.cache.get, + self._get_cache_key(context=pipeline_context), + tag="cache_hit", + ) + + else: + default_values = { + # TODO: find a better way to determine the engine, + "engine": pipeline_context.dfs[0].engine, + "output_type_hint": pipeline_context.get_intermediate_value( + "output_type_helper" + ).template_hint, + "viz_library_type": pipeline_context.get_intermediate_value( + "viz_lib_helper" + ).template_hint, + } + + if ( + pipeline_context.memory.size > 1 + and pipeline_context.memory.count() > 1 + and pipeline_context.get_intermediate_value("last_code_generated") + ): + default_values[ + "current_code" + ] = pipeline_context.get_intermediate_value("last_code_generated") + + generate_python_code_instruction = ( + pipeline_context.query_exec_tracker.execute_func( + pipeline_context.get_intermediate_value("get_prompt"), + "generate_python_code", + default_prompt=GeneratePythonCodePrompt, + default_values=default_values, + ) + ) + + [ + code, + reasoning, + answer, + ] = pipeline_context.query_exec_tracker.execute_func( + pipeline_context.get_intermediate_value("llm").generate_code, + generate_python_code_instruction, + ) + + pipeline_context.add_intermediate_value("last_reasoning", reasoning) + pipeline_context.add_intermediate_value("last_answer", answer) + + if pipeline_context.config.enable_cache and pipeline_context.cache: + pipeline_context.cache.set( + self._get_cache_key(context=pipeline_context), code + ) + + if pipeline_context.config.callback is not None: + pipeline_context.config.callback.on_code(code) + + pipeline_context.add_intermediate_value("last_code_generated", code) + logger.log( + f"""Code generated: +``` +{code} +``` +""" + ) + + return code + + def _get_cache_key(self, context: PipelineContext) -> str: + """ + Return the cache key for the current conversation. + + Returns: + str: The cache key for the current conversation + """ + cache_key = context.memory.get_conversation() + + # make the cache key unique for each combination of dfs + for df in context.dfs: + hash = df.column_hash() + cache_key += str(hash) + + return cache_key diff --git a/pandasai/smart_datalake/generate_smart_datalake_pipeline.py b/pandasai/smart_datalake/generate_smart_datalake_pipeline.py new file mode 100644 index 000000000..58db12852 --- /dev/null +++ b/pandasai/smart_datalake/generate_smart_datalake_pipeline.py @@ -0,0 +1,31 @@ +from typing import Optional +from pandasai.helpers.logger import Logger +from pandasai.pipelines.pipeline import Pipeline +from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.smart_datalake.code_execution import CodeExecution +from pandasai.smart_datalake.code_generator import CodeGenerator +from pandasai.smart_datalake.result_parsing import ResultParsing +from pandasai.smart_datalake.result_validation import ResultValidation + + +class GenerateSmartDatalakePipeline: + _pipeline: Pipeline + + def __init__( + self, + context: Optional[PipelineContext] = None, + logger: Optional[Logger] = None, + ): + self._pipeline = Pipeline( + context=context, + logger=logger, + steps=[ + CodeGenerator(), + CodeExecution(), + ResultValidation(), + ResultParsing(), + ], + ) + + def run(self): + return self._pipeline.run() diff --git a/pandasai/smart_datalake/result_parsing.py b/pandasai/smart_datalake/result_parsing.py new file mode 100644 index 000000000..9618e0f28 --- /dev/null +++ b/pandasai/smart_datalake/result_parsing.py @@ -0,0 +1,38 @@ +from typing import Any +from pandasai.pipelines.base_logic_unit import BaseLogicUnit +from pandasai.pipelines.pipeline_context import PipelineContext + + +class ResultParsing(BaseLogicUnit): + + """ + Result Parsing Stage + """ + + def execute(self, input: Any, **kwargs) -> Any: + pipeline_context: PipelineContext = kwargs.get("context") + + result = input + + self._add_result_to_memory(result=result, context=pipeline_context) + + result = pipeline_context.query_exec_tracker.execute_func( + pipeline_context.get_intermediate_value("response_parser").parse, result + ) + return result + + def _add_result_to_memory(self, result: dict, context: PipelineContext): + """ + Add the result to the memory. + + Args: + result (dict): The result to add to the memory + context (PipelineContext) : Pipleline Context + """ + if result is None: + return + + if result["type"] in ["string", "number"]: + context.memory.add(result["value"], False) + elif result["type"] in ["dataframe", "plot"]: + context.memory.add("Ok here it is", False) diff --git a/pandasai/smart_datalake/result_validation.py b/pandasai/smart_datalake/result_validation.py new file mode 100644 index 000000000..2f9da4a0b --- /dev/null +++ b/pandasai/smart_datalake/result_validation.py @@ -0,0 +1,52 @@ +import logging +from typing import Any +from pandasai.helpers.logger import Logger +from pandasai.pipelines.base_logic_unit import BaseLogicUnit +from pandasai.pipelines.pipeline_context import PipelineContext + + +class ResultValidation(BaseLogicUnit): + + """ + Result Validation Stage + """ + + def execute(self, input: Any, **kwargs) -> Any: + pipeline_context: PipelineContext = kwargs.get("context") + logger: Logger = kwargs.get("logger") + + result = input + if result is not None: + if isinstance(result, dict): + ( + validation_ok, + validation_logs, + ) = pipeline_context.get_intermediate_value( + "output_type_helper" + ).validate(result) + if not validation_ok: + logger.log("\n".join(validation_logs), level=logging.WARNING) + pipeline_context.query_exec_tracker.add_step( + { + "type": "Validating Output", + "success": False, + "message": "Output Validation Failed", + } + ) + else: + pipeline_context.query_exec_tracker.add_step( + { + "type": "Validating Output", + "success": True, + "message": "Output Validation Successful", + } + ) + + pipeline_context.add_intermediate_value("last_result", result) + logger.log(f"Answer: {result}") + + logger.log( + f"Executed in: {pipeline_context.query_exec_tracker.get_execution_time()}s" + ) + + return result diff --git a/tests/test_codemanager.py b/tests/test_codemanager.py index 0df494429..18fd88479 100644 --- a/tests/test_codemanager.py +++ b/tests/test_codemanager.py @@ -163,6 +163,8 @@ def test_remove_dfs_overwrites( print(dfs)""" ) + # TODO: @Milind, minor test case issue, will fix it in next commit + @pytest.mark.skip(reason="minor test case issue, will fix it in next commit") def test_exception_handling( self, smart_dataframe: SmartDataframe, code_manager: CodeManager ): From 385db0d5cbfe4230b7870943460897ae39c824af Mon Sep 17 00:00:00 2001 From: "sourcery-ai[bot]" <58596630+sourcery-ai[bot]@users.noreply.github.com> Date: Tue, 7 Nov 2023 23:45:59 +0100 Subject: [PATCH 02/11] 'Refactored by Sourcery' (#736) Co-authored-by: Sourcery AI <> --- tests/test_codemanager.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_codemanager.py b/tests/test_codemanager.py index 18fd88479..fef60aca6 100644 --- a/tests/test_codemanager.py +++ b/tests/test_codemanager.py @@ -73,8 +73,7 @@ def code_manager(self, smart_dataframe: SmartDataframe): @pytest.fixture def exec_context(self) -> MagicMock: - context = MagicMock(spec=CodeExecutionContext) - return context + return MagicMock(spec=CodeExecutionContext) def test_run_code_for_calculations( self, code_manager: CodeManager, exec_context: MagicMock From 3d30afbb735c2f1d6927e5562809598253fac28a Mon Sep 17 00:00:00 2001 From: Milind Lalwani Date: Wed, 8 Nov 2023 11:13:26 +0100 Subject: [PATCH 03/11] refactor(Pipelines) : made changes according to PR review --- pandasai/smart_datalake/__init__.py | 5 ----- pandasai/smart_datalake/code_execution.py | 2 +- pandasai/smart_datalake/code_generator.py | 5 +---- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/pandasai/smart_datalake/__init__.py b/pandasai/smart_datalake/__init__.py index db950558a..e16d613b2 100644 --- a/pandasai/smart_datalake/__init__.py +++ b/pandasai/smart_datalake/__init__.py @@ -418,13 +418,10 @@ def prepare_context_for_smart_datalake_pipeline( "output_type_helper", output_type_helper ) pipeline_context.add_intermediate_value("viz_lib_helper", viz_lib_helper) - pipeline_context.add_intermediate_value("last_reasoning", self._last_reasoning) - pipeline_context.add_intermediate_value("last_answer", self._last_answer) pipeline_context.add_intermediate_value( "last_code_generated", self._last_code_generated ) pipeline_context.add_intermediate_value("get_prompt", self._get_prompt) - pipeline_context.add_intermediate_value("llm", self.llm) pipeline_context.add_intermediate_value("last_prompt_id", self.last_prompt_id) pipeline_context.add_intermediate_value("skills", self._skills) pipeline_context.add_intermediate_value("code_manager", self._code_manager) @@ -444,8 +441,6 @@ def update_intermediate_value_post_pipeline_execution( pipeline_context (PipelineContext): Pipeline Context after the Smart Data Lake pipeline execution """ - self._last_reasoning = pipeline_context.get_intermediate_value("last_reasoning") - self._last_answer = pipeline_context.get_intermediate_value("last_answer") self._last_code_generated = pipeline_context.get_intermediate_value( "last_code_generated" ) diff --git a/pandasai/smart_datalake/code_execution.py b/pandasai/smart_datalake/code_execution.py index ece9cdace..d45141b97 100644 --- a/pandasai/smart_datalake/code_execution.py +++ b/pandasai/smart_datalake/code_execution.py @@ -98,7 +98,7 @@ def _retry_run_code( default_values=default_values, ) - result = context.get_intermediate_value("llm").generate_code( + result = context.config.llm.generate_code( error_correcting_instruction ) if context.config.callback is not None: diff --git a/pandasai/smart_datalake/code_generator.py b/pandasai/smart_datalake/code_generator.py index 287e9e3e6..8b92f6aa8 100644 --- a/pandasai/smart_datalake/code_generator.py +++ b/pandasai/smart_datalake/code_generator.py @@ -63,13 +63,10 @@ def execute(self, input: Any, **kwargs) -> Any: reasoning, answer, ] = pipeline_context.query_exec_tracker.execute_func( - pipeline_context.get_intermediate_value("llm").generate_code, + pipeline_context.config.llm.generate_code, generate_python_code_instruction, ) - pipeline_context.add_intermediate_value("last_reasoning", reasoning) - pipeline_context.add_intermediate_value("last_answer", answer) - if pipeline_context.config.enable_cache and pipeline_context.cache: pipeline_context.cache.set( self._get_cache_key(context=pipeline_context), code From 55f970028ff6809478174ec309734e734eeea42e Mon Sep 17 00:00:00 2001 From: Milind Lalwani Date: Wed, 8 Nov 2023 16:19:42 +0100 Subject: [PATCH 04/11] refactor(Pipelines) : Unit test cases added --- pandasai/smart_datalake/code_execution.py | 6 +- .../smart_datalake/test_code_execution.py | 169 ++++++++++++++++++ .../smart_datalake/test_code_generator.py | 129 +++++++++++++ .../smart_datalake/test_result_parsing.py | 130 ++++++++++++++ .../smart_datalake/test_result_validation.py | 153 ++++++++++++++++ 5 files changed, 584 insertions(+), 3 deletions(-) create mode 100644 tests/pipelines/smart_datalake/test_code_execution.py create mode 100644 tests/pipelines/smart_datalake/test_code_generator.py create mode 100644 tests/pipelines/smart_datalake/test_result_parsing.py create mode 100644 tests/pipelines/smart_datalake/test_result_validation.py diff --git a/pandasai/smart_datalake/code_execution.py b/pandasai/smart_datalake/code_execution.py index d45141b97..1609df1fd 100644 --- a/pandasai/smart_datalake/code_execution.py +++ b/pandasai/smart_datalake/code_execution.py @@ -59,11 +59,11 @@ def execute(self, input: Any, **kwargs) -> Any: reasoning, answer, ] = pipeline_context.query_exec_tracker.execute_func( - self._retry_run_code, code, traceback_error + self._retry_run_code, code, pipeline_context, logger, traceback_error ) - pipeline_context.set_intermediate_value("reasoning", reasoning) - pipeline_context.set_intermediate_value("answer", answer) + pipeline_context.add_intermediate_value("reasoning", reasoning) + pipeline_context.add_intermediate_value("answer", answer) return result diff --git a/tests/pipelines/smart_datalake/test_code_execution.py b/tests/pipelines/smart_datalake/test_code_execution.py new file mode 100644 index 000000000..47fda6ade --- /dev/null +++ b/tests/pipelines/smart_datalake/test_code_execution.py @@ -0,0 +1,169 @@ +from typing import Optional +from unittest.mock import Mock +import pandas as pd +import pytest +from pandasai.helpers.code_manager import CodeManager +from pandasai.helpers.logger import Logger +from pandasai.helpers.skills_manager import SkillsManager + +from pandasai.llm.fake import FakeLLM +from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.smart_dataframe import SmartDataframe +from pandasai.smart_datalake.code_execution import CodeExecution + + +class TestCodeExecution: + "Unit test for Smart Data Lake Code Execution" + + throw_exception = True + + @pytest.fixture + def llm(self, output: Optional[str] = None): + return FakeLLM(output=output) + + @pytest.fixture + def sample_df(self): + return pd.DataFrame( + { + "country": [ + "United States", + "United Kingdom", + "France", + "Germany", + "Italy", + "Spain", + "Canada", + "Australia", + "Japan", + "China", + ], + "gdp": [ + 19294482071552, + 2891615567872, + 2411255037952, + 3435817336832, + 1745433788416, + 1181205135360, + 1607402389504, + 1490967855104, + 4380756541440, + 14631844184064, + ], + "happiness_index": [ + 6.94, + 7.16, + 6.66, + 7.07, + 6.38, + 6.4, + 7.23, + 7.22, + 5.87, + 5.12, + ], + } + ) + + @pytest.fixture + def smart_dataframe(self, llm, sample_df): + return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": True}) + + @pytest.fixture + def config(self, llm): + return {"llm": llm, "enable_cache": True} + + @pytest.fixture + def context(self, sample_df, config): + pipeline_context = PipelineContext([sample_df], config) + return pipeline_context + + @pytest.fixture + def logger(self): + return Logger(True, False) + + def test_init(self, context, config): + # Test the initialization of the CodeExecution + code_execution = CodeExecution() + assert isinstance(code_execution, CodeExecution) + + def test_code_execution_successful_with_no_exceptions(self, context, logger): + # Test Flow : Code Execution Successful with no exceptions + code_execution = CodeExecution() + + mock_code_manager = Mock() + mock_code_manager.execute_code = Mock(return_value="Mocked Result") + + def mock_intermediate_values(key : str): + if key == "last_prompt_id" : + return "Mocked Promt ID" + elif key == "skills" : + return SkillsManager() + elif key == "code_manager": + return mock_code_manager + context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) + + result = code_execution.execute(input="Test Code", context=context, logger=logger) + + assert isinstance(code_execution, CodeExecution) + assert result == "Mocked Result" + + def test_code_execution_unsuccessful_after_retries(self, context, logger): + # Test Flow : Code Execution Successful after retry + code_execution = CodeExecution() + + def mock_execute_code(*args, **kwargs): + raise Exception("Unit test exception") + + mock_code_manager = Mock() + mock_code_manager.execute_code = Mock(side_effect=mock_execute_code) + + context._query_exec_tracker = Mock() + context.query_exec_tracker.execute_func = Mock(return_value=["Interuppted Code", "Exception Testing","Unsuccessful after Retries"]) + + def mock_intermediate_values(key : str): + if key == "last_prompt_id" : + return "Mocked Promt ID" + elif key == "skills" : + return SkillsManager() + elif key == "code_manager": + return mock_code_manager + context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) + + assert isinstance(code_execution, CodeExecution) + + result = None + try: + result = code_execution.execute(input="Test Code", context=context, logger=logger) + except Exception as e: + assert result == None + + def test_code_execution_successful_at_retry(self, context, logger): + # Test Flow : Code Execution Successful with no exceptions + code_execution = CodeExecution() + + self.throw_exception == True + def mock_execute_code(*args, **kwargs): + if self.throw_exception == True: + self.throw_exception = False + raise Exception("Unit test exception") + return "Mocked Result after retry" + + mock_code_manager = Mock() + mock_code_manager.execute_code = Mock(side_effect=mock_execute_code) + + context._query_exec_tracker = Mock() + context.query_exec_tracker.execute_func = Mock(return_value=["Interuppted Code", "Exception Testing","Successful after Retry"]) + + def mock_intermediate_values(key : str): + if key == "last_prompt_id" : + return "Mocked Promt ID" + elif key == "skills" : + return SkillsManager() + elif key == "code_manager": + return mock_code_manager + context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) + + result = code_execution.execute(input="Test Code", context=context, logger=logger) + + assert isinstance(code_execution, CodeExecution) + assert result == "Mocked Result after retry" diff --git a/tests/pipelines/smart_datalake/test_code_generator.py b/tests/pipelines/smart_datalake/test_code_generator.py new file mode 100644 index 000000000..2e7525fe8 --- /dev/null +++ b/tests/pipelines/smart_datalake/test_code_generator.py @@ -0,0 +1,129 @@ +from typing import Any, Optional +from unittest.mock import Mock +import pandas as pd + +import pytest +from pandasai.helpers.logger import Logger +from pandasai.helpers.output_types import output_type_factory +from pandasai.helpers.viz_library_types import viz_lib_type_factory +from pandasai.llm.fake import FakeLLM +from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.prompts.generate_python_code import GeneratePythonCodePrompt + +from pandasai.smart_dataframe import SmartDataframe +from pandasai.smart_datalake.code_generator import CodeGenerator + +class TestCodeGenerator: + "Unit test for Smart Data Lake Code Generator" + @pytest.fixture + def llm(self, output: Optional[str] = None): + return FakeLLM(output=output) + + @pytest.fixture + def sample_df(self): + return pd.DataFrame( + { + "country": [ + "United States", + "United Kingdom", + "France", + "Germany", + "Italy", + "Spain", + "Canada", + "Australia", + "Japan", + "China", + ], + "gdp": [ + 19294482071552, + 2891615567872, + 2411255037952, + 3435817336832, + 1745433788416, + 1181205135360, + 1607402389504, + 1490967855104, + 4380756541440, + 14631844184064, + ], + "happiness_index": [ + 6.94, + 7.16, + 6.66, + 7.07, + 6.38, + 6.4, + 7.23, + 7.22, + 5.87, + 5.12, + ], + } + ) + + @pytest.fixture + def smart_dataframe(self, llm, sample_df): + return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": True}) + + @pytest.fixture + def config(self, llm): + return {"llm": llm, "enable_cache": True} + + @pytest.fixture + def context(self, sample_df, config): + pipeline_context = PipelineContext([sample_df], config) + return pipeline_context + + @pytest.fixture + def logger(self): + return Logger(True, False) + + def test_init(self, context, config): + # Test the initialization of the CodeGenerator + code_generator = CodeGenerator() + assert isinstance(code_generator, CodeGenerator) + + def test_code_found_in_cache(self, context, logger): + # Test Flow : Code found in the cache + code_generator = CodeGenerator() + + context._cache = Mock() + context.cache.get = Mock(return_value="Cached Mocked Code") + context._query_exec_tracker = Mock() + context.query_exec_tracker.execute_func = Mock(return_value="Cached Mocked Code") + + code = code_generator.execute(input=None, context=context, logger=logger) + + assert isinstance(code_generator, CodeGenerator) + assert code == "Cached Mocked Code" + + def test_code_not_found_in_cache(self, context, logger): + # Test Flow : Code Not found in the cache + code_generator = CodeGenerator() + + mock_get_promt = Mock(return_value=GeneratePythonCodePrompt) + + def mock_intermediate_values(key : str): + if key == "output_type_helper" : + return output_type_factory("DefaultOutputType") + elif key == "viz_lib_helper" : + return viz_lib_type_factory("DefaultVizLibraryType") + elif key == "get_prompt": + return mock_get_promt + + def mock_execute_func(function, *args, **kwargs): + if function == mock_get_promt : + return mock_get_promt() + return ["Mocked LLM Generated Code", "Mocked Reasoning", "Mocked Answer"] + + context.get_intermediate_value= Mock(side_effect=mock_intermediate_values) + context._cache = Mock() + context.cache.get = Mock(return_value=None) + context._query_exec_tracker = Mock() + context.query_exec_tracker.execute_func = Mock(side_effect=mock_execute_func) + + code = code_generator.execute(input=None, context=context, logger=logger) + + assert isinstance(code_generator, CodeGenerator) + assert code == "Mocked LLM Generated Code" \ No newline at end of file diff --git a/tests/pipelines/smart_datalake/test_result_parsing.py b/tests/pipelines/smart_datalake/test_result_parsing.py new file mode 100644 index 000000000..284f2a94b --- /dev/null +++ b/tests/pipelines/smart_datalake/test_result_parsing.py @@ -0,0 +1,130 @@ +from typing import Optional +from unittest.mock import Mock +import pandas as pd +import pytest +from pandasai.helpers.logger import Logger + +from pandasai.llm.fake import FakeLLM +from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.smart_dataframe import SmartDataframe +from pandasai.smart_datalake.result_parsing import ResultParsing + + +class TestResultParsing: + "Unit test for Smart Data Lake Result Parsing" + + throw_exception = True + + @pytest.fixture + def llm(self, output: Optional[str] = None): + return FakeLLM(output=output) + + @pytest.fixture + def sample_df(self): + return pd.DataFrame( + { + "country": [ + "United States", + "United Kingdom", + "France", + "Germany", + "Italy", + "Spain", + "Canada", + "Australia", + "Japan", + "China", + ], + "gdp": [ + 19294482071552, + 2891615567872, + 2411255037952, + 3435817336832, + 1745433788416, + 1181205135360, + 1607402389504, + 1490967855104, + 4380756541440, + 14631844184064, + ], + "happiness_index": [ + 6.94, + 7.16, + 6.66, + 7.07, + 6.38, + 6.4, + 7.23, + 7.22, + 5.87, + 5.12, + ], + } + ) + + @pytest.fixture + def smart_dataframe(self, llm, sample_df): + return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": True}) + + @pytest.fixture + def config(self, llm): + return {"llm": llm, "enable_cache": True} + + @pytest.fixture + def context(self, sample_df, config): + pipeline_context = PipelineContext([sample_df], config) + return pipeline_context + + @pytest.fixture + def logger(self): + return Logger(True, False) + + def test_init(self, context, config): + # Test the initialization of the CodeExecution + result_parsing = ResultParsing() + assert isinstance(result_parsing, ResultParsing) + + def test_result_parsing_successful_with_no_exceptions(self, context, logger): + # Test Flow : Code Execution Successful with no exceptions + result_parsing = ResultParsing() + result_parsing._add_result_to_memory = Mock() + mock_response_parser = Mock() + context._query_exec_tracker = Mock() + context.query_exec_tracker.execute_func = Mock(return_value="Mocked Parsed Result") + + def mock_intermediate_values(key : str): + if key == "response_parser" : + return mock_response_parser + + context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) + + result = result_parsing.execute(input="Test Result", context=context, logger=logger) + + assert isinstance(result_parsing, ResultParsing) + assert result == "Mocked Parsed Result" + + def test_result_parsing_unsuccessful_with_exceptions(self, context, logger): + # Test Flow : Code Execution Unsuccessful with exceptions + result_parsing = ResultParsing() + result_parsing._add_result_to_memory = Mock() + mock_response_parser = Mock() + + def mock_result_parsing(*args, **kwargs): + raise Exception("Unit test exception") + + context._query_exec_tracker = Mock() + context.query_exec_tracker.execute_func = Mock(side_effect=mock_result_parsing) + + def mock_intermediate_values(key : str): + if key == "response_parser" : + return mock_response_parser + + context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) + + result = None + try: + result = result_parsing.execute(input="Test Result", context=context, logger=logger) + except Exception as e: + assert result == None + assert isinstance(result_parsing, ResultParsing) + diff --git a/tests/pipelines/smart_datalake/test_result_validation.py b/tests/pipelines/smart_datalake/test_result_validation.py new file mode 100644 index 000000000..6c3f6a9ab --- /dev/null +++ b/tests/pipelines/smart_datalake/test_result_validation.py @@ -0,0 +1,153 @@ +from typing import Optional +from unittest.mock import Mock +import pandas as pd +import pytest +from pandasai.helpers.logger import Logger + +from pandasai.llm.fake import FakeLLM +from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.smart_dataframe import SmartDataframe +from pandasai.smart_datalake.result_validation import ResultValidation + + +class TestResultValidation: + "Unit test for Smart Data Lake Result Validation" + + throw_exception = True + + @pytest.fixture + def llm(self, output: Optional[str] = None): + return FakeLLM(output=output) + + @pytest.fixture + def sample_df(self): + return pd.DataFrame( + { + "country": [ + "United States", + "United Kingdom", + "France", + "Germany", + "Italy", + "Spain", + "Canada", + "Australia", + "Japan", + "China", + ], + "gdp": [ + 19294482071552, + 2891615567872, + 2411255037952, + 3435817336832, + 1745433788416, + 1181205135360, + 1607402389504, + 1490967855104, + 4380756541440, + 14631844184064, + ], + "happiness_index": [ + 6.94, + 7.16, + 6.66, + 7.07, + 6.38, + 6.4, + 7.23, + 7.22, + 5.87, + 5.12, + ], + } + ) + + @pytest.fixture + def smart_dataframe(self, llm, sample_df): + return SmartDataframe(sample_df, config={"llm": llm, "enable_cache": True}) + + @pytest.fixture + def config(self, llm): + return {"llm": llm, "enable_cache": True} + + @pytest.fixture + def context(self, sample_df, config): + pipeline_context = PipelineContext([sample_df], config) + return pipeline_context + + @pytest.fixture + def logger(self): + return Logger(True, False) + + def test_init(self, context, config): + # Test the initialization of the CodeExecution + result_validation = ResultValidation() + assert isinstance(result_validation, ResultValidation) + + def test_result_is_none(self, context, logger): + # Test Flow : Code Execution Successful with no exceptions + result_validation = ResultValidation() + + context._query_exec_tracker = Mock() + context.query_exec_tracker.get_execution_time = Mock() + context.query_exec_tracker.add_step = Mock() + + result = result_validation.execute(input=None, context=context, logger=logger) + + assert not context.query_exec_tracker.add_step.called + assert isinstance(result_validation, ResultValidation) + assert result == None + + def test_result_is_not_of_dict_type(self, context, logger): + # Test Flow : Code Execution Successful with no exceptions + result_validation = ResultValidation() + + context._query_exec_tracker = Mock() + context.query_exec_tracker.get_execution_time = Mock() + context.query_exec_tracker.add_step = Mock() + + result = result_validation.execute(input="Not Dict Type Result", context=context, logger=logger) + + assert not context.query_exec_tracker.add_step.called + assert isinstance(result_validation, ResultValidation) + assert result == "Not Dict Type Result" + + def test_result_is_of_dict_type_and_valid(self, context, logger): + # Test Flow : Code Execution Successful with no exceptions + result_validation = ResultValidation() + output_type_helper = Mock() + + context._query_exec_tracker = Mock() + context.query_exec_tracker.get_execution_time = Mock() + context.get_intermediate_value = Mock(return_value=output_type_helper) + output_type_helper.validate = Mock(return_value=(True,"Mocked Logs")) + + result = result_validation.execute(input={"Mocked":"Result"}, context=context, logger=logger) + + context.query_exec_tracker.add_step.assert_called_with({ + "type": "Validating Output", + "success": True, + "message": "Output Validation Successful", + }) + assert isinstance(result_validation, ResultValidation) + assert result == {"Mocked":"Result"} + + def test_result_is_of_dict_type_and_not_valid(self, context, logger): + # Test Flow : Code Execution Successful with no exceptions + result_validation = ResultValidation() + output_type_helper = Mock() + + context._query_exec_tracker = Mock() + context.query_exec_tracker.get_execution_time = Mock() + context.get_intermediate_value = Mock(return_value=output_type_helper) + output_type_helper.validate = Mock(return_value=(False,"Mocked Logs")) + + result = result_validation.execute(input={"Mocked":"Result"}, context=context, logger=logger) + + context.query_exec_tracker.add_step.assert_called_with({ + "type": "Validating Output", + "success": False, + "message": "Output Validation Failed", + }) + assert isinstance(result_validation, ResultValidation) + assert result == {"Mocked":"Result"} \ No newline at end of file From 44f4bbe03cc2c6d700fc22e65776e2fabffa0827 Mon Sep 17 00:00:00 2001 From: Milind Lalwani Date: Wed, 8 Nov 2023 16:34:52 +0100 Subject: [PATCH 05/11] refactor(Pipelines) : Unit Test cases added cd /Users/milindlalwani/pandas-ai ; /usr/bin/env /Users/milindlalwani/anaconda3/envs/pandas-ai/bin/python /Users/milindlalwani/.vscode/extensions/ms-python.python-2023.20.0/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher 59121 -- /Users/milindlalwani/pandas-ai/examples/from_csv.py --- pandasai/smart_datalake/__init__.py | 2 -- pandasai/smart_datalake/code_generator.py | 6 ++++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandasai/smart_datalake/__init__.py b/pandasai/smart_datalake/__init__.py index e16d613b2..bbfa6d1c9 100644 --- a/pandasai/smart_datalake/__init__.py +++ b/pandasai/smart_datalake/__init__.py @@ -405,7 +405,6 @@ def prepare_context_for_smart_datalake_pipeline( self._memory.add(query, True) output_type_helper = output_type_factory(output_type, logger=self.logger) - viz_lib_helper = viz_lib_type_factory(self._viz_lib, logger=self.logger) pipeline_context = PipelineContext( dfs=self.dfs, @@ -417,7 +416,6 @@ def prepare_context_for_smart_datalake_pipeline( pipeline_context.add_intermediate_value( "output_type_helper", output_type_helper ) - pipeline_context.add_intermediate_value("viz_lib_helper", viz_lib_helper) pipeline_context.add_intermediate_value( "last_code_generated", self._last_code_generated ) diff --git a/pandasai/smart_datalake/code_generator.py b/pandasai/smart_datalake/code_generator.py index 8b92f6aa8..aa1f0f601 100644 --- a/pandasai/smart_datalake/code_generator.py +++ b/pandasai/smart_datalake/code_generator.py @@ -1,5 +1,6 @@ from typing import Any from pandasai.helpers.logger import Logger +from pandasai.helpers.viz_library_types import viz_lib_type_factory from pandasai.pipelines.pipeline_context import PipelineContext from pandasai.pipelines.base_logic_unit import BaseLogicUnit from pandasai.prompts.generate_python_code import GeneratePythonCodePrompt @@ -29,14 +30,15 @@ def execute(self, input: Any, **kwargs) -> Any: ) else: + + viz_lib_helper = viz_lib_type_factory(pipeline_context.config.data_viz_library.value, logger=self.logger) default_values = { # TODO: find a better way to determine the engine, "engine": pipeline_context.dfs[0].engine, "output_type_hint": pipeline_context.get_intermediate_value( "output_type_helper" ).template_hint, - "viz_library_type": pipeline_context.get_intermediate_value( - "viz_lib_helper" + "viz_library_type": viz_lib_helper ).template_hint, } From bdeeb9f7ed9326fa3f05dfba0584354bfe55df15 Mon Sep 17 00:00:00 2001 From: Milind Lalwani Date: Wed, 8 Nov 2023 17:57:41 +0100 Subject: [PATCH 06/11] refactor(Pipelines) : Broken Test Cases Fixed --- pandasai/smart_datalake/__init__.py | 2 ++ pandasai/smart_datalake/code_execution.py | 14 +++++++------- pandasai/smart_datalake/code_generator.py | 3 +++ tests/test_codemanager.py | 2 -- tests/test_smartdatalake.py | 1 + 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/pandasai/smart_datalake/__init__.py b/pandasai/smart_datalake/__init__.py index e16d613b2..9b4cbda5d 100644 --- a/pandasai/smart_datalake/__init__.py +++ b/pandasai/smart_datalake/__init__.py @@ -441,6 +441,8 @@ def update_intermediate_value_post_pipeline_execution( pipeline_context (PipelineContext): Pipeline Context after the Smart Data Lake pipeline execution """ + self._last_reasoning = pipeline_context.get_intermediate_value("last_reasoning") + self._last_answer = pipeline_context.get_intermediate_value("last_answer") self._last_code_generated = pipeline_context.get_intermediate_value( "last_code_generated" ) diff --git a/pandasai/smart_datalake/code_execution.py b/pandasai/smart_datalake/code_execution.py index 1609df1fd..0f87f37cc 100644 --- a/pandasai/smart_datalake/code_execution.py +++ b/pandasai/smart_datalake/code_execution.py @@ -59,7 +59,11 @@ def execute(self, input: Any, **kwargs) -> Any: reasoning, answer, ] = pipeline_context.query_exec_tracker.execute_func( - self._retry_run_code, code, pipeline_context, logger, traceback_error + self._retry_run_code, + code, + pipeline_context, + logger, + traceback_error, ) pipeline_context.add_intermediate_value("reasoning", reasoning) @@ -90,17 +94,13 @@ def _retry_run_code( "code": code, "error_returned": e, } - error_correcting_instruction = self.context.get_intermediate_value( - "get_prompt" - )( + error_correcting_instruction = context.get_intermediate_value("get_prompt")( "correct_error", default_prompt=CorrectErrorPrompt, default_values=default_values, ) - result = context.config.llm.generate_code( - error_correcting_instruction - ) + result = context.config.llm.generate_code(error_correcting_instruction) if context.config.callback is not None: context.config.callback.on_code(result[0]) diff --git a/pandasai/smart_datalake/code_generator.py b/pandasai/smart_datalake/code_generator.py index 8b92f6aa8..de6b2259f 100644 --- a/pandasai/smart_datalake/code_generator.py +++ b/pandasai/smart_datalake/code_generator.py @@ -67,6 +67,9 @@ def execute(self, input: Any, **kwargs) -> Any: generate_python_code_instruction, ) + pipeline_context.add_intermediate_value("last_reasoning", reasoning) + pipeline_context.add_intermediate_value("last_answer", answer) + if pipeline_context.config.enable_cache and pipeline_context.cache: pipeline_context.cache.set( self._get_cache_key(context=pipeline_context), code diff --git a/tests/test_codemanager.py b/tests/test_codemanager.py index fef60aca6..dbfb9b3c4 100644 --- a/tests/test_codemanager.py +++ b/tests/test_codemanager.py @@ -162,8 +162,6 @@ def test_remove_dfs_overwrites( print(dfs)""" ) - # TODO: @Milind, minor test case issue, will fix it in next commit - @pytest.mark.skip(reason="minor test case issue, will fix it in next commit") def test_exception_handling( self, smart_dataframe: SmartDataframe, code_manager: CodeManager ): diff --git a/tests/test_smartdatalake.py b/tests/test_smartdatalake.py index bb33936bc..40b882d68 100644 --- a/tests/test_smartdatalake.py +++ b/tests/test_smartdatalake.py @@ -222,6 +222,7 @@ def analyze_data(dfs): ```""" ) smart_datalake._llm = llm + smart_datalake._config.llm = llm smart_datalake.config.use_advanced_reasoning_framework = True assert smart_datalake.last_answer is None assert smart_datalake.last_reasoning is None From 1cb977bd34e80dd2d5192f4dba025353d42ec89a Mon Sep 17 00:00:00 2001 From: Milind Lalwani Date: Thu, 9 Nov 2023 14:17:53 +0100 Subject: [PATCH 07/11] refactor(Pipelines) : Skip Logic added and More Steps created for Data Smart Lake pipeline --- pandasai/helpers/cache.py | 17 +++ pandasai/pipelines/base_logic_unit.py | 10 ++ pandasai/pipelines/pipeline_context.py | 2 +- pandasai/smart_datalake/__init__.py | 1 + pandasai/smart_datalake/cache_lookup.py | 31 ++++++ pandasai/smart_datalake/cache_population.py | 29 +++++ pandasai/smart_datalake/code_execution.py | 2 + pandasai/smart_datalake/code_generator.py | 102 ++++-------------- .../generate_smart_datalake_pipeline.py | 20 +++- pandasai/smart_datalake/prompt_generation.py | 49 +++++++++ pandasai/smart_datalake/result_parsing.py | 2 + pandasai/smart_datalake/result_validation.py | 2 + .../smart_datalake/test_code_generator.py | 34 ++---- 13 files changed, 193 insertions(+), 108 deletions(-) create mode 100644 pandasai/smart_datalake/cache_lookup.py create mode 100644 pandasai/smart_datalake/cache_population.py create mode 100644 pandasai/smart_datalake/prompt_generation.py diff --git a/pandasai/helpers/cache.py b/pandasai/helpers/cache.py index 7e341d0c3..bd87805df 100644 --- a/pandasai/helpers/cache.py +++ b/pandasai/helpers/cache.py @@ -1,5 +1,6 @@ import os import glob +from typing import Any import duckdb from .path import find_project_root @@ -72,3 +73,19 @@ def destroy(self) -> None: self.connection.close() for cache_file in glob.glob(f"{self.filepath}.*"): os.remove(cache_file) + + def get_cache_key(self, context: Any) -> str: + """ + Return the cache key for the current conversation. + + Returns: + str: The cache key for the current conversation + """ + cache_key = context.memory.get_conversation() + + # make the cache key unique for each combination of dfs + for df in context.dfs: + hash = df.column_hash() + cache_key += str(hash) + + return cache_key diff --git a/pandasai/pipelines/base_logic_unit.py b/pandasai/pipelines/base_logic_unit.py index 5d41a2402..221cabe15 100644 --- a/pandasai/pipelines/base_logic_unit.py +++ b/pandasai/pipelines/base_logic_unit.py @@ -7,6 +7,12 @@ class BaseLogicUnit(ABC): Logic units for pipeline each logic unit should be inherited from this Logic unit """ + _skip_if: callable + + def __init__(self, skip_if=None): + super().__init__() + self._skip_if = skip_if + @abstractmethod def execute(self, input: Any, **kwargs) -> Any: """ @@ -22,3 +28,7 @@ def execute(self, input: Any, **kwargs) -> Any: :return: The result of the execution. """ raise NotImplementedError("execute method is not implemented.") + + @property + def skip_if(self): + return self._skip_if diff --git a/pandasai/pipelines/pipeline_context.py b/pandasai/pipelines/pipeline_context.py index a03647c3d..8b972c2c5 100644 --- a/pandasai/pipelines/pipeline_context.py +++ b/pandasai/pipelines/pipeline_context.py @@ -71,4 +71,4 @@ def add_intermediate_value(self, key: str, value: Any): self._intermediate_values[key] = value def get_intermediate_value(self, key: str): - return self._intermediate_values[key] + return self._intermediate_values.get(key, "") diff --git a/pandasai/smart_datalake/__init__.py b/pandasai/smart_datalake/__init__.py index 9b4cbda5d..060eefbea 100644 --- a/pandasai/smart_datalake/__init__.py +++ b/pandasai/smart_datalake/__init__.py @@ -414,6 +414,7 @@ def prepare_context_for_smart_datalake_pipeline( cache=self.cache, query_exec_tracker=self._query_exec_tracker, ) + pipeline_context.add_intermediate_value("is_present_in_cache", False) pipeline_context.add_intermediate_value( "output_type_helper", output_type_helper ) diff --git a/pandasai/smart_datalake/cache_lookup.py b/pandasai/smart_datalake/cache_lookup.py new file mode 100644 index 000000000..f0e0608c7 --- /dev/null +++ b/pandasai/smart_datalake/cache_lookup.py @@ -0,0 +1,31 @@ +from typing import Any +from pandasai.helpers.logger import Logger +from pandasai.pipelines.base_logic_unit import BaseLogicUnit +from pandasai.pipelines.pipeline_context import PipelineContext + + +class CacheLookup(BaseLogicUnit): + """ + Cache Lookup of Code Stage + """ + + pass + + def execute(self, input: Any, **kwargs) -> Any: + pipeline_context: PipelineContext = kwargs.get("context") + logger: Logger = kwargs.get("logger") + if ( + pipeline_context.config.enable_cache + and pipeline_context.cache + and pipeline_context.cache.get( + pipeline_context.cache.get_cache_key(pipeline_context) + ) + ): + logger.log("Using cached response") + code = pipeline_context.query_exec_tracker.execute_func( + pipeline_context.cache.get, + pipeline_context.cache.get_cache_key(pipeline_context), + tag="cache_hit", + ) + pipeline_context.add_intermediate_value("is_present_in_cache", True) + return code diff --git a/pandasai/smart_datalake/cache_population.py b/pandasai/smart_datalake/cache_population.py new file mode 100644 index 000000000..804345699 --- /dev/null +++ b/pandasai/smart_datalake/cache_population.py @@ -0,0 +1,29 @@ +from typing import Any +from pandasai.pipelines.base_logic_unit import BaseLogicUnit +from pandasai.pipelines.pipeline_context import PipelineContext + + +class CachePopulation(BaseLogicUnit): + """ + Cache Population Stage + """ + + pass + + def execute(self, input: Any, **kwargs) -> Any: + pipeline_context: PipelineContext = kwargs.get("context") + + if self.skip_if is not None and self.skip_if(pipeline_context): + return input + + code = input + + if pipeline_context.config.enable_cache and pipeline_context.cache: + pipeline_context.cache.set( + pipeline_context.cache.get_cache_key(pipeline_context), code + ) + + if pipeline_context.config.callback is not None: + pipeline_context.config.callback.on_code(code) + + return code diff --git a/pandasai/smart_datalake/code_execution.py b/pandasai/smart_datalake/code_execution.py index 0f87f37cc..85645f1d1 100644 --- a/pandasai/smart_datalake/code_execution.py +++ b/pandasai/smart_datalake/code_execution.py @@ -14,6 +14,8 @@ class CodeExecution(BaseLogicUnit): Code Execution Stage """ + pass + def execute(self, input: Any, **kwargs) -> Any: pipeline_context: PipelineContext = kwargs.get("context") logger: Logger = kwargs.get("logger") diff --git a/pandasai/smart_datalake/code_generator.py b/pandasai/smart_datalake/code_generator.py index de6b2259f..2c1370bbc 100644 --- a/pandasai/smart_datalake/code_generator.py +++ b/pandasai/smart_datalake/code_generator.py @@ -2,7 +2,6 @@ from pandasai.helpers.logger import Logger from pandasai.pipelines.pipeline_context import PipelineContext from pandasai.pipelines.base_logic_unit import BaseLogicUnit -from pandasai.prompts.generate_python_code import GeneratePythonCodePrompt class CodeGenerator(BaseLogicUnit): @@ -10,97 +9,34 @@ class CodeGenerator(BaseLogicUnit): LLM Code Generation Stage """ + pass + def execute(self, input: Any, **kwargs) -> Any: pipeline_context: PipelineContext = kwargs.get("context") logger: Logger = kwargs.get("logger") - if ( - pipeline_context.config.enable_cache - and pipeline_context.cache - and pipeline_context.cache.get( - self._get_cache_key(context=pipeline_context) - ) - ): - logger.log("Using cached response") - code = pipeline_context.query_exec_tracker.execute_func( - pipeline_context.cache.get, - self._get_cache_key(context=pipeline_context), - tag="cache_hit", - ) - - else: - default_values = { - # TODO: find a better way to determine the engine, - "engine": pipeline_context.dfs[0].engine, - "output_type_hint": pipeline_context.get_intermediate_value( - "output_type_helper" - ).template_hint, - "viz_library_type": pipeline_context.get_intermediate_value( - "viz_lib_helper" - ).template_hint, - } - - if ( - pipeline_context.memory.size > 1 - and pipeline_context.memory.count() > 1 - and pipeline_context.get_intermediate_value("last_code_generated") - ): - default_values[ - "current_code" - ] = pipeline_context.get_intermediate_value("last_code_generated") - - generate_python_code_instruction = ( - pipeline_context.query_exec_tracker.execute_func( - pipeline_context.get_intermediate_value("get_prompt"), - "generate_python_code", - default_prompt=GeneratePythonCodePrompt, - default_values=default_values, - ) - ) - - [ - code, - reasoning, - answer, - ] = pipeline_context.query_exec_tracker.execute_func( - pipeline_context.config.llm.generate_code, - generate_python_code_instruction, - ) - - pipeline_context.add_intermediate_value("last_reasoning", reasoning) - pipeline_context.add_intermediate_value("last_answer", answer) + if self.skip_if is not None and self.skip_if(pipeline_context): + return input - if pipeline_context.config.enable_cache and pipeline_context.cache: - pipeline_context.cache.set( - self._get_cache_key(context=pipeline_context), code - ) - - if pipeline_context.config.callback is not None: - pipeline_context.config.callback.on_code(code) + generate_python_code_instruction = input + [ + code, + reasoning, + answer, + ] = pipeline_context.query_exec_tracker.execute_func( + pipeline_context.config.llm.generate_code, + generate_python_code_instruction, + ) pipeline_context.add_intermediate_value("last_code_generated", code) logger.log( f"""Code generated: -``` -{code} -``` -""" + ``` + {code} + ``` + """ ) + pipeline_context.add_intermediate_value("last_reasoning", reasoning) + pipeline_context.add_intermediate_value("last_answer", answer) return code - - def _get_cache_key(self, context: PipelineContext) -> str: - """ - Return the cache key for the current conversation. - - Returns: - str: The cache key for the current conversation - """ - cache_key = context.memory.get_conversation() - - # make the cache key unique for each combination of dfs - for df in context.dfs: - hash = df.column_hash() - cache_key += str(hash) - - return cache_key diff --git a/pandasai/smart_datalake/generate_smart_datalake_pipeline.py b/pandasai/smart_datalake/generate_smart_datalake_pipeline.py index 58db12852..cccaae47d 100644 --- a/pandasai/smart_datalake/generate_smart_datalake_pipeline.py +++ b/pandasai/smart_datalake/generate_smart_datalake_pipeline.py @@ -2,8 +2,11 @@ from pandasai.helpers.logger import Logger from pandasai.pipelines.pipeline import Pipeline from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.smart_datalake.cache_lookup import CacheLookup +from pandasai.smart_datalake.cache_population import CachePopulation from pandasai.smart_datalake.code_execution import CodeExecution from pandasai.smart_datalake.code_generator import CodeGenerator +from pandasai.smart_datalake.prompt_generation import PromptGeneration from pandasai.smart_datalake.result_parsing import ResultParsing from pandasai.smart_datalake.result_validation import ResultValidation @@ -20,7 +23,22 @@ def __init__( context=context, logger=logger, steps=[ - CodeGenerator(), + CacheLookup(), + PromptGeneration( + lambda pipeline_context: pipeline_context.get_intermediate_value( + "is_present_in_cache" + ) + ), + CodeGenerator( + lambda pipeline_context: pipeline_context.get_intermediate_value( + "is_present_in_cache" + ) + ), + CachePopulation( + lambda pipeline_context: pipeline_context.get_intermediate_value( + "is_present_in_cache" + ) + ), CodeExecution(), ResultValidation(), ResultParsing(), diff --git a/pandasai/smart_datalake/prompt_generation.py b/pandasai/smart_datalake/prompt_generation.py new file mode 100644 index 000000000..254304576 --- /dev/null +++ b/pandasai/smart_datalake/prompt_generation.py @@ -0,0 +1,49 @@ +from typing import Any +from pandasai.pipelines.base_logic_unit import BaseLogicUnit +from pandasai.pipelines.pipeline_context import PipelineContext +from pandasai.prompts.generate_python_code import GeneratePythonCodePrompt + + +class PromptGeneration(BaseLogicUnit): + """ + Code Prompt Generation Stage + """ + + pass + + def execute(self, input: Any, **kwargs) -> Any: + pipeline_context: PipelineContext = kwargs.get("context") + + if self.skip_if is not None and self.skip_if(pipeline_context): + return input + + default_values = { + # TODO: find a better way to determine the engine, + "engine": pipeline_context.dfs[0].engine, + "output_type_hint": pipeline_context.get_intermediate_value( + "output_type_helper" + ).template_hint, + "viz_library_type": pipeline_context.get_intermediate_value( + "viz_lib_helper" + ).template_hint, + } + + if ( + pipeline_context.memory.size > 1 + and pipeline_context.memory.count() > 1 + and pipeline_context.get_intermediate_value("last_code_generated") + ): + default_values["current_code"] = pipeline_context.get_intermediate_value( + "last_code_generated" + ) + + generate_python_code_instruction = ( + pipeline_context.query_exec_tracker.execute_func( + pipeline_context.get_intermediate_value("get_prompt"), + "generate_python_code", + default_prompt=GeneratePythonCodePrompt, + default_values=default_values, + ) + ) + + return generate_python_code_instruction diff --git a/pandasai/smart_datalake/result_parsing.py b/pandasai/smart_datalake/result_parsing.py index 9618e0f28..54bfe28df 100644 --- a/pandasai/smart_datalake/result_parsing.py +++ b/pandasai/smart_datalake/result_parsing.py @@ -9,6 +9,8 @@ class ResultParsing(BaseLogicUnit): Result Parsing Stage """ + pass + def execute(self, input: Any, **kwargs) -> Any: pipeline_context: PipelineContext = kwargs.get("context") diff --git a/pandasai/smart_datalake/result_validation.py b/pandasai/smart_datalake/result_validation.py index 2f9da4a0b..456e61841 100644 --- a/pandasai/smart_datalake/result_validation.py +++ b/pandasai/smart_datalake/result_validation.py @@ -11,6 +11,8 @@ class ResultValidation(BaseLogicUnit): Result Validation Stage """ + pass + def execute(self, input: Any, **kwargs) -> Any: pipeline_context: PipelineContext = kwargs.get("context") logger: Logger = kwargs.get("logger") diff --git a/tests/pipelines/smart_datalake/test_code_generator.py b/tests/pipelines/smart_datalake/test_code_generator.py index 2e7525fe8..0c2f84566 100644 --- a/tests/pipelines/smart_datalake/test_code_generator.py +++ b/tests/pipelines/smart_datalake/test_code_generator.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Optional from unittest.mock import Mock import pandas as pd @@ -13,8 +13,10 @@ from pandasai.smart_dataframe import SmartDataframe from pandasai.smart_datalake.code_generator import CodeGenerator + class TestCodeGenerator: "Unit test for Smart Data Lake Code Generator" + @pytest.fixture def llm(self, output: Optional[str] = None): return FakeLLM(output=output) @@ -78,46 +80,32 @@ def context(self, sample_df, config): @pytest.fixture def logger(self): return Logger(True, False) - + def test_init(self, context, config): # Test the initialization of the CodeGenerator code_generator = CodeGenerator() assert isinstance(code_generator, CodeGenerator) - def test_code_found_in_cache(self, context, logger): - # Test Flow : Code found in the cache - code_generator = CodeGenerator() - - context._cache = Mock() - context.cache.get = Mock(return_value="Cached Mocked Code") - context._query_exec_tracker = Mock() - context.query_exec_tracker.execute_func = Mock(return_value="Cached Mocked Code") - - code = code_generator.execute(input=None, context=context, logger=logger) - - assert isinstance(code_generator, CodeGenerator) - assert code == "Cached Mocked Code" - def test_code_not_found_in_cache(self, context, logger): # Test Flow : Code Not found in the cache code_generator = CodeGenerator() mock_get_promt = Mock(return_value=GeneratePythonCodePrompt) - def mock_intermediate_values(key : str): - if key == "output_type_helper" : + def mock_intermediate_values(key: str): + if key == "output_type_helper": return output_type_factory("DefaultOutputType") - elif key == "viz_lib_helper" : + elif key == "viz_lib_helper": return viz_lib_type_factory("DefaultVizLibraryType") elif key == "get_prompt": return mock_get_promt - + def mock_execute_func(function, *args, **kwargs): - if function == mock_get_promt : + if function == mock_get_promt: return mock_get_promt() return ["Mocked LLM Generated Code", "Mocked Reasoning", "Mocked Answer"] - context.get_intermediate_value= Mock(side_effect=mock_intermediate_values) + context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) context._cache = Mock() context.cache.get = Mock(return_value=None) context._query_exec_tracker = Mock() @@ -126,4 +114,4 @@ def mock_execute_func(function, *args, **kwargs): code = code_generator.execute(input=None, context=context, logger=logger) assert isinstance(code_generator, CodeGenerator) - assert code == "Mocked LLM Generated Code" \ No newline at end of file + assert code == "Mocked LLM Generated Code" From 69fa4be51b6f54c810060eacbc8d243f179b901c Mon Sep 17 00:00:00 2001 From: "sourcery-ai[bot]" <58596630+sourcery-ai[bot]@users.noreply.github.com> Date: Thu, 9 Nov 2023 23:49:41 +0100 Subject: [PATCH 08/11] 'Refactored by Sourcery' (#740) Co-authored-by: Sourcery AI <> --- pandasai/smart_datalake/prompt_generation.py | 14 +++++--------- .../smart_datalake/test_code_execution.py | 8 ++++---- .../smart_datalake/test_code_generator.py | 3 +-- .../smart_datalake/test_result_parsing.py | 9 ++++----- .../smart_datalake/test_result_validation.py | 5 ++--- 5 files changed, 16 insertions(+), 23 deletions(-) diff --git a/pandasai/smart_datalake/prompt_generation.py b/pandasai/smart_datalake/prompt_generation.py index 254304576..b1bb74c10 100644 --- a/pandasai/smart_datalake/prompt_generation.py +++ b/pandasai/smart_datalake/prompt_generation.py @@ -37,13 +37,9 @@ def execute(self, input: Any, **kwargs) -> Any: "last_code_generated" ) - generate_python_code_instruction = ( - pipeline_context.query_exec_tracker.execute_func( - pipeline_context.get_intermediate_value("get_prompt"), - "generate_python_code", - default_prompt=GeneratePythonCodePrompt, - default_values=default_values, - ) + return pipeline_context.query_exec_tracker.execute_func( + pipeline_context.get_intermediate_value("get_prompt"), + "generate_python_code", + default_prompt=GeneratePythonCodePrompt, + default_values=default_values, ) - - return generate_python_code_instruction diff --git a/tests/pipelines/smart_datalake/test_code_execution.py b/tests/pipelines/smart_datalake/test_code_execution.py index 47fda6ade..942285908 100644 --- a/tests/pipelines/smart_datalake/test_code_execution.py +++ b/tests/pipelines/smart_datalake/test_code_execution.py @@ -74,8 +74,7 @@ def config(self, llm): @pytest.fixture def context(self, sample_df, config): - pipeline_context = PipelineContext([sample_df], config) - return pipeline_context + return PipelineContext([sample_df], config) @pytest.fixture def logger(self): @@ -127,15 +126,16 @@ def mock_intermediate_values(key : str): return SkillsManager() elif key == "code_manager": return mock_code_manager + context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) - + assert isinstance(code_execution, CodeExecution) result = None try: result = code_execution.execute(input="Test Code", context=context, logger=logger) except Exception as e: - assert result == None + assert result is None def test_code_execution_successful_at_retry(self, context, logger): # Test Flow : Code Execution Successful with no exceptions diff --git a/tests/pipelines/smart_datalake/test_code_generator.py b/tests/pipelines/smart_datalake/test_code_generator.py index 0c2f84566..bf377742c 100644 --- a/tests/pipelines/smart_datalake/test_code_generator.py +++ b/tests/pipelines/smart_datalake/test_code_generator.py @@ -74,8 +74,7 @@ def config(self, llm): @pytest.fixture def context(self, sample_df, config): - pipeline_context = PipelineContext([sample_df], config) - return pipeline_context + return PipelineContext([sample_df], config) @pytest.fixture def logger(self): diff --git a/tests/pipelines/smart_datalake/test_result_parsing.py b/tests/pipelines/smart_datalake/test_result_parsing.py index 284f2a94b..b7e377d0c 100644 --- a/tests/pipelines/smart_datalake/test_result_parsing.py +++ b/tests/pipelines/smart_datalake/test_result_parsing.py @@ -72,8 +72,7 @@ def config(self, llm): @pytest.fixture def context(self, sample_df, config): - pipeline_context = PipelineContext([sample_df], config) - return pipeline_context + return PipelineContext([sample_df], config) @pytest.fixture def logger(self): @@ -111,20 +110,20 @@ def test_result_parsing_unsuccessful_with_exceptions(self, context, logger): def mock_result_parsing(*args, **kwargs): raise Exception("Unit test exception") - + context._query_exec_tracker = Mock() context.query_exec_tracker.execute_func = Mock(side_effect=mock_result_parsing) def mock_intermediate_values(key : str): if key == "response_parser" : return mock_response_parser - + context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) result = None try: result = result_parsing.execute(input="Test Result", context=context, logger=logger) except Exception as e: - assert result == None + assert result is None assert isinstance(result_parsing, ResultParsing) diff --git a/tests/pipelines/smart_datalake/test_result_validation.py b/tests/pipelines/smart_datalake/test_result_validation.py index 6c3f6a9ab..d10984341 100644 --- a/tests/pipelines/smart_datalake/test_result_validation.py +++ b/tests/pipelines/smart_datalake/test_result_validation.py @@ -72,8 +72,7 @@ def config(self, llm): @pytest.fixture def context(self, sample_df, config): - pipeline_context = PipelineContext([sample_df], config) - return pipeline_context + return PipelineContext([sample_df], config) @pytest.fixture def logger(self): @@ -96,7 +95,7 @@ def test_result_is_none(self, context, logger): assert not context.query_exec_tracker.add_step.called assert isinstance(result_validation, ResultValidation) - assert result == None + assert result is None def test_result_is_not_of_dict_type(self, context, logger): # Test Flow : Code Execution Successful with no exceptions From d5e9e03af9d26cbc28c9f0b2d7cebc10e319e3f3 Mon Sep 17 00:00:00 2001 From: Gabriele Venturi Date: Fri, 10 Nov 2023 00:35:47 +0100 Subject: [PATCH 09/11] refactor: move pipeline logic unit from sdf to pipelines folder --- pandasai/pipelines/pipeline.py | 4 ++ .../smart_datalake_chat}/cache_lookup.py | 18 +++++- .../smart_datalake_chat}/cache_population.py | 19 ++++-- .../smart_datalake_chat}/code_execution.py | 23 +++++-- .../smart_datalake_chat}/code_generator.py | 18 +++++- .../generate_smart_datalake_pipeline.py | 20 +++---- .../smart_datalake_chat}/prompt_generation.py | 21 +++++-- .../smart_datalake_chat}/result_parsing.py | 16 ++++- .../smart_datalake_chat}/result_validation.py | 17 +++++- pandasai/smart_datalake/__init__.py | 2 +- .../smart_datalake/test_code_execution.py | 60 ++++++++++++------- .../smart_datalake/test_code_generator.py | 2 +- .../smart_datalake/test_result_parsing.py | 29 +++++---- .../smart_datalake/test_result_validation.py | 48 +++++++++------ 14 files changed, 205 insertions(+), 92 deletions(-) rename pandasai/{smart_datalake => pipelines/smart_datalake_chat}/cache_lookup.py (62%) rename pandasai/{smart_datalake => pipelines/smart_datalake_chat}/cache_population.py (54%) rename pandasai/{smart_datalake => pipelines/smart_datalake_chat}/code_execution.py (82%) rename pandasai/{smart_datalake => pipelines/smart_datalake_chat}/code_generator.py (65%) rename pandasai/{smart_datalake => pipelines/smart_datalake_chat}/generate_smart_datalake_pipeline.py (65%) rename pandasai/{smart_datalake => pipelines/smart_datalake_chat}/prompt_generation.py (68%) rename pandasai/{smart_datalake => pipelines/smart_datalake_chat}/result_parsing.py (67%) rename pandasai/{smart_datalake => pipelines/smart_datalake_chat}/result_validation.py (76%) diff --git a/pandasai/pipelines/pipeline.py b/pandasai/pipelines/pipeline.py index cc2e15fe2..6a1bda9ac 100644 --- a/pandasai/pipelines/pipeline.py +++ b/pandasai/pipelines/pipeline.py @@ -78,6 +78,10 @@ def run(self, data: Any = None) -> Any: try: for index, logic in enumerate(self._steps): self._logger.log(f"Executing Step {index}: {logic.__class__.__name__}") + + if logic.skip_if is not None and logic.skip_if(self._context): + continue + data = logic.execute( data, logger=self._logger, diff --git a/pandasai/smart_datalake/cache_lookup.py b/pandasai/pipelines/smart_datalake_chat/cache_lookup.py similarity index 62% rename from pandasai/smart_datalake/cache_lookup.py rename to pandasai/pipelines/smart_datalake_chat/cache_lookup.py index f0e0608c7..46142b0a7 100644 --- a/pandasai/smart_datalake/cache_lookup.py +++ b/pandasai/pipelines/smart_datalake_chat/cache_lookup.py @@ -1,7 +1,7 @@ from typing import Any -from pandasai.helpers.logger import Logger -from pandasai.pipelines.base_logic_unit import BaseLogicUnit -from pandasai.pipelines.pipeline_context import PipelineContext +from ...helpers.logger import Logger +from ..base_logic_unit import BaseLogicUnit +from ..pipeline_context import PipelineContext class CacheLookup(BaseLogicUnit): @@ -12,6 +12,18 @@ class CacheLookup(BaseLogicUnit): pass def execute(self, input: Any, **kwargs) -> Any: + """ + This method will return output according to + Implementation. + + :param input: Your input data. + :param kwargs: A dictionary of keyword arguments. + - 'logger' (any): The logger for logging. + - 'config' (Config): Global configurations for the test + - 'context' (any): The execution context. + + :return: The result of the execution. + """ pipeline_context: PipelineContext = kwargs.get("context") logger: Logger = kwargs.get("logger") if ( diff --git a/pandasai/smart_datalake/cache_population.py b/pandasai/pipelines/smart_datalake_chat/cache_population.py similarity index 54% rename from pandasai/smart_datalake/cache_population.py rename to pandasai/pipelines/smart_datalake_chat/cache_population.py index 804345699..8d2791a07 100644 --- a/pandasai/smart_datalake/cache_population.py +++ b/pandasai/pipelines/smart_datalake_chat/cache_population.py @@ -1,6 +1,6 @@ from typing import Any -from pandasai.pipelines.base_logic_unit import BaseLogicUnit -from pandasai.pipelines.pipeline_context import PipelineContext +from ..base_logic_unit import BaseLogicUnit +from ..pipeline_context import PipelineContext class CachePopulation(BaseLogicUnit): @@ -11,11 +11,20 @@ class CachePopulation(BaseLogicUnit): pass def execute(self, input: Any, **kwargs) -> Any: + """ + This method will return output according to + Implementation. + + :param input: Your input data. + :param kwargs: A dictionary of keyword arguments. + - 'logger' (any): The logger for logging. + - 'config' (Config): Global configurations for the test + - 'context' (any): The execution context. + + :return: The result of the execution. + """ pipeline_context: PipelineContext = kwargs.get("context") - if self.skip_if is not None and self.skip_if(pipeline_context): - return input - code = input if pipeline_context.config.enable_cache and pipeline_context.cache: diff --git a/pandasai/smart_datalake/code_execution.py b/pandasai/pipelines/smart_datalake_chat/code_execution.py similarity index 82% rename from pandasai/smart_datalake/code_execution.py rename to pandasai/pipelines/smart_datalake_chat/code_execution.py index 85645f1d1..9b2cc27ce 100644 --- a/pandasai/smart_datalake/code_execution.py +++ b/pandasai/pipelines/smart_datalake_chat/code_execution.py @@ -1,15 +1,14 @@ import logging import traceback from typing import Any, List -from pandasai.helpers.code_manager import CodeExecutionContext -from pandasai.helpers.logger import Logger -from pandasai.pipelines.base_logic_unit import BaseLogicUnit -from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.prompts.correct_error_prompt import CorrectErrorPrompt +from ...helpers.code_manager import CodeExecutionContext +from ...helpers.logger import Logger +from ..base_logic_unit import BaseLogicUnit +from ..pipeline_context import PipelineContext +from ...prompts.correct_error_prompt import CorrectErrorPrompt class CodeExecution(BaseLogicUnit): - """ Code Execution Stage """ @@ -17,6 +16,18 @@ class CodeExecution(BaseLogicUnit): pass def execute(self, input: Any, **kwargs) -> Any: + """ + This method will return output according to + Implementation. + + :param input: Your input data. + :param kwargs: A dictionary of keyword arguments. + - 'logger' (any): The logger for logging. + - 'config' (Config): Global configurations for the test + - 'context' (any): The execution context. + + :return: The result of the execution. + """ pipeline_context: PipelineContext = kwargs.get("context") logger: Logger = kwargs.get("logger") diff --git a/pandasai/smart_datalake/code_generator.py b/pandasai/pipelines/smart_datalake_chat/code_generator.py similarity index 65% rename from pandasai/smart_datalake/code_generator.py rename to pandasai/pipelines/smart_datalake_chat/code_generator.py index 2c1370bbc..148ec9b2f 100644 --- a/pandasai/smart_datalake/code_generator.py +++ b/pandasai/pipelines/smart_datalake_chat/code_generator.py @@ -1,7 +1,7 @@ from typing import Any -from pandasai.helpers.logger import Logger -from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.pipelines.base_logic_unit import BaseLogicUnit +from ...helpers.logger import Logger +from ..pipeline_context import PipelineContext +from ..base_logic_unit import BaseLogicUnit class CodeGenerator(BaseLogicUnit): @@ -12,6 +12,18 @@ class CodeGenerator(BaseLogicUnit): pass def execute(self, input: Any, **kwargs) -> Any: + """ + This method will return output according to + Implementation. + + :param input: Your input data. + :param kwargs: A dictionary of keyword arguments. + - 'logger' (any): The logger for logging. + - 'config' (Config): Global configurations for the test + - 'context' (any): The execution context. + + :return: The result of the execution. + """ pipeline_context: PipelineContext = kwargs.get("context") logger: Logger = kwargs.get("logger") diff --git a/pandasai/smart_datalake/generate_smart_datalake_pipeline.py b/pandasai/pipelines/smart_datalake_chat/generate_smart_datalake_pipeline.py similarity index 65% rename from pandasai/smart_datalake/generate_smart_datalake_pipeline.py rename to pandasai/pipelines/smart_datalake_chat/generate_smart_datalake_pipeline.py index cccaae47d..feeb2b816 100644 --- a/pandasai/smart_datalake/generate_smart_datalake_pipeline.py +++ b/pandasai/pipelines/smart_datalake_chat/generate_smart_datalake_pipeline.py @@ -1,14 +1,14 @@ from typing import Optional -from pandasai.helpers.logger import Logger -from pandasai.pipelines.pipeline import Pipeline -from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.smart_datalake.cache_lookup import CacheLookup -from pandasai.smart_datalake.cache_population import CachePopulation -from pandasai.smart_datalake.code_execution import CodeExecution -from pandasai.smart_datalake.code_generator import CodeGenerator -from pandasai.smart_datalake.prompt_generation import PromptGeneration -from pandasai.smart_datalake.result_parsing import ResultParsing -from pandasai.smart_datalake.result_validation import ResultValidation +from ...helpers.logger import Logger +from ..pipeline import Pipeline +from ..pipeline_context import PipelineContext +from .cache_lookup import CacheLookup +from .cache_population import CachePopulation +from .code_execution import CodeExecution +from .code_generator import CodeGenerator +from .prompt_generation import PromptGeneration +from .result_parsing import ResultParsing +from .result_validation import ResultValidation class GenerateSmartDatalakePipeline: diff --git a/pandasai/smart_datalake/prompt_generation.py b/pandasai/pipelines/smart_datalake_chat/prompt_generation.py similarity index 68% rename from pandasai/smart_datalake/prompt_generation.py rename to pandasai/pipelines/smart_datalake_chat/prompt_generation.py index b1bb74c10..ac6017bdc 100644 --- a/pandasai/smart_datalake/prompt_generation.py +++ b/pandasai/pipelines/smart_datalake_chat/prompt_generation.py @@ -1,7 +1,7 @@ from typing import Any -from pandasai.pipelines.base_logic_unit import BaseLogicUnit -from pandasai.pipelines.pipeline_context import PipelineContext -from pandasai.prompts.generate_python_code import GeneratePythonCodePrompt +from ..base_logic_unit import BaseLogicUnit +from ..pipeline_context import PipelineContext +from ...prompts.generate_python_code import GeneratePythonCodePrompt class PromptGeneration(BaseLogicUnit): @@ -12,11 +12,20 @@ class PromptGeneration(BaseLogicUnit): pass def execute(self, input: Any, **kwargs) -> Any: + """ + This method will return output according to + Implementation. + + :param input: Your input data. + :param kwargs: A dictionary of keyword arguments. + - 'logger' (any): The logger for logging. + - 'config' (Config): Global configurations for the test + - 'context' (any): The execution context. + + :return: The result of the execution. + """ pipeline_context: PipelineContext = kwargs.get("context") - if self.skip_if is not None and self.skip_if(pipeline_context): - return input - default_values = { # TODO: find a better way to determine the engine, "engine": pipeline_context.dfs[0].engine, diff --git a/pandasai/smart_datalake/result_parsing.py b/pandasai/pipelines/smart_datalake_chat/result_parsing.py similarity index 67% rename from pandasai/smart_datalake/result_parsing.py rename to pandasai/pipelines/smart_datalake_chat/result_parsing.py index 54bfe28df..8fc6df6c2 100644 --- a/pandasai/smart_datalake/result_parsing.py +++ b/pandasai/pipelines/smart_datalake_chat/result_parsing.py @@ -1,6 +1,6 @@ from typing import Any -from pandasai.pipelines.base_logic_unit import BaseLogicUnit -from pandasai.pipelines.pipeline_context import PipelineContext +from ..base_logic_unit import BaseLogicUnit +from ..pipeline_context import PipelineContext class ResultParsing(BaseLogicUnit): @@ -12,6 +12,18 @@ class ResultParsing(BaseLogicUnit): pass def execute(self, input: Any, **kwargs) -> Any: + """ + This method will return output according to + Implementation. + + :param input: Your input data. + :param kwargs: A dictionary of keyword arguments. + - 'logger' (any): The logger for logging. + - 'config' (Config): Global configurations for the test + - 'context' (any): The execution context. + + :return: The result of the execution. + """ pipeline_context: PipelineContext = kwargs.get("context") result = input diff --git a/pandasai/smart_datalake/result_validation.py b/pandasai/pipelines/smart_datalake_chat/result_validation.py similarity index 76% rename from pandasai/smart_datalake/result_validation.py rename to pandasai/pipelines/smart_datalake_chat/result_validation.py index 456e61841..ce5e37e11 100644 --- a/pandasai/smart_datalake/result_validation.py +++ b/pandasai/pipelines/smart_datalake_chat/result_validation.py @@ -1,12 +1,11 @@ import logging from typing import Any from pandasai.helpers.logger import Logger -from pandasai.pipelines.base_logic_unit import BaseLogicUnit -from pandasai.pipelines.pipeline_context import PipelineContext +from ..base_logic_unit import BaseLogicUnit +from ..pipeline_context import PipelineContext class ResultValidation(BaseLogicUnit): - """ Result Validation Stage """ @@ -14,6 +13,18 @@ class ResultValidation(BaseLogicUnit): pass def execute(self, input: Any, **kwargs) -> Any: + """ + This method will return output according to + Implementation. + + :param input: Your input data. + :param kwargs: A dictionary of keyword arguments. + - 'logger' (any): The logger for logging. + - 'config' (Config): Global configurations for the test + - 'context' (any): The execution context. + + :return: The result of the execution. + """ pipeline_context: PipelineContext = kwargs.get("context") logger: Logger = kwargs.get("logger") diff --git a/pandasai/smart_datalake/__init__.py b/pandasai/smart_datalake/__init__.py index 060eefbea..e31d71696 100644 --- a/pandasai/smart_datalake/__init__.py +++ b/pandasai/smart_datalake/__init__.py @@ -27,7 +27,7 @@ from pandasai.skills import skill from pandasai.helpers.query_exec_tracker import QueryExecTracker -from pandasai.smart_datalake.generate_smart_datalake_pipeline import ( +from ..pipelines.smart_datalake_chat.generate_smart_datalake_pipeline import ( GenerateSmartDatalakePipeline, ) diff --git a/tests/pipelines/smart_datalake/test_code_execution.py b/tests/pipelines/smart_datalake/test_code_execution.py index 942285908..7acadd4e1 100644 --- a/tests/pipelines/smart_datalake/test_code_execution.py +++ b/tests/pipelines/smart_datalake/test_code_execution.py @@ -2,14 +2,13 @@ from unittest.mock import Mock import pandas as pd import pytest -from pandasai.helpers.code_manager import CodeManager from pandasai.helpers.logger import Logger from pandasai.helpers.skills_manager import SkillsManager from pandasai.llm.fake import FakeLLM from pandasai.pipelines.pipeline_context import PipelineContext from pandasai.smart_dataframe import SmartDataframe -from pandasai.smart_datalake.code_execution import CodeExecution +from pandasai.pipelines.smart_datalake_chat.code_execution import CodeExecution class TestCodeExecution: @@ -79,7 +78,7 @@ def context(self, sample_df, config): @pytest.fixture def logger(self): return Logger(True, False) - + def test_init(self, context, config): # Test the initialization of the CodeExecution code_execution = CodeExecution() @@ -92,16 +91,19 @@ def test_code_execution_successful_with_no_exceptions(self, context, logger): mock_code_manager = Mock() mock_code_manager.execute_code = Mock(return_value="Mocked Result") - def mock_intermediate_values(key : str): - if key == "last_prompt_id" : + def mock_intermediate_values(key: str): + if key == "last_prompt_id": return "Mocked Promt ID" - elif key == "skills" : + elif key == "skills": return SkillsManager() elif key == "code_manager": - return mock_code_manager + return mock_code_manager + context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) - result = code_execution.execute(input="Test Code", context=context, logger=logger) + result = code_execution.execute( + input="Test Code", context=context, logger=logger + ) assert isinstance(code_execution, CodeExecution) assert result == "Mocked Result" @@ -117,12 +119,18 @@ def mock_execute_code(*args, **kwargs): mock_code_manager.execute_code = Mock(side_effect=mock_execute_code) context._query_exec_tracker = Mock() - context.query_exec_tracker.execute_func = Mock(return_value=["Interuppted Code", "Exception Testing","Unsuccessful after Retries"]) + context.query_exec_tracker.execute_func = Mock( + return_value=[ + "Interuppted Code", + "Exception Testing", + "Unsuccessful after Retries", + ] + ) - def mock_intermediate_values(key : str): - if key == "last_prompt_id" : + def mock_intermediate_values(key: str): + if key == "last_prompt_id": return "Mocked Promt ID" - elif key == "skills" : + elif key == "skills": return SkillsManager() elif key == "code_manager": return mock_code_manager @@ -133,17 +141,18 @@ def mock_intermediate_values(key : str): result = None try: - result = code_execution.execute(input="Test Code", context=context, logger=logger) - except Exception as e: + result = code_execution.execute( + input="Test Code", context=context, logger=logger + ) + except Exception: assert result is None def test_code_execution_successful_at_retry(self, context, logger): # Test Flow : Code Execution Successful with no exceptions code_execution = CodeExecution() - self.throw_exception == True def mock_execute_code(*args, **kwargs): - if self.throw_exception == True: + if self.throw_exception is True: self.throw_exception = False raise Exception("Unit test exception") return "Mocked Result after retry" @@ -152,18 +161,27 @@ def mock_execute_code(*args, **kwargs): mock_code_manager.execute_code = Mock(side_effect=mock_execute_code) context._query_exec_tracker = Mock() - context.query_exec_tracker.execute_func = Mock(return_value=["Interuppted Code", "Exception Testing","Successful after Retry"]) + context.query_exec_tracker.execute_func = Mock( + return_value=[ + "Interuppted Code", + "Exception Testing", + "Successful after Retry", + ] + ) - def mock_intermediate_values(key : str): - if key == "last_prompt_id" : + def mock_intermediate_values(key: str): + if key == "last_prompt_id": return "Mocked Promt ID" - elif key == "skills" : + elif key == "skills": return SkillsManager() elif key == "code_manager": return mock_code_manager + context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) - result = code_execution.execute(input="Test Code", context=context, logger=logger) + result = code_execution.execute( + input="Test Code", context=context, logger=logger + ) assert isinstance(code_execution, CodeExecution) assert result == "Mocked Result after retry" diff --git a/tests/pipelines/smart_datalake/test_code_generator.py b/tests/pipelines/smart_datalake/test_code_generator.py index bf377742c..32bc83083 100644 --- a/tests/pipelines/smart_datalake/test_code_generator.py +++ b/tests/pipelines/smart_datalake/test_code_generator.py @@ -11,7 +11,7 @@ from pandasai.prompts.generate_python_code import GeneratePythonCodePrompt from pandasai.smart_dataframe import SmartDataframe -from pandasai.smart_datalake.code_generator import CodeGenerator +from pandasai.pipelines.smart_datalake_chat.code_generator import CodeGenerator class TestCodeGenerator: diff --git a/tests/pipelines/smart_datalake/test_result_parsing.py b/tests/pipelines/smart_datalake/test_result_parsing.py index b7e377d0c..08bf1e1fb 100644 --- a/tests/pipelines/smart_datalake/test_result_parsing.py +++ b/tests/pipelines/smart_datalake/test_result_parsing.py @@ -7,7 +7,7 @@ from pandasai.llm.fake import FakeLLM from pandasai.pipelines.pipeline_context import PipelineContext from pandasai.smart_dataframe import SmartDataframe -from pandasai.smart_datalake.result_parsing import ResultParsing +from pandasai.pipelines.smart_datalake_chat.result_parsing import ResultParsing class TestResultParsing: @@ -77,7 +77,7 @@ def context(self, sample_df, config): @pytest.fixture def logger(self): return Logger(True, False) - + def test_init(self, context, config): # Test the initialization of the CodeExecution result_parsing = ResultParsing() @@ -89,15 +89,19 @@ def test_result_parsing_successful_with_no_exceptions(self, context, logger): result_parsing._add_result_to_memory = Mock() mock_response_parser = Mock() context._query_exec_tracker = Mock() - context.query_exec_tracker.execute_func = Mock(return_value="Mocked Parsed Result") + context.query_exec_tracker.execute_func = Mock( + return_value="Mocked Parsed Result" + ) - def mock_intermediate_values(key : str): - if key == "response_parser" : + def mock_intermediate_values(key: str): + if key == "response_parser": return mock_response_parser - + context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) - result = result_parsing.execute(input="Test Result", context=context, logger=logger) + result = result_parsing.execute( + input="Test Result", context=context, logger=logger + ) assert isinstance(result_parsing, ResultParsing) assert result == "Mocked Parsed Result" @@ -114,16 +118,17 @@ def mock_result_parsing(*args, **kwargs): context._query_exec_tracker = Mock() context.query_exec_tracker.execute_func = Mock(side_effect=mock_result_parsing) - def mock_intermediate_values(key : str): - if key == "response_parser" : + def mock_intermediate_values(key: str): + if key == "response_parser": return mock_response_parser context.get_intermediate_value = Mock(side_effect=mock_intermediate_values) result = None try: - result = result_parsing.execute(input="Test Result", context=context, logger=logger) - except Exception as e: + result = result_parsing.execute( + input="Test Result", context=context, logger=logger + ) + except Exception: assert result is None assert isinstance(result_parsing, ResultParsing) - diff --git a/tests/pipelines/smart_datalake/test_result_validation.py b/tests/pipelines/smart_datalake/test_result_validation.py index d10984341..b150a7188 100644 --- a/tests/pipelines/smart_datalake/test_result_validation.py +++ b/tests/pipelines/smart_datalake/test_result_validation.py @@ -7,7 +7,7 @@ from pandasai.llm.fake import FakeLLM from pandasai.pipelines.pipeline_context import PipelineContext from pandasai.smart_dataframe import SmartDataframe -from pandasai.smart_datalake.result_validation import ResultValidation +from pandasai.pipelines.smart_datalake_chat.result_validation import ResultValidation class TestResultValidation: @@ -77,7 +77,7 @@ def context(self, sample_df, config): @pytest.fixture def logger(self): return Logger(True, False) - + def test_init(self, context, config): # Test the initialization of the CodeExecution result_validation = ResultValidation() @@ -105,7 +105,9 @@ def test_result_is_not_of_dict_type(self, context, logger): context.query_exec_tracker.get_execution_time = Mock() context.query_exec_tracker.add_step = Mock() - result = result_validation.execute(input="Not Dict Type Result", context=context, logger=logger) + result = result_validation.execute( + input="Not Dict Type Result", context=context, logger=logger + ) assert not context.query_exec_tracker.add_step.called assert isinstance(result_validation, ResultValidation) @@ -119,17 +121,21 @@ def test_result_is_of_dict_type_and_valid(self, context, logger): context._query_exec_tracker = Mock() context.query_exec_tracker.get_execution_time = Mock() context.get_intermediate_value = Mock(return_value=output_type_helper) - output_type_helper.validate = Mock(return_value=(True,"Mocked Logs")) + output_type_helper.validate = Mock(return_value=(True, "Mocked Logs")) - result = result_validation.execute(input={"Mocked":"Result"}, context=context, logger=logger) + result = result_validation.execute( + input={"Mocked": "Result"}, context=context, logger=logger + ) - context.query_exec_tracker.add_step.assert_called_with({ - "type": "Validating Output", - "success": True, - "message": "Output Validation Successful", - }) + context.query_exec_tracker.add_step.assert_called_with( + { + "type": "Validating Output", + "success": True, + "message": "Output Validation Successful", + } + ) assert isinstance(result_validation, ResultValidation) - assert result == {"Mocked":"Result"} + assert result == {"Mocked": "Result"} def test_result_is_of_dict_type_and_not_valid(self, context, logger): # Test Flow : Code Execution Successful with no exceptions @@ -139,14 +145,18 @@ def test_result_is_of_dict_type_and_not_valid(self, context, logger): context._query_exec_tracker = Mock() context.query_exec_tracker.get_execution_time = Mock() context.get_intermediate_value = Mock(return_value=output_type_helper) - output_type_helper.validate = Mock(return_value=(False,"Mocked Logs")) + output_type_helper.validate = Mock(return_value=(False, "Mocked Logs")) - result = result_validation.execute(input={"Mocked":"Result"}, context=context, logger=logger) + result = result_validation.execute( + input={"Mocked": "Result"}, context=context, logger=logger + ) - context.query_exec_tracker.add_step.assert_called_with({ - "type": "Validating Output", - "success": False, - "message": "Output Validation Failed", - }) + context.query_exec_tracker.add_step.assert_called_with( + { + "type": "Validating Output", + "success": False, + "message": "Output Validation Failed", + } + ) assert isinstance(result_validation, ResultValidation) - assert result == {"Mocked":"Result"} \ No newline at end of file + assert result == {"Mocked": "Result"} From 9b209ce05a68974aef77204bc712c0d4e72ecb27 Mon Sep 17 00:00:00 2001 From: Milind Lalwani Date: Fri, 10 Nov 2023 12:29:08 +0100 Subject: [PATCH 10/11] refactor(Pipelines) : Merge Comflicts Fixed --- .../smart_datalake_chat/code_execution.py | 2 +- .../smart_datalake_chat/prompt_generation.py | 2 +- pandasai/smart_datalake/__init__.py | 38 +++++++------------ 3 files changed, 15 insertions(+), 27 deletions(-) diff --git a/pandasai/pipelines/smart_datalake_chat/code_execution.py b/pandasai/pipelines/smart_datalake_chat/code_execution.py index 9b2cc27ce..a9d05f59e 100644 --- a/pandasai/pipelines/smart_datalake_chat/code_execution.py +++ b/pandasai/pipelines/smart_datalake_chat/code_execution.py @@ -109,7 +109,7 @@ def _retry_run_code( } error_correcting_instruction = context.get_intermediate_value("get_prompt")( "correct_error", - default_prompt=CorrectErrorPrompt, + default_prompt=CorrectErrorPrompt(), default_values=default_values, ) diff --git a/pandasai/pipelines/smart_datalake_chat/prompt_generation.py b/pandasai/pipelines/smart_datalake_chat/prompt_generation.py index ac6017bdc..b81b65797 100644 --- a/pandasai/pipelines/smart_datalake_chat/prompt_generation.py +++ b/pandasai/pipelines/smart_datalake_chat/prompt_generation.py @@ -49,6 +49,6 @@ def execute(self, input: Any, **kwargs) -> Any: return pipeline_context.query_exec_tracker.execute_func( pipeline_context.get_intermediate_value("get_prompt"), "generate_python_code", - default_prompt=GeneratePythonCodePrompt, + default_prompt=GeneratePythonCodePrompt(), default_values=default_values, ) diff --git a/pandasai/smart_datalake/__init__.py b/pandasai/smart_datalake/__init__.py index 4d1b2c781..ca54f73e7 100644 --- a/pandasai/smart_datalake/__init__.py +++ b/pandasai/smart_datalake/__init__.py @@ -43,9 +43,9 @@ from ..config import load_config from ..prompts.base import AbstractPrompt from ..prompts.correct_error_prompt import CorrectErrorPrompt -from typing import Union, List, Any, Type, Optional +from typing import Union, List, Any, Optional from ..prompts.generate_python_code import GeneratePythonCodePrompt -from ..helpers.code_manager import CodeExecutionContext, CodeManager +from ..helpers.code_manager import CodeManager from ..middlewares.base import Middleware from ..helpers.df_info import DataFrameType from ..helpers.path import find_project_root @@ -378,27 +378,18 @@ def chat(self, query: str, output_type: Optional[str] = None): try: result = GenerateSmartDatalakePipeline(pipeline_context, self.logger).run() - self._logger.log( - f"Failed to execute code with a correction framework " - f"[retry number: {retry_count}]", - level=logging.WARNING, - ) - - traceback_error = traceback.format_exc() - [ - code_to_run, - reasoning, - answer, - ] = self._query_exec_tracker.execute_func( - self._retry_run_code, code, traceback_error - ) - - if isinstance(result, dict): - self._validate_output(result, output_type) + except Exception as exception: + self.last_error = str(exception) + self._query_exec_tracker.success = False + self._query_exec_tracker.publish() + + return ( + "Unfortunately, I was not able to answer your question, " + "because of the following error:\n" + f"\n{exception}\n" + ) - if result is not None: - self.last_result = result - self.logger.log(f"Answer: {result}") + self.update_intermediate_value_post_pipeline_execution(pipeline_context) return result @@ -448,9 +439,6 @@ def _validate_output(self, result: dict, output_type: Optional[str] = None): ) raise ValueError("Output validation failed") - self.update_intermediate_value_post_pipeline_execution(pipeline_context) - - def _get_viz_library_type(self) -> str: """ Get the visualization library type based on the configured library. From 3f3fc61b6ed898261ab1ecc1208f297cb9f0aa4b Mon Sep 17 00:00:00 2001 From: Gabriele Venturi Date: Sun, 12 Nov 2023 17:52:35 +0100 Subject: [PATCH 11/11] build: fix .lock file --- poetry.lock | 124 ++++++++++++++++++++++++---------------------------- 1 file changed, 58 insertions(+), 66 deletions(-) diff --git a/poetry.lock b/poetry.lock index 9feb29055..d60c257c1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.0 and should not be changed by hand. [[package]] name = "aiohttp" @@ -1302,11 +1302,11 @@ files = [ google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""}, + {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, ] grpcio-status = [ - {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""}, + {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""}, {version = ">=1.49.1,<2.0.dev0", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" @@ -3035,8 +3035,8 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, ] python-dateutil = ">=2.8.1" @@ -4047,27 +4047,32 @@ description = "A set of python modules for machine learning and data mining" optional = true python-versions = ">=3.8" files = [ - {file = "scikit-learn-1.3.1.tar.gz", hash = "sha256:1a231cced3ee3fa04756b4a7ab532dc9417acd581a330adff5f2c01ac2831fcf"}, - {file = "scikit_learn-1.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3153612ff8d36fa4e35ef8b897167119213698ea78f3fd130b4068e6f8d2da5a"}, - {file = "scikit_learn-1.3.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:6bb9490fdb8e7e00f1354621689187bef3cab289c9b869688f805bf724434755"}, - {file = "scikit_learn-1.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7135a03af71138669f19bc96e7d0cc8081aed4b3565cc3b131135d65fc642ba"}, - {file = "scikit_learn-1.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d8dee8c1f40eeba49a85fe378bdf70a07bb64aba1a08fda1e0f48d27edfc3e6"}, - {file = "scikit_learn-1.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:4d379f2b34096105a96bd857b88601dffe7389bd55750f6f29aaa37bc6272eb5"}, - {file = "scikit_learn-1.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14e8775eba072ab10866a7e0596bc9906873e22c4c370a651223372eb62de180"}, - {file = "scikit_learn-1.3.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:58b0c2490eff8355dc26e884487bf8edaccf2ba48d09b194fb2f3a026dd64f9d"}, - {file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f66eddfda9d45dd6cadcd706b65669ce1df84b8549875691b1f403730bdef217"}, - {file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6448c37741145b241eeac617028ba6ec2119e1339b1385c9720dae31367f2be"}, - {file = "scikit_learn-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c413c2c850241998168bbb3bd1bb59ff03b1195a53864f0b80ab092071af6028"}, - {file = "scikit_learn-1.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:52b77cc08bd555969ec5150788ed50276f5ef83abb72e6f469c5b91a0009bbca"}, - {file = "scikit_learn-1.3.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a683394bc3f80b7c312c27f9b14ebea7766b1f0a34faf1a2e9158d80e860ec26"}, - {file = "scikit_learn-1.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15d964d9eb181c79c190d3dbc2fff7338786bf017e9039571418a1d53dab236"}, - {file = "scikit_learn-1.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ce9233cdf0cdcf0858a5849d306490bf6de71fa7603a3835124e386e62f2311"}, - {file = "scikit_learn-1.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:1ec668ce003a5b3d12d020d2cde0abd64b262ac5f098b5c84cf9657deb9996a8"}, - {file = "scikit_learn-1.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ccbbedae99325628c1d1cbe3916b7ef58a1ce949672d8d39c8b190e10219fd32"}, - {file = "scikit_learn-1.3.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:845f81c7ceb4ea6bac64ab1c9f2ce8bef0a84d0f21f3bece2126adcc213dfecd"}, - {file = "scikit_learn-1.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8454d57a22d856f1fbf3091bd86f9ebd4bff89088819886dc0c72f47a6c30652"}, - {file = "scikit_learn-1.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d993fb70a1d78c9798b8f2f28705bfbfcd546b661f9e2e67aa85f81052b9c53"}, - {file = "scikit_learn-1.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:66f7bb1fec37d65f4ef85953e1df5d3c98a0f0141d394dcdaead5a6de9170347"}, + {file = "scikit-learn-1.3.2.tar.gz", hash = "sha256:a2f54c76accc15a34bfb9066e6c7a56c1e7235dda5762b990792330b52ccfb05"}, + {file = "scikit_learn-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e326c0eb5cf4d6ba40f93776a20e9a7a69524c4db0757e7ce24ba222471ee8a1"}, + {file = "scikit_learn-1.3.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:535805c2a01ccb40ca4ab7d081d771aea67e535153e35a1fd99418fcedd1648a"}, + {file = "scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1215e5e58e9880b554b01187b8c9390bf4dc4692eedeaf542d3273f4785e342c"}, + {file = "scikit_learn-1.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ee107923a623b9f517754ea2f69ea3b62fc898a3641766cb7deb2f2ce450161"}, + {file = "scikit_learn-1.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:35a22e8015048c628ad099da9df5ab3004cdbf81edc75b396fd0cff8699ac58c"}, + {file = "scikit_learn-1.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6fb6bc98f234fda43163ddbe36df8bcde1d13ee176c6dc9b92bb7d3fc842eb66"}, + {file = "scikit_learn-1.3.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:18424efee518a1cde7b0b53a422cde2f6625197de6af36da0b57ec502f126157"}, + {file = "scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3271552a5eb16f208a6f7f617b8cc6d1f137b52c8a1ef8edf547db0259b2c9fb"}, + {file = "scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4144a5004a676d5022b798d9e573b05139e77f271253a4703eed295bde0433"}, + {file = "scikit_learn-1.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:67f37d708f042a9b8d59551cf94d30431e01374e00dc2645fa186059c6c5d78b"}, + {file = "scikit_learn-1.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8db94cd8a2e038b37a80a04df8783e09caac77cbe052146432e67800e430c028"}, + {file = "scikit_learn-1.3.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:61a6efd384258789aa89415a410dcdb39a50e19d3d8410bd29be365bcdd512d5"}, + {file = "scikit_learn-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb06f8dce3f5ddc5dee1715a9b9f19f20d295bed8e3cd4fa51e1d050347de525"}, + {file = "scikit_learn-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b2de18d86f630d68fe1f87af690d451388bb186480afc719e5f770590c2ef6c"}, + {file = "scikit_learn-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:0402638c9a7c219ee52c94cbebc8fcb5eb9fe9c773717965c1f4185588ad3107"}, + {file = "scikit_learn-1.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a19f90f95ba93c1a7f7924906d0576a84da7f3b2282ac3bfb7a08a32801add93"}, + {file = "scikit_learn-1.3.2-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:b8692e395a03a60cd927125eef3a8e3424d86dde9b2370d544f0ea35f78a8073"}, + {file = "scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15e1e94cc23d04d39da797ee34236ce2375ddea158b10bee3c343647d615581d"}, + {file = "scikit_learn-1.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:785a2213086b7b1abf037aeadbbd6d67159feb3e30263434139c98425e3dcfcf"}, + {file = "scikit_learn-1.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:64381066f8aa63c2710e6b56edc9f0894cc7bf59bd71b8ce5613a4559b6145e0"}, + {file = "scikit_learn-1.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6c43290337f7a4b969d207e620658372ba3c1ffb611f8bc2b6f031dc5c6d1d03"}, + {file = "scikit_learn-1.3.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:dc9002fc200bed597d5d34e90c752b74df516d592db162f756cc52836b38fe0e"}, + {file = "scikit_learn-1.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d08ada33e955c54355d909b9c06a4789a729977f165b8bae6f225ff0a60ec4a"}, + {file = "scikit_learn-1.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:763f0ae4b79b0ff9cca0bf3716bcc9915bdacff3cebea15ec79652d1cc4fa5c9"}, + {file = "scikit_learn-1.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:ed932ea780517b00dae7431e031faae6b49b20eb6950918eb83bd043237950e0"}, ] [package.dependencies] @@ -4352,48 +4357,35 @@ description = "Database Abstraction Library" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" files = [ - {file = "SQLAlchemy-1.4.49-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2e126cf98b7fd38f1e33c64484406b78e937b1a280e078ef558b95bf5b6895f6"}, - {file = "SQLAlchemy-1.4.49-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:03db81b89fe7ef3857b4a00b63dedd632d6183d4ea5a31c5d8a92e000a41fc71"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:95b9df9afd680b7a3b13b38adf6e3a38995da5e162cc7524ef08e3be4e5ed3e1"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a63e43bf3f668c11bb0444ce6e809c1227b8f067ca1068898f3008a273f52b09"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f835c050ebaa4e48b18403bed2c0fda986525896efd76c245bdd4db995e51a4c"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c21b172dfb22e0db303ff6419451f0cac891d2e911bb9fbf8003d717f1bcf91"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-win32.whl", hash = "sha256:5fb1ebdfc8373b5a291485757bd6431de8d7ed42c27439f543c81f6c8febd729"}, - {file = "SQLAlchemy-1.4.49-cp310-cp310-win_amd64.whl", hash = "sha256:f8a65990c9c490f4651b5c02abccc9f113a7f56fa482031ac8cb88b70bc8ccaa"}, - {file = "SQLAlchemy-1.4.49-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8923dfdf24d5aa8a3adb59723f54118dd4fe62cf59ed0d0d65d940579c1170a4"}, - {file = "SQLAlchemy-1.4.49-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9ab2c507a7a439f13ca4499db6d3f50423d1d65dc9b5ed897e70941d9e135b0"}, - {file = "SQLAlchemy-1.4.49-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5debe7d49b8acf1f3035317e63d9ec8d5e4d904c6e75a2a9246a119f5f2fdf3d"}, - {file = "SQLAlchemy-1.4.49-cp311-cp311-win32.whl", hash = "sha256:82b08e82da3756765c2e75f327b9bf6b0f043c9c3925fb95fb51e1567fa4ee87"}, - {file = "SQLAlchemy-1.4.49-cp311-cp311-win_amd64.whl", hash = "sha256:171e04eeb5d1c0d96a544caf982621a1711d078dbc5c96f11d6469169bd003f1"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:36e58f8c4fe43984384e3fbe6341ac99b6b4e083de2fe838f0fdb91cebe9e9cb"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b31e67ff419013f99ad6f8fc73ee19ea31585e1e9fe773744c0f3ce58c039c30"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c14b29d9e1529f99efd550cd04dbb6db6ba5d690abb96d52de2bff4ed518bc95"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c40f3470e084d31247aea228aa1c39bbc0904c2b9ccbf5d3cfa2ea2dac06f26d"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-win32.whl", hash = "sha256:706bfa02157b97c136547c406f263e4c6274a7b061b3eb9742915dd774bbc264"}, - {file = "SQLAlchemy-1.4.49-cp36-cp36m-win_amd64.whl", hash = "sha256:a7f7b5c07ae5c0cfd24c2db86071fb2a3d947da7bd487e359cc91e67ac1c6d2e"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:4afbbf5ef41ac18e02c8dc1f86c04b22b7a2125f2a030e25bbb4aff31abb224b"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24e300c0c2147484a002b175f4e1361f102e82c345bf263242f0449672a4bccf"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:201de072b818f8ad55c80d18d1a788729cccf9be6d9dc3b9d8613b053cd4836d"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653ed6817c710d0c95558232aba799307d14ae084cc9b1f4c389157ec50df5c"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-win32.whl", hash = "sha256:647e0b309cb4512b1f1b78471fdaf72921b6fa6e750b9f891e09c6e2f0e5326f"}, - {file = "SQLAlchemy-1.4.49-cp37-cp37m-win_amd64.whl", hash = "sha256:ab73ed1a05ff539afc4a7f8cf371764cdf79768ecb7d2ec691e3ff89abbc541e"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:37ce517c011560d68f1ffb28af65d7e06f873f191eb3a73af5671e9c3fada08a"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1878ce508edea4a879015ab5215546c444233881301e97ca16fe251e89f1c55"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:0e8e608983e6f85d0852ca61f97e521b62e67969e6e640fe6c6b575d4db68557"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ccf956da45290df6e809ea12c54c02ace7f8ff4d765d6d3dfb3655ee876ce58d"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-win32.whl", hash = "sha256:f167c8175ab908ce48bd6550679cc6ea20ae169379e73c7720a28f89e53aa532"}, - {file = "SQLAlchemy-1.4.49-cp38-cp38-win_amd64.whl", hash = "sha256:45806315aae81a0c202752558f0df52b42d11dd7ba0097bf71e253b4215f34f4"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:b6d0c4b15d65087738a6e22e0ff461b407533ff65a73b818089efc8eb2b3e1de"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a843e34abfd4c797018fd8d00ffffa99fd5184c421f190b6ca99def4087689bd"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1c890421651b45a681181301b3497e4d57c0d01dc001e10438a40e9a9c25ee77"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d26f280b8f0a8f497bc10573849ad6dc62e671d2468826e5c748d04ed9e670d5"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-win32.whl", hash = "sha256:ec2268de67f73b43320383947e74700e95c6770d0c68c4e615e9897e46296294"}, - {file = "SQLAlchemy-1.4.49-cp39-cp39-win_amd64.whl", hash = "sha256:bbdf16372859b8ed3f4d05f925a984771cd2abd18bd187042f24be4886c2a15f"}, - {file = "SQLAlchemy-1.4.49.tar.gz", hash = "sha256:06ff25cbae30c396c4b7737464f2a7fc37a67b7da409993b182b024cec80aed9"}, + {file = "SQLAlchemy-1.4.50-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d00665725063692c42badfd521d0c4392e83c6c826795d38eb88fb108e5660e5"}, + {file = "SQLAlchemy-1.4.50-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85292ff52ddf85a39367057c3d7968a12ee1fb84565331a36a8fead346f08796"}, + {file = "SQLAlchemy-1.4.50-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d0fed0f791d78e7767c2db28d34068649dfeea027b83ed18c45a423f741425cb"}, + {file = "SQLAlchemy-1.4.50-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db4db3c08ffbb18582f856545f058a7a5e4ab6f17f75795ca90b3c38ee0a8ba4"}, + {file = "SQLAlchemy-1.4.50-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14b0cacdc8a4759a1e1bd47dc3ee3f5db997129eb091330beda1da5a0e9e5bd7"}, + {file = "SQLAlchemy-1.4.50-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1fb9cb60e0f33040e4f4681e6658a7eb03b5cb4643284172f91410d8c493dace"}, + {file = "SQLAlchemy-1.4.50-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4cb501d585aa74a0f86d0ea6263b9c5e1d1463f8f9071392477fd401bd3c7cc"}, + {file = "SQLAlchemy-1.4.50-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a7a66297e46f85a04d68981917c75723e377d2e0599d15fbe7a56abed5e2d75"}, + {file = "SQLAlchemy-1.4.50-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1db0221cb26d66294f4ca18c533e427211673ab86c1fbaca8d6d9ff78654293"}, + {file = "SQLAlchemy-1.4.50-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7dbe6369677a2bea68fe9812c6e4bbca06ebfa4b5cde257b2b0bf208709131"}, + {file = "SQLAlchemy-1.4.50-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a9bddb60566dc45c57fd0a5e14dd2d9e5f106d2241e0a2dc0c1da144f9444516"}, + {file = "SQLAlchemy-1.4.50-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82dd4131d88395df7c318eeeef367ec768c2a6fe5bd69423f7720c4edb79473c"}, + {file = "SQLAlchemy-1.4.50-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:273505fcad22e58cc67329cefab2e436006fc68e3c5423056ee0513e6523268a"}, + {file = "SQLAlchemy-1.4.50-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3257a6e09626d32b28a0c5b4f1a97bced585e319cfa90b417f9ab0f6145c33c"}, + {file = "SQLAlchemy-1.4.50-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:d69738d582e3a24125f0c246ed8d712b03bd21e148268421e4a4d09c34f521a5"}, + {file = "SQLAlchemy-1.4.50-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34e1c5d9cd3e6bf3d1ce56971c62a40c06bfc02861728f368dcfec8aeedb2814"}, + {file = "SQLAlchemy-1.4.50-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1fcee5a2c859eecb4ed179edac5ffbc7c84ab09a5420219078ccc6edda45436"}, + {file = "SQLAlchemy-1.4.50-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbaf6643a604aa17e7a7afd74f665f9db882df5c297bdd86c38368f2c471f37d"}, + {file = "SQLAlchemy-1.4.50-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2e70e0673d7d12fa6cd363453a0d22dac0d9978500aa6b46aa96e22690a55eab"}, + {file = "SQLAlchemy-1.4.50-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b881ac07d15fb3e4f68c5a67aa5cdaf9eb8f09eb5545aaf4b0a5f5f4659be18"}, + {file = "SQLAlchemy-1.4.50-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f6997da81114daef9203d30aabfa6b218a577fc2bd797c795c9c88c9eb78d49"}, + {file = "SQLAlchemy-1.4.50-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdb77e1789e7596b77fd48d99ec1d2108c3349abd20227eea0d48d3f8cf398d9"}, + {file = "SQLAlchemy-1.4.50-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:128a948bd40780667114b0297e2cc6d657b71effa942e0a368d8cc24293febb3"}, + {file = "SQLAlchemy-1.4.50-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2d526aeea1bd6a442abc7c9b4b00386fd70253b80d54a0930c0a216230a35be"}, + {file = "SQLAlchemy-1.4.50.tar.gz", hash = "sha256:3b97ddf509fc21e10b09403b5219b06c5b558b27fc2453150274fa4e70707dbf"}, ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\")"} +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} [package.extras] aiomysql = ["aiomysql (>=0.2.0)", "greenlet (!=0.4.17)"] @@ -4483,8 +4475,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.18", markers = "python_version != \"3.10\" or platform_system != \"Windows\" or platform_python_implementation == \"PyPy\""}, {version = ">=1.22.3", markers = "python_version == \"3.10\" and platform_system == \"Windows\" and platform_python_implementation != \"PyPy\""}, + {version = ">=1.18", markers = "python_version != \"3.10\" or platform_system != \"Windows\" or platform_python_implementation == \"PyPy\""}, ] packaging = ">=21.3" pandas = ">=1.0" @@ -5012,4 +5004,4 @@ yfinance = ["yfinance"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.9.7 || >3.9.7,<4.0" -content-hash = "73f0448e3e2a2031b23c114b7a83cc59338825ef78dd3d8bf006c9710970f98a" +content-hash = "f83a0055f0f7f19f06194258b175c36d3bd5cb4b852f9538b76bf486b9cfbe8a"