From 314ba224ca484542e095852bb43fc1988ca0d705 Mon Sep 17 00:00:00 2001 From: scaliseraoul-sinaptik Date: Fri, 31 Jan 2025 10:58:37 +0100 Subject: [PATCH 1/5] feature(SqlLoader): transformations in SqlLoader --- pandasai/data_loader/loader.py | 16 +- pandasai/data_loader/local_loader.py | 7 - pandasai/data_loader/sql_loader.py | 7 +- .../data_loader/transformation_manager.py | 31 ++-- pandasai/dataframe/virtual_dataframe.py | 7 +- tests/unit_tests/data_loader/test_loader.py | 87 ----------- .../unit_tests/data_loader/test_sql_loader.py | 140 ++++++++++++++++++ 7 files changed, 166 insertions(+), 129 deletions(-) create mode 100644 tests/unit_tests/data_loader/test_sql_loader.py diff --git a/pandasai/data_loader/loader.py b/pandasai/data_loader/loader.py index e75f44292..cabc76e1e 100644 --- a/pandasai/data_loader/loader.py +++ b/pandasai/data_loader/loader.py @@ -1,5 +1,6 @@ import os +import pandas as pd import yaml from pandasai.dataframe.base import DataFrame @@ -12,6 +13,7 @@ ) from .query_builder import QueryBuilder from .semantic_layer_schema import SemanticLayerSchema +from .transformation_manager import TransformationManager from .view_query_builder import ViewQueryBuilder @@ -72,16 +74,12 @@ def load(self) -> DataFrame: """ raise MethodNotImplementedError("Loader not instantiated") - def _build_dataset( - self, schema: SemanticLayerSchema, dataset_path: str - ) -> DataFrame: - self.schema = schema - self.dataset_path = dataset_path - is_view = schema.view + def _apply_transformations(self, df: pd.DataFrame) -> pd.DataFrame: + if not self.schema.transformations: + return df - self.query_builder = ( - ViewQueryBuilder(schema) if is_view else QueryBuilder(schema) - ) + transformation_manager = TransformationManager(df) + return transformation_manager.apply_transformations(self.schema.transformations) def _get_abs_dataset_path(self): return os.path.join(find_project_root(), "datasets", self.dataset_path) diff --git a/pandasai/data_loader/local_loader.py b/pandasai/data_loader/local_loader.py index 6edfe0b80..e58c514cf 100644 --- a/pandasai/data_loader/local_loader.py +++ b/pandasai/data_loader/local_loader.py @@ -69,10 +69,3 @@ def _filter_columns(self, df: pd.DataFrame) -> pd.DataFrame: df_columns = df.columns.tolist() columns_to_keep = [col for col in df_columns if col in schema_columns] return df[columns_to_keep] - - def _apply_transformations(self, df: pd.DataFrame) -> pd.DataFrame: - if not self.schema.transformations: - return df - - transformation_manager = TransformationManager(df) - return transformation_manager.apply_transformations(self.schema.transformations) diff --git a/pandasai/data_loader/sql_loader.py b/pandasai/data_loader/sql_loader.py index 82dcacd16..4b2a47189 100644 --- a/pandasai/data_loader/sql_loader.py +++ b/pandasai/data_loader/sql_loader.py @@ -24,7 +24,6 @@ def __init__(self, schema: SemanticLayerSchema, dataset_path: str): self.query_builder: QueryBuilder = QueryBuilder(schema) def load(self) -> VirtualDataFrame: - self.query_builder = QueryBuilder(self.schema) return VirtualDataFrame( schema=self.schema, data_loader=SQLDatasetLoader(self.schema, self.dataset_path), @@ -37,9 +36,11 @@ def execute_query(self, query: str, params: Optional[list] = None) -> pd.DataFra formatted_query = self.query_builder.format_query(query) load_function = self._get_loader_function(source_type) - try: - return load_function(connection_info, formatted_query, params) + dataframe: pd.DataFrame = load_function( + connection_info, formatted_query, params + ) + return self._apply_transformations(dataframe) except Exception as e: raise RuntimeError( f"Failed to execute query for '{source_type}' with: {formatted_query}" diff --git a/pandasai/data_loader/transformation_manager.py b/pandasai/data_loader/transformation_manager.py index eb946475f..75d055bc3 100644 --- a/pandasai/data_loader/transformation_manager.py +++ b/pandasai/data_loader/transformation_manager.py @@ -1,9 +1,9 @@ from typing import Any, List, Optional, Union -import numpy as np import pandas as pd from ..exceptions import UnsupportedTransformation +from .semantic_layer_schema import Transformation class TransformationManager: @@ -268,12 +268,12 @@ def format_date(self, column: str, date_format: str) -> "TransformationManager": TransformationManager: Self for method chaining Example: - >>> df = pd.DataFrame({"date": ["2024-01-01 12:30:45"]}) + >>> df = pd.DataFrame({"date": ["2025-01-01 12:30:45"]}) >>> manager = TransformationManager(df) >>> result = manager.format_date("date", "%Y-%m-%d").df >>> print(result) date - 0 2024-01-01 + 0 2025-01-01 """ self.df[column] = self.df[column].dt.strftime(date_format) return self @@ -307,28 +307,28 @@ def to_numeric( return self def to_datetime( - self, column: str, format: Optional[str] = None, errors: str = "coerce" + self, column: str, _format: Optional[str] = None, errors: str = "coerce" ) -> "TransformationManager": """Convert values in a column to datetime type. Args: column (str): The column to transform - format (Optional[str]): Expected date format of the input + _format (Optional[str]): Expected date format of the input errors (str): How to handle parsing errors Returns: TransformationManager: Self for method chaining Example: - >>> df = pd.DataFrame({"date": ["2024-01-01", "invalid"]}) + >>> df = pd.DataFrame({"date": ["2025-01-01", "invalid"]}) >>> manager = TransformationManager(df) >>> result = manager.to_datetime("date", errors="coerce").df >>> print(result) date - 0 2024-01-01 + 0 2025-01-01 1 NaT """ - self.df[column] = pd.to_datetime(self.df[column], format=format, errors=errors) + self.df[column] = pd.to_datetime(self.df[column], format=_format, errors=errors) return self def fill_na(self, column: str, value: Any) -> "TransformationManager": @@ -884,27 +884,20 @@ def rename(self, column: str, new_name: str) -> "TransformationManager": return self def apply_transformations( - self, transformations: Optional[List[dict]] = None + self, transformations: List[Transformation] ) -> pd.DataFrame: """Apply a list of transformations to the DataFrame. Args: - transformations (Optional[List[dict]]): List of transformation configurations + transformations List[Transformation]: List of transformation configurations Returns: pd.DataFrame: The transformed DataFrame """ - if not transformations: - return self.df for transformation in transformations: - # Handle both dict and object transformations - if isinstance(transformation, dict): - transformation_type = transformation["type"] - params = transformation["params"] - else: - transformation_type = transformation.type - params = transformation.params + transformation_type = transformation.type + params = transformation.params handler = self.transformation_handlers.get(transformation_type) if not handler: diff --git a/pandasai/dataframe/virtual_dataframe.py b/pandasai/dataframe/virtual_dataframe.py index 2f68b21b7..ae446ba5f 100644 --- a/pandasai/dataframe/virtual_dataframe.py +++ b/pandasai/dataframe/virtual_dataframe.py @@ -1,15 +1,14 @@ from __future__ import annotations -from typing import TYPE_CHECKING, ClassVar +from typing import TYPE_CHECKING, Optional import pandas as pd -from pandasai.data_loader.semantic_layer_schema import SemanticLayerSchema from pandasai.dataframe.base import DataFrame from pandasai.exceptions import VirtualizationError if TYPE_CHECKING: - from pandasai.data_loader.loader import DatasetLoader + from pandasai.data_loader.sql_loader import SQLDatasetLoader class VirtualDataFrame(DataFrame): @@ -25,7 +24,7 @@ class VirtualDataFrame(DataFrame): ] def __init__(self, *args, **kwargs): - self._loader: DatasetLoader = kwargs.pop("data_loader", None) + self._loader: Optional[SQLDatasetLoader] = kwargs.pop("data_loader", None) if not self._loader: raise VirtualizationError("Data loader is required for virtualization!") self._head = None diff --git a/tests/unit_tests/data_loader/test_loader.py b/tests/unit_tests/data_loader/test_loader.py index 4b20aaedf..1129d2d06 100644 --- a/tests/unit_tests/data_loader/test_loader.py +++ b/tests/unit_tests/data_loader/test_loader.py @@ -7,7 +7,6 @@ from pandasai.data_loader.loader import DatasetLoader from pandasai.data_loader.local_loader import LocalDatasetLoader from pandasai.data_loader.semantic_layer_schema import SemanticLayerSchema -from pandasai.data_loader.sql_loader import SQLDatasetLoader from pandasai.dataframe.base import DataFrame from pandasai.exceptions import InvalidDataSourceType @@ -111,92 +110,6 @@ def test_apply_transformations(self, sample_schema): assert result.iloc[0]["email"] != "user1@example.com" assert result.iloc[0]["timestamp"].tzname() == "UTC" - def test_load_mysql_source(self, mysql_schema): - """Test loading data from a MySQL source creates a VirtualDataFrame and handles queries correctly.""" - with patch("os.path.exists", return_value=True), patch( - "builtins.open", mock_open(read_data=str(mysql_schema.to_yaml())) - ), patch( - "pandasai.data_loader.sql_loader.SQLDatasetLoader.execute_query" - ) as mock_execute_query: - # Mock the query results - mock_execute_query.return_value = DataFrame( - pd.DataFrame( - { - "email": ["test@example.com"], - "first_name": ["John"], - "timestamp": [pd.Timestamp.now()], - } - ) - ) - - loader = SQLDatasetLoader(mysql_schema, "test/users") - logging.debug("Loading schema from dataset path: %s", loader) - result = loader.load() - - # Test that we get a VirtualDataFrame - assert isinstance(result, DataFrame) - assert result.schema == mysql_schema - - # Test that load_head() works - head_result = result.head() - assert isinstance(head_result, DataFrame) - assert "email" in head_result.columns - assert "first_name" in head_result.columns - assert "timestamp" in head_result.columns - - # Verify the SQL query was executed correctly - mock_execute_query.assert_called_once_with( - "SELECT email, first_name, timestamp FROM users ORDER BY RAND() LIMIT 5" - ) - - # Test executing a custom query - custom_query = "SELECT email FROM users WHERE first_name = 'John'" - result.execute_sql_query(custom_query) - mock_execute_query.assert_called_with(custom_query) - - def test_build_dataset_mysql_schema(self, mysql_schema): - """Test loading data from a MySQL schema directly and creates a VirtualDataFrame and handles queries correctly.""" - with patch("os.path.exists", return_value=True), patch( - "builtins.open", mock_open(read_data=str(mysql_schema.to_yaml())) - ), patch( - "pandasai.data_loader.sql_loader.SQLDatasetLoader.execute_query" - ) as mock_execute_query: - # Mock the query results - mock_execute_query.return_value = DataFrame( - pd.DataFrame( - { - "email": ["test@example.com"], - "first_name": ["John"], - "timestamp": [pd.Timestamp.now()], - } - ) - ) - - loader = SQLDatasetLoader(mysql_schema, "test/test") - logging.debug("Loading schema from dataset path: %s", loader) - result = loader.load() - - # Test that we get a VirtualDataFrame - assert isinstance(result, DataFrame) - assert result.schema == mysql_schema - - # Test that load_head() works - head_result = result.head() - assert isinstance(head_result, DataFrame) - assert "email" in head_result.columns - assert "first_name" in head_result.columns - assert "timestamp" in head_result.columns - - # Verify the SQL query was executed correctly - mock_execute_query.assert_called_once_with( - "SELECT email, first_name, timestamp FROM users ORDER BY RAND() LIMIT 5" - ) - - # Test executing a custom query - custom_query = "SELECT email FROM users WHERE first_name = 'John'" - result.execute_sql_query(custom_query) - mock_execute_query.assert_called_with(custom_query) - def test_build_dataset_csv_schema(self, sample_schema): """Test loading data from a CSV schema directly and creates a VirtualDataFrame and handles queries correctly.""" with patch("os.path.exists", return_value=True), patch( diff --git a/tests/unit_tests/data_loader/test_sql_loader.py b/tests/unit_tests/data_loader/test_sql_loader.py new file mode 100644 index 000000000..f433eddca --- /dev/null +++ b/tests/unit_tests/data_loader/test_sql_loader.py @@ -0,0 +1,140 @@ +import logging +from unittest.mock import MagicMock, mock_open, patch + +import pandas as pd +import pytest + +from pandasai import VirtualDataFrame +from pandasai.data_loader.loader import DatasetLoader +from pandasai.data_loader.local_loader import LocalDatasetLoader +from pandasai.data_loader.semantic_layer_schema import SemanticLayerSchema +from pandasai.data_loader.sql_loader import SQLDatasetLoader +from pandasai.dataframe.base import DataFrame +from pandasai.exceptions import InvalidDataSourceType + + +class TestSqlDatasetLoader: + def test_load_mysql_source(self, mysql_schema): + """Test loading data from a MySQL source creates a VirtualDataFrame and handles queries correctly.""" + with patch( + "pandasai.data_loader.sql_loader.SQLDatasetLoader.execute_query" + ) as mock_execute_query: + # Mock the query results + mock_execute_query.return_value = DataFrame( + pd.DataFrame( + { + "email": ["test@example.com"], + "first_name": ["John"], + "timestamp": [pd.Timestamp.now()], + } + ) + ) + + loader = SQLDatasetLoader(mysql_schema, "test/users") + result = loader.load() + + # Test that we get a VirtualDataFrame + assert isinstance(result, DataFrame) + assert result.schema == mysql_schema + + # Test that load_head() works + head_result = result.head() + assert isinstance(head_result, DataFrame) + assert "email" in head_result.columns + assert "first_name" in head_result.columns + assert "timestamp" in head_result.columns + + # Verify the SQL query was executed correctly + mock_execute_query.assert_called_once_with( + "SELECT email, first_name, timestamp FROM users ORDER BY RAND() LIMIT 5" + ) + + # Test executing a custom query + custom_query = "SELECT email FROM users WHERE first_name = 'John'" + result.execute_sql_query(custom_query) + mock_execute_query.assert_called_with(custom_query) + + def test_mysql_schema(self, mysql_schema): + """Test loading data from a MySQL schema directly and creates a VirtualDataFrame and handles queries correctly.""" + with patch( + "pandasai.data_loader.sql_loader.SQLDatasetLoader.execute_query" + ) as mock_execute_query: + # Mock the query results + mock_execute_query.return_value = DataFrame( + pd.DataFrame( + { + "email": ["test@example.com"], + "first_name": ["John"], + "timestamp": [pd.Timestamp.now()], + } + ) + ) + + loader = SQLDatasetLoader(mysql_schema, "test/test") + logging.debug("Loading schema from dataset path: %s", loader) + result = loader.load() + + # Test that we get a VirtualDataFrame + assert isinstance(result, DataFrame) + assert result.schema == mysql_schema + + # Test that load_head() works + head_result = result.head() + assert isinstance(head_result, DataFrame) + assert "email" in head_result.columns + assert "first_name" in head_result.columns + assert "timestamp" in head_result.columns + + # Verify the SQL query was executed correctly + mock_execute_query.assert_called_once_with( + "SELECT email, first_name, timestamp FROM users ORDER BY RAND() LIMIT 5" + ) + + # Test executing a custom query + custom_query = "SELECT email FROM users WHERE first_name = 'John'" + result.execute_sql_query(custom_query) + mock_execute_query.assert_called_with(custom_query) + + def test_load_with_transformation(self, mysql_schema): + """Test loading data from a MySQL source creates a VirtualDataFrame and handles queries correctly.""" + with patch( + "pandasai.data_loader.sql_loader.SQLDatasetLoader._get_loader_function" + ) as mock_get_loader_function: + # Mock the query results + + dti = pd.to_datetime(["2025-01-31 10:29:12.694309"]) + dti = dti.tz_localize("Europe/Berlin") + + loader_function = MagicMock() + loader_function.return_value = pd.DataFrame( + { + "email": ["test@example.com"], + "first_name": ["John"], + "timestamp": dti, + } + ) + + mock_get_loader_function.return_value = loader_function + + loader = SQLDatasetLoader(mysql_schema, "test/users") + result = loader.load() + + # Test that we get a VirtualDataFrame + assert isinstance(result, VirtualDataFrame) + assert result.schema == mysql_schema + + # Test that load_head() works + head_result = result.head() + assert isinstance(head_result, pd.DataFrame) + assert "email" in head_result.columns + assert head_result["email"][0] == "****@example.com" + assert head_result["timestamp"][0] == dti[0].tz_convert("UTC") + assert "first_name" in head_result.columns + assert "timestamp" in head_result.columns + + # Verify the SQL query was executed correctly + loader_function.assert_called_once() + assert ( + loader_function.call_args[0][1] + == "SELECT email, first_name, timestamp FROM users ORDER BY RAND() LIMIT 5" + ) From 25f44781660fe53f130d34132548b353aa48bcc6 Mon Sep 17 00:00:00 2001 From: scaliseraoul-sinaptik Date: Fri, 31 Jan 2025 17:26:07 +0100 Subject: [PATCH 2/5] feature(FileManager): adding FileManager to make feasible work with the library in other environment --- pandasai/core/prompts/file_based_prompt.py | 40 ---------------- pandasai/helpers/filemanager.py | 53 ++++++++++++++++++++++ 2 files changed, 53 insertions(+), 40 deletions(-) delete mode 100644 pandasai/core/prompts/file_based_prompt.py create mode 100644 pandasai/helpers/filemanager.py diff --git a/pandasai/core/prompts/file_based_prompt.py b/pandasai/core/prompts/file_based_prompt.py deleted file mode 100644 index c3c3804b5..000000000 --- a/pandasai/core/prompts/file_based_prompt.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -from pathlib import Path - -from ..exceptions import TemplateFileNotFoundError -from .base import AbstractPrompt - - -class FileBasedPrompt(AbstractPrompt): - """Base class for prompts supposed to read template content from a file. - - `_path_to_template` attribute has to be specified. - """ - - _path_to_template: str - - def __init__(self, **kwargs): - if (template_path := kwargs.pop("path_to_template", None)) is not None: - self._path_to_template = template_path - else: - current_dir_path = Path(__file__).parent - self._path_to_template = os.path.join( - current_dir_path, "..", self._path_to_template - ) - - self.conversation_text = self.template - super().__init__(**kwargs) - - @property - def template(self) -> str: - try: - with open(self._path_to_template, encoding="utf-8") as fp: - return fp.read() - except FileNotFoundError as e: - raise TemplateFileNotFoundError( - self._path_to_template, self.__class__.__name__ - ) from e - except IOError as exc: - raise RuntimeError( - f"Failed to read template file '{self._path_to_template}': {exc}" - ) from exc diff --git a/pandasai/helpers/filemanager.py b/pandasai/helpers/filemanager.py new file mode 100644 index 000000000..145b69da6 --- /dev/null +++ b/pandasai/helpers/filemanager.py @@ -0,0 +1,53 @@ +import os +from abc import ABC, abstractmethod +from pandasai.helpers.path import find_project_root + +class FileLoader(ABC): + """Abstract base class for file loaders, supporting local and remote backends.""" + + @abstractmethod + def load(self, file_path: str) -> str: + """Reads the content of a file.""" + pass + + @abstractmethod + def write(self, file_path: str, content: str) -> None: + """Writes content to a file.""" + pass + + @abstractmethod + def exists(self, file_path: str) -> bool: + """Checks if a file or directory exists.""" + pass + + @abstractmethod + def mkdir(self, dir_path: str) -> None: + """Creates a directory if it doesn't exist.""" + pass + + +class DefaultFileLoader(FileLoader): + """Local file system implementation of FileLoader.""" + + def __init__(self): + self.base_path = find_project_root() + + def load(self, file_path: str) -> str: + full_path = self.base_path / file_path + with open(full_path, "r", encoding="utf-8") as f: + return f.read() + + def write(self, file_path: str, content: str) -> None: + full_path = self.base_path / file_path + with open(full_path, "w", encoding="utf-8") as f: + f.write(content) + + def exists(self, file_path: str) -> bool: + """Checks if a file or directory exists.""" + full_path = self.base_path / file_path + return os.path.exists(full_path) + + def mkdir(self, dir_path: str) -> None: + """Creates a directory if it doesn't exist.""" + full_path = self.base_path / dir_path + os.makedirs(full_path, exist_ok=True) \ No newline at end of file From bcc04a9427037a4fbf73c19d976560c5ada73ca2 Mon Sep 17 00:00:00 2001 From: scaliseraoul-sinaptik Date: Fri, 31 Jan 2025 17:27:42 +0100 Subject: [PATCH 3/5] feature(FileManager): filemanager full implementation --- pandasai/__init__.py | 17 +- pandasai/config.py | 3 + pandasai/data_loader/loader.py | 26 ++- pandasai/data_loader/local_loader.py | 2 +- pandasai/dataframe/base.py | 57 +++--- pandasai/helpers/filemanager.py | 40 +++- pandasai/helpers/path.py | 1 + tests/unit_tests/agent/test_agent_chat.py | 3 +- .../unit_tests/agent/test_agent_llm_judge.py | 3 +- tests/unit_tests/conftest.py | 13 ++ tests/unit_tests/data_loader/test_loader.py | 8 +- tests/unit_tests/dataframe/test_dataframe.py | 18 +- tests/unit_tests/test_pandasai_init.py | 179 +++--------------- 13 files changed, 135 insertions(+), 235 deletions(-) diff --git a/pandasai/__init__.py b/pandasai/__init__.py index cbdbee2c6..51c878a7c 100644 --- a/pandasai/__init__.py +++ b/pandasai/__init__.py @@ -98,17 +98,17 @@ def create( org_name, dataset_name = get_validated_dataset_path(path) - dataset_directory = os.path.join( - find_project_root(), "datasets", org_name, dataset_name - ) + dataset_directory = str(os.path.join(org_name, dataset_name)) - schema_path = os.path.join(str(dataset_directory), "schema.yaml") - parquet_file_path = os.path.join(str(dataset_directory), "data.parquet") + schema_path = os.path.join(dataset_directory, "schema.yaml") + parquet_file_path = os.path.join(dataset_directory, "data.parquet") + + file_manager = config.get().file_manager # Check if dataset already exists - if os.path.exists(dataset_directory) and os.path.exists(schema_path): + if file_manager.exists(dataset_directory) and file_manager.exists(schema_path): raise ValueError(f"Dataset already exists at path: {path}") - os.makedirs(dataset_directory, exist_ok=True) + file_manager.mkdir(dataset_directory) if df is None and source is None and not view: raise InvalidConfigError( @@ -135,8 +135,7 @@ def create( if columns: schema.columns = [Column(**column) for column in columns] - with open(schema_path, "w") as yml_file: - yml_file.write(schema.to_yaml()) + file_manager.write(schema_path, schema.to_yaml()) print(f"Dataset saved successfully to path: {dataset_directory}") diff --git a/pandasai/config.py b/pandasai/config.py index fb13c3148..22b420fe5 100644 --- a/pandasai/config.py +++ b/pandasai/config.py @@ -1,9 +1,11 @@ import os +from abc import ABC, abstractmethod from importlib.util import find_spec from typing import Any, Dict, Optional from pydantic import BaseModel, ConfigDict +from pandasai.helpers.filemanager import DefaultFileManager, FileManager from pandasai.llm.base import LLM @@ -13,6 +15,7 @@ class Config(BaseModel): enable_cache: bool = True max_retries: int = 3 llm: Optional[LLM] = None + file_manager: FileManager = DefaultFileManager() model_config = ConfigDict(arbitrary_types_allowed=True) diff --git a/pandasai/data_loader/loader.py b/pandasai/data_loader/loader.py index cabc76e1e..fc50a580b 100644 --- a/pandasai/data_loader/loader.py +++ b/pandasai/data_loader/loader.py @@ -5,9 +5,9 @@ from pandasai.dataframe.base import DataFrame from pandasai.exceptions import MethodNotImplementedError -from pandasai.helpers.path import find_project_root from pandasai.helpers.sql_sanitizer import sanitize_sql_table_name +from .. import ConfigManager from ..constants import ( LOCAL_SOURCE_TYPES, ) @@ -48,21 +48,22 @@ def create_loader_from_path(cls, dataset_path: str) -> "DatasetLoader": """ Factory method to create the appropriate loader based on the dataset type. """ - schema = cls._read_local_schema(dataset_path) + schema = cls._read_schema_file(dataset_path) return DatasetLoader.create_loader_from_schema(schema, dataset_path) @staticmethod - def _read_local_schema(dataset_path: str) -> SemanticLayerSchema: - schema_path = os.path.join( - find_project_root(), "datasets", dataset_path, "schema.yaml" - ) - if not os.path.exists(schema_path): + def _read_schema_file(dataset_path: str) -> SemanticLayerSchema: + schema_path = os.path.join(dataset_path, "schema.yaml") + + file_manager = ConfigManager.get().file_manager + + if not file_manager.exists(schema_path): raise FileNotFoundError(f"Schema file not found: {schema_path}") - with open(schema_path, "r") as file: - raw_schema = yaml.safe_load(file) - raw_schema["name"] = sanitize_sql_table_name(raw_schema["name"]) - return SemanticLayerSchema(**raw_schema) + schema_file = file_manager.load(schema_path) + raw_schema = yaml.safe_load(schema_file) + raw_schema["name"] = sanitize_sql_table_name(raw_schema["name"]) + return SemanticLayerSchema(**raw_schema) def load(self) -> DataFrame: """ @@ -80,6 +81,3 @@ def _apply_transformations(self, df: pd.DataFrame) -> pd.DataFrame: transformation_manager = TransformationManager(df) return transformation_manager.apply_transformations(self.schema.transformations) - - def _get_abs_dataset_path(self): - return os.path.join(find_project_root(), "datasets", self.dataset_path) diff --git a/pandasai/data_loader/local_loader.py b/pandasai/data_loader/local_loader.py index e58c514cf..69dc298a5 100644 --- a/pandasai/data_loader/local_loader.py +++ b/pandasai/data_loader/local_loader.py @@ -37,7 +37,7 @@ def _load_from_local_source(self) -> pd.DataFrame: ) filepath = os.path.join( - str(self._get_abs_dataset_path()), + self.dataset_path, self.schema.source.path, ) diff --git a/pandasai/dataframe/base.py b/pandasai/dataframe/base.py index a45d0587d..702b3a5cd 100644 --- a/pandasai/dataframe/base.py +++ b/pandasai/dataframe/base.py @@ -10,7 +10,7 @@ from pandas._typing import Axes, Dtype import pandasai as pai -from pandasai.config import Config +from pandasai.config import Config, ConfigManager from pandasai.core.response import BaseResponse from pandasai.data_loader.semantic_layer_schema import ( Column, @@ -19,7 +19,6 @@ ) from pandasai.exceptions import DatasetNotFound, PandaAIApiKeyError from pandasai.helpers.dataframe_serializer import DataframeSerializer -from pandasai.helpers.path import find_project_root from pandasai.helpers.session import get_pandaai_session if TYPE_CHECKING: @@ -164,38 +163,32 @@ def push(self): "name": self.schema.name, } - dataset_directory = os.path.join(find_project_root(), "datasets", self.path) - + dataset_directory = os.path.join("datasets", self.path) + file_manager = ConfigManager.get().file_manager headers = {"accept": "application/json", "x-authorization": f"Bearer {api_key}"} files = [] schema_file_path = os.path.join(dataset_directory, "schema.yaml") data_file_path = os.path.join(dataset_directory, "data.parquet") - try: - # Open schema.yaml - schema_file = open(schema_file_path, "rb") - files.append(("files", ("schema.yaml", schema_file, "application/x-yaml"))) - - # Check if data.parquet exists and open it - if os.path.exists(data_file_path): - data_file = open(data_file_path, "rb") - files.append( - ("files", ("data.parquet", data_file, "application/octet-stream")) - ) - - # Send the POST request - request_session.post( - "/datasets/push", - files=files, - params=params, - headers=headers, + # Open schema.yaml + schema_file = file_manager.load_binary(schema_file_path) + files.append(("files", ("schema.yaml", schema_file, "application/x-yaml"))) + + # Check if data.parquet exists and open it + if file_manager.exists(data_file_path): + data_file = file_manager.load_binary(data_file_path) + files.append( + ("files", ("data.parquet", data_file, "application/octet-stream")) ) - finally: - # Ensure files are closed after the request - for _, (name, file, _) in files: - file.close() + # Send the POST request + request_session.post( + "/datasets/push", + files=files, + params=params, + headers=headers, + ) print("Your dataset was successfully pushed to the remote server!") print(f"🔗 URL: https://app.pandabi.ai/datasets/{self.path}") @@ -218,20 +211,18 @@ def pull(self): with ZipFile(BytesIO(file_data.content)) as zip_file: for file_name in zip_file.namelist(): - target_path = os.path.join( - find_project_root(), "datasets", self.path, file_name - ) + target_path = os.path.join(self.path, file_name) + file_manager = ConfigManager.get().file_manager # Check if the file already exists - if os.path.exists(target_path): + if file_manager.exists(target_path): print(f"Replacing existing file: {target_path}") # Ensure target directory exists - os.makedirs(os.path.dirname(target_path), exist_ok=True) + file_manager.mkdir(os.path.dirname(target_path)) # Extract the file - with open(target_path, "wb") as f: - f.write(zip_file.read(file_name)) + file_manager.write_binary(target_path, zip_file.read(file_name)) # Reloads the Dataframe from pandasai import DatasetLoader diff --git a/pandasai/helpers/filemanager.py b/pandasai/helpers/filemanager.py index 145b69da6..bf06fe7da 100644 --- a/pandasai/helpers/filemanager.py +++ b/pandasai/helpers/filemanager.py @@ -1,8 +1,10 @@ import os from abc import ABC, abstractmethod + from pandasai.helpers.path import find_project_root -class FileLoader(ABC): + +class FileManager(ABC): """Abstract base class for file loaders, supporting local and remote backends.""" @abstractmethod @@ -10,11 +12,21 @@ def load(self, file_path: str) -> str: """Reads the content of a file.""" pass + @abstractmethod + def load_binary(self, file_path: str) -> bytes: + """Reads the content of a file as bytes.""" + pass + @abstractmethod def write(self, file_path: str, content: str) -> None: """Writes content to a file.""" pass + @abstractmethod + def write_binary(self, file_path: str, content: bytes) -> None: + """Writes binary content to a file.""" + pass + @abstractmethod def exists(self, file_path: str) -> bool: """Checks if a file or directory exists.""" @@ -26,28 +38,36 @@ def mkdir(self, dir_path: str) -> None: pass -class DefaultFileLoader(FileLoader): +class DefaultFileManager(FileManager): """Local file system implementation of FileLoader.""" def __init__(self): - self.base_path = find_project_root() + self.base_path = os.path.join(find_project_root(), "datasets") def load(self, file_path: str) -> str: - full_path = self.base_path / file_path + full_path = os.path.join(self.base_path, file_path) with open(full_path, "r", encoding="utf-8") as f: return f.read() + def load_binary(self, file_path: str) -> bytes: + full_path = os.path.join(self.base_path, file_path) + with open(full_path, "rb") as f: + return f.read() + def write(self, file_path: str, content: str) -> None: - full_path = self.base_path / file_path + full_path = os.path.join(self.base_path, file_path) with open(full_path, "w", encoding="utf-8") as f: f.write(content) + def write_binary(self, file_path: str, content: bytes) -> None: + full_path = os.path.join(self.base_path, file_path) + with open(full_path, "wb") as f: + f.write(content) + def exists(self, file_path: str) -> bool: - """Checks if a file or directory exists.""" - full_path = self.base_path / file_path + full_path = os.path.join(self.base_path, file_path) return os.path.exists(full_path) def mkdir(self, dir_path: str) -> None: - """Creates a directory if it doesn't exist.""" - full_path = self.base_path / dir_path - os.makedirs(full_path, exist_ok=True) \ No newline at end of file + full_path = os.path.join(self.base_path, dir_path) + os.makedirs(full_path, exist_ok=True) diff --git a/pandasai/helpers/path.py b/pandasai/helpers/path.py index 612787ef5..58708d4c5 100644 --- a/pandasai/helpers/path.py +++ b/pandasai/helpers/path.py @@ -10,6 +10,7 @@ def find_project_root(filename=None): # Get the path of the file that is be # ing executed + current_file_path = os.path.abspath(os.getcwd()) # Navigate back until we either find a $filename file or there is no parent diff --git a/tests/unit_tests/agent/test_agent_chat.py b/tests/unit_tests/agent/test_agent_chat.py index f7a6f85c2..5b0c961d8 100644 --- a/tests/unit_tests/agent/test_agent_chat.py +++ b/tests/unit_tests/agent/test_agent_chat.py @@ -7,13 +7,14 @@ import pytest import pandasai as pai -from pandasai import DataFrame, find_project_root +from pandasai import DataFrame from pandasai.core.response import ( ChartResponse, DataFrameResponse, NumberResponse, StringResponse, ) +from pandasai.helpers.filemanager import find_project_root # Read the API key from an environment variable API_KEY = os.getenv("PANDABI_API_KEY_TEST_CHAT", None) diff --git a/tests/unit_tests/agent/test_agent_llm_judge.py b/tests/unit_tests/agent/test_agent_llm_judge.py index e7bdaf53f..c2d5e0c7e 100644 --- a/tests/unit_tests/agent/test_agent_llm_judge.py +++ b/tests/unit_tests/agent/test_agent_llm_judge.py @@ -7,7 +7,8 @@ from pydantic import BaseModel import pandasai as pai -from pandasai import DataFrame, find_project_root +from pandasai import DataFrame +from pandasai.helpers.path import find_project_root # Read the API key from an environment variable JUDGE_OPENAI_API_KEY = os.getenv("JUDGE_OPENAI_API_KEY", None) diff --git a/tests/unit_tests/conftest.py b/tests/unit_tests/conftest.py index c754ecb3a..b56456aa8 100644 --- a/tests/unit_tests/conftest.py +++ b/tests/unit_tests/conftest.py @@ -5,9 +5,11 @@ import pytest +from pandasai import ConfigManager from pandasai.data_loader.loader import DatasetLoader from pandasai.data_loader.semantic_layer_schema import SemanticLayerSchema from pandasai.dataframe.base import DataFrame +from pandasai.helpers.filemanager import DefaultFileManager from pandasai.helpers.path import find_project_root @@ -171,3 +173,14 @@ def mock_loader_instance(sample_df): mock_create_loader_from_schema.return_value = mock_loader_instance yield mock_loader_instance + + +@pytest.fixture +def mock_file_manager(): + """Fixture to mock FileManager and its methods.""" + with patch.object(ConfigManager, "get") as mock_config_get: + # Create a mock FileManager + mock_file_manager = MagicMock() + mock_file_manager.exists.return_value = False + mock_config_get.return_value.file_manager = mock_file_manager + yield mock_file_manager diff --git a/tests/unit_tests/data_loader/test_loader.py b/tests/unit_tests/data_loader/test_loader.py index 1129d2d06..7ff5f4b89 100644 --- a/tests/unit_tests/data_loader/test_loader.py +++ b/tests/unit_tests/data_loader/test_loader.py @@ -41,14 +41,14 @@ def test_load_schema(self, sample_schema): with patch("os.path.exists", return_value=True), patch( "builtins.open", mock_open(read_data=str(sample_schema.to_yaml())) ): - schema = DatasetLoader._read_local_schema("test/users") + schema = DatasetLoader._read_schema_file("test/users") assert schema == sample_schema def test_load_schema_mysql(self, mysql_schema): with patch("os.path.exists", return_value=True), patch( "builtins.open", mock_open(read_data=str(mysql_schema.to_yaml())) ): - schema = DatasetLoader._read_local_schema("test/users") + schema = DatasetLoader._read_schema_file("test/users") assert schema == mysql_schema def test_load_schema_mysql_sanitized_name(self, mysql_schema): @@ -57,13 +57,13 @@ def test_load_schema_mysql_sanitized_name(self, mysql_schema): with patch("os.path.exists", return_value=True), patch( "builtins.open", mock_open(read_data=str(mysql_schema.to_yaml())) ): - schema = DatasetLoader._read_local_schema("test/users") + schema = DatasetLoader._read_schema_file("test/users") assert schema.name == "non_sanitized_name" def test_load_schema_file_not_found(self): with patch("os.path.exists", return_value=False): with pytest.raises(FileNotFoundError): - DatasetLoader._read_local_schema("test/users") + DatasetLoader._read_schema_file("test/users") def test_read_parquet(self, sample_schema): loader = LocalDatasetLoader(sample_schema, "test") diff --git a/tests/unit_tests/dataframe/test_dataframe.py b/tests/unit_tests/dataframe/test_dataframe.py index 7c4ae0da6..3d64f6173 100644 --- a/tests/unit_tests/dataframe/test_dataframe.py +++ b/tests/unit_tests/dataframe/test_dataframe.py @@ -2,7 +2,6 @@ import pandas as pd import pytest -from numpy import False_ import pandasai from pandasai.agent import Agent @@ -79,10 +78,10 @@ def test_column_hash(self, sample_df): assert len(sample_df.column_hash) == 32 # MD5 hash length @patch("pandasai.dataframe.base.get_pandaai_session") - @patch("pandasai.dataframe.base.os.path.exists") - @patch("pandasai.dataframe.base.open", new_callable=mock_open) + @patch("pandasai.helpers.filemanager.os.path.exists") + @patch("pandasai.helpers.filemanager.open", new_callable=mock_open) @patch("pandasai.dataframe.base.os.environ") - @patch("pandasai.dataframe.base.find_project_root") + @patch("pandasai.helpers.path.find_project_root") def test_push_successful( self, mock_find_project_root, @@ -115,13 +114,13 @@ def test_push_successful( files=[ ( "files", - ("schema.yaml", mock_open.return_value, "application/x-yaml"), + ("schema.yaml", "", "application/x-yaml"), ), ( "files", ( "data.parquet", - mock_open.return_value, + "", "application/octet-stream", ), ), @@ -155,8 +154,8 @@ def test_push_raises_error_if_api_key_is_missing(self, mock_environ, sample_df): sample_df.path = "test/test" sample_df.push() - @patch("pandasai.dataframe.base.os.path.exists") - @patch("pandasai.dataframe.base.open", new_callable=mock_open) + @patch("pandasai.helpers.filemanager.os.path.exists") + @patch("pandasai.helpers.filemanager.open", new_callable=mock_open) @patch("pandasai.dataframe.base.get_pandaai_session") @patch("pandasai.dataframe.base.os.environ") def test_push_closes_files_on_completion( @@ -179,6 +178,3 @@ def test_push_closes_files_on_completion( # Call the method sample_df.path = "test/test" sample_df.push() - - # Assert that files were closed after the request - mock_open.return_value.close.assert_called() diff --git a/tests/unit_tests/test_pandasai_init.py b/tests/unit_tests/test_pandasai_init.py index a03a8e2ef..33bd058c1 100644 --- a/tests/unit_tests/test_pandasai_init.py +++ b/tests/unit_tests/test_pandasai_init.py @@ -275,27 +275,16 @@ def test_load_with_custom_api_url( params={"path": "org/dataset"}, ) - @patch("pandasai.helpers.path.find_project_root") - @patch("os.makedirs") def test_create_valid_dataset_no_params( - self, mock_makedirs, mock_find_project_root, sample_df, mock_loader_instance + self, sample_df, mock_loader_instance, mock_file_manager ): """Test creating a dataset with valid inputs.""" - mock_find_project_root.return_value = os.path.join("mock", "root") - - with patch("builtins.open", mock_open()) as mock_file, patch.object( - sample_df, "to_parquet" - ) as mock_to_parquet, patch( - "pandasai.find_project_root", return_value=os.path.join("mock", "root") - ): + with patch.object(sample_df, "to_parquet") as mock_to_parquet: result = pandasai.create("test-org/test-dataset", sample_df) # Check if directories were created - mock_makedirs.assert_called_once_with( - os.path.join( - os.path.join("mock", "root", "datasets", "test-org", "test-dataset") - ), - exist_ok=True, + mock_file_manager.mkdir.assert_called_once_with( + os.path.join("test-org", "test-dataset") ) # Check if DataFrame was saved @@ -304,17 +293,7 @@ def test_create_valid_dataset_no_params( assert mock_to_parquet.call_args[1]["index"] is False # Check if schema was saved - mock_file.assert_called_once_with( - os.path.join( - "mock", - "root", - "datasets", - "test-org", - "test-dataset", - "schema.yaml", - ), - "w", - ) + mock_file_manager.write.assert_called_once() # Check returned DataFrame assert isinstance(result, DataFrame) @@ -396,30 +375,29 @@ def mock_exists_side_effect(path): mock_file.assert_called_once() mock_loader_instance.load.assert_called_once() - @patch("pandasai.helpers.path.find_project_root") - @patch("os.makedirs") + @patch("pandasai.config.ConfigManager.get") def test_create_valid_dataset_with_description( - self, mock_makedirs, mock_find_project_root, sample_df, mock_loader_instance + self, mock_config_get, sample_df, mock_loader_instance ): """Test creating a dataset with valid inputs.""" - mock_find_project_root.return_value = os.path.join("mock", "root") mock_schema = MagicMock() sample_df.schema = mock_schema - with patch("builtins.open", mock_open()) as mock_file, patch.object( - sample_df, "to_parquet" - ) as mock_to_parquet, patch( - "pandasai.find_project_root", return_value=os.path.join("mock", "root") - ): + mock_file_manager = MagicMock() + mock_file_manager.exists.return_value = False + mock_config_get.return_value.file_manager = mock_file_manager + + with patch( + "pandasai.helpers.filemanager.open", mock_open() + ) as mock_file, patch.object(sample_df, "to_parquet") as mock_to_parquet: result = pandasai.create( "test-org/test-dataset", sample_df, description="test_description" ) # Check if directories were created - mock_makedirs.assert_called_once_with( - os.path.join("mock", "root", "datasets", "test-org", "test-dataset"), - exist_ok=True, + mock_file_manager.mkdir.assert_called_once_with( + os.path.join("test-org", "test-dataset") ) # Check if DataFrame was saved @@ -428,17 +406,7 @@ def test_create_valid_dataset_with_description( assert mock_to_parquet.call_args[1]["index"] is False # Check if schema was saved - mock_file.assert_called_once_with( - os.path.join( - "mock", - "root", - "datasets", - "test-org", - "test-dataset", - "schema.yaml", - ), - "w", - ) + mock_file_manager.write.assert_called_once() # Check returned DataFrame assert isinstance(result, DataFrame) @@ -446,28 +414,20 @@ def test_create_valid_dataset_with_description( assert mock_schema.description == "test_description" mock_loader_instance.load.assert_called_once() - @patch("pandasai.helpers.path.find_project_root") - @patch("os.makedirs") def test_create_valid_dataset_with_columns( - self, mock_makedirs, mock_find_project_root, sample_df, mock_loader_instance + self, sample_df, mock_loader_instance, mock_file_manager ): """Test creating a dataset with valid inputs.""" - mock_find_project_root.return_value = os.path.join("mock", "root") - with patch("builtins.open", mock_open()) as mock_file, patch.object( - sample_df, "to_parquet" - ) as mock_to_parquet, patch( - "pandasai.find_project_root", return_value=os.path.join("mock", "root") - ): + with patch.object(sample_df, "to_parquet") as mock_to_parquet: columns_dict = [{"name": "a"}, {"name": "b"}] result = pandasai.create( "test-org/test-dataset", sample_df, columns=columns_dict ) # Check if directories were created - mock_makedirs.assert_called_once_with( - os.path.join("mock", "root", "datasets", "test-org", "test-dataset"), - exist_ok=True, + mock_file_manager.mkdir.assert_called_once_with( + os.path.join("test-org", "test-dataset") ) # Check if DataFrame was saved @@ -476,17 +436,7 @@ def test_create_valid_dataset_with_columns( assert mock_to_parquet.call_args[1]["index"] is False # Check if schema was saved - mock_file.assert_called_once_with( - os.path.join( - "mock", - "root", - "datasets", - "test-org", - "test-dataset", - "schema.yaml", - ), - "w", - ) + mock_file_manager.write.assert_called_once() # Check returned DataFrame assert isinstance(result, DataFrame) @@ -500,7 +450,7 @@ def test_create_valid_dataset_with_columns( @patch("pandasai.helpers.path.find_project_root") @patch("os.makedirs") def test_create_dataset_wrong_columns( - self, mock_makedirs, mock_find_project_root, sample_df + self, mock_makedirs, mock_find_project_root, sample_df, mock_file_manager ): """Test creating a dataset with valid inputs.""" mock_find_project_root.return_value = os.path.join("mock", "root") @@ -517,18 +467,10 @@ def test_create_dataset_wrong_columns( "test-org/test-dataset", sample_df, columns=columns_dict ) - @patch("pandasai.helpers.path.find_project_root") - @patch("os.makedirs") def test_create_valid_dataset_with_mysql( - self, - mock_makedirs, - mock_find_project_root, - sample_df, - mysql_connection_json, - mock_loader_instance, + self, sample_df, mysql_connection_json, mock_loader_instance, mock_file_manager ): """Test creating a dataset with valid inputs.""" - mock_find_project_root.return_value = os.path.join("mock", "root") with patch("builtins.open", mock_open()) as mock_file, patch.object( sample_df, "to_parquet" @@ -543,22 +485,8 @@ def test_create_valid_dataset_with_mysql( ) # Check if directories were created - mock_makedirs.assert_called_once_with( - os.path.join("mock", "root", "datasets", "test-org", "test-dataset"), - exist_ok=True, - ) - - # Check if schema was saved - mock_file.assert_called_once_with( - os.path.join( - "mock", - "root", - "datasets", - "test-org", - "test-dataset", - "schema.yaml", - ), - "w", + mock_file_manager.mkdir.assert_called_once_with( + os.path.join("test-org", "test-dataset") ) # Check returned DataFrame @@ -567,19 +495,9 @@ def test_create_valid_dataset_with_mysql( assert result.schema.description is None assert mock_loader_instance.load.call_count == 1 - @patch("pandasai.helpers.path.find_project_root") - @patch("os.makedirs") def test_create_valid_dataset_with_postgres( - self, - mock_makedirs, - mock_find_project_root, - sample_df, - mysql_connection_json, - mock_loader_instance, + self, sample_df, mysql_connection_json, mock_loader_instance, mock_file_manager ): - """Test creating a dataset with valid inputs.""" - mock_find_project_root.return_value = os.path.join("mock", "root") - with patch("builtins.open", mock_open()) as mock_file, patch.object( sample_df, "to_parquet" ) as mock_to_parquet, patch( @@ -592,25 +510,6 @@ def test_create_valid_dataset_with_postgres( columns=columns_dict, ) - # Check if directories were created - mock_makedirs.assert_called_once_with( - os.path.join("mock", "root", "datasets", "test-org", "test-dataset"), - exist_ok=True, - ) - - # Check if schema was saved - mock_file.assert_called_once_with( - os.path.join( - "mock", - "root", - "datasets", - "test-org", - "test-dataset", - "schema.yaml", - ), - "w", - ) - # Check returned DataFrame assert isinstance(result, DataFrame) assert result.schema.name == sample_df.schema.name @@ -640,13 +539,10 @@ def test_create_with_no_dataframe_with_incorrect_type( with pytest.raises(ValueError, match="df must be a PandaAI DataFrame"): pandasai.create("test-org/test-dataset", df={"test": "test"}) - @patch("pandasai.helpers.path.find_project_root") - @patch("os.makedirs") def test_create_valid_view( - self, mock_makedirs, mock_find_project_root, sample_df, mock_loader_instance + self, sample_df, mock_loader_instance, mock_file_manager ): """Test creating a dataset with valid inputs.""" - mock_find_project_root.return_value = os.path.join("mock", "root") with patch("builtins.open", mock_open()) as mock_file, patch( "pandasai.find_project_root", return_value=os.path.join("mock", "root") @@ -669,25 +565,6 @@ def test_create_valid_view( "test-org/test-dataset", columns=columns, relations=relations, view=True ) - # Check if directories were created - mock_makedirs.assert_called_once_with( - os.path.join("mock", "root", "datasets", "test-org", "test-dataset"), - exist_ok=True, - ) - - # Check if schema was saved - mock_file.assert_called_once_with( - os.path.join( - "mock", - "root", - "datasets", - "test-org", - "test-dataset", - "schema.yaml", - ), - "w", - ) - # Check returned DataFrame assert isinstance(result, DataFrame) assert result.schema.name == sample_df.schema.name From cb1668b1499b8adebf580ab24af6ae5084d63638 Mon Sep 17 00:00:00 2001 From: scaliseraoul-sinaptik Date: Fri, 31 Jan 2025 17:36:58 +0100 Subject: [PATCH 4/5] feature(FileManager): format issues --- tests/unit_tests/data_loader/test_sql_loader.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/unit_tests/data_loader/test_sql_loader.py b/tests/unit_tests/data_loader/test_sql_loader.py index 776163bed..b46354eae 100644 --- a/tests/unit_tests/data_loader/test_sql_loader.py +++ b/tests/unit_tests/data_loader/test_sql_loader.py @@ -1,5 +1,4 @@ import logging - from unittest.mock import MagicMock, mock_open, patch import pandas as pd @@ -140,7 +139,6 @@ def test_load_with_transformation(self, mysql_schema): == "SELECT email, first_name, timestamp FROM users ORDER BY RAND() LIMIT 5" ) - def test_mysql_malicious_query(self, mysql_schema): """Test loading data from a MySQL source creates a VirtualDataFrame and handles queries correctly.""" with patch( From 459991a610ef5fb652db259d52c1f5904f1d003f Mon Sep 17 00:00:00 2001 From: scaliseraoul-sinaptik Date: Fri, 31 Jan 2025 18:23:20 +0100 Subject: [PATCH 5/5] feature(FileManager): updated tests --- tests/unit_tests/test_pandasai_init.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tests/unit_tests/test_pandasai_init.py b/tests/unit_tests/test_pandasai_init.py index 33bd058c1..46ff84bd5 100644 --- a/tests/unit_tests/test_pandasai_init.py +++ b/tests/unit_tests/test_pandasai_init.py @@ -375,22 +375,15 @@ def mock_exists_side_effect(path): mock_file.assert_called_once() mock_loader_instance.load.assert_called_once() - @patch("pandasai.config.ConfigManager.get") def test_create_valid_dataset_with_description( - self, mock_config_get, sample_df, mock_loader_instance + self, sample_df, mock_loader_instance, mock_file_manager ): """Test creating a dataset with valid inputs.""" mock_schema = MagicMock() sample_df.schema = mock_schema - mock_file_manager = MagicMock() - mock_file_manager.exists.return_value = False - mock_config_get.return_value.file_manager = mock_file_manager - - with patch( - "pandasai.helpers.filemanager.open", mock_open() - ) as mock_file, patch.object(sample_df, "to_parquet") as mock_to_parquet: + with patch.object(sample_df, "to_parquet") as mock_to_parquet: result = pandasai.create( "test-org/test-dataset", sample_df, description="test_description" )