diff --git a/pandasai/__init__.py b/pandasai/__init__.py index a65ce95e0..ca978a737 100644 --- a/pandasai/__init__.py +++ b/pandasai/__init__.py @@ -212,11 +212,17 @@ def load(dataset_path: str) -> DataFrame: raise ValueError("The path must be in the format 'organization/dataset'.") dataset_full_path = os.path.join(find_project_root(), "datasets", dataset_path) - if not os.path.exists(dataset_full_path): + + local_dataset_exists = os.path.exists(dataset_full_path) + + if not local_dataset_exists: api_key = os.environ.get("PANDABI_API_KEY", None) api_url = os.environ.get("PANDABI_API_URL", DEFAULT_API_URL) + if not api_url or not api_key: - raise PandaAIApiKeyError() + raise PandaAIApiKeyError( + f'The dataset "{dataset_path}" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.' + ) request_session = get_pandaai_session() @@ -232,7 +238,16 @@ def load(dataset_path: str) -> DataFrame: zip_file.extractall(dataset_full_path) loader = DatasetLoader.create_loader_from_path(dataset_path) - return loader.load() + df = loader.load() + + message = ( + "Dataset loaded successfully." + if local_dataset_exists + else "Dataset fetched successfully from the remote server." + ) + print(message) + + return df def read_csv(filepath: str) -> DataFrame: diff --git a/tests/unit_tests/test_pandasai_init.py b/tests/unit_tests/test_pandasai_init.py index 3f1cce487..b8e783cbc 100644 --- a/tests/unit_tests/test_pandasai_init.py +++ b/tests/unit_tests/test_pandasai_init.py @@ -137,6 +137,25 @@ def test_load_dataset_not_found(self, mockenviron, mock_bytes_io, mock_zip_file) with pytest.raises(DatasetNotFound): pandasai.load(dataset_path) + @patch("pandasai.os.path.exists") + @patch("pandasai.os.environ", {}) + @patch("pandasai.get_pandaai_session") + def test_load_missing_not_found_locally_and_no_remote_key( + self, mock_session, mock_exists + ): + """Test loading when API URL is missing.""" + mock_exists.return_value = False + mock_response = MagicMock() + mock_response.status_code = 404 + mock_session.return_value.get.return_value = mock_response + dataset_path = "org/dataset_name" + + with pytest.raises( + PandaAIApiKeyError, + match='The dataset "org/dataset_name" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.', + ): + pandasai.load(dataset_path) + @patch("pandasai.os.path.exists") @patch("pandasai.os.environ", {"PANDABI_API_KEY": "key"}) def test_load_missing_api_url(self, mock_exists): @@ -144,13 +163,13 @@ def test_load_missing_api_url(self, mock_exists): mock_exists.return_value = False dataset_path = "org/dataset_name" - with pytest.raises(PandaAIApiKeyError): + with pytest.raises(DatasetNotFound): pandasai.load(dataset_path) @patch("pandasai.os.path.exists") @patch("pandasai.os.environ", {"PANDABI_API_KEY": "key"}) @patch("pandasai.get_pandaai_session") - def test_load_missing_api_url(self, mock_session, mock_exists): + def test_load_missing_not_found(self, mock_session, mock_exists): """Test loading when API URL is missing.""" mock_exists.return_value = False mock_response = MagicMock() @@ -202,7 +221,7 @@ def test_load_without_api_credentials( pandasai.load("test/dataset") assert ( str(exc_info.value) - == "PandaAI API key not found. Please set your API key using PandaAI.set_api_key() or by setting the PANDASAI_API_KEY environment variable." + == 'The dataset "test/dataset" does not exist in your local datasets directory. In addition, no API Key has been provided. Set an API key with valid permits if you want to fetch the dataset from the remote server.' ) def test_clear_cache(self):