sinaptik-ai · gventuri · Jan 11, 2024 · Jan 10, 2024 · Jan 11, 2024 · Jan 11, 2024
diff --git a/pandasai/assets/prompt_templates/correct_output_type_error_prompt.tmpl b/pandasai/assets/prompt_templates/correct_output_type_error_prompt.tmpl
@@ -0,0 +1,9 @@
+{dataframes}
+
+The user asked the following question:
+{conversation}
+
+You generated this python code:
+{code}
+
+Fix the python code above and return the new python code but the result type should be: {output_type_hint}
diff --git a/pandasai/exceptions.py b/pandasai/exceptions.py
@@ -178,3 +178,11 @@ class MaliciousQueryError(Exception):
     Args:
         Exception (Exception): MaliciousQueryError
     """
+
+
+class InvalidLLMOutputType(Exception):
+    """
+    Raise error if malicious query is generated
+    Args:
+        Exception (Exception): MaliciousQueryError
+    """
-class InvalidLLMOutputType(Exception):
-    """
-    Raise error if malicious query is generated
-    Args:
-        Exception (Exception): MaliciousQueryError
-    """
+class InvalidLLMOutputType(Exception):
+    """
+    Raise error if the output type is invalid
+    Args:
+        Exception (Exception): MaliciousQueryError
+    """
-class InvalidLLMOutputType(Exception):
-    """
-    Raise error if malicious query is generated
-    Args:
-        Exception (Exception): MaliciousQueryError
-    """
+class InvalidLLMOutputType(Exception):
+    """
+    Raise error if the output type is invalid
+    Args:
+        Exception (Exception): MaliciousQueryError
+    """
diff --git a/pandasai/helpers/output_types/_output_types.py b/pandasai/helpers/output_types/_output_types.py
@@ -118,6 +118,8 @@ def _validate_value(self, actual_value: Any) -> bool:
 
 
 class DefaultOutputType(BaseOutputType):
+    default_types = ["string", "number", "dataframe", "plot"]
+
     @property
     def template_hint(self):
         return """type (possible values "string", "number", "dataframe", "plot"). Examples: { "type": "string", "value": f"The highest salary is {highest_salary}." } or { "type": "number", "value": 125 } or { "type": "dataframe", "value": pd.DataFrame({...}) } or { "type": "plot", "value": "temp_chart.png" }"""  # noqa E501
@@ -140,4 +142,4 @@ def validate(self, result: dict[str, Any]) -> tuple[bool, Iterable]:
              (bool): True since the `DefaultOutputType`
                 is supposed to have no validation
         """
-        return True, ()
+        return result["type"] in self.default_types, ()
diff --git a/pandasai/llm/fake.py b/pandasai/llm/fake.py
@@ -9,7 +9,7 @@
 class FakeLLM(LLM):
     """Fake LLM"""
 
-    _output: str = """result = { 'type': 'text', 'value': "Hello World" }"""
+    _output: str = """result = { 'type': 'string', 'value': "Hello World" }"""
 
     def __init__(self, output: Optional[str] = None):
         if output is not None:

diff --git a/pandasai/pipelines/smart_datalake_chat/code_execution.py b/pandasai/pipelines/smart_datalake_chat/code_execution.py
@@ -1,6 +1,12 @@
 import logging
 import traceback
 from typing import Any, List
+
+from pandasai.exceptions import InvalidLLMOutputType
+from pandasai.prompts.base import AbstractPrompt
+from pandasai.prompts.correct_output_type_error_prompt import (
+    CorrectOutputTypeErrorPrompt,
+)
 from ...helpers.code_manager import CodeExecutionContext
 from ...helpers.logger import Logger
 from ..base_logic_unit import BaseLogicUnit
@@ -51,6 +57,17 @@ def execute(self, input: Any, **kwargs) -> Any:
                     context=code_context,
                 )
 
+                output_helper = pipeline_context.get_intermediate_value(
+                    "output_type_helper"
+                )
+                if output_helper := pipeline_context.get_intermediate_value(
+                    "output_type_helper"
+                ):
+                    (validation_ok, validation_errors) = output_helper.validate(result)
+
+                    if not validation_ok:
+                        raise InvalidLLMOutputType(validation_errors)
+
                 break
 
             except Exception as e:
@@ -69,18 +86,33 @@ def execute(self, input: Any, **kwargs) -> Any:
                 )
 
                 traceback_error = traceback.format_exc()
+
+                # Get Error Prompt for retry
+                error_prompt = self._get_error_prompt(e)
                 code_to_run = pipeline_context.query_exec_tracker.execute_func(
                     self._retry_run_code,
                     code,
                     pipeline_context,
                     logger,
                     traceback_error,
+                    error_prompt,
                 )
 
         return result
 
+    def _get_error_prompt(self, e: Exception) -> AbstractPrompt:
+        if isinstance(e, InvalidLLMOutputType):
+            return CorrectOutputTypeErrorPrompt()
+        else:
+            return CorrectErrorPrompt()
+
     def _retry_run_code(
-        self, code: str, context: PipelineContext, logger: Logger, e: Exception
+        self,
+        code: str,
+        context: PipelineContext,
+        logger: Logger,
+        e: Exception,
+        error_prompt=CorrectErrorPrompt(),
     ) -> List:
         """
         A method to retry the code execution with error correction framework.
@@ -94,7 +126,6 @@ def _retry_run_code(
 
         Returns (str): A python code
         """
-
         logger.log(f"Failed with error: {e}. Retrying", logging.ERROR)
 
         default_values = {
@@ -107,7 +138,7 @@ def _retry_run_code(
         }
         error_correcting_instruction = context.get_intermediate_value("get_prompt")(
             "correct_error",
-            default_prompt=CorrectErrorPrompt(),
+            default_prompt=error_prompt,
             default_values=default_values,
         )
 

diff --git a/pandasai/prompts/correct_output_type_error_prompt.py b/pandasai/prompts/correct_output_type_error_prompt.py
@@ -0,0 +1,22 @@
+""" Prompt to correct Output Type Python Code on Error
+```
+{dataframes}
+
+{conversation}
+
+You generated this python code:
+{code}
+
+It fails with the following error:
+{error_returned}
+
+Fix the python code above and return the new python code but the result type should be: 
+"""  # noqa: E501
+
+from .file_based_prompt import FileBasedPrompt
+
+
+class CorrectOutputTypeErrorPrompt(FileBasedPrompt):
+    """Prompt to Correct Python code on Error"""
+
+    _path_to_template = "assets/prompt_templates/correct_output_type_error_prompt.tmpl"
diff --git a/tests/pipelines/smart_datalake/test_code_execution.py b/tests/pipelines/smart_datalake/test_code_execution.py
@@ -1,12 +1,17 @@
 from typing import Optional
-from unittest.mock import Mock
+from unittest.mock import MagicMock, Mock
 import pandas as pd
 import pytest
+from pandasai.exceptions import InvalidLLMOutputType
 from pandasai.helpers.logger import Logger
 from pandasai.helpers.skills_manager import SkillsManager
 
 from pandasai.llm.fake import FakeLLM
 from pandasai.pipelines.pipeline_context import PipelineContext
+from pandasai.prompts.correct_error_prompt import CorrectErrorPrompt
+from pandasai.prompts.correct_output_type_error_prompt import (
+    CorrectOutputTypeErrorPrompt,
+)
 from pandasai.smart_dataframe import SmartDataframe
 from pandasai.pipelines.smart_datalake_chat.code_execution import CodeExecution
 
@@ -194,3 +199,27 @@ def mock_intermediate_values(key: str):
 
         assert isinstance(code_execution, CodeExecution)
         assert result == "Mocked Result after retry"
+
+    def test_get_error_prompt_invalid_llm_output_type(self):
+        code_execution = CodeExecution()
+
+        # Mock the InvalidLLMOutputType exception
+        mock_exception = MagicMock(spec=InvalidLLMOutputType)
+
+        # Call the method with the mock exception
+        result = code_execution._get_error_prompt(mock_exception)
+
+        # Assert that the CorrectOutputTypeErrorPrompt is returned
+        assert isinstance(result, CorrectOutputTypeErrorPrompt)
+
+    def test_get_error_prompt_other_exception(self):
+        code_execution = CodeExecution()
+
+        # Mock a generic exception
+        mock_exception = MagicMock(spec=Exception)
+
+        # Call the method with the mock exception
+        result = code_execution._get_error_prompt(mock_exception)
+
+        # Assert that the CorrectErrorPrompt is returned
+        assert isinstance(result, CorrectErrorPrompt)
diff --git a/tests/test_smartdataframe.py b/tests/test_smartdataframe.py
@@ -133,6 +133,17 @@ def smart_dataframe(self, llm, sample_df, custom_head):
             custom_head=custom_head,
         )
 
+    @pytest.fixture
+    def llm_result_mocks(self, custom_head):
+        result_template = "result = {{ 'type': '{type}', 'value': {value} }}"
+
+        return {
+            "number": result_template.format(type="number", value=1),
+            "string": result_template.format(type="string", value="'Test'"),
+            "plot": result_template.format(type="plot", value="'temp_plot.png'"),
+            "dataframe": result_template.format(type="dataframe", value=custom_head),
+        }
+
     @pytest.fixture
     def smart_dataframe_mocked_df(self, llm, sample_df, custom_head):
         smart_df = SmartDataframe(
@@ -225,7 +236,10 @@ def test_run_with_privacy_enforcement(self, llm):
             ],
         ],
     )
-    def test_run_passing_output_type(self, llm, output_type, output_type_hint):
+    @patch("pandasai.responses.response_parser.ResponseParser.parse", autospec=True)
+    def test_run_passing_output_type(
+        self, parser_mock, llm, llm_result_mocks, output_type, output_type_hint
+    ):
         df = pd.DataFrame({"country": []})
         df = SmartDataframe(df, config={"llm": llm, "enable_cache": False})
 
@@ -255,12 +269,14 @@ def test_run_passing_output_type(self, llm, output_type, output_type_hint):
 
 
 Generate python code and return full updated code:"""
+        parser_mock.return_value = Mock()
+        type_ = output_type if output_type is not None else "string"
+        llm._output = llm_result_mocks[type_]
 
         df.chat("How many countries are in the dataframe?", output_type=output_type)
         last_prompt = df.last_prompt
         if sys.platform.startswith("win"):
             last_prompt = df.last_prompt.replace("\r\n", "\n")
-
         assert last_prompt == expected_prompt
 
     @pytest.mark.parametrize(