Refactor

aorwall · Aug 5, 2024 · 28b2242 · 28b2242
1 parent 63d9f92
commit 28b2242
Show file tree

Hide file tree

Showing 24 changed files with 542 additions and 251 deletions.
diff --git a/moatless/__init__.py b/moatless/__init__.py
@@ -1,3 +1,4 @@
-from moatless.loop import AgenticLoop, TransitionRules
 from moatless.repository import FileRepository
 from moatless.workspace import Workspace
+from moatless.transition_rules import TransitionRules
+from moatless.loop import AgenticLoop
diff --git a/moatless/benchmark/evaluation.py b/moatless/benchmark/evaluation.py
diff --git a/moatless/codeblocks/parser/create.py b/moatless/codeblocks/parser/create.py
@@ -1,13 +1,25 @@
 from moatless.codeblocks.parser.parser import CodeParser
 from moatless.codeblocks.parser.python import PythonParser
+from moatless.codeblocks.parser.java import JavaParser
 
 
 def is_supported(language: str) -> bool:
     return language and language in ["python", "java", "typescript", "javascript"]
 
 
+def create_parser_by_ext(ext: str, **kwargs) -> CodeParser | None:
+    if ext == ".py":
+        return PythonParser(**kwargs)
+    elif ext == ".java":
+        return JavaParser(**kwargs)
+
+    raise NotImplementedError(f"Extension {ext} is not supported.")
+
+
 def create_parser(language: str, **kwargs) -> CodeParser | None:
     if language == "python":
         return PythonParser(**kwargs)
+    elif language == "java":
+        return JavaParser(**kwargs)
 
     raise NotImplementedError(f"Language {language} is not supported.")
diff --git a/moatless/edit/clarify.py b/moatless/edit/clarify.py
@@ -82,7 +82,7 @@ def init(self):
             outcomment_code_comment="... other code",
         )
 
-    def handle_action(self, request: LineNumberClarification) -> ActionResponse:
+    def _execute_action(self, request: LineNumberClarification) -> ActionResponse:
         logger.info(
             f"{self}: Got line number clarification: {request.start_line} - {request.end_line}"
         )

diff --git a/moatless/edit/edit.py b/moatless/edit/edit.py
@@ -148,7 +148,7 @@ def init(self):
         lines_to_replace = code_lines[self.start_line - 1 : self.end_line]
         self._code_to_replace = "\n".join(lines_to_replace)
 
-    def handle_action(self, content: Content) -> ActionResponse:
+    def _execute_action(self, content: Content) -> ActionResponse:
         self._messages.append(AssistantMessage(content=content.content))
 
         scratch_pad = None

diff --git a/moatless/edit/plan.py b/moatless/edit/plan.py
@@ -139,7 +139,7 @@ def init(self):
             )
             self.file_context.expand_small_classes(max_tokens=1000)
 
-    def handle_action(self, action: ApplyChange) -> ActionResponse:
+    def _execute_action(self, action: ApplyChange) -> ActionResponse:
         if action.action == "review":
             if self.diff and self.finish_on_review:
                 logger.info("Review suggested after diff, will finish")
@@ -177,6 +177,11 @@ def _request_for_change(self, rfc: ApplyChange) -> ActionResponse:
             f"request_for_change(file_path={rfc.file_path}, span_id={rfc.span_id})"
         )
 
+        if not rfc.instructions:
+            return ActionResponse.retry(
+                f"Please provide instructions for the code change."
+            )
+
         context_file = self.file_context.get_file(rfc.file_path)
         if not context_file:
             logger.warning(

diff --git a/moatless/edit/plan_lines.py b/moatless/edit/plan_lines.py
@@ -118,7 +118,7 @@ def init(self):
         ):
             self.file_context.expand_context_with_related_spans(max_tokens=4000)
 
-    def handle_action(self, action: ApplyChange) -> ActionResponse:
+    def _execute_action(self, action: ApplyChange) -> ActionResponse:
         if action.finish:
             self.file_context.save()
 

diff --git a/moatless/edit/review.py b/moatless/edit/review.py
@@ -170,7 +170,7 @@ def init(self) -> Optional[ActionResponse]:
 
         return None
 
-    def handle_action(self, action: ApplyChange) -> ActionResponse:
+    def _execute_action(self, action: ApplyChange) -> ActionResponse:
         if action.action == "review":
             if self.diff and self.finish_on_review:
                 logger.info(f"Review suggested after diff, will finish")

diff --git a/moatless/find/decide.py b/moatless/find/decide.py
@@ -92,7 +92,7 @@ def __init__(
             **data,
         )
 
-    def handle_action(self, action: Decision) -> ActionResponse:
+    def _execute_action(self, action: Decision) -> ActionResponse:
         if action.complete and action.relevant:
             return ActionResponse.transition("finish")
 

diff --git a/moatless/find/identify.py b/moatless/find/identify.py
@@ -98,7 +98,7 @@ def __init__(
     def model_dump(self, **kwargs):
         return super().model_dump(**kwargs)
 
-    def handle_action(self, action: Identify) -> ActionResponse:
+    def _execute_action(self, action: Identify) -> ActionResponse:
         if action.identified_spans:
             self.file_context.add_files_with_spans(action.identified_spans)
 

diff --git a/moatless/find/search.py b/moatless/find/search.py
@@ -332,7 +332,7 @@ def __init__(
             **data,
         )
 
-    def handle_action(self, action: Search) -> ActionResponse:
+    def _execute_action(self, action: Search) -> ActionResponse:
         if action.complete:
             return ActionResponse.transition(
                 "finish",
@@ -433,7 +433,7 @@ def messages(self) -> list[Message]:
                 query=self.loop.trajectory.initial_message,
                 exact_match_if_possible=False,
                 max_spans_per_file=5,
-                max_results=50,
+                max_results=100,
             )
 
             file_context = self.create_file_context(max_tokens=4000)

diff --git a/moatless/index/code_index.py b/moatless/index/code_index.py
@@ -67,7 +67,6 @@ def __init__(
         max_exact_results: int = 5,
     ):
         self._index_name = index_name
-
         self._settings = settings or IndexSettings()
 
         self.max_results = max_results
@@ -157,12 +156,12 @@ def from_index_name(
             logger.info(f"Loading existing index {index_name} from {persist_dir}.")
             return cls.from_persist_dir(persist_dir, file_repo=file_repo)
 
-        if not os.getenv("INDEX_STORE_URL"):
-            raise ValueError(
-                "INDEX_STORE_URL environment variable must be set to a index store URL to download the index."
-            )
+        if os.getenv("INDEX_STORE_URL"):
+            index_store_url = os.getenv("INDEX_STORE_URL")
+        else:
+            index_store_url = "https://stmoatless.blob.core.windows.net/indexstore/20240522-voyage-code-2"
 
-        store_url = os.path.join(os.getenv("INDEX_STORE_URL"), f"{index_name}.zip")
+        store_url = os.path.join(index_store_url, f"{index_name}.zip")
         logger.info(f"Downloading existing index {index_name} from {store_url}.")
         return cls.from_url(store_url, persist_dir, file_repo)
 
@@ -699,14 +698,23 @@ def file_metadata_func(file_path: str) -> dict:
                 "category": category,
             }
 
-        reader = SimpleDirectoryReader(
-            input_dir=repo_path,
-            file_metadata=file_metadata_func,
-            input_files=input_files,
-            filename_as_id=True,
-            required_exts=[".py"],  # TODO: Shouldn't be hardcoded and filtered
-            recursive=True,
-        )
+        if self._settings and self._settings.language == "java":
+            required_exts = [".java"]
+        else:
+            required_exts = [".py"]
+
+        try:
+            reader = SimpleDirectoryReader(
+                input_dir=repo_path,
+                file_metadata=file_metadata_func,
+                input_files=input_files,
+                filename_as_id=True,
+                required_exts=required_exts,
+                recursive=True,
+            )
+        except Exception as e:
+            logger.exception(f"Failed to create reader with input_dir {repo_path}, input_files {input_files} and required_exts {required_exts}.")
+            raise e
 
         embed_pipeline = IngestionPipeline(
             transformations=[self._embed_model],
@@ -737,6 +745,7 @@ def index_callback(codeblock: CodeBlock):
                 )
 
         splitter = EpicSplitter(
+            language=self._settings.language,
             min_chunk_size=self._settings.min_chunk_size,
             chunk_size=self._settings.chunk_size,
             hard_token_limit=self._settings.hard_token_limit,

diff --git a/moatless/index/epic_split.py b/moatless/index/epic_split.py
@@ -10,6 +10,7 @@
 from llama_index.core.schema import BaseNode, TextNode
 from llama_index.core.utils import get_tokenizer, get_tqdm_iterable
 
+from moatless.codeblocks import create_parser
 from moatless.codeblocks.codeblocks import CodeBlock, CodeBlockType, PathTree
 from moatless.codeblocks.parser.python import PythonParser
 from moatless.index.code_node import CodeNode
@@ -39,6 +40,10 @@ def count_parent_tokens(codeblock: CodeBlock) -> int:
 
 
 class EpicSplitter(NodeParser):
+    language: str = Field(
+        default="python", description="Language of the code blocks to parse."
+    )
+
     text_splitter: TextSplitter = Field(
         description="Text splitter to use for splitting non code documents into nodes."
     )
@@ -82,6 +87,7 @@ class EpicSplitter(NodeParser):
 
     def __init__(
         self,
+        language: str = "python",
         chunk_size: int = 750,
         min_chunk_size: int = 100,
         max_chunk_size: int = 1500,
@@ -106,6 +112,7 @@ def __init__(
         # self._fallback_code_splitter = fallback_code_splitter
 
         super().__init__(
+            language=language,
             chunk_size=chunk_size,
             chunk_overlap=0,
             text_splitter=text_splitter or TokenTextSplitter(),
@@ -142,10 +149,10 @@ def _parse_nodes(
             content = node.get_content()
 
             try:
-                # TODO: Derive language from file extension
                 starttime = time.time_ns()
 
-                parser = PythonParser(index_callback=self.index_callback)
+                # TODO: Derive language from file extension
+                parser = create_parser(language=self.language, index_callback=self.index_callback)
                 codeblock = parser.parse(content, file_path=file_path)
 
                 parse_time = time.time_ns() - starttime