Format

aorwall · Jan 17, 2025 · 33b1171 · 33b1171
1 parent d88fc78
commit 33b1171
Show file tree

Hide file tree

Showing 86 changed files with 1,218 additions and 3,015 deletions.
diff --git a/README.md b/README.md
@@ -146,23 +146,19 @@ Available dataset splits that can be specified with the `--split` argument:
 |------------|-------------|----------------|
 | lite | All instances from the lite dataset | 300 | 
 | verified | All instances from the verified dataset | 450 | 
-| verified_mini | A subset of SWEBench-verified that has approximately the same distribution of performance, test pass rates and difficulty as the original dataset ([source](https://huggingface.co/datasets/MariusHobbhahn/swe-bench-verified-mini)) | 50 |
+| verified_mini | [MariusHobbhahn/swe-bench-verified-mini](https://huggingface.co/datasets/MariusHobbhahn/swe-bench-verified-mini), a subset of SWE-Bench Verified  | 50 |
 | lite_and_verified_solvable | Instances that exist in both lite and verified datasets and have at least one solved submission to SWE-Bench | 80 |
 
-Default model configurations are provided for supported models. When specifying just the `--model` argument, the following configurations are used:
+Default model configurations are provided for verified models. Note that other models may work but have not been extensively tested. When specifying just the `--model` argument, the following configurations are used:
 
 | Model | Response Format | Message History | Thoughts in Action |
 |-------|----------------|-----------------|-------------------|
 | claude-3-5-sonnet-20241022 | tool_call | messages | no |
 | claude-3-5-haiku-20241022 | tool_call | messages | no |
 | gpt-4o-2024-11-20 | tool_call | messages | yes |
 | gpt-4o-mini-2024-07-18 | tool_call | messages | yes |
-| o1-preview-2024-09-12 | react | react | no |
-| o1-mini-2024-09-12 | react | react | no |
 | deepseek/deepseek-chat | react | react | yes |
-| gemini/gemini-exp-1206 | tool_call | messages | no |
 | gemini/gemini-2.0-flash-exp | tool_call | messages | yes |
-| gemini/gemini-2.0-flash-thinking-exp | react | react | no |
 | openrouter/meta-llama/llama-3.1-70b-instruct | react | react | no |
 | openrouter/qwen/qwen-2.5-coder-32b-instruct | react | react | no |
 
@@ -178,7 +174,7 @@ poetry run python -m moatless.benchmark.run_evaluation \
 # Run specific instances with GPT-4
 poetry run python -m moatless.benchmark.run_evaluation \
   --model gpt-4o \
-  --instance-ids django__django-16379 django__django-16380
+  --instance-ids "django__django-16379"
 ```
 
 # Code Example

diff --git a/moatless/actions/action.py b/moatless/actions/action.py
@@ -6,10 +6,7 @@
 
 from pydantic import BaseModel, ConfigDict
 
-from moatless.actions.schema import (
-    ActionArguments,
-    Observation, RewardScaleEntry, FewShotExample
-)
+from moatless.actions.schema import ActionArguments, Observation, RewardScaleEntry, FewShotExample
 from moatless.file_context import FileContext
 from moatless.index import CodeIndex
 from moatless.repository.repository import Repository
@@ -128,9 +125,7 @@ def get_few_shot_examples(cls) -> List[FewShotExample]:
         return []
 
     @classmethod
-    def get_action_by_args_class(
-        cls, args_class: Type[ActionArguments]
-    ) -> Optional[Type["Action"]]:
+    def get_action_by_args_class(cls, args_class: Type[ActionArguments]) -> Optional[Type["Action"]]:
         """
         Get the Action subclass corresponding to the given ActionArguments subclass.
 
@@ -142,10 +137,7 @@ def get_action_by_args_class(
         """
 
         def search_subclasses(current_class):
-            if (
-                hasattr(current_class, "args_schema")
-                and current_class.args_schema == args_class
-            ):
+            if hasattr(current_class, "args_schema") and current_class.args_schema == args_class:
                 return current_class
             for subclass in current_class.__subclasses__():
                 result = search_subclasses(subclass)

diff --git a/moatless/actions/append_string.py b/moatless/actions/append_string.py
@@ -23,10 +23,7 @@ class AppendStringArgs(ActionArguments):
     model_config = ConfigDict(title="AppendString")
 
     path: str = Field(..., description="Path to the file to append to")
-    new_str: str = Field(
-        ..., description="Text content to append at the end of the file"
-    )
-
+    new_str: str = Field(..., description="Text content to append at the end of the file")
 
     def format_args_for_llm(self) -> str:
         return f"""<path>{self.path}</path>
@@ -36,9 +33,7 @@ def format_args_for_llm(self) -> str:
 
     @classmethod
     def format_schema_for_llm(cls) -> str:
-        return cls.format_xml_schema(
-            {"path": "file/path.py", "new_str": "\ncontent to append at end of file\n"}
-        )
+        return cls.format_xml_schema({"path": "file/path.py", "new_str": "\ncontent to append at end of file\n"})
 
     @classmethod
     def get_few_shot_examples(cls) -> List[FewShotExample]:

diff --git a/moatless/actions/claude_text_editor.py b/moatless/actions/claude_text_editor.py
@@ -38,23 +38,22 @@ class EditActionArguments(ActionArguments):
     """
     An filesystem editor tool that allows the agent to view, create, and edit files.
     """
+
     model_config = ConfigDict(title="str_replace_editor")
 
     command: Command = Field(..., description="The edit command to execute")
     path: str = Field(..., description="The file path to edit")
-    file_text: Optional[str] = Field(
-        None, description="The text content for file creation"
-    )
-    view_range: Optional[List[int]] = Field(
-        None, description="Range of lines to view [start, end]"
-    )
+    file_text: Optional[str] = Field(None, description="The text content for file creation")
+    view_range: Optional[List[int]] = Field(None, description="Range of lines to view [start, end]")
     old_str: Optional[str] = Field(None, description="String to replace")
     new_str: Optional[str] = Field(None, description="Replacement string")
     insert_line: Optional[int] = Field(None, description="Line number for insertion")
 
     @classmethod
     def tool_schema(cls, thoughts_in_action: bool = False) -> ChatCompletionToolParam:
-        return ChatCompletionToolParam(type="text_editor_20241022", function=ChatCompletionToolParamFunctionChunk(name="str_replace_editor"))
+        return ChatCompletionToolParam(
+            type="text_editor_20241022", function=ChatCompletionToolParamFunctionChunk(name="str_replace_editor")
+        )
 
     @field_validator("file_text")
     @classmethod
@@ -74,7 +73,7 @@ def validate_old_str(cls, v, info):
     @classmethod
     def validate_new_str(cls, v, info):
         # TODO: To keep backward compatibility, but would like to uncomment this
-        #if info.data.get("command") == "str_replace" and v is None:
+        # if info.data.get("command") == "str_replace" and v is None:
         #    raise ValueError(
         #        "Parameter `new_str` cannot be null for command: str_replace. Return an empty string if your intention was to remove old_str."
         #    )
@@ -103,7 +102,7 @@ def validate_command(cls, v):
         if v not in valid_commands:
             raise ValueError(f"Unknown command: {v}")
         return v
-    
+
 
 class ClaudeEditTool(Action, CodeModificationMixin):
     """
@@ -113,9 +112,7 @@ class ClaudeEditTool(Action, CodeModificationMixin):
 
     args_schema = EditActionArguments
 
-    max_tokens_to_view: int = Field(
-        2000, description="Max tokens to view in one command"
-    )
+    max_tokens_to_view: int = Field(2000, description="Max tokens to view in one command")
 
     _str_replace: StringReplace = PrivateAttr()
     _create_file: CreateFile = PrivateAttr()
@@ -143,9 +140,7 @@ def __init__(
             code_index=self._code_index,
             repository=self._repository,
         )
-        self._view_code = ViewCode(
-            repository=self._repository, completion_model=completion_model
-        )
+        self._view_code = ViewCode(repository=self._repository, completion_model=completion_model)
 
     def execute(
         self,
@@ -195,9 +190,7 @@ def execute(
                 file_context,
             )
         elif args.command == "insert":
-            observation = self._insert(
-                file_context, path, args.insert_line, args.new_str
-            )
+            observation = self._insert(file_context, path, args.insert_line, args.new_str)
         else:
             raise Observation(
                 message=f"Unknown command: {args.command}",
@@ -229,9 +222,7 @@ def execute(
 
         return observation
 
-    def validate_path(
-        self, file_context: FileContext, command: str, path: Path
-    ) -> str | None:
+    def validate_path(self, file_context: FileContext, command: str, path: Path) -> str | None:
         """
         Check that the path/command combination is valid.
         """
@@ -256,9 +247,7 @@ def validate_path(
 
         return None
 
-    def _view(
-        self, file_context: FileContext, path: Path, args: EditActionArguments
-    ) -> Observation:
+    def _view(self, file_context: FileContext, path: Path, args: EditActionArguments) -> Observation:
         codespan = CodeSpan(file_path=str(path))
 
         view_range = args.view_range
@@ -268,9 +257,7 @@ def _view(
         view_code_args = ViewCodeArgs(thoughts=args.thoughts, files=[codespan])
         return self._view_code.execute(view_code_args, file_context=file_context)
 
-    def _create(
-        self, file_context: FileContext, path: Path, file_text: str
-    ) -> Observation:
+    def _create(self, file_context: FileContext, path: Path, file_text: str) -> Observation:
         if file_context.file_exists(str(path)):
             return Observation(
                 message=f"File already exists at: {path}",
@@ -287,9 +274,7 @@ def _create(
             properties={"diff": diff},
         )
 
-    def _insert(
-        self, file_context: FileContext, path: Path, insert_line: int, new_str: str
-    ) -> Observation:
+    def _insert(self, file_context: FileContext, path: Path, insert_line: int, new_str: str) -> Observation:
         context_file = file_context.get_context_file(str(path))
         if not context_file:
             return Observation(
@@ -316,11 +301,7 @@ def _insert(
             )
 
         new_str_lines = new_str.split("\n")
-        new_file_text_lines = (
-            file_text_lines[:insert_line]
-            + new_str_lines
-            + file_text_lines[insert_line:]
-        )
+        new_file_text_lines = file_text_lines[:insert_line] + new_str_lines + file_text_lines[insert_line:]
         snippet_lines = (
             file_text_lines[max(0, insert_line - SNIPPET_LINES) : insert_line]
             + new_str_lines
@@ -357,17 +338,8 @@ def _make_output(
         file_content = maybe_truncate(file_content)
         if expand_tabs:
             file_content = file_content.expandtabs()
-        file_content = "\n".join(
-            [
-                f"{i + init_line:6}\t{line}"
-                for i, line in enumerate(file_content.split("\n"))
-            ]
-        )
-        return (
-            f"Here's the result of running `cat -n` on {file_descriptor}:\n"
-            + file_content
-            + "\n"
-        )
+        file_content = "\n".join([f"{i + init_line:6}\t{line}" for i, line in enumerate(file_content.split("\n"))])
+        return f"Here's the result of running `cat -n` on {file_descriptor}:\n" + file_content + "\n"
 
     def span_id_list(self, span_ids: set[str]) -> str:
         list_str = ""

diff --git a/moatless/actions/code_modification_mixin.py b/moatless/actions/code_modification_mixin.py
@@ -72,9 +72,7 @@ def run_tests(
             file_context.add_test_file(file_path)
         elif self._code_index:
             # If the file is not a test file, find test files that might be related to the file
-            search_results = self._code_index.find_test_files(
-                file_path, query=file_path, max_results=2, max_spans=2
-            )
+            search_results = self._code_index.find_test_files(file_path, query=file_path, max_results=2, max_spans=2)
 
             for search_result in search_results:
                 file_context.add_test_file(search_result.file_path)
@@ -99,7 +97,7 @@ def run_tests(
         failure_details = file_context.get_test_failure_details()
         if failure_details:
             response_msg += f"\n{failure_details}"
-        
+
         summary = f"\n{file_context.get_test_summary()}"
         response_msg += summary
 
@@ -110,6 +108,4 @@ def run_tests(
 
     def format_snippet_with_lines(self, snippet: str, start_line: int) -> str:
         """Format a code snippet with line numbers"""
-        return "\n".join(
-            f"{i + start_line:6}\t{line}" for i, line in enumerate(snippet.split("\n"))
-        )
+        return "\n".join(f"{i + start_line:6}\t{line}" for i, line in enumerate(snippet.split("\n")))
diff --git a/moatless/actions/create_file.py b/moatless/actions/create_file.py
@@ -41,9 +41,7 @@ def format_args_for_llm(self) -> str:
 
     @classmethod
     def format_schema_for_llm(cls) -> str:
-        return cls.format_xml_schema(
-            {"path": "file/path.py", "file_text": "\ncomplete file content\n"}
-        )
+        return cls.format_xml_schema({"path": "file/path.py", "file_text": "\ncomplete file content\n"})
 
 
 class CreateFile(Action, CodeActionValueMixin, CodeModificationMixin):

diff --git a/moatless/actions/find_class.py b/moatless/actions/find_class.py
@@ -33,9 +33,7 @@ def validate_names(self) -> "FindClassArgs":
         if "." in self.class_name:
             original_name = self.class_name
             self.class_name = self.class_name.split(".")[-1]
-            logger.info(
-                f"Using class name '{self.class_name}' from fully qualified name '{original_name}'"
-            )
+            logger.info(f"Using class name '{self.class_name}' from fully qualified name '{original_name}'")
         return self
 
     model_config = ConfigDict(title="FindClass")
@@ -57,22 +55,16 @@ def to_prompt(self):
         return prompt
 
     def _search(self, args: FindClassArgs) -> SearchCodeResponse:
-        logger.info(
-            f"{self.name}: {args.class_name} (file_pattern: {args.file_pattern})"
-        )
-        return self._code_index.find_class(
-            args.class_name, file_pattern=args.file_pattern
-        )
+        logger.info(f"{self.name}: {args.class_name} (file_pattern: {args.file_pattern})")
+        return self._code_index.find_class(args.class_name, file_pattern=args.file_pattern)
 
     def _select_span_instructions(self, search_result: SearchCodeResponse) -> str:
         return (
             f"Here's the class structure."
             f"Use the function ViewCode and specify the SpanIDs of the relevant functions to view them.\n"
         )
 
-    def _search_for_alternative_suggestion(
-        self, args: FindClassArgs
-    ) -> SearchCodeResponse:
+    def _search_for_alternative_suggestion(self, args: FindClassArgs) -> SearchCodeResponse:
         if args.file_pattern:
             return self._code_index.find_class(args.class_name, file_pattern=None)
         return SearchCodeResponse()

diff --git a/moatless/actions/find_code_snippet.py b/moatless/actions/find_code_snippet.py
@@ -40,6 +40,15 @@ def validate_snippet(self) -> "FindCodeSnippetArgs":
             raise ValueError("code_snippet cannot be empty")
         return self
 
+    @classmethod
+    def format_schema_for_llm(cls) -> str:
+        return cls.format_xml_schema(
+            {
+                "code_snippet": "The exact code snippet to find",
+                "file_pattern": "A glob pattern to filter search results to specific file types or directories.",
+            }
+        )
+
     def to_prompt(self):
         prompt = f"Searching for code snippet: {self.code_snippet}"
         if self.file_pattern:
@@ -52,6 +61,12 @@ def short_summary(self) -> str:
             param_str += f", file_pattern={self.file_pattern}"
         return f"{self.name}({param_str})"
 
+    def format_args_for_llm(self) -> str:
+        return f"""<code_snippet>
+{self.code_snippet}
+</code_snippet>
+<file_pattern>{self.file_pattern if self.file_pattern else ''}</file_pattern>"""
+
 
 class FindCodeSnippet(SearchBaseAction):
     args_schema: ClassVar[Type[ActionArguments]] = FindCodeSnippetArgs
@@ -61,32 +76,22 @@ class FindCodeSnippet(SearchBaseAction):
         description="The maximum number of search results to return. Default is 10.",
     )
 
-    def _search_for_context(
-        self, args: FindCodeSnippetArgs
-    ) -> Tuple[FileContext, bool]:
-        logger.info(
-            f"{self.name}: {args.code_snippet} (file_pattern: {args.file_pattern})"
-        )
+    def _search_for_context(self, args: FindCodeSnippetArgs) -> Tuple[FileContext, bool]:
+        logger.info(f"{self.name}: {args.code_snippet} (file_pattern: {args.file_pattern})")
 
-        matches = self._repository.find_exact_matches(
-            search_text=args.code_snippet, file_pattern=args.file_pattern
-        )
+        matches = self._repository.find_exact_matches(search_text=args.code_snippet, file_pattern=args.file_pattern)
 
         if args.file_pattern and len(matches) > 1:
             matches = [
-                (file_path, line_num)
-                for file_path, line_num in matches
-                if fnmatch(file_path, args.file_pattern)
+                (file_path, line_num) for file_path, line_num in matches if fnmatch(file_path, args.file_pattern)
             ]
 
         search_result_context = FileContext(repo=self._repository)
         for file_path, start_line in matches[: self.max_hits]:
             num_lines = len(args.code_snippet.splitlines())
             end_line = start_line + num_lines - 1
 
-            search_result_context.add_line_span_to_context(
-                file_path, start_line, end_line, add_extra=False
-            )
+            search_result_context.add_line_span_to_context(file_path, start_line, end_line, add_extra=False)
 
         return search_result_context, False