Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
aorwall committed Aug 5, 2024
1 parent 63d9f92 commit 28b2242
Show file tree
Hide file tree
Showing 24 changed files with 542 additions and 251 deletions.
3 changes: 2 additions & 1 deletion moatless/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from moatless.loop import AgenticLoop, TransitionRules
from moatless.repository import FileRepository
from moatless.workspace import Workspace
from moatless.transition_rules import TransitionRules
from moatless.loop import AgenticLoop
220 changes: 124 additions & 96 deletions moatless/benchmark/evaluation.py

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions moatless/codeblocks/parser/create.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
from moatless.codeblocks.parser.parser import CodeParser
from moatless.codeblocks.parser.python import PythonParser
from moatless.codeblocks.parser.java import JavaParser


def is_supported(language: str) -> bool:
return language and language in ["python", "java", "typescript", "javascript"]


def create_parser_by_ext(ext: str, **kwargs) -> CodeParser | None:
if ext == ".py":
return PythonParser(**kwargs)
elif ext == ".java":
return JavaParser(**kwargs)

raise NotImplementedError(f"Extension {ext} is not supported.")


def create_parser(language: str, **kwargs) -> CodeParser | None:
if language == "python":
return PythonParser(**kwargs)
elif language == "java":
return JavaParser(**kwargs)

raise NotImplementedError(f"Language {language} is not supported.")
2 changes: 1 addition & 1 deletion moatless/edit/clarify.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def init(self):
outcomment_code_comment="... other code",
)

def handle_action(self, request: LineNumberClarification) -> ActionResponse:
def _execute_action(self, request: LineNumberClarification) -> ActionResponse:
logger.info(
f"{self}: Got line number clarification: {request.start_line} - {request.end_line}"
)
Expand Down
2 changes: 1 addition & 1 deletion moatless/edit/edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def init(self):
lines_to_replace = code_lines[self.start_line - 1 : self.end_line]
self._code_to_replace = "\n".join(lines_to_replace)

def handle_action(self, content: Content) -> ActionResponse:
def _execute_action(self, content: Content) -> ActionResponse:
self._messages.append(AssistantMessage(content=content.content))

scratch_pad = None
Expand Down
7 changes: 6 additions & 1 deletion moatless/edit/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def init(self):
)
self.file_context.expand_small_classes(max_tokens=1000)

def handle_action(self, action: ApplyChange) -> ActionResponse:
def _execute_action(self, action: ApplyChange) -> ActionResponse:
if action.action == "review":
if self.diff and self.finish_on_review:
logger.info("Review suggested after diff, will finish")
Expand Down Expand Up @@ -177,6 +177,11 @@ def _request_for_change(self, rfc: ApplyChange) -> ActionResponse:
f"request_for_change(file_path={rfc.file_path}, span_id={rfc.span_id})"
)

if not rfc.instructions:
return ActionResponse.retry(
f"Please provide instructions for the code change."
)

context_file = self.file_context.get_file(rfc.file_path)
if not context_file:
logger.warning(
Expand Down
2 changes: 1 addition & 1 deletion moatless/edit/plan_lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def init(self):
):
self.file_context.expand_context_with_related_spans(max_tokens=4000)

def handle_action(self, action: ApplyChange) -> ActionResponse:
def _execute_action(self, action: ApplyChange) -> ActionResponse:
if action.finish:
self.file_context.save()

Expand Down
2 changes: 1 addition & 1 deletion moatless/edit/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def init(self) -> Optional[ActionResponse]:

return None

def handle_action(self, action: ApplyChange) -> ActionResponse:
def _execute_action(self, action: ApplyChange) -> ActionResponse:
if action.action == "review":
if self.diff and self.finish_on_review:
logger.info(f"Review suggested after diff, will finish")
Expand Down
2 changes: 1 addition & 1 deletion moatless/find/decide.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def __init__(
**data,
)

def handle_action(self, action: Decision) -> ActionResponse:
def _execute_action(self, action: Decision) -> ActionResponse:
if action.complete and action.relevant:
return ActionResponse.transition("finish")

Expand Down
2 changes: 1 addition & 1 deletion moatless/find/identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def __init__(
def model_dump(self, **kwargs):
return super().model_dump(**kwargs)

def handle_action(self, action: Identify) -> ActionResponse:
def _execute_action(self, action: Identify) -> ActionResponse:
if action.identified_spans:
self.file_context.add_files_with_spans(action.identified_spans)

Expand Down
4 changes: 2 additions & 2 deletions moatless/find/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ def __init__(
**data,
)

def handle_action(self, action: Search) -> ActionResponse:
def _execute_action(self, action: Search) -> ActionResponse:
if action.complete:
return ActionResponse.transition(
"finish",
Expand Down Expand Up @@ -433,7 +433,7 @@ def messages(self) -> list[Message]:
query=self.loop.trajectory.initial_message,
exact_match_if_possible=False,
max_spans_per_file=5,
max_results=50,
max_results=100,
)

file_context = self.create_file_context(max_tokens=4000)
Expand Down
37 changes: 23 additions & 14 deletions moatless/index/code_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ def __init__(
max_exact_results: int = 5,
):
self._index_name = index_name

self._settings = settings or IndexSettings()

self.max_results = max_results
Expand Down Expand Up @@ -157,12 +156,12 @@ def from_index_name(
logger.info(f"Loading existing index {index_name} from {persist_dir}.")
return cls.from_persist_dir(persist_dir, file_repo=file_repo)

if not os.getenv("INDEX_STORE_URL"):
raise ValueError(
"INDEX_STORE_URL environment variable must be set to a index store URL to download the index."
)
if os.getenv("INDEX_STORE_URL"):
index_store_url = os.getenv("INDEX_STORE_URL")
else:
index_store_url = "https://stmoatless.blob.core.windows.net/indexstore/20240522-voyage-code-2"

store_url = os.path.join(os.getenv("INDEX_STORE_URL"), f"{index_name}.zip")
store_url = os.path.join(index_store_url, f"{index_name}.zip")
logger.info(f"Downloading existing index {index_name} from {store_url}.")
return cls.from_url(store_url, persist_dir, file_repo)

Expand Down Expand Up @@ -699,14 +698,23 @@ def file_metadata_func(file_path: str) -> dict:
"category": category,
}

reader = SimpleDirectoryReader(
input_dir=repo_path,
file_metadata=file_metadata_func,
input_files=input_files,
filename_as_id=True,
required_exts=[".py"], # TODO: Shouldn't be hardcoded and filtered
recursive=True,
)
if self._settings and self._settings.language == "java":
required_exts = [".java"]
else:
required_exts = [".py"]

try:
reader = SimpleDirectoryReader(
input_dir=repo_path,
file_metadata=file_metadata_func,
input_files=input_files,
filename_as_id=True,
required_exts=required_exts,
recursive=True,
)
except Exception as e:
logger.exception(f"Failed to create reader with input_dir {repo_path}, input_files {input_files} and required_exts {required_exts}.")
raise e

embed_pipeline = IngestionPipeline(
transformations=[self._embed_model],
Expand Down Expand Up @@ -737,6 +745,7 @@ def index_callback(codeblock: CodeBlock):
)

splitter = EpicSplitter(
language=self._settings.language,
min_chunk_size=self._settings.min_chunk_size,
chunk_size=self._settings.chunk_size,
hard_token_limit=self._settings.hard_token_limit,
Expand Down
11 changes: 9 additions & 2 deletions moatless/index/epic_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from llama_index.core.schema import BaseNode, TextNode
from llama_index.core.utils import get_tokenizer, get_tqdm_iterable

from moatless.codeblocks import create_parser
from moatless.codeblocks.codeblocks import CodeBlock, CodeBlockType, PathTree
from moatless.codeblocks.parser.python import PythonParser
from moatless.index.code_node import CodeNode
Expand Down Expand Up @@ -39,6 +40,10 @@ def count_parent_tokens(codeblock: CodeBlock) -> int:


class EpicSplitter(NodeParser):
language: str = Field(
default="python", description="Language of the code blocks to parse."
)

text_splitter: TextSplitter = Field(
description="Text splitter to use for splitting non code documents into nodes."
)
Expand Down Expand Up @@ -82,6 +87,7 @@ class EpicSplitter(NodeParser):

def __init__(
self,
language: str = "python",
chunk_size: int = 750,
min_chunk_size: int = 100,
max_chunk_size: int = 1500,
Expand All @@ -106,6 +112,7 @@ def __init__(
# self._fallback_code_splitter = fallback_code_splitter

super().__init__(
language=language,
chunk_size=chunk_size,
chunk_overlap=0,
text_splitter=text_splitter or TokenTextSplitter(),
Expand Down Expand Up @@ -142,10 +149,10 @@ def _parse_nodes(
content = node.get_content()

try:
# TODO: Derive language from file extension
starttime = time.time_ns()

parser = PythonParser(index_callback=self.index_callback)
# TODO: Derive language from file extension
parser = create_parser(language=self.language, index_callback=self.index_callback)
codeblock = parser.parse(content, file_path=file_path)

parse_time = time.time_ns() - starttime
Expand Down
Loading

0 comments on commit 28b2242

Please sign in to comment.