Skip to content

Commit

Permalink
Merge pull request #212 from potpie-ai/xml
Browse files Browse the repository at this point in the history
Parsing support: Only XML/Markdown files in repo
  • Loading branch information
dhirenmathur authored Dec 16, 2024
2 parents e889a4f + 11ea1d9 commit d820204
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 2 deletions.
9 changes: 9 additions & 0 deletions app/modules/parsing/graph_construction/parsing_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ def open_text_file(file_path):
"html",
"css",
"sh",
"md",
"mdx",
"xsq",
]
if ext in exclude_extensions:
return False
Expand Down Expand Up @@ -218,6 +221,8 @@ def detect_repo_language(repo_dir):
"ruby": 0,
"rust": 0,
"typescript": 0,
"markdown": 0,
"xml": 0,
"other": 0,
}
total_chars = 0
Expand Down Expand Up @@ -266,6 +271,10 @@ def detect_repo_language(repo_dir):
lang_count["rust"] += 1
elif ext in [".ts", ".tsx"]:
lang_count["typescript"] += 1
elif ext in [".md", ".mdx"]:
lang_count["markdown"] += 1
elif ext in [".xml", ".xsq"]:
lang_count["xml"] += 1
else:
lang_count["other"] += 1
except (
Expand Down
5 changes: 4 additions & 1 deletion app/modules/parsing/graph_construction/parsing_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@ async def parse_directory(
language = self.parse_helper.detect_repo_language(extracted_dir)
else:
languages = repo.get_languages()
language = max(languages, key=languages.get).lower()
if languages:
language = max(languages, key=languages.get).lower()
else:
language = self.parse_helper.detect_repo_language(extracted_dir)

await self.analyze_directory(
extracted_dir, project_id, user_id, self.db, language, user_email
Expand Down
2 changes: 1 addition & 1 deletion app/modules/parsing/knowledge_graph/inference_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def replace_match(match):

if node_tokens > max_tokens:
logger.warning(
f"Node {node['node_id']} has exceeded the max_tokens limit. Skipping..."
f"Node {node['node_id']} - {node_tokens} tokens, has exceeded the max_tokens limit. Skipping..."
)
continue

Expand Down

0 comments on commit d820204

Please sign in to comment.