Skip to content

Commit

Permalink
Merge pull request #1306 from N1ghtmarecus/refactor/regex-malicious-k…
Browse files Browse the repository at this point in the history
…eywords

Improve query safety by using regex for keyword detection and fix exclude benign phrases from malicious keyword detection (e.g. "through osmosis")
  • Loading branch information
ArslanSaleem authored Aug 13, 2024
2 parents 1db8dd0 + 9737c68 commit e15281b
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 16 deletions.
22 changes: 6 additions & 16 deletions pandasai/agent/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
import re
import uuid
from typing import List, Optional, Union

Expand Down Expand Up @@ -234,22 +235,11 @@ def call_llm_with_prompt(self, prompt: BasePrompt):
retry_count += 1

def check_malicious_keywords_in_query(self, query):
dangerous_modules = [
" os",
" io",
".os",
".io",
"'os'",
"'io'",
'"os"',
'"io"',
"chr(",
"chr)",
"chr ",
"(chr",
"b64decode",
]
return any(module in query for module in dangerous_modules)
dangerous_pattern = re.compile(
r"\b(os|io|chr|b64decode)\b|"
r"(\.os|\.io|'os'|'io'|\"os\"|\"io\"|chr\(|chr\)|chr |\(chr)"
)
return bool(dangerous_pattern.search(query))

def chat(self, query: str, output_type: Optional[str] = None):
"""
Expand Down
35 changes: 35 additions & 0 deletions tests/unit_tests/agent/test_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,3 +675,38 @@ def test_(self, sample_df, config):
The query contains references to io or os modules or b64decode method which can be used to execute or access system resources in unsafe ways.
"""
)

def test_query_detection(self, sample_df, config):
agent = Agent(sample_df, config, memory_size=10)

# Positive cases: should detect malicious keywords
malicious_queries = [
"import os",
"import io",
"chr(97)",
"base64.b64decode",
"file = open('file.txt', 'os')",
"os.system('rm -rf /')",
"io.open('file.txt', 'w')",
]

expected_malicious_response = (
"""Unfortunately, I was not able to get your answers, because of the following error:\n\n"""
"""The query contains references to io or os modules or b64decode method which can be used to execute or access system resources in unsafe ways.\n"""
)

for query in malicious_queries:
response = agent.chat(query)
assert response == expected_malicious_response

# Negative cases: should not detect any malicious keywords
safe_queries = [
"print('Hello world')",
"through osmosis",
"the ionosphere",
"the capital of Norway is Oslo",
]

for query in safe_queries:
response = agent.chat(query)
assert "Unfortunately, I was not able to get your answers" not in response

0 comments on commit e15281b

Please sign in to comment.