Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore[Security]: restrict libs to allow specific functionalities #1429

Merged
merged 5 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 22 additions & 9 deletions pandasai/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,21 +85,34 @@

# List of Python packages that are whitelisted for import in generated code
WHITELISTED_LIBRARIES = [
"sklearn",
"statsmodels",
"seaborn",
"plotly",
"ggplot",
"matplotlib",
"numpy",
"datetime",
"json",
"io",
"base64",
"scipy",
"streamlit",
"modin",
"scikit-learn",
"pandas",
]

# List of restricted libs
RESTRICTED_LIBS = [
"os", # OS-level operations (file handling, environment variables)
"sys", # System-level access
"subprocess", # Run system commands
"shutil", # File operations, including delete
"multiprocessing", # Spawn new processes
"threading", # Thread-level operations
"socket", # Network connections
"http", # HTTP requests
"ftplib", # FTP connections
"paramiko", # SSH operations
"tempfile", # Create temporary files
"pathlib", # Filesystem path handling
"resource", # Access resource usage limits (system-related)
"ssl", # SSL socket connections
"pickle", # Unsafe object serialization
"ctypes", # C-level interaction with memory
"psutil", # System and process utilities
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about io?

]

PANDASBI_SETUP_MESSAGE = (
Expand Down
34 changes: 27 additions & 7 deletions pandasai/helpers/optional.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@
import warnings
from typing import TYPE_CHECKING, List

import matplotlib.pyplot as plt
import numpy as np
from pandas.util.version import Version

import pandasai.pandas as pd
from pandasai.constants import WHITELISTED_BUILTINS
from pandasai.safe_libs.restricted_base64 import RestrictedBase64
from pandasai.safe_libs.restricted_datetime import RestrictedDatetime
from pandasai.safe_libs.restricted_json import RestrictedJson
from pandasai.safe_libs.restricted_matplotlib import RestrictedMatplotlib
from pandasai.safe_libs.restricted_numpy import RestrictedNumpy
from pandasai.safe_libs.restricted_pandas import RestrictedPandas
from pandasai.safe_libs.restricted_seaborn import RestrictedSeaborn

if TYPE_CHECKING:
import types
Expand Down Expand Up @@ -54,10 +58,7 @@

Returns (dict): A dictionary of environment variables
"""
return {
"pd": pd,
"plt": plt,
"np": np,
env = {
**{
lib["alias"]: (
getattr(import_dependency(lib["module"]), lib["name"])
Expand All @@ -73,6 +74,25 @@
},
}

env["pd"] = RestrictedPandas()
env["plt"] = RestrictedMatplotlib()
env["np"] = RestrictedNumpy()

for lib in additional_deps:
if lib["name"] == "seaborn":
env["sns"] = RestrictedSeaborn()

Check warning on line 83 in pandasai/helpers/optional.py

View check run for this annotation

Codecov / codecov/patch

pandasai/helpers/optional.py#L83

Added line #L83 was not covered by tests

if lib["name"] == "datetime":
env["datetime"] = RestrictedDatetime()

Check warning on line 86 in pandasai/helpers/optional.py

View check run for this annotation

Codecov / codecov/patch

pandasai/helpers/optional.py#L86

Added line #L86 was not covered by tests

if lib["name"] == "json":
env["json"] = RestrictedJson()

Check warning on line 89 in pandasai/helpers/optional.py

View check run for this annotation

Codecov / codecov/patch

pandasai/helpers/optional.py#L89

Added line #L89 was not covered by tests

if lib["name"] == "base64":
env["base64"] = RestrictedBase64()

Check warning on line 92 in pandasai/helpers/optional.py

View check run for this annotation

Codecov / codecov/patch

pandasai/helpers/optional.py#L92

Added line #L92 was not covered by tests

return env


def import_dependency(
name: str,
Expand Down
63 changes: 60 additions & 3 deletions pandasai/pipelines/chat/code_cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from ...connectors import BaseConnector
from ...connectors.sql import SQLConnector
from ...constants import WHITELISTED_BUILTINS, WHITELISTED_LIBRARIES
from ...constants import RESTRICTED_LIBS, WHITELISTED_LIBRARIES
from ...exceptions import (
BadImportError,
ExecuteSQLQueryNotUsed,
Expand Down Expand Up @@ -161,6 +161,58 @@
return code_to_run

def _is_malicious_code(self, code) -> bool:
tree = ast.parse(code)

# Check for private attributes and access of restricted libs
def check_restricted_access(node):
"""Check if the node accesses restricted modules or private attributes."""
if isinstance(node, ast.Attribute):
attr_chain = []
while isinstance(node, ast.Attribute):
if node.attr.startswith("_"):
raise MaliciousQueryError(
f"Access to private attribute '{node.attr}' is not allowed."
)
attr_chain.insert(0, node.attr)
node = node.value
if isinstance(node, ast.Name):
attr_chain.insert(0, node.id)
if any(module in RESTRICTED_LIBS for module in attr_chain):
raise MaliciousQueryError(

Check warning on line 181 in pandasai/pipelines/chat/code_cleaning.py

View check run for this annotation

Codecov / codecov/patch

pandasai/pipelines/chat/code_cleaning.py#L181

Added line #L181 was not covered by tests
f"Restricted access detected in attribute chain: {'.'.join(attr_chain)}"
)

elif isinstance(node, ast.Subscript) and isinstance(
node.value, ast.Attribute
):
check_restricted_access(node.value)

for node in ast.walk(tree):
# Check 'import ...' statements
if isinstance(node, ast.Import):
for alias in node.names:
sub_module_names = alias.name.split(".")
if any(module in RESTRICTED_LIBS for module in sub_module_names):
raise MaliciousQueryError(
f"Restricted library import detected: {alias.name}"
)

# Check 'from ... import ...' statements
elif isinstance(node, ast.ImportFrom):
sub_module_names = node.module.split(".")
if any(module in RESTRICTED_LIBS for module in sub_module_names):
raise MaliciousQueryError(

Check warning on line 204 in pandasai/pipelines/chat/code_cleaning.py

View check run for this annotation

Codecov / codecov/patch

pandasai/pipelines/chat/code_cleaning.py#L204

Added line #L204 was not covered by tests
f"Restricted library import detected: {node.module}"
)
if any(alias.name in RESTRICTED_LIBS for alias in node.names):
raise MaliciousQueryError(
"Restricted library import detected in 'from ... import ...'"
)

# Check attribute access for restricted libraries
elif isinstance(node, (ast.Attribute, ast.Subscript)):
check_restricted_access(node)

dangerous_modules = [
" os",
" io",
Expand All @@ -176,6 +228,7 @@
"(chr",
"b64decode",
]

return any(
re.search(r"\b" + re.escape(module) + r"\b", code)
for module in dangerous_modules
Expand Down Expand Up @@ -584,5 +637,9 @@
)
return

if library not in WHITELISTED_BUILTINS:
raise BadImportError(library)
if library not in WHITELISTED_LIBRARIES:
raise BadImportError(
f"The library '{library}' is not in the list of whitelisted libraries. "
"To learn how to whitelist custom dependencies, visit: "
"https://docs.pandas-ai.com/custom-whitelisted-dependencies#custom-whitelisted-dependencies"
)
27 changes: 27 additions & 0 deletions pandasai/safe_libs/base_restricted_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
class BaseRestrictedModule:
def _wrap_function(self, func):
def wrapper(*args, **kwargs):
# Check for any suspicious arguments that might be used for importing
for arg in args + tuple(kwargs.values()):
if isinstance(arg, str) and any(
module in arg.lower()
for module in ["io", "os", "subprocess", "sys", "importlib"]
):
raise SecurityError(

Check warning on line 10 in pandasai/safe_libs/base_restricted_module.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/base_restricted_module.py#L10

Added line #L10 was not covered by tests
f"Potential security risk: '{arg}' is not allowed"
)
return func(*args, **kwargs)

return wrapper

def _wrap_class(self, cls):
class WrappedClass(cls):
def __getattribute__(self, name):
attr = super().__getattribute__(name)
return self._wrap_function(self, attr) if callable(attr) else attr

Check warning on line 21 in pandasai/safe_libs/base_restricted_module.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/base_restricted_module.py#L20-L21

Added lines #L20 - L21 were not covered by tests

return WrappedClass


class SecurityError(Exception):
pass
21 changes: 21 additions & 0 deletions pandasai/safe_libs/restricted_base64.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import base64

from .base_restricted_module import BaseRestrictedModule


class RestrictedBase64(BaseRestrictedModule):
def __init__(self):
self.allowed_functions = [

Check warning on line 8 in pandasai/safe_libs/restricted_base64.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/restricted_base64.py#L8

Added line #L8 was not covered by tests
"b64encode", # Safe function to encode data into base64
"b64decode", # Safe function to decode base64 encoded data
]

# Bind the allowed functions to the object
for func in self.allowed_functions:
if hasattr(base64, func):
setattr(self, func, self._wrap_function(getattr(base64, func)))

Check warning on line 16 in pandasai/safe_libs/restricted_base64.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/restricted_base64.py#L14-L16

Added lines #L14 - L16 were not covered by tests

def __getattr__(self, name):
if name not in self.allowed_functions:
raise AttributeError(f"'{name}' is not allowed in RestrictedBase64")
return getattr(base64, name)

Check warning on line 21 in pandasai/safe_libs/restricted_base64.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/restricted_base64.py#L19-L21

Added lines #L19 - L21 were not covered by tests
64 changes: 64 additions & 0 deletions pandasai/safe_libs/restricted_datetime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import datetime

from .base_restricted_module import BaseRestrictedModule


class RestrictedDatetime(BaseRestrictedModule):
def __init__(self):
self.allowed_attributes = [

Check warning on line 8 in pandasai/safe_libs/restricted_datetime.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/restricted_datetime.py#L8

Added line #L8 was not covered by tests
# Classes
"date",
"time",
"datetime",
"timedelta",
"tzinfo",
"timezone",
# Constants
"MINYEAR",
"MAXYEAR",
# Time zone constants
"UTC",
# Functions
"now",
"utcnow",
"today",
"fromtimestamp",
"utcfromtimestamp",
"fromordinal",
"combine",
"strptime",
# Timedelta operations
"timedelta",
# Date operations
"weekday",
"isoweekday",
"isocalendar",
"isoformat",
"ctime",
"strftime",
"year",
"month",
"day",
"hour",
"minute",
"second",
"microsecond",
# Time operations
"replace",
"tzname",
"dst",
"utcoffset",
# Comparison methods
"min",
"max",
]

for attr in self.allowed_attributes:
if hasattr(datetime, attr):
setattr(self, attr, self._wrap_function(getattr(datetime, attr)))

Check warning on line 58 in pandasai/safe_libs/restricted_datetime.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/restricted_datetime.py#L56-L58

Added lines #L56 - L58 were not covered by tests

def __getattr__(self, name):
if name not in self.allowed_attributes:
raise AttributeError(f"'{name}' is not allowed in RestrictedDatetime")

Check warning on line 62 in pandasai/safe_libs/restricted_datetime.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/restricted_datetime.py#L61-L62

Added lines #L61 - L62 were not covered by tests

return getattr(datetime, name)

Check warning on line 64 in pandasai/safe_libs/restricted_datetime.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/restricted_datetime.py#L64

Added line #L64 was not covered by tests
23 changes: 23 additions & 0 deletions pandasai/safe_libs/restricted_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import json

from .base_restricted_module import BaseRestrictedModule


class RestrictedJson(BaseRestrictedModule):
def __init__(self):
self.allowed_functions = [

Check warning on line 8 in pandasai/safe_libs/restricted_json.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/restricted_json.py#L8

Added line #L8 was not covered by tests
"load",
"loads",
"dump",
"dumps",
]

# Bind the allowed functions to the object
for func in self.allowed_functions:
if hasattr(json, func):
setattr(self, func, self._wrap_function(getattr(json, func)))

Check warning on line 18 in pandasai/safe_libs/restricted_json.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/restricted_json.py#L16-L18

Added lines #L16 - L18 were not covered by tests

def __getattr__(self, name):
if name not in self.allowed_functions:
raise AttributeError(f"'{name}' is not allowed in RestrictedJson")
return getattr(json, name)

Check warning on line 23 in pandasai/safe_libs/restricted_json.py

View check run for this annotation

Codecov / codecov/patch

pandasai/safe_libs/restricted_json.py#L21-L23

Added lines #L21 - L23 were not covered by tests
Loading
Loading