From aa7fdac0ae49edf52aacdf2872de51004230ffe7 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Mon, 23 Oct 2023 17:19:25 -0400 Subject: [PATCH 01/15] feat: multicorn2 (Postgres FDW) backend --- .gitignore | 2 + docs/install.rst | 17 +- examples/postgres.py | 31 ++ postgres/Dockerfile | 27 ++ postgres/docker-compose.yml | 20 ++ postgres/entrypoint.sh | 21 ++ postgres/init.sql | 1 + setup.cfg | 4 + src/shillelagh/backends/apsw/db.py | 5 +- src/shillelagh/backends/apsw/dialects/base.py | 15 +- src/shillelagh/backends/multicorn/__init__.py | 0 src/shillelagh/backends/multicorn/db.py | 289 ++++++++++++++++++ .../backends/multicorn/dialects/__init__.py | 0 .../backends/multicorn/dialects/base.py | 105 +++++++ 14 files changed, 524 insertions(+), 13 deletions(-) create mode 100644 examples/postgres.py create mode 100644 postgres/Dockerfile create mode 100644 postgres/docker-compose.yml create mode 100755 postgres/entrypoint.sh create mode 100644 postgres/init.sql create mode 100644 src/shillelagh/backends/multicorn/__init__.py create mode 100644 src/shillelagh/backends/multicorn/db.py create mode 100644 src/shillelagh/backends/multicorn/dialects/__init__.py create mode 100644 src/shillelagh/backends/multicorn/dialects/base.py diff --git a/.gitignore b/.gitignore index 22e91ac2..64569892 100644 --- a/.gitignore +++ b/.gitignore @@ -105,3 +105,5 @@ ENV/ *.sqlite *.db *.swp + +multicorn2 diff --git a/docs/install.rst b/docs/install.rst index d4d49d8e..5265bc03 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -14,17 +14,18 @@ You also need to install optional dependencies, depending on the adapter you wan .. code-block:: bash - $ pip install 'shillelagh[console]' # to use the CLI - $ pip install 'shillelagh[githubapi]' # for GitHub - $ pip install 'shillelagh[gsheetsapi]' # for GSheets - $ pip install 'shillelagh[htmltableapi]' # for HTML tables - $ pip install 'shillelagh[pandasmemory]' # for Pandas in memory - $ pip install 'shillelagh[s3selectapi]' # for S3 files - $ pip install 'shillelagh[systemapi]' # for CPU information + $ pip install 'shillelagh[console]' # to use the CLI + $ pip install 'shillelagh[genericjsonapi]' # for Generic JSON + $ pip install 'shillelagh[genericxmlapi]' # for Generic XML + $ pip install 'shillelagh[githubapi]' # for GitHub + $ pip install 'shillelagh[gsheetsapi]' # for GSheets + $ pip install 'shillelagh[htmltableapi]' # for HTML tables + $ pip install 'shillelagh[pandasmemory]' # for Pandas in memory + $ pip install 'shillelagh[s3selectapi]' # for S3 files + $ pip install 'shillelagh[systemapi]' # for CPU information Alternatively, you can install everything with: .. code-block:: bash $ pip install 'shillelagh[all]' -~ diff --git a/examples/postgres.py b/examples/postgres.py new file mode 100644 index 00000000..26e8ae01 --- /dev/null +++ b/examples/postgres.py @@ -0,0 +1,31 @@ +""" +Simple multicorn2 test. + +Multicorn2 is an extension for PostgreSQL that allows you to create foreign data wrappers +in Python. To use it, you need to install on the machine running Postgres the extension, +the multicorn2 package (not on (PyPI), and the shillelagh package. + +If you want to play with it Shillelagh has a `docker-compose.yml` file that will run +Postgres with the extenion and the Python packages. Just run: + + $ cd postgres/ + $ docker-compose up --build -d + +Then you can run this script. +""" + +from sqlalchemy import create_engine + +# the backend uses psycopg2 under the hood, so any valid connection string for it will +# work; just replace the scheme with `shillelagh+multicorn2` +engine = create_engine( + "shillelagh+multicorn2://shillelagh:shillelagh123@localhost:12345/shillelagh" +) +connection = engine.connect() + +SQL = ( + 'SELECT * FROM "https://docs.google.com/spreadsheets/d/' + '1LcWZMsdCl92g7nA-D6qGRqg1T5TiHyuKJUY1u9XAnsk/edit#gid=0"' +) +for row in connection.execute(SQL): + print(row) diff --git a/postgres/Dockerfile b/postgres/Dockerfile new file mode 100644 index 00000000..cf836fbf --- /dev/null +++ b/postgres/Dockerfile @@ -0,0 +1,27 @@ +# Use the official Postgres image as a base +FROM postgres:13 + +# Use root for package installation +USER root + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + git \ + postgresql-server-dev-13 \ + python3 \ + python3-dev \ + python3-setuptools \ + python3-pip \ + python3-venv \ + wget + +# Download, build, and install multicorn2 +RUN wget https://github.com/pgsql-io/multicorn2/archive/refs/tags/v2.5.tar.gz && \ + tar -xvf v2.5.tar.gz && \ + cd multicorn2-2.5 && \ + make && \ + make install + +# Switch back to the default postgres user +USER postgres diff --git a/postgres/docker-compose.yml b/postgres/docker-compose.yml new file mode 100644 index 00000000..89eeb956 --- /dev/null +++ b/postgres/docker-compose.yml @@ -0,0 +1,20 @@ +version: '3.8' + +services: + postgres: + build: . + environment: + POSTGRES_PASSWORD: shillelagh123 + POSTGRES_USER: shillelagh + POSTGRES_DB: shillelagh + volumes: + - db_data:/var/lib/postgresql/data + - ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro + - ./entrypoint.sh:/usr/local/bin/custom_entrypoint.sh + - ..:/src + ports: + - "12345:5432" + entrypoint: /usr/local/bin/custom_entrypoint.sh + +volumes: + db_data: diff --git a/postgres/entrypoint.sh b/postgres/entrypoint.sh new file mode 100755 index 00000000..ad4cc37e --- /dev/null +++ b/postgres/entrypoint.sh @@ -0,0 +1,21 @@ +#!/bin/bash +set -e # If any command fails, stop the script + +# create virtualenv +cd +python3 -m venv venv +source venv/bin/activate + +# install shillelagh +cd /src +pip3 install -e '.[all]' + +# install multicorn2 +rm -rf multicorn2 +git clone https://github.com/pgsql-io/multicorn2.git +cd multicorn2 +git checkout v2.5 +pip3 install . + +# call the original entrypoint +exec docker-entrypoint.sh postgres diff --git a/postgres/init.sql b/postgres/init.sql new file mode 100644 index 00000000..6844895f --- /dev/null +++ b/postgres/init.sql @@ -0,0 +1 @@ +CREATE EXTENSION IF NOT EXISTS multicorn; diff --git a/setup.cfg b/setup.cfg index e276358f..ddb1e2e0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -115,6 +115,7 @@ all = prison>=0.2.1 prompt_toolkit>=3 psutil>=5.8.0 + psycopg2-binary>=2.9.9 pyarrow>=14.0.1 pygments>=2.8 python-graphql-client>=0.4.3 @@ -153,6 +154,8 @@ htmltableapi = beautifulsoup4>=4.11.1 html5lib>=1.1 pandas>=1.2.2 +multicorn = + psycopg2-binary>=2.9.9 pandasmemory = pandas>=1.2.2 s3selectapi = @@ -184,6 +187,7 @@ sqlalchemy.dialects = shillelagh.apsw = shillelagh.backends.apsw.dialects.base:APSWDialect shillelagh.safe = shillelagh.backends.apsw.dialects.safe:APSWSafeDialect gsheets = shillelagh.backends.apsw.dialects.gsheets:APSWGSheetsDialect + shillelagh.multicorn2 = shillelagh.backends.multicorn.dialects.base:Multicorn2Dialect console_scripts = shillelagh = shillelagh.console:main # For example: diff --git a/src/shillelagh/backends/apsw/db.py b/src/shillelagh/backends/apsw/db.py index ab7b8901..be9cd007 100644 --- a/src/shillelagh/backends/apsw/db.py +++ b/src/shillelagh/backends/apsw/db.py @@ -286,9 +286,10 @@ def _drop_table_uri(self, operation: str) -> Optional[str]: operation = "\n".join( line for line in operation.split("\n") if not line.strip().startswith("--") ) + schema = re.escape(self.schema) regexp = re.compile( - rf"^\s*DROP\s+TABLE\s+(IF\s+EXISTS\s+)?" - rf'({self.schema}\.)?(?P(.*?)|(".*?"))\s*;?\s*$', + r"^\s*DROP\s+TABLE\s+(IF\s+EXISTS\s+)?" + rf'({schema}\.)?(?P(.*?)|(".*?"))\s*;?\s*$', re.IGNORECASE, ) if match := regexp.match(operation): diff --git a/src/shillelagh/backends/apsw/dialects/base.py b/src/shillelagh/backends/apsw/dialects/base.py index 73d15b60..5fbcf5e0 100644 --- a/src/shillelagh/backends/apsw/dialects/base.py +++ b/src/shillelagh/backends/apsw/dialects/base.py @@ -1,5 +1,8 @@ +""" +A SQLALchemy dialect. +""" + # pylint: disable=protected-access, abstract-method -"""A SQLALchemy dialect.""" from typing import Any, Dict, List, Optional, Tuple, cast @@ -102,7 +105,6 @@ def has_table( # pylint: disable=unused-argument connection: _ConnectionFairy, table_name: str, schema: Optional[str] = None, - info_cache: Optional[Dict[Any, Any]] = None, **kwargs: Any, ) -> bool: """ @@ -111,7 +113,14 @@ def has_table( # pylint: disable=unused-argument try: get_adapter_for_table_name(connection, table_name) except ProgrammingError: - return False + return bool( + super().has_table( + connection, + table_name, + schema, + **kwargs, + ), + ) return True # needed for SQLAlchemy diff --git a/src/shillelagh/backends/multicorn/__init__.py b/src/shillelagh/backends/multicorn/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/shillelagh/backends/multicorn/db.py b/src/shillelagh/backends/multicorn/db.py new file mode 100644 index 00000000..65737f9b --- /dev/null +++ b/src/shillelagh/backends/multicorn/db.py @@ -0,0 +1,289 @@ +# pylint: disable=invalid-name, c-extension-no-member, unused-import +""" +A DB API 2.0 wrapper. +""" +import logging +import re +from typing import ( + Any, + Dict, + Iterator, + List, + Optional, + Tuple, + Type, + TypedDict, + Union, + cast, +) +from uuid import uuid4 + +import psycopg2 +from multicorn import ForeignDataWrapper, Qual, SortKey +from psycopg2 import extensions + +from shillelagh.adapters.base import Adapter +from shillelagh.adapters.registry import registry +from shillelagh.exceptions import ( # nopycln: import; pylint: disable=redefined-builtin + DatabaseError, + DataError, + Error, + IntegrityError, + InterfaceError, + InternalError, + NotSupportedError, + OperationalError, + ProgrammingError, + Warning, +) +from shillelagh.lib import ( + combine_args_kwargs, + deserialize, + escape_identifier, + find_adapter, + serialize, +) +from shillelagh.types import ( + BINARY, + DATETIME, + NUMBER, + ROWID, + STRING, + Binary, + Date, + DateFromTicks, + Time, + TimeFromTicks, + Timestamp, + TimestampFromTicks, +) +from shillelagh.typing import Row + +apilevel = "2.0" +threadsafety = 2 +paramstyle = "pyformat" + +NO_SUCH_TABLE = re.compile('relation "(.*?)" does not exist') +DEFAULT_SCHEMA = "main" + +_logger = logging.getLogger(__name__) + + +class Cursor(extensions.cursor): # pylint: disable=too-few-public-methods + """ + A cursor that registers FDWs. + """ + + def __init__( + self, + *args: Any, + adapters: Dict[str, Type[Adapter]], + adapter_kwargs: Dict[str, Dict[str, Any]], + schema: str, + **kwargs: Any, + ): + super().__init__(*args, **kwargs) + self._adapters = list(adapters.values()) + self._adapter_map = {v: k for k, v in adapters.items()} + self._adapter_kwargs = adapter_kwargs + self.schema = schema + + def execute( + self, + operation: str, + parameters: Optional[Tuple[Any, ...]] = None, + ) -> Union["Cursor", extensions.cursor]: + """ + Execute a query, automatically registering FDWs if necessary. + """ + while True: + savepoint = uuid4() + super().execute(f'SAVEPOINT "{savepoint}"') + + try: + return cast(extensions.cursor, super().execute(operation, parameters)) + except psycopg2.errors.UndefinedTable as ex: # pylint: disable=no-member + message = ex.args[0] + match = NO_SUCH_TABLE.match(message) + if not match: + raise ProgrammingError(message) from ex + + # Postgres truncates the table name in the error message, so we need to + # find it in the original query + fragment = match.group(1) + uri = self._get_table_uri(fragment, operation) + if not uri: + raise ProgrammingError("Could not determine table name") from ex + + super().execute(f'ROLLBACK TO SAVEPOINT "{savepoint}"') + self._create_table(uri) + + if uri := self._drop_table_uri(operation): + adapter, args, kwargs = find_adapter( + uri, + self._adapter_kwargs, + self._adapters, + ) + instance = adapter(*args, **kwargs) + instance.drop_table() + + return self + + def _get_table_uri(self, fragment: str, operation: str) -> Optional[str]: + """ + Extract the table name from a query. + """ + schema = re.escape(self.schema) + fragment = re.escape(fragment) + regexp = re.compile( + rf'\b(FROM|INTO)\s+({schema}\.)?(?P"{fragment}.*?")', + re.IGNORECASE, + ) + if match := regexp.search(operation): + return match.groupdict()["uri"].strip('"') + + return None + + def _drop_table_uri(self, operation: str) -> Optional[str]: + """ + Build a ``DROP TABLE`` regexp. + """ + schema = re.escape(self.schema) + regexp = re.compile( + r"^\s*DROP\s+TABLE\s+(IF\s+EXISTS\s+)?" + rf'({schema}\.)?(?P(.*?)|(".*?"))\s*;?\s*$', + re.IGNORECASE, + ) + if match := regexp.match(operation): + return match.groupdict()["uri"].strip('"') + + return None + + def _create_table(self, uri: str) -> None: + """ + Register a FDW. + """ + adapter, args, kwargs = find_adapter(uri, self._adapter_kwargs, self._adapters) + formatted_args = serialize(combine_args_kwargs(adapter, *args, **kwargs)) + entrypoint = self._adapter_map[adapter] + + table_name = escape_identifier(uri) + + columns = adapter(*args, **kwargs).get_columns() + if not columns: + raise ProgrammingError(f"Virtual table {table_name} has no columns") + + quoted_columns = {k.replace('"', '""'): v for k, v in columns.items()} + formatted_columns = ", ".join( + f'"{k}" {v.type}' for (k, v) in quoted_columns.items() + ) + + super().execute( + """ +CREATE SERVER shillelagh foreign data wrapper multicorn options ( + wrapper 'shillelagh.backends.multicorn.db.MulticornForeignDataWrapper' +); + """, + ) + super().execute( + f""" +CREATE FOREIGN TABLE "{table_name}" ( + {formatted_columns} +) server shillelagh options ( + adapter '{entrypoint}', + args '{formatted_args}' +); + """, + ) + + +class CursorFactory: # pylint: disable=too-few-public-methods + """ + Custom cursor factory. + + This returns a custom cursor that will auto register FDWs for the user. + """ + + def __init__( + self, + adapters: Dict[str, Type[Adapter]], + adapter_kwargs: Dict[str, Dict[str, Any]], + schema: str, + ): + self.schema = schema + self._adapters = adapters + self._adapter_kwargs = adapter_kwargs + + def __call__(self, *args, **kwargs) -> Cursor: + """ + Create a new cursor. + """ + return Cursor( + *args, + adapters=self._adapters, + adapter_kwargs=self._adapter_kwargs, + schema=self.schema, + **kwargs, + ) + + +class OptionsType(TypedDict): + """ + Type for OPTIONS. + """ + + adapter: str + args: str + + +class MulticornForeignDataWrapper(ForeignDataWrapper): + """ + A FDW that dispatches queries to adapters. + """ + + def __init__(self, options: OptionsType, columns: Dict[str, str]): + super().__init__(options, columns) + + deserialized_args = deserialize(options["args"]) + self.adapter = registry.load(options["adapter"])(*deserialized_args) + + def execute( + self, + quals: List[Qual], + columns: List[str], + sortkeys: Optional[List[SortKey]] = None, + ) -> Iterator[Row]: + # XXX implement quals/columns + return self.adapter.get_rows({}, []) + + # XXX implement write API + + +def connect( # pylint: disable=too-many-arguments + adapters: Optional[List[str]] = None, + adapter_kwargs: Optional[Dict[str, Dict[str, Any]]] = None, + safe: bool = False, + schema: str = DEFAULT_SCHEMA, + **psycopg2_connection_kwargs: Any, +) -> extensions.connection: + """ + Constructor for creating a connection to the database. + """ + adapter_kwargs = adapter_kwargs or {} + enabled_adapters = registry.load_all(adapters, safe) + + # replace entry point names with class names + mapping = { + name: adapter.__name__.lower() for name, adapter in enabled_adapters.items() + } + adapter_kwargs = {mapping[k]: v for k, v in adapter_kwargs.items() if k in mapping} + + cursor_factory = CursorFactory( + enabled_adapters, + adapter_kwargs, + schema, + ) + return psycopg2.connect( + cursor_factory=cursor_factory, + **psycopg2_connection_kwargs, + ) diff --git a/src/shillelagh/backends/multicorn/dialects/__init__.py b/src/shillelagh/backends/multicorn/dialects/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/shillelagh/backends/multicorn/dialects/base.py b/src/shillelagh/backends/multicorn/dialects/base.py new file mode 100644 index 00000000..5e654d42 --- /dev/null +++ b/src/shillelagh/backends/multicorn/dialects/base.py @@ -0,0 +1,105 @@ +""" +A SQLAlchemy dialect based on psycopg2 and multicorn2. +""" + +# pylint: disable=protected-access, abstract-method + +from typing import Any, Dict, List, Optional, Tuple, cast + +from psycopg2 import extensions +from sqlalchemy.dialects.postgresql.psycopg2 import PGDialect_psycopg2 +from sqlalchemy.engine.url import URL +from sqlalchemy.pool.base import _ConnectionFairy + +from shillelagh.adapters.base import Adapter +from shillelagh.backends.multicorn import db +from shillelagh.exceptions import ProgrammingError +from shillelagh.lib import find_adapter + + +class Multicorn2Dialect(PGDialect_psycopg2): + + """ + A SQLAlchemy dialect for Shillelagh based on psycopg2 and multicorn2. + """ + + name = "shillelagh" + driver = "multicorn2" + + supports_statement_cache = True + + @classmethod + def dbapi(cls): + """ + Return the DB API module. + """ + return db + + import_dbapi = dbapi + + def __init__( + self, + adapters: Optional[List[str]] = None, + adapter_kwargs: Optional[Dict[str, Dict[str, Any]]] = None, + safe: bool = False, + **kwargs: Any, + ): + super().__init__(**kwargs) + self._adapters = adapters + self._adapter_kwargs = adapter_kwargs or {} + self._safe = safe + + def create_connect_args( + self, + url: URL, + ) -> Tuple[List[Any], Dict[str, Any]]: + args, kwargs = super().create_connect_args(url) + kwargs.update( + { + "adapters": self._adapters, + "adapter_kwargs": self._adapter_kwargs, + }, + ) + return args, kwargs + + def has_table( + self, + connection: _ConnectionFairy, + table_name: str, + schema: Optional[str] = None, + **kwargs: Any, + ) -> bool: + """ + Return true if a given table exists. + """ + try: + get_adapter_for_table_name(connection, table_name) + except ProgrammingError: + return bool( + super().has_table( + connection, + table_name, + schema, + **kwargs, + ), + ) + return True + + +def get_adapter_for_table_name( + connection: _ConnectionFairy, + table_name: str, +) -> Adapter: + """ + Return an adapter associated with a connection. + + This function instantiates the adapter responsible for a given table name, + using the connection to properly pass any adapter kwargs. + """ + raw_connection = cast(extensions.connection, connection.engine.raw_connection()) + adapter, args, kwargs = find_adapter( + table_name, + raw_connection._adapter_kwargs, + raw_connection._adapters, + ) + return adapter(*args, **kwargs) From b0fdbf41953e4de55926f73f9d72e4bb0aa868dc Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Mon, 23 Oct 2023 23:33:27 -0400 Subject: [PATCH 02/15] Adding tests --- examples/postgres.py | 2 +- src/shillelagh/backends/multicorn/db.py | 8 ++++---- tests/backends/apsw/db_test.py | 3 ++- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/postgres.py b/examples/postgres.py index 26e8ae01..7afdd343 100644 --- a/examples/postgres.py +++ b/examples/postgres.py @@ -19,7 +19,7 @@ # the backend uses psycopg2 under the hood, so any valid connection string for it will # work; just replace the scheme with `shillelagh+multicorn2` engine = create_engine( - "shillelagh+multicorn2://shillelagh:shillelagh123@localhost:12345/shillelagh" + "shillelagh+multicorn2://shillelagh:shillelagh123@localhost:12345/shillelagh", ) connection = engine.connect() diff --git a/src/shillelagh/backends/multicorn/db.py b/src/shillelagh/backends/multicorn/db.py index 65737f9b..edae751e 100644 --- a/src/shillelagh/backends/multicorn/db.py +++ b/src/shillelagh/backends/multicorn/db.py @@ -83,6 +83,7 @@ def __init__( **kwargs: Any, ): super().__init__(*args, **kwargs) + self._adapters = list(adapters.values()) self._adapter_map = {v: k for k, v in adapters.items()} self._adapter_kwargs = adapter_kwargs @@ -236,7 +237,9 @@ class OptionsType(TypedDict): args: str -class MulticornForeignDataWrapper(ForeignDataWrapper): +class MulticornForeignDataWrapper( + ForeignDataWrapper, +): # pylint: disable=abstract-method """ A FDW that dispatches queries to adapters. """ @@ -253,11 +256,8 @@ def execute( columns: List[str], sortkeys: Optional[List[SortKey]] = None, ) -> Iterator[Row]: - # XXX implement quals/columns return self.adapter.get_rows({}, []) - # XXX implement write API - def connect( # pylint: disable=too-many-arguments adapters: Optional[List[str]] = None, diff --git a/tests/backends/apsw/db_test.py b/tests/backends/apsw/db_test.py index 662bc14c..206c0bad 100644 --- a/tests/backends/apsw/db_test.py +++ b/tests/backends/apsw/db_test.py @@ -1,8 +1,9 @@ -# pylint: disable=protected-access, c-extension-no-member, too-few-public-methods """ Tests for shillelagh.backends.apsw.db. """ +# pylint: disable=protected-access, c-extension-no-member, too-few-public-methods + import datetime from typing import Any, List, Tuple from unittest import mock From 21192379e81bd03306eede95179aa97f787213d1 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Mon, 23 Oct 2023 23:33:39 -0400 Subject: [PATCH 03/15] Adding tests --- CHANGELOG.rst | 6 +- examples/postgres.py | 2 +- src/shillelagh/backends/apsw/dialects/base.py | 2 +- src/shillelagh/backends/multicorn/db.py | 64 ++--- .../backends/multicorn/dialects/base.py | 14 +- tests/backends/apsw/dialects/base_test.py | 8 + tests/backends/multicorn/__init__.py | 0 tests/backends/multicorn/db_test.py | 244 ++++++++++++++++++ tests/backends/multicorn/dialects/__init__.py | 0 .../backends/multicorn/dialects/base_test.py | 83 ++++++ 10 files changed, 366 insertions(+), 57 deletions(-) create mode 100644 tests/backends/multicorn/__init__.py create mode 100644 tests/backends/multicorn/db_test.py create mode 100644 tests/backends/multicorn/dialects/__init__.py create mode 100644 tests/backends/multicorn/dialects/base_test.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d8362541..f0bdabbb 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,8 +2,10 @@ Changelog ========= -Next -==== +Next (1.3.0) +============ + +- New Postgres backend based on multicorn2 (#397) Version 1.2.28 - 2024-09-11 =========================== diff --git a/examples/postgres.py b/examples/postgres.py index 7afdd343..b46a4599 100644 --- a/examples/postgres.py +++ b/examples/postgres.py @@ -6,7 +6,7 @@ the multicorn2 package (not on (PyPI), and the shillelagh package. If you want to play with it Shillelagh has a `docker-compose.yml` file that will run -Postgres with the extenion and the Python packages. Just run: +Postgres with the extension and the Python packages. Just run: $ cd postgres/ $ docker-compose up --build -d diff --git a/src/shillelagh/backends/apsw/dialects/base.py b/src/shillelagh/backends/apsw/dialects/base.py index 5fbcf5e0..56302020 100644 --- a/src/shillelagh/backends/apsw/dialects/base.py +++ b/src/shillelagh/backends/apsw/dialects/base.py @@ -118,7 +118,7 @@ def has_table( # pylint: disable=unused-argument connection, table_name, schema, - **kwargs, + **kwargs, # pylint: disable=unused-argument ), ) return True diff --git a/src/shillelagh/backends/multicorn/db.py b/src/shillelagh/backends/multicorn/db.py index edae751e..35009179 100644 --- a/src/shillelagh/backends/multicorn/db.py +++ b/src/shillelagh/backends/multicorn/db.py @@ -4,18 +4,7 @@ """ import logging import re -from typing import ( - Any, - Dict, - Iterator, - List, - Optional, - Tuple, - Type, - TypedDict, - Union, - cast, -) +from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast from uuid import uuid4 import psycopg2 @@ -97,12 +86,16 @@ def execute( """ Execute a query, automatically registering FDWs if necessary. """ + # which cursor should be returned + cursor: Union["Cursor", extensions.cursor] = self + while True: savepoint = uuid4() super().execute(f'SAVEPOINT "{savepoint}"') try: - return cast(extensions.cursor, super().execute(operation, parameters)) + cursor = cast(extensions.cursor, super().execute(operation, parameters)) + break except psycopg2.errors.UndefinedTable as ex: # pylint: disable=no-member message = ex.args[0] match = NO_SUCH_TABLE.match(message) @@ -128,7 +121,7 @@ def execute( instance = adapter(*args, **kwargs) instance.drop_table() - return self + return cursor def _get_table_uri(self, fragment: str, operation: str) -> Optional[str]: """ @@ -182,7 +175,7 @@ def _create_table(self, uri: str) -> None: super().execute( """ CREATE SERVER shillelagh foreign data wrapper multicorn options ( - wrapper 'shillelagh.backends.multicorn.db.MulticornForeignDataWrapper' + wrapper 'shillelagh.backends.multicorn.fdw.MulticornForeignDataWrapper' ); """, ) @@ -228,49 +221,24 @@ def __call__(self, *args, **kwargs) -> Cursor: ) -class OptionsType(TypedDict): - """ - Type for OPTIONS. - """ - - adapter: str - args: str - - -class MulticornForeignDataWrapper( - ForeignDataWrapper, -): # pylint: disable=abstract-method - """ - A FDW that dispatches queries to adapters. - """ - - def __init__(self, options: OptionsType, columns: Dict[str, str]): - super().__init__(options, columns) - - deserialized_args = deserialize(options["args"]) - self.adapter = registry.load(options["adapter"])(*deserialized_args) - - def execute( - self, - quals: List[Qual], - columns: List[str], - sortkeys: Optional[List[SortKey]] = None, - ) -> Iterator[Row]: - return self.adapter.get_rows({}, []) - - def connect( # pylint: disable=too-many-arguments adapters: Optional[List[str]] = None, adapter_kwargs: Optional[Dict[str, Dict[str, Any]]] = None, - safe: bool = False, schema: str = DEFAULT_SCHEMA, **psycopg2_connection_kwargs: Any, ) -> extensions.connection: """ Constructor for creating a connection to the database. + + Only safe adapters can be loaded. If no adapters are specified, all safe adapters are + loaded. """ adapter_kwargs = adapter_kwargs or {} - enabled_adapters = registry.load_all(adapters, safe) + enabled_adapters = { + name: adapter + for name, adapter in registry.load_all(adapters, safe=False).items() + if adapter.safe + } # replace entry point names with class names mapping = { diff --git a/src/shillelagh/backends/multicorn/dialects/base.py b/src/shillelagh/backends/multicorn/dialects/base.py index 5e654d42..6b6829bd 100644 --- a/src/shillelagh/backends/multicorn/dialects/base.py +++ b/src/shillelagh/backends/multicorn/dialects/base.py @@ -35,19 +35,22 @@ def dbapi(cls): """ return db - import_dbapi = dbapi + @classmethod + def import_dbapi(cls): + """ + New version of the ``dbapi`` method. + """ + return db def __init__( self, adapters: Optional[List[str]] = None, adapter_kwargs: Optional[Dict[str, Dict[str, Any]]] = None, - safe: bool = False, **kwargs: Any, ): super().__init__(**kwargs) self._adapters = adapters self._adapter_kwargs = adapter_kwargs or {} - self._safe = safe def create_connect_args( self, @@ -97,9 +100,10 @@ def get_adapter_for_table_name( using the connection to properly pass any adapter kwargs. """ raw_connection = cast(extensions.connection, connection.engine.raw_connection()) + cursor = raw_connection.cursor() adapter, args, kwargs = find_adapter( table_name, - raw_connection._adapter_kwargs, - raw_connection._adapters, + cursor._adapter_kwargs, + cursor._adapters, ) return adapter(*args, **kwargs) diff --git a/tests/backends/apsw/dialects/base_test.py b/tests/backends/apsw/dialects/base_test.py index a578c2fd..37586d0a 100644 --- a/tests/backends/apsw/dialects/base_test.py +++ b/tests/backends/apsw/dialects/base_test.py @@ -8,12 +8,20 @@ from sqlalchemy import MetaData, Table, create_engine, func, inspect, select from shillelagh.adapters.registry import AdapterLoader +from shillelagh.backends.apsw import db from shillelagh.backends.apsw.dialects.base import APSWDialect from shillelagh.exceptions import ProgrammingError from ....fakes import FakeAdapter +def test_dbapi() -> None: + """ + Test the ``dbapi`` and ``import_dbapi`` methods. + """ + assert APSWDialect.dbapi() == APSWDialect.import_dbapi() == db + + def test_create_engine(registry: AdapterLoader) -> None: """ Test ``create_engine``. diff --git a/tests/backends/multicorn/__init__.py b/tests/backends/multicorn/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/backends/multicorn/db_test.py b/tests/backends/multicorn/db_test.py new file mode 100644 index 00000000..b2f7d2d4 --- /dev/null +++ b/tests/backends/multicorn/db_test.py @@ -0,0 +1,244 @@ +""" +Tests for the Multicorn2 DB API 2.0 wrapper. +""" + +# pylint: disable=invalid-name, redefined-outer-name, no-member, redefined-builtin + +import psycopg2 +import pytest +from pytest_mock import MockerFixture + +from shillelagh.adapters.registry import AdapterLoader +from shillelagh.backends.multicorn.db import Cursor, CursorFactory, connect +from shillelagh.exceptions import ProgrammingError + +from ...fakes import FakeAdapter + + +def test_connect(mocker: MockerFixture, registry: AdapterLoader) -> None: + """ + Test the ``connect`` function. + """ + psycopg2 = mocker.patch("shillelagh.backends.multicorn.db.psycopg2") + CursorFactory = mocker.patch("shillelagh.backends.multicorn.db.CursorFactory") + + registry.add("dummy", FakeAdapter) + + connect( + ["dummy"], + username="username", + password="password", + host="host", + port=1234, + database="database", + ) + psycopg2.connect.assert_called_with( + cursor_factory=CursorFactory( + {"dummy": FakeAdapter}, + {}, + "main", + ), + username="username", + password="password", + host="host", + port=1234, + database="database", + ) + + +def test_cursor_factory(mocker: MockerFixture) -> None: + """ + Test the ``CursorFactory`` class. + """ + Cursor = mocker.patch("shillelagh.backends.multicorn.db.Cursor") + + cursor_factory = CursorFactory( + {"dummy": FakeAdapter}, + {}, + "main", + ) + assert cursor_factory( + username="username", + password="password", + host="host", + port=1234, + database="database", + ) == Cursor( + adapters=["dummy"], + adapter_kwargs={}, + schema="main", + username="username", + password="password", + host="host", + port=1234, + database="database", + ) + + +def test_cursor(mocker: MockerFixture) -> None: + """ + Test the ``Cursor`` class. + """ + mocker.patch("shillelagh.backends.multicorn.db.uuid4", return_value="uuid") + super = mocker.patch("shillelagh.backends.multicorn.db.super", create=True) + execute = mocker.MagicMock(name="execute") + super.return_value.execute = execute + connection = mocker.MagicMock() + + cursor = Cursor( + connection, + adapters={"dummy": FakeAdapter}, + adapter_kwargs={}, + schema="main", + ) + + cursor.execute("SELECT 1") + execute.assert_has_calls( + [ + mocker.call('SAVEPOINT "uuid"'), + mocker.call("SELECT 1", None), + ], + ) + + execute.reset_mock() + execute.side_effect = [ + True, # SAVEPOINT + psycopg2.errors.UndefinedTable('relation "dummy://" does not exist'), + True, # ROLLBACK + True, # CREATE SERVER + True, # CREATE FOREIGN TABLE + True, # SAVEPOINT + True, # successful query + ] + + cursor.execute('SELECT * FROM "dummy://"') + execute.assert_has_calls( + [ + mocker.call('SAVEPOINT "uuid"'), + mocker.call('SELECT * FROM "dummy://"', None), + mocker.call('ROLLBACK TO SAVEPOINT "uuid"'), + mocker.call( + """ +CREATE SERVER shillelagh foreign data wrapper multicorn options ( + wrapper 'shillelagh.backends.multicorn.fdw.MulticornForeignDataWrapper' +); + """, + ), + mocker.call( + """ +CREATE FOREIGN TABLE "dummy://" ( + "age" REAL, "name" TEXT, "pets" INTEGER +) server shillelagh options ( + adapter \'dummy\', + args \'qQA=\' +); + """, + ), + mocker.call('SAVEPOINT "uuid"'), + mocker.call('SELECT * FROM "dummy://"', None), + ], + ) + + +def test_cursor_no_table_match(mocker: MockerFixture) -> None: + """ + Test an edge case where ``UndefinedTable`` is raised with a different message. + """ + super = mocker.patch("shillelagh.backends.multicorn.db.super", create=True) + execute = mocker.MagicMock(name="execute") + super.return_value.execute = execute + connection = mocker.MagicMock() + + execute.side_effect = [ + True, # SAVEPOINT + psycopg2.errors.UndefinedTable("An unexpected error occurred"), + ] + + cursor = Cursor( + connection, + adapters={"dummy": FakeAdapter}, + adapter_kwargs={}, + schema="main", + ) + + with pytest.raises(ProgrammingError) as excinfo: + cursor.execute('SELECT * FROM "dummy://"') + assert str(excinfo.value) == "An unexpected error occurred" + + +def test_cursor_no_table_name(mocker: MockerFixture) -> None: + """ + Test an edge case where we can't determine the table name from the exception. + """ + super = mocker.patch("shillelagh.backends.multicorn.db.super", create=True) + execute = mocker.MagicMock(name="execute") + super.return_value.execute = execute + connection = mocker.MagicMock() + + execute.side_effect = [ + True, # SAVEPOINT + psycopg2.errors.UndefinedTable('relation "invalid://" does not exist'), + ] + + cursor = Cursor( + connection, + adapters={"dummy": FakeAdapter}, + adapter_kwargs={}, + schema="main", + ) + + with pytest.raises(ProgrammingError) as excinfo: + cursor.execute('SELECT * FROM "dummy://"') + assert str(excinfo.value) == "Could not determine table name" + + +def test_drop_table(mocker: MockerFixture) -> None: + """ + Test the ``drop_table`` method. + """ + super = mocker.patch("shillelagh.backends.multicorn.db.super", create=True) + execute = mocker.MagicMock(name="execute") + super.return_value.execute = execute + adapter = mocker.MagicMock(name="adapter") + mocker.patch( + "shillelagh.backends.multicorn.db.find_adapter", + return_value=(adapter, ["one"], {"two": 2}), + ) + connection = mocker.MagicMock() + + cursor = Cursor( + connection, + adapters={"dummy": FakeAdapter}, + adapter_kwargs={}, + schema="main", + ) + + cursor.execute('DROP TABLE "dummy://"') + adapter.assert_called_with("one", two=2) + adapter().drop_table.assert_called() + + +def test_table_without_columns(mocker: MockerFixture) -> None: + """ + Test an edge case where a virtual table has no columns. + """ + super = mocker.patch("shillelagh.backends.multicorn.db.super", create=True) + execute = mocker.MagicMock(name="execute") + super.return_value.execute = execute + adapter = mocker.MagicMock(name="adapter") + adapter().get_columns.return_value = [] + mocker.patch( + "shillelagh.backends.multicorn.db.find_adapter", + return_value=(adapter, ["one"], {"two": 2}), + ) + connection = mocker.MagicMock() + + cursor = Cursor( + connection, + adapters={"dummy": adapter}, + adapter_kwargs={}, + schema="main", + ) + with pytest.raises(ProgrammingError) as excinfo: + cursor._create_table("dummy://") # pylint: disable=protected-access + assert str(excinfo.value) == "Virtual table dummy:// has no columns" diff --git a/tests/backends/multicorn/dialects/__init__.py b/tests/backends/multicorn/dialects/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/backends/multicorn/dialects/base_test.py b/tests/backends/multicorn/dialects/base_test.py new file mode 100644 index 00000000..86e32b96 --- /dev/null +++ b/tests/backends/multicorn/dialects/base_test.py @@ -0,0 +1,83 @@ +""" +Tests for the multicorn dialect. +""" + +from pytest_mock import MockerFixture +from sqlalchemy.engine.url import make_url + +from shillelagh.backends.multicorn import db +from shillelagh.backends.multicorn.db import Cursor +from shillelagh.backends.multicorn.dialects.base import ( + Multicorn2Dialect, + get_adapter_for_table_name, +) +from shillelagh.exceptions import ProgrammingError + +from ....fakes import FakeAdapter + + +def test_dbapi() -> None: + """ + Test the ``dbapi`` and ``import_dbapi`` methods. + """ + assert Multicorn2Dialect.dbapi() == Multicorn2Dialect.import_dbapi() == db + + +def test_create_connect_args() -> None: + """ + Test ``create_connect_args``. + """ + dialect = Multicorn2Dialect(["dummy"], {}) + assert dialect.create_connect_args( + make_url( + "shillelagh+multicorn2://shillelagh:shillelagh123@localhost:12345/shillelagh", + ), + ) == ( + [], + { + "adapter_kwargs": {}, + "adapters": ["dummy"], + "user": "shillelagh", + "password": "shillelagh123", + "host": "localhost", + "port": 12345, + "database": "shillelagh", + }, + ) + + +def test_has_table(mocker: MockerFixture) -> None: + """ + Test ``has_table``. + """ + super = mocker.patch( # pylint: disable=redefined-builtin + "shillelagh.backends.multicorn.dialects.base.super", + create=True, + ) + has_table = mocker.MagicMock(name="has_table", return_value=False) + super.return_value.has_table = has_table + mocker.patch( + "shillelagh.backends.multicorn.dialects.base.get_adapter_for_table_name", + side_effect=[True, ProgrammingError('No adapter for table "dummy://".')], + ) + connection = mocker.MagicMock() + + dialect = Multicorn2Dialect(["dummy"], {}) + assert dialect.has_table(connection, "dummy://") is True + assert dialect.has_table(connection, "my_table") is False + + +def test_get_adapter_for_table_name(mocker: MockerFixture) -> None: + """ + Test the ``get_adapter_for_table_name`` function. + """ + mocker.patch("shillelagh.backends.multicorn.db.super", create=True) + connection = mocker.MagicMock() + connection.engine.raw_connection().cursor.return_value = Cursor( + connection, + adapters={"dummy": FakeAdapter}, + adapter_kwargs={}, + schema="main", + ) + + assert isinstance(get_adapter_for_table_name(connection, "dummy://"), FakeAdapter) From eed8d937ca17a047bbae796884a00dc725dd79b0 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 24 Oct 2023 11:43:55 -0400 Subject: [PATCH 04/15] Optimizing SELECT --- src/shillelagh/backends/apsw/vt.py | 23 +--- src/shillelagh/backends/multicorn/fdw.py | 136 +++++++++++++++++++++++ src/shillelagh/lib.py | 20 ++++ tests/backends/multicorn/fdw_test.py | 129 +++++++++++++++++++++ 4 files changed, 287 insertions(+), 21 deletions(-) create mode 100644 src/shillelagh/backends/multicorn/fdw.py create mode 100644 tests/backends/multicorn/fdw_test.py diff --git a/src/shillelagh/backends/apsw/vt.py b/src/shillelagh/backends/apsw/vt.py index 771f1c39..217b9def 100644 --- a/src/shillelagh/backends/apsw/vt.py +++ b/src/shillelagh/backends/apsw/vt.py @@ -42,8 +42,8 @@ StringDuration, StringInteger, ) -from shillelagh.filters import Filter, Operator -from shillelagh.lib import best_index_object_available, deserialize +from shillelagh.filters import Operator +from shillelagh.lib import best_index_object_available, deserialize, get_bounds from shillelagh.typing import ( Constraint, Index, @@ -245,25 +245,6 @@ def get_order( ] -def get_bounds( - columns: Dict[str, Field], - all_bounds: DefaultDict[str, Set[Tuple[Operator, Any]]], -) -> Dict[str, Filter]: - """ - Combine all filters that apply to each column. - """ - bounds: Dict[str, Filter] = {} - for column_name, operations in all_bounds.items(): - column_type = columns[column_name] - operators = {operation[0] for operation in operations} - for class_ in column_type.filters: - if all(operator in class_.operators for operator in operators): - bounds[column_name] = class_.build(operations) - break - - return bounds - - class VTModule: # pylint: disable=too-few-public-methods """ A module used to create SQLite virtual tables. diff --git a/src/shillelagh/backends/multicorn/fdw.py b/src/shillelagh/backends/multicorn/fdw.py new file mode 100644 index 00000000..17ab8cef --- /dev/null +++ b/src/shillelagh/backends/multicorn/fdw.py @@ -0,0 +1,136 @@ +""" +An FDW. +""" + +from collections import defaultdict +from typing import ( + Any, + DefaultDict, + Dict, + Iterator, + List, + Optional, + Set, + Tuple, + TypedDict, +) + +from multicorn import ForeignDataWrapper, Qual, SortKey + +from shillelagh.adapters.registry import registry +from shillelagh.fields import Order +from shillelagh.filters import Operator +from shillelagh.lib import deserialize, get_bounds +from shillelagh.typing import RequestedOrder, Row + +operator_map = { + "=": Operator.EQ, + ">": Operator.GT, + "<": Operator.LT, + ">=": Operator.GE, + "<=": Operator.LE, +} + + +def get_all_bounds(quals: List[Qual]) -> DefaultDict[str, Set[Tuple[Operator, Any]]]: + """ + Convert list of ``Qual`` into a set of operators for each column. + """ + all_bounds: DefaultDict[str, Set[Tuple[Operator, Any]]] = defaultdict(set) + for qual in quals: + if operator := operator_map.get(qual.operator): + all_bounds[qual.field_name].add((operator, qual.value)) + + return all_bounds + + +class OptionsType(TypedDict): + """ + Type for OPTIONS. + """ + + adapter: str + args: str + + +class MulticornForeignDataWrapper(ForeignDataWrapper): + """ + A FDW that dispatches queries to adapters. + """ + + def __init__(self, options: OptionsType, columns: Dict[str, str]): + super().__init__(options, columns) + + deserialized_args = deserialize(options["args"]) + self.adapter = registry.load(options["adapter"])(*deserialized_args) + self.columns = self.adapter.get_columns() + + def execute( + self, + quals: List[Qual], + columns: List[str], + sortkeys: Optional[List[SortKey]] = None, + ) -> Iterator[Row]: + """ + Execute a query. + """ + all_bounds = get_all_bounds(quals) + bounds = get_bounds(self.columns, all_bounds) + + order: List[Tuple[str, RequestedOrder]] = [ + (key.attname, Order.DESCENDING if key.is_reversed else Order.ASCENDING) + for key in sortkeys or [] + ] + + kwargs = ( + {"requested_columns": columns} + if self.adapter.supports_requested_columns + else {} + ) + + return self.adapter.get_rows(bounds, order, **kwargs) + + def can_sort(self, sortkeys: List[SortKey]) -> List[SortKey]: + """ + Return a list of sorts the adapter can perform. + """ + + def is_sortable(key: SortKey) -> bool: + """ + Return if a given sort key can be enforced by the adapter. + """ + if key.attname not in self.columns: + return False + + order = self.columns[key.attname].order + return ( + order == Order.ANY + or (order == Order.ASCENDING and not key.is_reversed) + or (order == Order.DESCENDING and key.is_reversed) + ) + + return [key for key in sortkeys if is_sortable(key)] + + def insert(self, values): + pass + + def delete(self, oldvalues): + pass + + def update(self, oldvalues, newvalues): + pass + + @property + def rowid_column(self): + return "rowid" + + @classmethod + def import_schema( # pylint: disable=too-many-arguments + cls, + schema, + srv_options, + options, + restriction_type, + restricts, + ): + return [] diff --git a/src/shillelagh/lib.py b/src/shillelagh/lib.py index 409efc37..d35a0dbf 100644 --- a/src/shillelagh/lib.py +++ b/src/shillelagh/lib.py @@ -11,6 +11,7 @@ from typing import ( Any, Callable, + DefaultDict, Dict, Iterator, List, @@ -641,3 +642,22 @@ def get_session( session.headers.update(request_headers) return session + + +def get_bounds( + columns: Dict[str, Field], + all_bounds: DefaultDict[str, Set[Tuple[Operator, Any]]], +) -> Dict[str, Filter]: + """ + Combine all filters that apply to each column. + """ + bounds: Dict[str, Filter] = {} + for column_name, operations in all_bounds.items(): + column_type = columns[column_name] + operators = {operation[0] for operation in operations} + for class_ in column_type.filters: + if all(operator in class_.operators for operator in operators): + bounds[column_name] = class_.build(operations) + break + + return bounds diff --git a/tests/backends/multicorn/fdw_test.py b/tests/backends/multicorn/fdw_test.py new file mode 100644 index 00000000..439ff88d --- /dev/null +++ b/tests/backends/multicorn/fdw_test.py @@ -0,0 +1,129 @@ +""" +Tests for the Multicorn2 FDW. +""" + +# pylint: disable=invalid-name, redefined-outer-name, no-member, redefined-builtin + +from collections import defaultdict + +from multicorn import Qual, SortKey +from pytest_mock import MockerFixture + +from shillelagh.adapters.registry import AdapterLoader +from shillelagh.backends.multicorn.fdw import ( + MulticornForeignDataWrapper, + get_all_bounds, +) +from shillelagh.filters import Operator + +from ...fakes import FakeAdapter + + +def test_fdw(mocker: MockerFixture, registry: AdapterLoader) -> None: + """ + Test the ``MulticornForeignDataWrapper`` class. + """ + mocker.patch("shillelagh.backends.multicorn.fdw.registry", registry) + + registry.add("dummy", FakeAdapter) + + wrapper = MulticornForeignDataWrapper( + {"adapter": "dummy", "args": "qQA="}, + {}, + ) + + assert list(wrapper.execute([], ["rowid", "name", "age", "pets"])) == [ + {"rowid": 0, "name": "Alice", "age": 20, "pets": 0}, + {"rowid": 1, "name": "Bob", "age": 23, "pets": 3}, + ] + + assert list( + wrapper.execute( + [Qual("age", ">", 21)], + ["rowid", "name", "age", "pets"], + [], + ), + ) == [ + {"rowid": 1, "name": "Bob", "age": 23, "pets": 3}, + ] + + assert list( + wrapper.execute( + [], + ["rowid", "name", "age", "pets"], + [ + SortKey( + attname="age", + attnum=2, + is_reversed=True, + nulls_first=True, + collate=None, + ), + ], + ), + ) == [ + {"rowid": 1, "name": "Bob", "age": 23, "pets": 3}, + {"rowid": 0, "name": "Alice", "age": 20, "pets": 0}, + ] + + +def test_get_all_bounds() -> None: + """ + Test ``get_all_bounds``. + """ + quals = [ + Qual("column1", "=", 3), + Qual("column2", "LIKE", "test%"), + Qual("column3", ">", 10), + ] + + assert get_all_bounds([]) == defaultdict(set) + assert get_all_bounds([quals[0]]) == {"column1": {(Operator.EQ, 3)}} + assert get_all_bounds(quals) == { + "column1": {(Operator.EQ, 3)}, + "column3": {(Operator.GT, 10)}, + } + assert get_all_bounds([Qual("column4", "unsupported_operator", 1)]) == defaultdict( + set, + ) + + +def test_can_sort(mocker: MockerFixture, registry: AdapterLoader) -> None: + """ + Test the ``can_sort`` method. + """ + mocker.patch("shillelagh.backends.multicorn.fdw.registry", registry) + + registry.add("dummy", FakeAdapter) + + wrapper = MulticornForeignDataWrapper( + {"adapter": "dummy", "args": "qQA="}, + {}, + ) + assert wrapper.can_sort([]) == [] + assert wrapper.can_sort( + [ + SortKey( + attname="age", + attnum=2, + is_reversed=True, + nulls_first=True, + collate=None, + ), + SortKey( + attname="foobar", + attnum=1, + is_reversed=True, + nulls_first=True, + collate=None, + ), + ], + ) == [ + SortKey( + attname="age", + attnum=2, + is_reversed=True, + nulls_first=True, + collate=None, + ), + ] From a902a11ad7bca806c85649bd977e61104cea8cc1 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 24 Oct 2023 17:10:56 -0400 Subject: [PATCH 05/15] Fix tests --- .github/workflows/python-package-daily.yml | 6 ++++++ .github/workflows/python-package.yml | 6 ++++++ requirements/base.txt | 7 +++++++ requirements/test.txt | 2 ++ setup.cfg | 1 + 5 files changed, 22 insertions(+) diff --git a/.github/workflows/python-package-daily.yml b/.github/workflows/python-package-daily.yml index bd279d75..10a66f99 100644 --- a/.github/workflows/python-package-daily.yml +++ b/.github/workflows/python-package-daily.yml @@ -30,6 +30,12 @@ jobs: run: | python -m pip install --upgrade pip setuptools python -m pip install -e '.[testing]' + - name: Install multicorn2 + run: | + git clone https://github.com/pgsql-io/multicorn2.git + cd multicorn2 + git checkout v2.5 + pip install . - name: Test with pytest run: | pytest --cov-fail-under=100 --cov=src/shillelagh -vv tests/ --doctest-modules src/shillelagh --without-integration --without-slow-integration diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 4f008d91..cb3e95e2 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -31,6 +31,12 @@ jobs: run: | python -m pip install --upgrade pip setuptools python -m pip install -r requirements/test.txt + - name: Install multicorn2 + run: | + git clone https://github.com/pgsql-io/multicorn2.git + cd multicorn2 + git checkout v2.5 + pip install . - name: Test with pytest run: | pre-commit run --all-files diff --git a/requirements/base.txt b/requirements/base.txt index d537e320..943dff0b 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -19,6 +19,8 @@ certifi==2022.6.15 # via requests charset-normalizer==2.1.0 # via requests +exceptiongroup==1.1.3 + # via cattrs greenlet==2.0.2 # via # shillelagh @@ -45,9 +47,14 @@ sqlalchemy==1.4.39 # via shillelagh typing-extensions==4.3.0 # via shillelagh + # via + # cattrs + # shillelagh url-normalize==1.4.3 # via requests-cache urllib3==1.26.10 # via # requests # requests-cache +zipp==3.15.0 + # via importlib-metadata diff --git a/requirements/test.txt b/requirements/test.txt index 5a71df88..d39efdf2 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -137,6 +137,8 @@ psutil==5.9.1 # via shillelagh pyarrow==16.0.0 # via shillelagh +psycopg2-binary==2.9.9 + # via shillelagh pyasn1==0.4.8 # via # pyasn1-modules diff --git a/setup.cfg b/setup.cfg index ddb1e2e0..44f793ae 100644 --- a/setup.cfg +++ b/setup.cfg @@ -87,6 +87,7 @@ testing = prison>=0.2.1 prompt_toolkit>=3 psutil>=5.8.0 + psycopg2-binary>=2.9.9 pyarrow>=14.0.1 pyfakefs>=4.3.3 pygments>=2.8 From b4a60192dd94c16e00014e0e17781e27d1b985c7 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 24 Oct 2023 18:33:23 -0400 Subject: [PATCH 06/15] Write API --- src/shillelagh/backends/multicorn/fdw.py | 27 ++++++---- tests/backends/multicorn/fdw_test.py | 67 ++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 11 deletions(-) diff --git a/src/shillelagh/backends/multicorn/fdw.py b/src/shillelagh/backends/multicorn/fdw.py index 17ab8cef..53dbbcc6 100644 --- a/src/shillelagh/backends/multicorn/fdw.py +++ b/src/shillelagh/backends/multicorn/fdw.py @@ -111,14 +111,19 @@ def is_sortable(key: SortKey) -> bool: return [key for key in sortkeys if is_sortable(key)] - def insert(self, values): - pass + def insert(self, values: Row) -> Row: + rowid = self.adapter.insert_row(values) + values["rowid"] = rowid + return values - def delete(self, oldvalues): - pass + def delete(self, oldvalues: Row) -> None: + rowid = oldvalues["rowid"] + self.adapter.delete_row(rowid) - def update(self, oldvalues, newvalues): - pass + def update(self, oldvalues: Row, newvalues: Row) -> Row: + rowid = newvalues["rowid"] + self.adapter.update_row(rowid, newvalues) + return newvalues @property def rowid_column(self): @@ -127,10 +132,10 @@ def rowid_column(self): @classmethod def import_schema( # pylint: disable=too-many-arguments cls, - schema, - srv_options, - options, - restriction_type, - restricts, + schema: str, + srv_options: Dict[str, str], + options: Dict[str, str], + restriction_type: Optional[str], + restricts: List[str], ): return [] diff --git a/tests/backends/multicorn/fdw_test.py b/tests/backends/multicorn/fdw_test.py index 439ff88d..aa8a6d6c 100644 --- a/tests/backends/multicorn/fdw_test.py +++ b/tests/backends/multicorn/fdw_test.py @@ -27,10 +27,14 @@ def test_fdw(mocker: MockerFixture, registry: AdapterLoader) -> None: registry.add("dummy", FakeAdapter) + assert ( + MulticornForeignDataWrapper.import_schema("schema", {}, {}, "limit", []) == [] + ) wrapper = MulticornForeignDataWrapper( {"adapter": "dummy", "args": "qQA="}, {}, ) + assert wrapper.rowid_column == "rowid" assert list(wrapper.execute([], ["rowid", "name", "age", "pets"])) == [ {"rowid": 0, "name": "Alice", "age": 20, "pets": 0}, @@ -127,3 +131,66 @@ def test_can_sort(mocker: MockerFixture, registry: AdapterLoader) -> None: collate=None, ), ] + + +def test_insert(mocker: MockerFixture, registry: AdapterLoader) -> None: + """ + Test the ``insert`` method. + """ + mocker.patch("shillelagh.backends.multicorn.fdw.registry", registry) + + registry.add("dummy", FakeAdapter) + + wrapper = MulticornForeignDataWrapper( + {"adapter": "dummy", "args": "qQA="}, + {}, + ) + + wrapper.insert({"rowid": 2, "name": "Charlie", "age": 6, "pets": 1}) + assert list(wrapper.execute([], ["rowid", "name", "age", "pets"])) == [ + {"rowid": 0, "name": "Alice", "age": 20, "pets": 0}, + {"rowid": 1, "name": "Bob", "age": 23, "pets": 3}, + {"rowid": 2, "name": "Charlie", "age": 6, "pets": 1}, + ] + + +def test_delete(mocker: MockerFixture, registry: AdapterLoader) -> None: + """ + Test the ``delete`` method. + """ + mocker.patch("shillelagh.backends.multicorn.fdw.registry", registry) + + registry.add("dummy", FakeAdapter) + + wrapper = MulticornForeignDataWrapper( + {"adapter": "dummy", "args": "qQA="}, + {}, + ) + + wrapper.delete({"rowid": 1, "name": "Bob", "age": 23, "pets": 3}) + assert list(wrapper.execute([], ["rowid", "name", "age", "pets"])) == [ + {"rowid": 0, "name": "Alice", "age": 20, "pets": 0}, + ] + + +def test_update(mocker: MockerFixture, registry: AdapterLoader) -> None: + """ + Test the ``update`` method. + """ + mocker.patch("shillelagh.backends.multicorn.fdw.registry", registry) + + registry.add("dummy", FakeAdapter) + + wrapper = MulticornForeignDataWrapper( + {"adapter": "dummy", "args": "qQA="}, + {}, + ) + + wrapper.update( + {"rowid": 0, "name": "Alice", "age": 20, "pets": 0}, + {"rowid": 0, "name": "Alice", "age": 20, "pets": 1}, + ) + assert list(wrapper.execute([], ["rowid", "name", "age", "pets"])) == [ + {"rowid": 1, "name": "Bob", "age": 23, "pets": 3}, + {"rowid": 0, "name": "Alice", "age": 20, "pets": 1}, + ] From 705485125e6b90fed93aa81d337eee03973e333c Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 24 Oct 2023 18:44:46 -0400 Subject: [PATCH 07/15] Query cost --- .github/workflows/python-integration.yml | 6 ++++++ src/shillelagh/backends/multicorn/fdw.py | 20 ++++++++++++++++++++ tests/backends/multicorn/fdw_test.py | 16 ++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/.github/workflows/python-integration.yml b/.github/workflows/python-integration.yml index 8f048852..4c51f586 100644 --- a/.github/workflows/python-integration.yml +++ b/.github/workflows/python-integration.yml @@ -31,6 +31,12 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements/test.txt + - name: Install multicorn2 + run: | + git clone https://github.com/pgsql-io/multicorn2.git + cd multicorn2 + git checkout v2.5 + pip install . - name: Test with pytest env: SHILLELAGH_ADAPTER_KWARGS: ${{ secrets.SHILLELAGH_ADAPTER_KWARGS }} diff --git a/src/shillelagh/backends/multicorn/fdw.py b/src/shillelagh/backends/multicorn/fdw.py index 53dbbcc6..def86730 100644 --- a/src/shillelagh/backends/multicorn/fdw.py +++ b/src/shillelagh/backends/multicorn/fdw.py @@ -129,6 +129,26 @@ def update(self, oldvalues: Row, newvalues: Row) -> Row: def rowid_column(self): return "rowid" + def get_rel_size(self, quals: List[Qual], columns: List[str]) -> Tuple[int, int]: + """ + Estimate query cost. + """ + all_bounds = get_all_bounds(quals) + filtered_columns = [ + (column, operator[0]) + for column, operators in all_bounds.items() + for operator in operators + ] + + # the adapter returns an arbitrary cost that takes in consideration filtering and + # sorting; let's use that as an approximation for rows + rows = int(self.adapter.get_cost(filtered_columns, [])) + + # same assumption as the parent class + row_width = len(columns) * 100 + + return (rows, row_width) + @classmethod def import_schema( # pylint: disable=too-many-arguments cls, diff --git a/tests/backends/multicorn/fdw_test.py b/tests/backends/multicorn/fdw_test.py index aa8a6d6c..f4518664 100644 --- a/tests/backends/multicorn/fdw_test.py +++ b/tests/backends/multicorn/fdw_test.py @@ -194,3 +194,19 @@ def test_update(mocker: MockerFixture, registry: AdapterLoader) -> None: {"rowid": 1, "name": "Bob", "age": 23, "pets": 3}, {"rowid": 0, "name": "Alice", "age": 20, "pets": 1}, ] + + +def test_get_rel_Size(mocker: MockerFixture, registry: AdapterLoader) -> None: + """ + Test the ``get_rel_size`` method. + """ + mocker.patch("shillelagh.backends.multicorn.fdw.registry", registry) + + registry.add("dummy", FakeAdapter) + + wrapper = MulticornForeignDataWrapper( + {"adapter": "dummy", "args": "qQA="}, + {}, + ) + + assert wrapper.get_rel_size([Qual("age", ">", 21)], ["name", "age"]) == (666, 200) From a612642e7541dfa9b9eccad9ffc2ddd3e15721d2 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 24 Oct 2023 19:35:52 -0400 Subject: [PATCH 08/15] Add docs --- README.rst | 19 +++++++++++++++++++ docs/index.rst | 1 + docs/postgres.rst | 23 +++++++++++++++++++++++ src/shillelagh/backends/multicorn/db.py | 2 ++ tests/backends/multicorn/db_test.py | 2 ++ 5 files changed, 47 insertions(+) create mode 100644 docs/postgres.rst diff --git a/README.rst b/README.rst index d1a6893c..ca786b6f 100644 --- a/README.rst +++ b/README.rst @@ -52,6 +52,25 @@ And a command-line utility: $ shillelagh sql> SELECT * FROM a_table +There is also an [experimental backend](https://shillelagh.readthedocs.io/en/latest/postgres.html) that uses Postgres with the [Multicorn2](http://multicorn2.org/) extension: + +.. code-block:: python + + from shillelagh.backends.multicorn.db import connect + + connection = connect( + username="username", + password="password", + host="localhost", + port=5432, + database="examples", + ) + +.. code-block:: python + + from sqlalchemy import create_engine + engine = create_engine("shillelagh+multicorn2://username:password@localhost:5432/examples") + Why SQL? ======== diff --git a/docs/index.rst b/docs/index.rst index a46ebe53..3466f4cb 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -10,6 +10,7 @@ Contents Usage Adapters Creating a new adapter + Postgres backend License Authors Changelog diff --git a/docs/postgres.rst b/docs/postgres.rst new file mode 100644 index 00000000..673df90e --- /dev/null +++ b/docs/postgres.rst @@ -0,0 +1,23 @@ +.. _postgres: + +================ +Postgres backend +================ + +Since version 1.3 Shillelagh ships with an experimental backend that uses Postgres instead of SQLite. The backend implements a custom [pyscopg2](https://pypi.org/project/psycopg2/) cursor that automatically registers a foreign data wrapper (FDW) whenever a supported table is accessed. It's based on the [multicorn2](http://multicorn2.org/) extension and Python package. + +To use the backend you need to: + +1. Install the [Multicorn2](http://multicorn2.org/) extension. +2. Install the multicorn2 Python package in the machine running Postgres. Note that this is not the "multicorn" package available on PyPI. You need to download the source and install it manually. +3. Install Shillelagh in the machine running Postgres. + +Note that you need to install Python packages in a way that they are available to the process running Postgres. You can either install them globally, or install them in a virtual environment and have it activated in the process that starts Postgres. + +The ``postgres/`` directory has a Docker configuration that can be used to test the backend, or as a basis for installation. To run it, enter the directory and execute: + +.. code-block:: bash + + docker-compose up --build -d + +You should then be able to run the example script in `examples/postgres.py`_ to test that everything works. diff --git a/src/shillelagh/backends/multicorn/db.py b/src/shillelagh/backends/multicorn/db.py index 35009179..d85e01aa 100644 --- a/src/shillelagh/backends/multicorn/db.py +++ b/src/shillelagh/backends/multicorn/db.py @@ -222,6 +222,7 @@ def __call__(self, *args, **kwargs) -> Cursor: def connect( # pylint: disable=too-many-arguments + dsn: Optional[str] = None, adapters: Optional[List[str]] = None, adapter_kwargs: Optional[Dict[str, Dict[str, Any]]] = None, schema: str = DEFAULT_SCHEMA, @@ -252,6 +253,7 @@ def connect( # pylint: disable=too-many-arguments schema, ) return psycopg2.connect( + dsn, cursor_factory=cursor_factory, **psycopg2_connection_kwargs, ) diff --git a/tests/backends/multicorn/db_test.py b/tests/backends/multicorn/db_test.py index b2f7d2d4..ea08c425 100644 --- a/tests/backends/multicorn/db_test.py +++ b/tests/backends/multicorn/db_test.py @@ -25,6 +25,7 @@ def test_connect(mocker: MockerFixture, registry: AdapterLoader) -> None: registry.add("dummy", FakeAdapter) connect( + None, ["dummy"], username="username", password="password", @@ -33,6 +34,7 @@ def test_connect(mocker: MockerFixture, registry: AdapterLoader) -> None: database="database", ) psycopg2.connect.assert_called_with( + None, cursor_factory=CursorFactory( {"dummy": FakeAdapter}, {}, From c719211d7eeaaf61e8934a497beb5443ad2b4f27 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 24 Oct 2023 21:34:08 -0400 Subject: [PATCH 09/15] Add integration test --- .github/workflows/python-integration.yml | 14 +++++ README.rst | 2 +- .../adapters/api/gsheets/integration_test.py | 57 +++++++++++++++++++ tests/backends/multicorn/db_test.py | 8 +-- 4 files changed, 76 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-integration.yml b/.github/workflows/python-integration.yml index 4c51f586..e5a52acc 100644 --- a/.github/workflows/python-integration.yml +++ b/.github/workflows/python-integration.yml @@ -37,8 +37,22 @@ jobs: cd multicorn2 git checkout v2.5 pip install . + - name: Start the Postgres service + run: | + docker-compose -f postgres/docker-compose.yml up --build -d + - name: Check if Postgres is ready + run: | + docker run --network container:postgres_postgres_1 postgres_postgres pg_isready \ + -h postgres \ + -p 5432 \ + -U shillelagh \ + --timeout=30 - name: Test with pytest env: SHILLELAGH_ADAPTER_KWARGS: ${{ secrets.SHILLELAGH_ADAPTER_KWARGS }} run: | pytest --cov-fail-under=100 --cov=src/shillelagh -vv tests/ --doctest-modules src/shillelagh --with-integration --with-slow-integration + - name: Stop the Postgres service + if: always() + run: | + docker-compose -f postgres/docker-compose.yml down diff --git a/README.rst b/README.rst index ca786b6f..68883d98 100644 --- a/README.rst +++ b/README.rst @@ -59,7 +59,7 @@ There is also an [experimental backend](https://shillelagh.readthedocs.io/en/lat from shillelagh.backends.multicorn.db import connect connection = connect( - username="username", + user="username", password="password", host="localhost", port=5432, diff --git a/tests/adapters/api/gsheets/integration_test.py b/tests/adapters/api/gsheets/integration_test.py index aa20de0c..5948d9ff 100644 --- a/tests/adapters/api/gsheets/integration_test.py +++ b/tests/adapters/api/gsheets/integration_test.py @@ -15,6 +15,7 @@ from shillelagh.adapters.api.gsheets.types import SyncMode from shillelagh.backends.apsw.db import connect +from shillelagh.backends.multicorn.db import connect as connect_multicorn @pytest.mark.skip("Credentials no longer valid") @@ -727,3 +728,59 @@ def test_weird_symbols(adapter_kwargs: Dict[str, Any]) -> None: assert cursor.fetchall() == [(1.0, "a", 45.0), (2.0, "b", 1999.0)] assert cursor.description is not None assert [column[0] for column in cursor.description] == ['foo"', '"bar', 'a"b'] + + +@pytest.mark.slow_integration_test +def test_public_sheet_apsw() -> None: + """ + Test reading values from a public sheet with APSW. + """ + table = ( + '"https://docs.google.com/spreadsheets/d/' + '1LcWZMsdCl92g7nA-D6qGRqg1T5TiHyuKJUY1u9XAnsk/edit#gid=0"' + ) + + connection = connect(":memory:") + cursor = connection.cursor() + sql = f"SELECT * FROM {table}" + cursor.execute(sql) + assert cursor.fetchall() == [ + ("BR", 2), + ("BR", 4), + ("ZA", 7), + ("CR", 11), + ("CR", 11), + ("FR", 100), + ("AR", 42), + ] + + +@pytest.mark.slow_integration_test +def test_public_sheet_multicorn() -> None: + """ + Test reading values from a public sheet with Multicorn2. + """ + table = ( + '"https://docs.google.com/spreadsheets/d/' + '1LcWZMsdCl92g7nA-D6qGRqg1T5TiHyuKJUY1u9XAnsk/edit#gid=0"' + ) + + connection = connect_multicorn( + user="shillelagh", + password="shillelagh123", + host="localhost", + port=12345, + database="shillelagh", + ) + cursor = connection.cursor() + sql = f"SELECT * FROM {table}" + cursor.execute(sql) + assert cursor.fetchall() == [ + ("BR", 2), + ("BR", 4), + ("ZA", 7), + ("CR", 11), + ("CR", 11), + ("FR", 100), + ("AR", 42), + ] diff --git a/tests/backends/multicorn/db_test.py b/tests/backends/multicorn/db_test.py index ea08c425..56e3e29b 100644 --- a/tests/backends/multicorn/db_test.py +++ b/tests/backends/multicorn/db_test.py @@ -27,7 +27,7 @@ def test_connect(mocker: MockerFixture, registry: AdapterLoader) -> None: connect( None, ["dummy"], - username="username", + user="username", password="password", host="host", port=1234, @@ -40,7 +40,7 @@ def test_connect(mocker: MockerFixture, registry: AdapterLoader) -> None: {}, "main", ), - username="username", + user="username", password="password", host="host", port=1234, @@ -60,7 +60,7 @@ def test_cursor_factory(mocker: MockerFixture) -> None: "main", ) assert cursor_factory( - username="username", + user="username", password="password", host="host", port=1234, @@ -69,7 +69,7 @@ def test_cursor_factory(mocker: MockerFixture) -> None: adapters=["dummy"], adapter_kwargs={}, schema="main", - username="username", + user="username", password="password", host="host", port=1234, From cb9f7c150ebac2f8f17ebdaa64dee20bd6e18031 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 24 Oct 2023 22:12:48 -0400 Subject: [PATCH 10/15] Different strategy --- .github/workflows/python-integration.yml | 16 ++++++++++------ postgres/Dockerfile | 2 +- postgres/docker-compose.yml | 2 +- postgres/entrypoint.sh | 3 ++- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/.github/workflows/python-integration.yml b/.github/workflows/python-integration.yml index e5a52acc..3f3f085a 100644 --- a/.github/workflows/python-integration.yml +++ b/.github/workflows/python-integration.yml @@ -40,13 +40,16 @@ jobs: - name: Start the Postgres service run: | docker-compose -f postgres/docker-compose.yml up --build -d - - name: Check if Postgres is ready + - name: List running containers run: | - docker run --network container:postgres_postgres_1 postgres_postgres pg_isready \ - -h postgres \ - -p 5432 \ - -U shillelagh \ - --timeout=30 + docker ps + - name: Check Postgres logs + run: | + sleep 5 + docker logs postgres_postgres_1 + - name: Wait for Postgres to become available + run: | + until docker run --network container:postgres_postgres_1 postgres_postgres pg_isready -h postgres -p 5432 -U shillelagh --timeout=90; do sleep 10; done - name: Test with pytest env: SHILLELAGH_ADAPTER_KWARGS: ${{ secrets.SHILLELAGH_ADAPTER_KWARGS }} @@ -55,4 +58,5 @@ jobs: - name: Stop the Postgres service if: always() run: | + docker logs postgres_postgres_1 docker-compose -f postgres/docker-compose.yml down diff --git a/postgres/Dockerfile b/postgres/Dockerfile index cf836fbf..12331396 100644 --- a/postgres/Dockerfile +++ b/postgres/Dockerfile @@ -11,7 +11,7 @@ RUN apt-get update && apt-get install -y \ postgresql-server-dev-13 \ python3 \ python3-dev \ - python3-setuptools \ + #python3-setuptools \ python3-pip \ python3-venv \ wget diff --git a/postgres/docker-compose.yml b/postgres/docker-compose.yml index 89eeb956..4ab5e677 100644 --- a/postgres/docker-compose.yml +++ b/postgres/docker-compose.yml @@ -13,7 +13,7 @@ services: - ./entrypoint.sh:/usr/local/bin/custom_entrypoint.sh - ..:/src ports: - - "12345:5432" + - "5432:5432" entrypoint: /usr/local/bin/custom_entrypoint.sh volumes: diff --git a/postgres/entrypoint.sh b/postgres/entrypoint.sh index ad4cc37e..063b9f60 100755 --- a/postgres/entrypoint.sh +++ b/postgres/entrypoint.sh @@ -5,10 +5,11 @@ set -e # If any command fails, stop the script cd python3 -m venv venv source venv/bin/activate +pip3 install 'setuptools>=61' wheel # install shillelagh cd /src -pip3 install -e '.[all]' +pip3 install -v -e '.[all]' # install multicorn2 rm -rf multicorn2 From 38642c81f0a4b548a81cb535418554e007b2e0bc Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Fri, 27 Oct 2023 14:56:34 -0400 Subject: [PATCH 11/15] Another approach --- .github/workflows/python-integration.yml | 13 ------------- .github/workflows/python-package-daily.yml | 2 +- .github/workflows/python-package.yml | 2 +- postgres/Dockerfile | 9 ++++++--- postgres/docker-compose.yml | 2 -- requirements/test.txt | 2 ++ setup.cfg | 5 +++++ 7 files changed, 15 insertions(+), 20 deletions(-) diff --git a/.github/workflows/python-integration.yml b/.github/workflows/python-integration.yml index 3f3f085a..f2c8381d 100644 --- a/.github/workflows/python-integration.yml +++ b/.github/workflows/python-integration.yml @@ -31,22 +31,9 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements/test.txt - - name: Install multicorn2 - run: | - git clone https://github.com/pgsql-io/multicorn2.git - cd multicorn2 - git checkout v2.5 - pip install . - name: Start the Postgres service run: | docker-compose -f postgres/docker-compose.yml up --build -d - - name: List running containers - run: | - docker ps - - name: Check Postgres logs - run: | - sleep 5 - docker logs postgres_postgres_1 - name: Wait for Postgres to become available run: | until docker run --network container:postgres_postgres_1 postgres_postgres pg_isready -h postgres -p 5432 -U shillelagh --timeout=90; do sleep 10; done diff --git a/.github/workflows/python-package-daily.yml b/.github/workflows/python-package-daily.yml index 10a66f99..a94a13a8 100644 --- a/.github/workflows/python-package-daily.yml +++ b/.github/workflows/python-package-daily.yml @@ -35,7 +35,7 @@ jobs: git clone https://github.com/pgsql-io/multicorn2.git cd multicorn2 git checkout v2.5 - pip install . + pip install -e '.[testing]' - name: Test with pytest run: | pytest --cov-fail-under=100 --cov=src/shillelagh -vv tests/ --doctest-modules src/shillelagh --without-integration --without-slow-integration diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index cb3e95e2..7e866a4d 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -36,7 +36,7 @@ jobs: git clone https://github.com/pgsql-io/multicorn2.git cd multicorn2 git checkout v2.5 - pip install . + pip install -r requirements/test.txt - name: Test with pytest run: | pre-commit run --all-files diff --git a/postgres/Dockerfile b/postgres/Dockerfile index 12331396..5d45189b 100644 --- a/postgres/Dockerfile +++ b/postgres/Dockerfile @@ -1,19 +1,19 @@ # Use the official Postgres image as a base FROM postgres:13 +WORKDIR /code +COPY . /code + # Use root for package installation USER root # Install system dependencies RUN apt-get update && apt-get install -y \ build-essential \ - git \ postgresql-server-dev-13 \ python3 \ python3-dev \ - #python3-setuptools \ python3-pip \ - python3-venv \ wget # Download, build, and install multicorn2 @@ -23,5 +23,8 @@ RUN wget https://github.com/pgsql-io/multicorn2/archive/refs/tags/v2.5.tar.gz && make && \ make install + +RUN pip install -e '.[all]' + # Switch back to the default postgres user USER postgres diff --git a/postgres/docker-compose.yml b/postgres/docker-compose.yml index 4ab5e677..6b2e1e15 100644 --- a/postgres/docker-compose.yml +++ b/postgres/docker-compose.yml @@ -10,11 +10,9 @@ services: volumes: - db_data:/var/lib/postgresql/data - ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro - - ./entrypoint.sh:/usr/local/bin/custom_entrypoint.sh - ..:/src ports: - "5432:5432" - entrypoint: /usr/local/bin/custom_entrypoint.sh volumes: db_data: diff --git a/requirements/test.txt b/requirements/test.txt index d39efdf2..9e136f6e 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -95,6 +95,8 @@ lazy-object-proxy==1.7.1 # via astroid mccabe==0.7.0 # via pylint +multicorn @ git+https://github.com/pgsql-io/multicorn2.git@v2.5 + # via shillelagh multidict==6.0.2 # via # aiohttp diff --git a/setup.cfg b/setup.cfg index 44f793ae..00329003 100644 --- a/setup.cfg +++ b/setup.cfg @@ -80,6 +80,8 @@ testing = google-auth>=1.23.0 holidays>=0.23 html5lib>=1.1 + jsonpath-python>=1.0.5 + multicorn @ git+https://github.com/pgsql-io/multicorn2.git@v2.5 pandas>=1.2.2 pip-tools>=6.4.0 pre-commit>=2.13.0 @@ -112,6 +114,8 @@ all = google-auth>=1.23.0 holidays>=0.23 html5lib>=1.1 + jsonpath-python>=1.0.5 + multicorn @ git+https://github.com/pgsql-io/multicorn2.git@v2.5 pandas>=1.2.2 prison>=0.2.1 prompt_toolkit>=3 @@ -156,6 +160,7 @@ htmltableapi = html5lib>=1.1 pandas>=1.2.2 multicorn = + multicorn @ git+https://github.com/pgsql-io/multicorn2.git@v2.5 psycopg2-binary>=2.9.9 pandasmemory = pandas>=1.2.2 From 811de6dccbf55f9aecad3d6d8ee193b632719667 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 24 Sep 2024 13:16:22 -0400 Subject: [PATCH 12/15] Rebase --- .github/workflows/python-integration.yml | 4 +-- .github/workflows/python-package-daily.yml | 6 ---- .github/workflows/python-package.yml | 6 ---- docs/postgres.rst | 2 +- examples/postgres.py | 2 +- src/shillelagh/backends/multicorn/db.py | 28 ++++++++++++++++--- .../backends/multicorn/dialects/base.py | 1 - 7 files changed, 28 insertions(+), 21 deletions(-) diff --git a/.github/workflows/python-integration.yml b/.github/workflows/python-integration.yml index f2c8381d..ff95ddda 100644 --- a/.github/workflows/python-integration.yml +++ b/.github/workflows/python-integration.yml @@ -33,7 +33,7 @@ jobs: pip install -r requirements/test.txt - name: Start the Postgres service run: | - docker-compose -f postgres/docker-compose.yml up --build -d + docker compose -f postgres/docker-compose.yml up --build -d - name: Wait for Postgres to become available run: | until docker run --network container:postgres_postgres_1 postgres_postgres pg_isready -h postgres -p 5432 -U shillelagh --timeout=90; do sleep 10; done @@ -46,4 +46,4 @@ jobs: if: always() run: | docker logs postgres_postgres_1 - docker-compose -f postgres/docker-compose.yml down + docker compose -f postgres/docker-compose.yml down diff --git a/.github/workflows/python-package-daily.yml b/.github/workflows/python-package-daily.yml index a94a13a8..bd279d75 100644 --- a/.github/workflows/python-package-daily.yml +++ b/.github/workflows/python-package-daily.yml @@ -30,12 +30,6 @@ jobs: run: | python -m pip install --upgrade pip setuptools python -m pip install -e '.[testing]' - - name: Install multicorn2 - run: | - git clone https://github.com/pgsql-io/multicorn2.git - cd multicorn2 - git checkout v2.5 - pip install -e '.[testing]' - name: Test with pytest run: | pytest --cov-fail-under=100 --cov=src/shillelagh -vv tests/ --doctest-modules src/shillelagh --without-integration --without-slow-integration diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 7e866a4d..4f008d91 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -31,12 +31,6 @@ jobs: run: | python -m pip install --upgrade pip setuptools python -m pip install -r requirements/test.txt - - name: Install multicorn2 - run: | - git clone https://github.com/pgsql-io/multicorn2.git - cd multicorn2 - git checkout v2.5 - pip install -r requirements/test.txt - name: Test with pytest run: | pre-commit run --all-files diff --git a/docs/postgres.rst b/docs/postgres.rst index 673df90e..c72880ae 100644 --- a/docs/postgres.rst +++ b/docs/postgres.rst @@ -18,6 +18,6 @@ The ``postgres/`` directory has a Docker configuration that can be used to test .. code-block:: bash - docker-compose up --build -d + docker compose up --build -d You should then be able to run the example script in `examples/postgres.py`_ to test that everything works. diff --git a/examples/postgres.py b/examples/postgres.py index b46a4599..de61b65c 100644 --- a/examples/postgres.py +++ b/examples/postgres.py @@ -9,7 +9,7 @@ Postgres with the extension and the Python packages. Just run: $ cd postgres/ - $ docker-compose up --build -d + $ docker compose up --build -d Then you can run this script. """ diff --git a/src/shillelagh/backends/multicorn/db.py b/src/shillelagh/backends/multicorn/db.py index d85e01aa..df826292 100644 --- a/src/shillelagh/backends/multicorn/db.py +++ b/src/shillelagh/backends/multicorn/db.py @@ -2,13 +2,13 @@ """ A DB API 2.0 wrapper. """ + import logging import re from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast from uuid import uuid4 import psycopg2 -from multicorn import ForeignDataWrapper, Qual, SortKey from psycopg2 import extensions from shillelagh.adapters.base import Adapter @@ -20,14 +20,12 @@ IntegrityError, InterfaceError, InternalError, - NotSupportedError, OperationalError, ProgrammingError, Warning, ) from shillelagh.lib import ( combine_args_kwargs, - deserialize, escape_identifier, find_adapter, serialize, @@ -46,7 +44,29 @@ Timestamp, TimestampFromTicks, ) -from shillelagh.typing import Row + +__all__ = [ + "DatabaseError", + "DataError", + "Error", + "IntegrityError", + "InterfaceError", + "InternalError", + "OperationalError", + "BINARY", + "DATETIME", + "NUMBER", + "ROWID", + "STRING", + "Binary", + "Date", + "DateFromTicks", + "Time", + "TimeFromTicks", + "Timestamp", + "TimestampFromTicks", + "Warning", +] apilevel = "2.0" threadsafety = 2 diff --git a/src/shillelagh/backends/multicorn/dialects/base.py b/src/shillelagh/backends/multicorn/dialects/base.py index 6b6829bd..e3ea9872 100644 --- a/src/shillelagh/backends/multicorn/dialects/base.py +++ b/src/shillelagh/backends/multicorn/dialects/base.py @@ -18,7 +18,6 @@ class Multicorn2Dialect(PGDialect_psycopg2): - """ A SQLAlchemy dialect for Shillelagh based on psycopg2 and multicorn2. """ From f96ed4efb1e6f087dccf78c2442da92ccf1ea6a6 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 24 Sep 2024 15:17:06 -0400 Subject: [PATCH 13/15] Fix docker --- docs/install.rst | 17 ++++++++--------- docs/postgres.rst | 4 ++-- examples/postgres.py | 2 +- postgres/Dockerfile | 10 +++++++++- postgres/docker-compose.yml | 5 +++-- 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/docs/install.rst b/docs/install.rst index 5265bc03..d4d49d8e 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -14,18 +14,17 @@ You also need to install optional dependencies, depending on the adapter you wan .. code-block:: bash - $ pip install 'shillelagh[console]' # to use the CLI - $ pip install 'shillelagh[genericjsonapi]' # for Generic JSON - $ pip install 'shillelagh[genericxmlapi]' # for Generic XML - $ pip install 'shillelagh[githubapi]' # for GitHub - $ pip install 'shillelagh[gsheetsapi]' # for GSheets - $ pip install 'shillelagh[htmltableapi]' # for HTML tables - $ pip install 'shillelagh[pandasmemory]' # for Pandas in memory - $ pip install 'shillelagh[s3selectapi]' # for S3 files - $ pip install 'shillelagh[systemapi]' # for CPU information + $ pip install 'shillelagh[console]' # to use the CLI + $ pip install 'shillelagh[githubapi]' # for GitHub + $ pip install 'shillelagh[gsheetsapi]' # for GSheets + $ pip install 'shillelagh[htmltableapi]' # for HTML tables + $ pip install 'shillelagh[pandasmemory]' # for Pandas in memory + $ pip install 'shillelagh[s3selectapi]' # for S3 files + $ pip install 'shillelagh[systemapi]' # for CPU information Alternatively, you can install everything with: .. code-block:: bash $ pip install 'shillelagh[all]' +~ diff --git a/docs/postgres.rst b/docs/postgres.rst index c72880ae..d76d57eb 100644 --- a/docs/postgres.rst +++ b/docs/postgres.rst @@ -14,10 +14,10 @@ To use the backend you need to: Note that you need to install Python packages in a way that they are available to the process running Postgres. You can either install them globally, or install them in a virtual environment and have it activated in the process that starts Postgres. -The ``postgres/`` directory has a Docker configuration that can be used to test the backend, or as a basis for installation. To run it, enter the directory and execute: +The ``postgres/`` directory has a Docker configuration that can be used to test the backend, or as a basis for installation. To run it, execute: .. code-block:: bash - docker compose up --build -d + docker compose -f postgres/docker-compose.yml up You should then be able to run the example script in `examples/postgres.py`_ to test that everything works. diff --git a/examples/postgres.py b/examples/postgres.py index de61b65c..534b5484 100644 --- a/examples/postgres.py +++ b/examples/postgres.py @@ -19,7 +19,7 @@ # the backend uses psycopg2 under the hood, so any valid connection string for it will # work; just replace the scheme with `shillelagh+multicorn2` engine = create_engine( - "shillelagh+multicorn2://shillelagh:shillelagh123@localhost:12345/shillelagh", + "shillelagh+multicorn2://shillelagh:shillelagh123@localhost:5432/shillelagh", ) connection = engine.connect() diff --git a/postgres/Dockerfile b/postgres/Dockerfile index 5d45189b..267c7786 100644 --- a/postgres/Dockerfile +++ b/postgres/Dockerfile @@ -10,10 +10,12 @@ USER root # Install system dependencies RUN apt-get update && apt-get install -y \ build-essential \ + git \ postgresql-server-dev-13 \ python3 \ python3-dev \ python3-pip \ + python3-venv \ wget # Download, build, and install multicorn2 @@ -24,7 +26,13 @@ RUN wget https://github.com/pgsql-io/multicorn2/archive/refs/tags/v2.5.tar.gz && make install -RUN pip install -e '.[all]' +# Create a virtual environment and install dependencies +RUN python3 -m venv /code/venv && \ + /code/venv/bin/pip install --upgrade pip && \ + /code/venv/bin/pip install -e '.[all]' + +# Set environment variable for PostgreSQL to use the virtual environment +ENV PATH="/code/venv/bin:$PATH" # Switch back to the default postgres user USER postgres diff --git a/postgres/docker-compose.yml b/postgres/docker-compose.yml index 6b2e1e15..5d905388 100644 --- a/postgres/docker-compose.yml +++ b/postgres/docker-compose.yml @@ -2,7 +2,9 @@ version: '3.8' services: postgres: - build: . + build: + context: .. + dockerfile: postgres/Dockerfile environment: POSTGRES_PASSWORD: shillelagh123 POSTGRES_USER: shillelagh @@ -10,7 +12,6 @@ services: volumes: - db_data:/var/lib/postgresql/data - ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro - - ..:/src ports: - "5432:5432" From bbd8cc8f3969b360f10e15da87a43da1e54a1147 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 24 Sep 2024 15:25:25 -0400 Subject: [PATCH 14/15] Remove entrypoint --- .github/workflows/python-integration.yml | 4 ++-- postgres/entrypoint.sh | 22 ------------------- .../adapters/api/gsheets/integration_test.py | 2 +- 3 files changed, 3 insertions(+), 25 deletions(-) delete mode 100755 postgres/entrypoint.sh diff --git a/.github/workflows/python-integration.yml b/.github/workflows/python-integration.yml index ff95ddda..37a68d8a 100644 --- a/.github/workflows/python-integration.yml +++ b/.github/workflows/python-integration.yml @@ -36,7 +36,7 @@ jobs: docker compose -f postgres/docker-compose.yml up --build -d - name: Wait for Postgres to become available run: | - until docker run --network container:postgres_postgres_1 postgres_postgres pg_isready -h postgres -p 5432 -U shillelagh --timeout=90; do sleep 10; done + until docker run --network container:postgres-postgres-1 postgres-postgres pg_isready -h postgres -p 5432 -U shillelagh --timeout=90; do sleep 10; done - name: Test with pytest env: SHILLELAGH_ADAPTER_KWARGS: ${{ secrets.SHILLELAGH_ADAPTER_KWARGS }} @@ -45,5 +45,5 @@ jobs: - name: Stop the Postgres service if: always() run: | - docker logs postgres_postgres_1 + docker logs postgres-postgres-1 docker compose -f postgres/docker-compose.yml down diff --git a/postgres/entrypoint.sh b/postgres/entrypoint.sh deleted file mode 100755 index 063b9f60..00000000 --- a/postgres/entrypoint.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -set -e # If any command fails, stop the script - -# create virtualenv -cd -python3 -m venv venv -source venv/bin/activate -pip3 install 'setuptools>=61' wheel - -# install shillelagh -cd /src -pip3 install -v -e '.[all]' - -# install multicorn2 -rm -rf multicorn2 -git clone https://github.com/pgsql-io/multicorn2.git -cd multicorn2 -git checkout v2.5 -pip3 install . - -# call the original entrypoint -exec docker-entrypoint.sh postgres diff --git a/tests/adapters/api/gsheets/integration_test.py b/tests/adapters/api/gsheets/integration_test.py index 5948d9ff..4653293d 100644 --- a/tests/adapters/api/gsheets/integration_test.py +++ b/tests/adapters/api/gsheets/integration_test.py @@ -769,7 +769,7 @@ def test_public_sheet_multicorn() -> None: user="shillelagh", password="shillelagh123", host="localhost", - port=12345, + port=5432, database="shillelagh", ) cursor = connection.cursor() From d554f9b68ca8af468b352165ba1a3cd3406617b7 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Tue, 24 Sep 2024 16:38:35 -0400 Subject: [PATCH 15/15] Fix tests --- tests/backends/multicorn/db_test.py | 10 ---------- tests/backends/multicorn/dialects/base_test.py | 1 - 2 files changed, 11 deletions(-) diff --git a/tests/backends/multicorn/db_test.py b/tests/backends/multicorn/db_test.py index 56e3e29b..41f989cc 100644 --- a/tests/backends/multicorn/db_test.py +++ b/tests/backends/multicorn/db_test.py @@ -85,10 +85,8 @@ def test_cursor(mocker: MockerFixture) -> None: super = mocker.patch("shillelagh.backends.multicorn.db.super", create=True) execute = mocker.MagicMock(name="execute") super.return_value.execute = execute - connection = mocker.MagicMock() cursor = Cursor( - connection, adapters={"dummy": FakeAdapter}, adapter_kwargs={}, schema="main", @@ -149,7 +147,6 @@ def test_cursor_no_table_match(mocker: MockerFixture) -> None: super = mocker.patch("shillelagh.backends.multicorn.db.super", create=True) execute = mocker.MagicMock(name="execute") super.return_value.execute = execute - connection = mocker.MagicMock() execute.side_effect = [ True, # SAVEPOINT @@ -157,7 +154,6 @@ def test_cursor_no_table_match(mocker: MockerFixture) -> None: ] cursor = Cursor( - connection, adapters={"dummy": FakeAdapter}, adapter_kwargs={}, schema="main", @@ -175,7 +171,6 @@ def test_cursor_no_table_name(mocker: MockerFixture) -> None: super = mocker.patch("shillelagh.backends.multicorn.db.super", create=True) execute = mocker.MagicMock(name="execute") super.return_value.execute = execute - connection = mocker.MagicMock() execute.side_effect = [ True, # SAVEPOINT @@ -183,7 +178,6 @@ def test_cursor_no_table_name(mocker: MockerFixture) -> None: ] cursor = Cursor( - connection, adapters={"dummy": FakeAdapter}, adapter_kwargs={}, schema="main", @@ -206,10 +200,8 @@ def test_drop_table(mocker: MockerFixture) -> None: "shillelagh.backends.multicorn.db.find_adapter", return_value=(adapter, ["one"], {"two": 2}), ) - connection = mocker.MagicMock() cursor = Cursor( - connection, adapters={"dummy": FakeAdapter}, adapter_kwargs={}, schema="main", @@ -233,10 +225,8 @@ def test_table_without_columns(mocker: MockerFixture) -> None: "shillelagh.backends.multicorn.db.find_adapter", return_value=(adapter, ["one"], {"two": 2}), ) - connection = mocker.MagicMock() cursor = Cursor( - connection, adapters={"dummy": adapter}, adapter_kwargs={}, schema="main", diff --git a/tests/backends/multicorn/dialects/base_test.py b/tests/backends/multicorn/dialects/base_test.py index 86e32b96..1d5610fe 100644 --- a/tests/backends/multicorn/dialects/base_test.py +++ b/tests/backends/multicorn/dialects/base_test.py @@ -74,7 +74,6 @@ def test_get_adapter_for_table_name(mocker: MockerFixture) -> None: mocker.patch("shillelagh.backends.multicorn.db.super", create=True) connection = mocker.MagicMock() connection.engine.raw_connection().cursor.return_value = Cursor( - connection, adapters={"dummy": FakeAdapter}, adapter_kwargs={}, schema="main",