Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Control number of rows used for peeking at data #5

Merged
merged 4 commits into from
Mar 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 23 additions & 8 deletions airtabledb/adapter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, Iterator, List, Optional, Tuple
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple

from pyairtable import Table
from shillelagh.adapters.base import Adapter
Expand All @@ -19,7 +19,8 @@ def __init__(
table: str,
base_id: str,
api_key: str,
base_metadata: BaseMetadata,
base_metadata: Optional[BaseMetadata],
peek_rows: Optional[int],
):
super().__init__()

Expand All @@ -28,7 +29,7 @@ def __init__(

self._table_api = Table(api_key, base_id, table)

fields: List[str]
fields: Iterable[str]
if self.base_metadata is not None:
# TODO(cancan101): Better error handling here
# We search by name here.
Expand All @@ -41,12 +42,26 @@ def __init__(
columns_metadata = table_metadata["columns"]
fields = [col["name"] for col in columns_metadata]
self.strict_col = True
# Attempts introspection by looking at data.
# This is super not reliable
# as Airtable removes the key if the value is empty.
else:
# This introspects the first row in the table.
# This is super not reliable
# as Airtable removes the key if the value is empty.
# We should probably look at more than one entry.
fields = self._table_api.first()["fields"]
# This introspects the just first row in the table.
if peek_rows is None or peek_rows == 1:
fields = self._table_api.first()["fields"].keys()
# Or peek at specified number of rows
else:
# We have an explicit type check here as the Airtable API
# just ignores the value if it isn't valid.
if not isinstance(peek_rows, int):
raise TypeError(
f"peek_rows should be an int. Got: {type(peek_rows)}"
)

fields = set()
for row in self._table_api.all(max_records=peek_rows):
fields |= row["fields"].keys()

self.strict_col = False

# TODO(cancan101): parse out types
Expand Down
9 changes: 8 additions & 1 deletion airtabledb/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,21 @@ def create_connect_args(
if url.password and self.airtable_api_key:
raise ValueError("Both password and airtable_api_key were provided")

_, url_host = extract_query_host(url)
url_query, url_host = extract_query_host(url)
peek_rows = None
if "peek_rows" in url_query:
peek_rows_raw = url_query["peek_rows"]
if not isinstance(peek_rows_raw, str):
peek_rows_raw = peek_rows_raw[-1]
peek_rows = int(peek_rows_raw)

# At some point we might have args
adapter_kwargs = {
ADAPTER_NAME: {
"api_key": self.airtable_api_key or url.password,
"base_id": url_host,
"base_metadata": self.base_metadata,
"peek_rows": peek_rows,
}
}

Expand Down
18 changes: 18 additions & 0 deletions tests/test_dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,21 @@ def test_extract_query_host_no_query():
query, host = extract_query_host(URL.create(drivername="drive", host="myhost"))
assert host == "myhost"
assert query == {}


def test_peek_rows_default():
url_http = make_url("airtable://foo")
_, kwargs = APSWAirtableDialect().create_connect_args(url_http)
assert _get_adapter_kwargs(kwargs)["peek_rows"] is None


def test_peek_rows_single():
url_http = make_url("airtable://foo?peek_rows=12")
_, kwargs = APSWAirtableDialect().create_connect_args(url_http)
assert _get_adapter_kwargs(kwargs)["peek_rows"] == 12


def test_peek_rows_dupe():
url_http = make_url("airtable://foo?peek_rows=12&peek_rows=13")
_, kwargs = APSWAirtableDialect().create_connect_args(url_http)
assert _get_adapter_kwargs(kwargs)["peek_rows"] == 13