Skip to content

Commit

Permalink
Attempt to guess the Airtable type for Fields (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
cancan101 authored Mar 24, 2022
1 parent 145b56e commit 160fd37
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 9 deletions.
53 changes: 44 additions & 9 deletions airtabledb/adapter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from collections import defaultdict
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple

from pyairtable import Table
from shillelagh.adapters.base import Adapter
from shillelagh.fields import Field, Filter, String
from shillelagh.fields import Boolean, Field, Filter, Float, String
from shillelagh.typing import RequestedOrder

from .fields import MaybeListString
Expand All @@ -11,6 +12,32 @@
# -----------------------------------------------------------------------------


def guess_field(values: List[Any]) -> Field:
types = set(type(v) for v in values)
if len(types) == 1:
types0 = list(types)[0]
if types0 is str:
return String()
elif types0 is float:
return Float()
elif types0 is int:
# This seems safest as there are cases where we get floats and ints
return Float()
elif types0 is bool:
return Boolean()
elif types0 is list:
# TODO(cancan101): do more work + make a Field for this
return MaybeListString()
elif types == {float, int}:
return Float()
elif types == {float, dict} or types == {int, dict} or types == {int, float, dict}:
# TODO(cancan101) check the dict + make a Field for this
# This seems safest as there are cases where we get floats and ints
return MaybeListString()

return MaybeListString()


class AirtableAdapter(Adapter):
safe = True

Expand All @@ -30,6 +57,7 @@ def __init__(
self._table_api = Table(api_key, base_id, table)

fields: Iterable[str]
columns: Dict[str, Field]
if self.base_metadata is not None:
# TODO(cancan101): Better error handling here
# We search by name here.
Expand All @@ -42,13 +70,18 @@ def __init__(
columns_metadata = table_metadata["columns"]
fields = [col["name"] for col in columns_metadata]
self.strict_col = True

columns = dict({k: MaybeListString() for k in fields}, id=String())

# Attempts introspection by looking at data.
# This is super not reliable
# as Airtable removes the key if the value is empty.
else:
# This introspects the just first row in the table.
if peek_rows is None or peek_rows == 1:
fields = self._table_api.first()["fields"].keys()
field_values = {
k: [v] for k, v in self._table_api.first()["fields"].items()
}
# Or peek at specified number of rows
else:
# We have an explicit type check here as the Airtable API
Expand All @@ -58,20 +91,22 @@ def __init__(
f"peek_rows should be an int. Got: {type(peek_rows)}"
)

fields = set()
field_values = defaultdict(list)
for row in self._table_api.all(max_records=peek_rows):
fields |= row["fields"].keys()
for k, v in row["fields"].items():
field_values[k].append(v)

self.strict_col = False

# TODO(cancan101): parse out types
self.columns: Dict[str, Field] = dict(
{k: MaybeListString() for k in fields}, id=String()
)
columns = dict(
{k: guess_field(v) for k, v in field_values.items()}, id=String()
)

self.columns = columns

@staticmethod
def supports(uri: str, fast: bool = True, **kwargs: Any) -> Optional[bool]:
# TODO the slow path here could connect to the GQL Server
# TODO the slow path here could connect to the Airtable API
return True

@staticmethod
Expand Down
26 changes: 26 additions & 0 deletions tests/test_adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from shillelagh.fields import Boolean, Float, String

from airtabledb import fields
from airtabledb.adapter import guess_field


def test_guess_field():
assert type(guess_field([1])) is Float
assert type(guess_field([1.5])) is Float
assert type(guess_field([1, 1.5])) is Float

assert type(guess_field([True])) is Boolean

assert type(guess_field(["a"])) is String

assert type(guess_field([1, {"specialValue": "NaN"}])) is fields.MaybeListString
assert type(guess_field([1.5, {"specialValue": "NaN"}])) is fields.MaybeListString
assert (
type(guess_field([1.5, 1, {"specialValue": "NaN"}])) is fields.MaybeListString
)

# Not sure if this comes up in practice
assert type(guess_field([["a"], ["b"]])) is fields.MaybeListString

# Not sure if this comes up in practice
assert type(guess_field(["a", 4])) is fields.MaybeListString

0 comments on commit 160fd37

Please sign in to comment.