diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b4e8a966..2f4d39ec 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,11 @@ Changelog Next ==== +Version 1.2.23 - 2024-07-31 +=========================== + +- Support for GitHub stats (#463) + Version 1.2.22 - 2024-07-30 =========================== diff --git a/src/shillelagh/adapters/api/github.py b/src/shillelagh/adapters/api/github.py index f3087bc1..8354c81d 100644 --- a/src/shillelagh/adapters/api/github.py +++ b/src/shillelagh/adapters/api/github.py @@ -6,14 +6,15 @@ import logging import urllib.parse from dataclasses import dataclass -from typing import Any, Dict, Iterator, List, Optional, Tuple +from datetime import datetime, timedelta, timezone +from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypedDict import jsonpath import requests_cache from shillelagh.adapters.base import Adapter from shillelagh.exceptions import ProgrammingError -from shillelagh.fields import Boolean, Field, Integer, String, StringDateTime +from shillelagh.fields import Boolean, DateTime, Field, Integer, String, StringDateTime from shillelagh.filters import Equal, Filter from shillelagh.typing import RequestedOrder, Row @@ -58,45 +59,111 @@ class Column: default: Optional[Filter] = None +def participation_processor(payload: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Process participation data. + + https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-the-weekly-commit-count + """ + today_utc_midnight = datetime.now(timezone.utc).replace( + hour=0, + minute=0, + second=0, + microsecond=0, + ) + start = today_utc_midnight - timedelta(weeks=len(payload["all"])) + + return [ + { + "start_at": start + timedelta(weeks=i), + "end_at": start + timedelta(weeks=i + 1), + "all": all, + "owner": owner, + } + for i, (all, owner) in enumerate(zip(payload["all"], payload["owner"])) + ] + + +class EndPointDefinition(TypedDict): + """ + A definition for an endpoint. + + This is used to define the columns and the path to the values in the JSON response. + It can also specify if the endpoint is paginated (most are) and a processor to + transform the payload. + """ + + columns: List[Column] + paginated: bool + processor: Optional[Callable[[Dict[str, Any]], List[Dict[str, Any]]]] + + # a mapping from the column name (eg, ``userid``) to the path in the JSON # response (``{"user": {"id": 42}}`` => ``user.id``) together with the field -TABLES: Dict[str, Dict[str, List[Column]]] = { +TABLES: Dict[str, Dict[str, EndPointDefinition]] = { "repos": { - "pulls": [ - Column("url", "html_url", String()), - Column("id", "id", Integer()), - Column("number", "number", Integer(filters=[Equal])), - Column("state", "state", String(filters=[Equal]), Equal("all")), - Column("title", "title", String()), - Column("userid", "user.id", Integer()), - Column("username", "user.login", String()), - Column("draft", "draft", Boolean()), - Column("head", "head.ref", String(filters=[Equal])), # head.label? - Column("created_at", "created_at", StringDateTime()), - Column("updated_at", "updated_at", StringDateTime()), - Column("closed_at", "closed_at", StringDateTime()), - Column("merged_at", "merged_at", StringDateTime()), - ], - "issues": [ - Column("url", "html_url", String()), - Column("id", "id", Integer()), - Column("number", "number", Integer(filters=[Equal])), - Column("state", "state", String(filters=[Equal]), Equal("all")), - Column("title", "title", String()), - Column("userid", "user.id", Integer()), - Column("username", "user.login", String()), - Column("draft", "draft", Boolean()), - Column("locked", "locked", Boolean()), - Column("comments", "comments", Integer()), - Column("created_at", "created_at", StringDateTime()), - Column("updated_at", "updated_at", StringDateTime()), - Column("closed_at", "closed_at", StringDateTime()), - Column("body", "body", String()), - Column("author_association", "author_association", String()), - Column("labels", "labels[*].name", JSONString()), - Column("assignees", "assignees[*].login", JSONString()), - Column("reactions", "reactions", JSONString()), - ], + "pulls": { + "columns": [ + Column("url", "html_url", String()), + Column("id", "id", Integer()), + Column("number", "number", Integer(filters=[Equal])), + Column("state", "state", String(filters=[Equal]), Equal("all")), + Column("title", "title", String()), + Column("userid", "user.id", Integer()), + Column("username", "user.login", String()), + Column("draft", "draft", Boolean()), + Column("head", "head.ref", String(filters=[Equal])), # head.label? + Column("created_at", "created_at", StringDateTime()), + Column("updated_at", "updated_at", StringDateTime()), + Column("closed_at", "closed_at", StringDateTime()), + Column("merged_at", "merged_at", StringDateTime()), + ], + "paginated": True, + "processor": None, + }, + "issues": { + "columns": [ + Column("url", "html_url", String()), + Column("id", "id", Integer()), + Column("number", "number", Integer(filters=[Equal])), + Column("state", "state", String(filters=[Equal]), Equal("all")), + Column("title", "title", String()), + Column("userid", "user.id", Integer()), + Column("username", "user.login", String()), + Column("draft", "draft", Boolean()), + Column("locked", "locked", Boolean()), + Column("comments", "comments", Integer()), + Column("created_at", "created_at", StringDateTime()), + Column("updated_at", "updated_at", StringDateTime()), + Column("closed_at", "closed_at", StringDateTime()), + Column("body", "body", String()), + Column("author_association", "author_association", String()), + Column("labels", "labels[*].name", JSONString()), + Column("assignees", "assignees[*].login", JSONString()), + Column("reactions", "reactions", JSONString()), + ], + "paginated": True, + "processor": None, + }, + "stats/punch_card": { + "columns": [ + Column("dow", "$.[0]", Integer()), + Column("hour", "$.[1]", Integer()), + Column("commits", "$.[2]", Integer()), + ], + "paginated": True, + "processor": None, + }, + "stats/participation": { + "columns": [ + Column("start_at", "$.start_at", DateTime()), + Column("end_at", "$.end_at", DateTime()), + Column("all", "$.all", Integer()), + Column("owner", "$.owner", Integer()), + ], + "paginated": False, + "processor": participation_processor, + }, }, } @@ -115,11 +182,11 @@ class GitHubAPI(Adapter): def supports(uri: str, fast: bool = True, **kwargs: Any) -> Optional[bool]: parsed = urllib.parse.urlparse(uri) - if parsed.path.count("/") != 4: + if parsed.path.count("/") < 4: return False # pylint: disable=unused-variable - _, base, owner, repo, resource = parsed.path.rsplit("/", 4) + _, base, owner, repo, resource = parsed.path.split("/", 4) return ( parsed.netloc == "api.github.com" and base in TABLES @@ -129,7 +196,7 @@ def supports(uri: str, fast: bool = True, **kwargs: Any) -> Optional[bool]: @staticmethod def parse_uri(uri: str) -> Tuple[str, str, str, str]: parsed = urllib.parse.urlparse(uri) - _, base, owner, repo, resource = parsed.path.rsplit("/", 4) + _, base, owner, repo, resource = parsed.path.split("/", 4) return ( base, owner, @@ -162,7 +229,8 @@ def __init__( # pylint: disable=too-many-arguments def get_columns(self) -> Dict[str, Field]: return { - column.name: column.field for column in TABLES[self.base][self.resource] + column.name: column.field + for column in TABLES[self.base][self.resource]["columns"] } def get_data( @@ -172,7 +240,7 @@ def get_data( **kwargs: Any, ) -> Iterator[Row]: # apply default values - for column in TABLES[self.base][self.resource]: + for column in TABLES[self.base][self.resource]["columns"]: if column.default is not None and column.name not in bounds: bounds[column.name] = column.default @@ -204,7 +272,7 @@ def _get_single_resource( row = { column.name: get_value(column, payload) - for column in TABLES[self.base][self.resource] + for column in TABLES[self.base][self.resource]["columns"] } row["rowid"] = 0 _logger.debug(row) @@ -222,16 +290,23 @@ def _get_multiple_resources( headers["Authorization"] = f"Bearer {self.access_token}" url = f"https://api.github.com/{self.base}/{self.owner}/{self.repo}/{self.resource}" + config = TABLES[self.base][self.resource] # map filters in ``bounds`` to query params params = {name: filter_.value for name, filter_ in bounds.items()} # type: ignore - params["per_page"] = PAGE_SIZE page = 1 rowid = 0 while True: - _logger.info("GET %s (page %d)", url, page) - params["page"] = page + if config["paginated"]: + _logger.info("GET %s (page %d)", url, page) + params.update( + { + "per_page": PAGE_SIZE, + "page": page, + }, + ) + response = self._session.get(url, headers=headers, params=params) payload = response.json() @@ -241,16 +316,22 @@ def _get_multiple_resources( if not response.ok: raise ProgrammingError(payload["message"]) + if processor := config["processor"]: + payload = processor(payload) + for resource in payload: row = { column.name: get_value(column, resource) - for column in TABLES[self.base][self.resource] + for column in config["columns"] } row["rowid"] = rowid _logger.debug(row) yield row rowid += 1 + if not config["paginated"]: + break + page += 1 diff --git a/src/shillelagh/lib.py b/src/shillelagh/lib.py index 43ac5f4e..8e2c13ab 100644 --- a/src/shillelagh/lib.py +++ b/src/shillelagh/lib.py @@ -567,7 +567,12 @@ def find_adapter( for adapter in adapters: key = adapter.__name__.lower() kwargs = adapter_kwargs.get(key, {}) - supported: Optional[bool] = adapter.supports(uri, fast=True, **kwargs) + + try: + supported: Optional[bool] = adapter.supports(uri, fast=True, **kwargs) + except Exception: # pylint: disable=broad-except + supported = False + if supported: args = adapter.parse_uri(uri) return adapter, args, kwargs @@ -577,7 +582,13 @@ def find_adapter( for adapter in candidates: key = adapter.__name__.lower() kwargs = adapter_kwargs.get(key, {}) - if adapter.supports(uri, fast=False, **kwargs): + + try: + supported = adapter.supports(uri, fast=False, **kwargs) + except Exception: # pylint: disable=broad-except + supported = False + + if supported: args = adapter.parse_uri(uri) return adapter, args, kwargs @@ -611,9 +622,11 @@ def get_session( session = requests_cache.CachedSession( cache_name=cache_name, backend="sqlite", - expire_after=requests_cache.DO_NOT_CACHE - if expire_after == timedelta(seconds=-1) - else expire_after.total_seconds(), + expire_after=( + requests_cache.DO_NOT_CACHE + if expire_after == timedelta(seconds=-1) + else expire_after.total_seconds() + ), ) session.headers.update(request_headers) diff --git a/tests/adapters/api/generic_json_test.py b/tests/adapters/api/generic_json_test.py index a9ef0dd0..ab5bb6b1 100644 --- a/tests/adapters/api/generic_json_test.py +++ b/tests/adapters/api/generic_json_test.py @@ -291,11 +291,11 @@ def test_generic_json_array(requests_mock: Mocker) -> None: """ # for datassette and other probing adapters requests_mock.get( - "https://api.github.com/repos/apache/superset/-/versions.json", + "https://example.com/-/versions.json", status_code=404, ) - url = URL("https://api.github.com/repos/apache/superset/stats/punch_card") + url = URL("https://example.com/") requests_mock.head(str(url), headers={"content-type": "application/json"}) requests_mock.get( str(url), diff --git a/tests/adapters/api/github_test.py b/tests/adapters/api/github_test.py index 774530f2..43a5fee8 100644 --- a/tests/adapters/api/github_test.py +++ b/tests/adapters/api/github_test.py @@ -3,14 +3,15 @@ Tests for the Datasette adapter. """ -import datetime +from datetime import datetime, timezone import pytest +from freezegun import freeze_time from pytest_mock import MockerFixture from requests import Session from requests_mock.mocker import Mocker -from shillelagh.adapters.api.github import GitHubAPI +from shillelagh.adapters.api.github import GitHubAPI, participation_processor from shillelagh.backends.apsw.db import connect from shillelagh.exceptions import ProgrammingError from shillelagh.filters import Equal @@ -55,8 +56,8 @@ def test_github(mocker: MockerFixture, requests_mock: Mocker) -> None: "AAfghahi", False, "arash/datasetsAndReports", - datetime.datetime(2021, 9, 3, 15, 57, 37, tzinfo=datetime.timezone.utc), - datetime.datetime(2021, 9, 3, 15, 57, 39, tzinfo=datetime.timezone.utc), + datetime(2021, 9, 3, 15, 57, 37, tzinfo=timezone.utc), + datetime(2021, 9, 3, 15, 57, 39, tzinfo=timezone.utc), None, None, ), @@ -70,8 +71,8 @@ def test_github(mocker: MockerFixture, requests_mock: Mocker) -> None: "villebro", True, "villebro/remove-table-viz", - datetime.datetime(2021, 9, 3, 7, 52, 18, tzinfo=datetime.timezone.utc), - datetime.datetime(2021, 9, 3, 8, 48, 45, tzinfo=datetime.timezone.utc), + datetime(2021, 9, 3, 7, 52, 18, tzinfo=timezone.utc), + datetime(2021, 9, 3, 8, 48, 45, tzinfo=timezone.utc), None, None, ), @@ -85,8 +86,8 @@ def test_github(mocker: MockerFixture, requests_mock: Mocker) -> None: "dependabot[bot]", False, "dependabot/npm_and_yarn/superset-frontend/storybook-addon-jsx-7.3.13", - datetime.datetime(2021, 9, 2, 20, 51, 50, tzinfo=datetime.timezone.utc), - datetime.datetime(2021, 9, 2, 21, 22, 54, tzinfo=datetime.timezone.utc), + datetime(2021, 9, 2, 20, 51, 50, tzinfo=timezone.utc), + datetime(2021, 9, 2, 21, 22, 54, tzinfo=timezone.utc), None, None, ), @@ -100,8 +101,8 @@ def test_github(mocker: MockerFixture, requests_mock: Mocker) -> None: "amitmiran137", False, "version_export_ff_on", - datetime.datetime(2021, 9, 2, 16, 52, 34, tzinfo=datetime.timezone.utc), - datetime.datetime(2021, 9, 2, 18, 6, 27, tzinfo=datetime.timezone.utc), + datetime(2021, 9, 2, 16, 52, 34, tzinfo=timezone.utc), + datetime(2021, 9, 2, 18, 6, 27, tzinfo=timezone.utc), None, None, ), @@ -115,8 +116,8 @@ def test_github(mocker: MockerFixture, requests_mock: Mocker) -> None: "villebro", False, "villebro/libecpg", - datetime.datetime(2021, 9, 2, 12, 1, 2, tzinfo=datetime.timezone.utc), - datetime.datetime(2021, 9, 2, 12, 6, 50, tzinfo=datetime.timezone.utc), + datetime(2021, 9, 2, 12, 1, 2, tzinfo=timezone.utc), + datetime(2021, 9, 2, 12, 6, 50, tzinfo=timezone.utc), None, None, ), @@ -130,8 +131,8 @@ def test_github(mocker: MockerFixture, requests_mock: Mocker) -> None: "zhaoyongjie", True, "refactor_orderby", - datetime.datetime(2021, 9, 2, 9, 45, 40, tzinfo=datetime.timezone.utc), - datetime.datetime(2021, 9, 3, 10, 31, 4, tzinfo=datetime.timezone.utc), + datetime(2021, 9, 2, 9, 45, 40, tzinfo=timezone.utc), + datetime(2021, 9, 3, 10, 31, 4, tzinfo=timezone.utc), None, None, ), @@ -145,8 +146,8 @@ def test_github(mocker: MockerFixture, requests_mock: Mocker) -> None: "robdiciuccio", False, "rd/async-query-init-refactor", - datetime.datetime(2021, 9, 1, 19, 51, 51, tzinfo=datetime.timezone.utc), - datetime.datetime(2021, 9, 1, 22, 29, 46, tzinfo=datetime.timezone.utc), + datetime(2021, 9, 1, 19, 51, 51, tzinfo=timezone.utc), + datetime(2021, 9, 1, 22, 29, 46, tzinfo=timezone.utc), None, None, ), @@ -160,8 +161,8 @@ def test_github(mocker: MockerFixture, requests_mock: Mocker) -> None: "m-ajay", False, "feat/migration-add-type-to-native-filter", - datetime.datetime(2021, 9, 1, 16, 35, 50, tzinfo=datetime.timezone.utc), - datetime.datetime(2021, 9, 3, 17, 33, 42, tzinfo=datetime.timezone.utc), + datetime(2021, 9, 1, 16, 35, 50, tzinfo=timezone.utc), + datetime(2021, 9, 3, 17, 33, 42, tzinfo=timezone.utc), None, None, ), @@ -175,8 +176,8 @@ def test_github(mocker: MockerFixture, requests_mock: Mocker) -> None: "ofekisr", False, "refactor/sql_json_view4", - datetime.datetime(2021, 9, 1, 16, 33, 45, tzinfo=datetime.timezone.utc), - datetime.datetime(2021, 9, 1, 17, 6, 32, tzinfo=datetime.timezone.utc), + datetime(2021, 9, 1, 16, 33, 45, tzinfo=timezone.utc), + datetime(2021, 9, 1, 17, 6, 32, tzinfo=timezone.utc), None, None, ), @@ -190,8 +191,8 @@ def test_github(mocker: MockerFixture, requests_mock: Mocker) -> None: "kgabryje", False, "perf/dashboard-rerenders-4", - datetime.datetime(2021, 9, 1, 13, 41, 12, tzinfo=datetime.timezone.utc), - datetime.datetime(2021, 9, 2, 15, 39, 15, tzinfo=datetime.timezone.utc), + datetime(2021, 9, 1, 13, 41, 12, tzinfo=timezone.utc), + datetime(2021, 9, 2, 15, 39, 15, tzinfo=timezone.utc), None, None, ), @@ -230,8 +231,8 @@ def test_github_single_resource(mocker: MockerFixture, requests_mock: Mocker) -> "AAfghahi", False, "arash/datasetsAndReports", - datetime.datetime(2021, 9, 3, 15, 57, 37, tzinfo=datetime.timezone.utc), - datetime.datetime(2021, 9, 3, 15, 57, 39, tzinfo=datetime.timezone.utc), + datetime(2021, 9, 3, 15, 57, 37, tzinfo=timezone.utc), + datetime(2021, 9, 3, 15, 57, 39, tzinfo=timezone.utc), None, None, ), @@ -620,3 +621,109 @@ def test_github_json_field(mocker: MockerFixture, requests_mock: Mocker) -> None ('["size/XS", "hold:review-after-release"]',), ('["size/M", "review-checkpoint", "plugins"]',), ] + + +@freeze_time("2024-01-01T00:00:00Z") +def test_participation_processor() -> None: + """ + Test the ``participation_processor`` function. + """ + payload = { + "all": [27, 34, 27, 31, 24], + "owner": [0, 0, 0, 0, 0], + } + processed = participation_processor(payload) + + assert processed == [ + { + "start_at": datetime(2023, 11, 27, 0, 0, tzinfo=timezone.utc), + "end_at": datetime(2023, 12, 4, 0, 0, tzinfo=timezone.utc), + "all": 27, + "owner": 0, + }, + { + "start_at": datetime(2023, 12, 4, 0, 0, tzinfo=timezone.utc), + "end_at": datetime(2023, 12, 11, 0, 0, tzinfo=timezone.utc), + "all": 34, + "owner": 0, + }, + { + "start_at": datetime(2023, 12, 11, 0, 0, tzinfo=timezone.utc), + "end_at": datetime(2023, 12, 18, 0, 0, tzinfo=timezone.utc), + "all": 27, + "owner": 0, + }, + { + "start_at": datetime(2023, 12, 18, 0, 0, tzinfo=timezone.utc), + "end_at": datetime(2023, 12, 25, 0, 0, tzinfo=timezone.utc), + "all": 31, + "owner": 0, + }, + { + "start_at": datetime(2023, 12, 25, 0, 0, tzinfo=timezone.utc), + "end_at": datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc), + "all": 24, + "owner": 0, + }, + ] + + +@freeze_time("2024-01-01T00:00:00Z") +def test_github_participation(mocker: MockerFixture, requests_mock: Mocker) -> None: + """ + Test a request to the participation stats. + """ + mocker.patch( + "shillelagh.adapters.api.github.requests_cache.CachedSession", + return_value=Session(), + ) + + url = "https://api.github.com/repos/apache/superset/stats/participation" + requests_mock.get( + url, + json={ + "all": [27, 34, 27, 31, 24], + "owner": [0, 0, 0, 0, 0], + }, + ) + + connection = connect(":memory:") + cursor = connection.cursor() + + sql = """ + SELECT * FROM + "https://api.github.com/repos/apache/superset/stats/participation" + """ + data = list(cursor.execute(sql)) + assert data == [ + ( + datetime(2023, 11, 27, 0, 0, tzinfo=timezone.utc), + datetime(2023, 12, 4, 0, 0, tzinfo=timezone.utc), + 27, + 0, + ), + ( + datetime(2023, 12, 4, 0, 0, tzinfo=timezone.utc), + datetime(2023, 12, 11, 0, 0, tzinfo=timezone.utc), + 34, + 0, + ), + ( + datetime(2023, 12, 11, 0, 0, tzinfo=timezone.utc), + datetime(2023, 12, 18, 0, 0, tzinfo=timezone.utc), + 27, + 0, + ), + ( + datetime(2023, 12, 18, 0, 0, tzinfo=timezone.utc), + datetime(2023, 12, 25, 0, 0, tzinfo=timezone.utc), + 31, + 0, + ), + ( + datetime(2023, 12, 25, 0, 0, tzinfo=timezone.utc), + datetime(2024, 1, 1, 0, 0, tzinfo=timezone.utc), + 24, + 0, + ), + ] diff --git a/tests/lib_test.py b/tests/lib_test.py index 55c7328c..b009b5d5 100644 --- a/tests/lib_test.py +++ b/tests/lib_test.py @@ -447,6 +447,12 @@ def test_find_adapter(mocker: MockerFixture) -> None: adapter2.supports.side_effect = [None, True] assert find_adapter(uri, adapter_kwargs, adapters) == (adapter2, ("2",), {}) + adapter1.supports.side_effect = ValueError("Not supported") + adapter2.supports.side_effect = [None, ValueError("Not supported")] + with pytest.raises(ProgrammingError) as excinfo: + find_adapter(uri, adapter_kwargs, adapters) + assert excinfo.value.args[0] == "Unsupported table: https://example.com/" + def test_is_null() -> None: """