Skip to content

Commit

Permalink
GitHub stats (#463)
Browse files Browse the repository at this point in the history
* WIP

* feat: GitHub stats endpoints
  • Loading branch information
betodealmeida authored Jul 31, 2024
1 parent 1f09be6 commit cdc79f4
Show file tree
Hide file tree
Showing 6 changed files with 291 additions and 79 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ Changelog
Next
====

Version 1.2.23 - 2024-07-31
===========================

- Support for GitHub stats (#463)

Version 1.2.22 - 2024-07-30
===========================

Expand Down
177 changes: 129 additions & 48 deletions src/shillelagh/adapters/api/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
import logging
import urllib.parse
from dataclasses import dataclass
from typing import Any, Dict, Iterator, List, Optional, Tuple
from datetime import datetime, timedelta, timezone
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, TypedDict

import jsonpath
import requests_cache

from shillelagh.adapters.base import Adapter
from shillelagh.exceptions import ProgrammingError
from shillelagh.fields import Boolean, Field, Integer, String, StringDateTime
from shillelagh.fields import Boolean, DateTime, Field, Integer, String, StringDateTime
from shillelagh.filters import Equal, Filter
from shillelagh.typing import RequestedOrder, Row

Expand Down Expand Up @@ -58,45 +59,111 @@ class Column:
default: Optional[Filter] = None


def participation_processor(payload: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Process participation data.
https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#get-the-weekly-commit-count
"""
today_utc_midnight = datetime.now(timezone.utc).replace(
hour=0,
minute=0,
second=0,
microsecond=0,
)
start = today_utc_midnight - timedelta(weeks=len(payload["all"]))

return [
{
"start_at": start + timedelta(weeks=i),
"end_at": start + timedelta(weeks=i + 1),
"all": all,
"owner": owner,
}
for i, (all, owner) in enumerate(zip(payload["all"], payload["owner"]))
]


class EndPointDefinition(TypedDict):
"""
A definition for an endpoint.
This is used to define the columns and the path to the values in the JSON response.
It can also specify if the endpoint is paginated (most are) and a processor to
transform the payload.
"""

columns: List[Column]
paginated: bool
processor: Optional[Callable[[Dict[str, Any]], List[Dict[str, Any]]]]


# a mapping from the column name (eg, ``userid``) to the path in the JSON
# response (``{"user": {"id": 42}}`` => ``user.id``) together with the field
TABLES: Dict[str, Dict[str, List[Column]]] = {
TABLES: Dict[str, Dict[str, EndPointDefinition]] = {
"repos": {
"pulls": [
Column("url", "html_url", String()),
Column("id", "id", Integer()),
Column("number", "number", Integer(filters=[Equal])),
Column("state", "state", String(filters=[Equal]), Equal("all")),
Column("title", "title", String()),
Column("userid", "user.id", Integer()),
Column("username", "user.login", String()),
Column("draft", "draft", Boolean()),
Column("head", "head.ref", String(filters=[Equal])), # head.label?
Column("created_at", "created_at", StringDateTime()),
Column("updated_at", "updated_at", StringDateTime()),
Column("closed_at", "closed_at", StringDateTime()),
Column("merged_at", "merged_at", StringDateTime()),
],
"issues": [
Column("url", "html_url", String()),
Column("id", "id", Integer()),
Column("number", "number", Integer(filters=[Equal])),
Column("state", "state", String(filters=[Equal]), Equal("all")),
Column("title", "title", String()),
Column("userid", "user.id", Integer()),
Column("username", "user.login", String()),
Column("draft", "draft", Boolean()),
Column("locked", "locked", Boolean()),
Column("comments", "comments", Integer()),
Column("created_at", "created_at", StringDateTime()),
Column("updated_at", "updated_at", StringDateTime()),
Column("closed_at", "closed_at", StringDateTime()),
Column("body", "body", String()),
Column("author_association", "author_association", String()),
Column("labels", "labels[*].name", JSONString()),
Column("assignees", "assignees[*].login", JSONString()),
Column("reactions", "reactions", JSONString()),
],
"pulls": {
"columns": [
Column("url", "html_url", String()),
Column("id", "id", Integer()),
Column("number", "number", Integer(filters=[Equal])),
Column("state", "state", String(filters=[Equal]), Equal("all")),
Column("title", "title", String()),
Column("userid", "user.id", Integer()),
Column("username", "user.login", String()),
Column("draft", "draft", Boolean()),
Column("head", "head.ref", String(filters=[Equal])), # head.label?
Column("created_at", "created_at", StringDateTime()),
Column("updated_at", "updated_at", StringDateTime()),
Column("closed_at", "closed_at", StringDateTime()),
Column("merged_at", "merged_at", StringDateTime()),
],
"paginated": True,
"processor": None,
},
"issues": {
"columns": [
Column("url", "html_url", String()),
Column("id", "id", Integer()),
Column("number", "number", Integer(filters=[Equal])),
Column("state", "state", String(filters=[Equal]), Equal("all")),
Column("title", "title", String()),
Column("userid", "user.id", Integer()),
Column("username", "user.login", String()),
Column("draft", "draft", Boolean()),
Column("locked", "locked", Boolean()),
Column("comments", "comments", Integer()),
Column("created_at", "created_at", StringDateTime()),
Column("updated_at", "updated_at", StringDateTime()),
Column("closed_at", "closed_at", StringDateTime()),
Column("body", "body", String()),
Column("author_association", "author_association", String()),
Column("labels", "labels[*].name", JSONString()),
Column("assignees", "assignees[*].login", JSONString()),
Column("reactions", "reactions", JSONString()),
],
"paginated": True,
"processor": None,
},
"stats/punch_card": {
"columns": [
Column("dow", "$.[0]", Integer()),
Column("hour", "$.[1]", Integer()),
Column("commits", "$.[2]", Integer()),
],
"paginated": True,
"processor": None,
},
"stats/participation": {
"columns": [
Column("start_at", "$.start_at", DateTime()),
Column("end_at", "$.end_at", DateTime()),
Column("all", "$.all", Integer()),
Column("owner", "$.owner", Integer()),
],
"paginated": False,
"processor": participation_processor,
},
},
}

Expand All @@ -115,11 +182,11 @@ class GitHubAPI(Adapter):
def supports(uri: str, fast: bool = True, **kwargs: Any) -> Optional[bool]:
parsed = urllib.parse.urlparse(uri)

if parsed.path.count("/") != 4:
if parsed.path.count("/") < 4:
return False

# pylint: disable=unused-variable
_, base, owner, repo, resource = parsed.path.rsplit("/", 4)
_, base, owner, repo, resource = parsed.path.split("/", 4)
return (
parsed.netloc == "api.github.com"
and base in TABLES
Expand All @@ -129,7 +196,7 @@ def supports(uri: str, fast: bool = True, **kwargs: Any) -> Optional[bool]:
@staticmethod
def parse_uri(uri: str) -> Tuple[str, str, str, str]:
parsed = urllib.parse.urlparse(uri)
_, base, owner, repo, resource = parsed.path.rsplit("/", 4)
_, base, owner, repo, resource = parsed.path.split("/", 4)
return (
base,
owner,
Expand Down Expand Up @@ -162,7 +229,8 @@ def __init__( # pylint: disable=too-many-arguments

def get_columns(self) -> Dict[str, Field]:
return {
column.name: column.field for column in TABLES[self.base][self.resource]
column.name: column.field
for column in TABLES[self.base][self.resource]["columns"]
}

def get_data(
Expand All @@ -172,7 +240,7 @@ def get_data(
**kwargs: Any,
) -> Iterator[Row]:
# apply default values
for column in TABLES[self.base][self.resource]:
for column in TABLES[self.base][self.resource]["columns"]:
if column.default is not None and column.name not in bounds:
bounds[column.name] = column.default

Expand Down Expand Up @@ -204,7 +272,7 @@ def _get_single_resource(

row = {
column.name: get_value(column, payload)
for column in TABLES[self.base][self.resource]
for column in TABLES[self.base][self.resource]["columns"]
}
row["rowid"] = 0
_logger.debug(row)
Expand All @@ -222,16 +290,23 @@ def _get_multiple_resources(
headers["Authorization"] = f"Bearer {self.access_token}"

url = f"https://api.github.com/{self.base}/{self.owner}/{self.repo}/{self.resource}"
config = TABLES[self.base][self.resource]

# map filters in ``bounds`` to query params
params = {name: filter_.value for name, filter_ in bounds.items()} # type: ignore
params["per_page"] = PAGE_SIZE

page = 1
rowid = 0
while True:
_logger.info("GET %s (page %d)", url, page)
params["page"] = page
if config["paginated"]:
_logger.info("GET %s (page %d)", url, page)
params.update(
{
"per_page": PAGE_SIZE,
"page": page,
},
)

response = self._session.get(url, headers=headers, params=params)

payload = response.json()
Expand All @@ -241,16 +316,22 @@ def _get_multiple_resources(
if not response.ok:
raise ProgrammingError(payload["message"])

if processor := config["processor"]:
payload = processor(payload)

for resource in payload:
row = {
column.name: get_value(column, resource)
for column in TABLES[self.base][self.resource]
for column in config["columns"]
}
row["rowid"] = rowid
_logger.debug(row)
yield row
rowid += 1

if not config["paginated"]:
break

page += 1


Expand Down
23 changes: 18 additions & 5 deletions src/shillelagh/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,12 @@ def find_adapter(
for adapter in adapters:
key = adapter.__name__.lower()
kwargs = adapter_kwargs.get(key, {})
supported: Optional[bool] = adapter.supports(uri, fast=True, **kwargs)

try:
supported: Optional[bool] = adapter.supports(uri, fast=True, **kwargs)
except Exception: # pylint: disable=broad-except
supported = False

if supported:
args = adapter.parse_uri(uri)
return adapter, args, kwargs
Expand All @@ -577,7 +582,13 @@ def find_adapter(
for adapter in candidates:
key = adapter.__name__.lower()
kwargs = adapter_kwargs.get(key, {})
if adapter.supports(uri, fast=False, **kwargs):

try:
supported = adapter.supports(uri, fast=False, **kwargs)
except Exception: # pylint: disable=broad-except
supported = False

if supported:
args = adapter.parse_uri(uri)
return adapter, args, kwargs

Expand Down Expand Up @@ -611,9 +622,11 @@ def get_session(
session = requests_cache.CachedSession(
cache_name=cache_name,
backend="sqlite",
expire_after=requests_cache.DO_NOT_CACHE
if expire_after == timedelta(seconds=-1)
else expire_after.total_seconds(),
expire_after=(
requests_cache.DO_NOT_CACHE
if expire_after == timedelta(seconds=-1)
else expire_after.total_seconds()
),
)
session.headers.update(request_headers)

Expand Down
4 changes: 2 additions & 2 deletions tests/adapters/api/generic_json_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,11 @@ def test_generic_json_array(requests_mock: Mocker) -> None:
"""
# for datassette and other probing adapters
requests_mock.get(
"https://api.github.com/repos/apache/superset/-/versions.json",
"https://example.com/-/versions.json",
status_code=404,
)

url = URL("https://api.github.com/repos/apache/superset/stats/punch_card")
url = URL("https://example.com/")
requests_mock.head(str(url), headers={"content-type": "application/json"})
requests_mock.get(
str(url),
Expand Down
Loading

0 comments on commit cdc79f4

Please sign in to comment.