Skip to content

Commit

Permalink
Merge pull request #71 from mcdonnnj/improvement/configure_github_tim…
Browse files Browse the repository at this point in the history
…eouts

Add the ability to configure GitHub session timeouts
  • Loading branch information
IanLee1521 authored Mar 21, 2023
2 parents aabc24f + f01f11e commit e4918c0
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 8 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ The basic structure is:
"token": null, // Private token for accessing this GitHub instance
"public_only": true, // Only inventory public repositories

"connect_timeout": 4, // The timeout in seconds for connecting to the server
"read_timeout": 10, // The timeout in seconds to wait for a response from the server

"orgs": [ ... ], // List of organizations to inventory
"repos": [ ... ], // List of single repositories to inventory
"exclude": [ ... ] // List of organizations / repositories to exclude from inventory
Expand Down
10 changes: 9 additions & 1 deletion scraper/code_gov/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,22 @@ def process_config(config):
if config.get("github_gov_orgs", False):
github_instances.append({"url": "https://github.com", "orgs": gov_orgs()})
for instance in github_instances:
timeouts = {}
url = instance.get("url", "https://github.com")
orgs = instance.get("orgs", [])
repos = instance.get("repos", [])
public_only = instance.get("public_only", True)
excluded = instance.get("exclude", [])
token = instance.get("token", None)
connect_timeout = instance.get("connect_timeout", None)
read_timeout = instance.get("read_timeout", None)

gh_session = github.connect(url, token)
if connect_timeout is not None:
timeouts["default_connect_timeout"] = connect_timeout
if read_timeout is not None:
timeouts["default_read_timeout"] = read_timeout

gh_session = github.connect(url, token, timeouts)

for repo in github.query_repos(gh_session, orgs, repos, public_only):
if repo.owner.login in excluded or repo.full_name in excluded:
Expand Down
24 changes: 17 additions & 7 deletions scraper/github/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def gov_orgs():
return list(us_gov_github_orgs)


def create_session(token=None):
def create_session(token=None, timeouts=None):
"""
Create a github3.py session connected to GitHub.com
Expand All @@ -45,23 +45,30 @@ def create_session(token=None):
if token is None:
token = os.environ.get("GITHUB_API_TOKEN", None)

gh_session = github3.login(token=token)
if timeouts is None:
timeouts = {}

custom_session = github3.session.GitHubSession(**timeouts)
gh_session = github3.GitHub(token=token, session=custom_session)

if gh_session is None:
raise RuntimeError("Invalid or missing GITHUB_API_TOKEN")

return gh_session


def create_enterprise_session(url, token=None):
def create_enterprise_session(url, token=None, timeouts=None):
"""
Create a github3.py session for a GitHub Enterprise instance
If token is not provided, will attempt to use the GITHUB_API_TOKEN
environment variable if present.
"""
if timeouts is None:
timeouts = {}

gh_session = github3.enterprise_login(url=url, token=token)
custom_session = github3.session.GitHubSession(**timeouts)
gh_session = github3.GitHubEnterprise(url=url, token=token, session=custom_session)

if gh_session is None:
msg = "Unable to connect to GitHub Enterprise (%s) with provided token."
Expand Down Expand Up @@ -105,16 +112,19 @@ def _check_api_limits(gh_session, api_required=250):
return


def connect(url="https://github.com", token=None):
def connect(url="https://github.com", token=None, timeouts=None):
"""
Create a GitHub session for making requests
"""

if timeouts is None:
timeouts = {}

gh_session = None
if url == "https://github.com":
gh_session = create_session(token)
gh_session = create_session(token, timeouts)
else:
gh_session = create_enterprise_session(url, token)
gh_session = create_enterprise_session(url, token, timeouts)

if gh_session is None:
msg = "Unable to connect to (%s) with provided token."
Expand Down

0 comments on commit e4918c0

Please sign in to comment.