Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GitHub Headers #54

Merged
merged 2 commits into from
Jul 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions scraper/code_gov/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def from_github3(klass, repository, labor_hours=True):

project["downloadURL"] = repository.downloads_url

project["languages"] = [l for l, _ in repository.languages()]
project["languages"] = [lang for lang, _ in repository.languages()]

# project['partners'] = []

Expand Down Expand Up @@ -337,7 +337,7 @@ def from_gitlab(klass, repository, labor_hours=True, fetch_languages=False):
archive_suffix = "/projects/%s/repository/archive" % repository.get_id()
project["downloadURL"] = api_url + archive_suffix

# project['languages'] = [l for l, _ in repository.languages()]
# project['languages'] = [lang for lang, _ in repository.languages()]

if fetch_languages:
project["languages"] = [*repository.languages()]
Expand Down
56 changes: 45 additions & 11 deletions scraper/github/queryManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def _readGQL(self, filePath, verbose=False):
self.__query = query_in
return query_in

def queryGitHubFromFile(self, filePath, gitvars={}, verbosity=0, **kwargs):
def queryGitHubFromFile(self, filePath, gitvars=None, verbosity=0, **kwargs):
"""Submit a GitHub GraphQL query from a file.

Can only be used with GraphQL queries.
Expand All @@ -156,7 +156,7 @@ def queryGitHubFromFile(self, filePath, gitvars={}, verbosity=0, **kwargs):
.. _GitHub GraphQL Explorer:
https://developer.github.com/v4/explorer/
gitvars (Optional[Dict]): All query variables.
Defaults to empty.
Defaults to None.
GraphQL Only.
verbosity (Optional[int]): Changes output verbosity levels.
If < 0, all extra printouts are suppressed.
Expand All @@ -169,6 +169,9 @@ def queryGitHubFromFile(self, filePath, gitvars={}, verbosity=0, **kwargs):
Dict: A JSON style dictionary.

"""
if not gitvars:
gitvars = {}

gitquery = self._readGQL(filePath, verbose=(verbosity >= 0))
return self.queryGitHub(
gitquery, gitvars=gitvars, verbosity=verbosity, **kwargs
Expand All @@ -177,14 +180,15 @@ def queryGitHubFromFile(self, filePath, gitvars={}, verbosity=0, **kwargs):
def queryGitHub(
self,
gitquery,
gitvars={},
gitvars=None,
verbosity=0,
paginate=False,
cursorVar=None,
keysToList=[],
keysToList=None,
rest=False,
requestCount=0,
pageNum=0,
headers=None,
):
"""Submit a GitHub query.

Expand All @@ -194,7 +198,7 @@ def queryGitHub(
query: 'query { viewer { login } }'
endpoint: '/user'
gitvars (Optional[Dict]): All query variables.
Defaults to empty.
Defaults to None.
GraphQL Only.
verbosity (Optional[int]): Changes output verbosity levels.
If < 0, all extra printouts are suppressed.
Expand All @@ -208,7 +212,7 @@ def queryGitHub(
GraphQL Only.
keysToList (Optional[List[str]]): Ordered list of keys needed to
retrieve the list in the query results to be extended by
pagination. Defaults to empty.
pagination. Defaults to None.
Example:
['data', 'viewer', 'repositories', 'nodes']
GraphQL Only.
Expand All @@ -217,11 +221,20 @@ def queryGitHub(
requestCount (Optional[int]): Counter for repeated requests.
pageNum (Optional[int]): Counter for pagination.
For user readable log messages only, does not affect data.
headers (Optional[Dict]): Additional headers.
Defaults to None.

Returns:
Dict: A JSON style dictionary.

"""
if not gitvars:
gitvars = {}
if not keysToList:
keysToList = []
if not headers:
headers = {}

requestCount += 1
pageNum = 0 if pageNum < 0 else pageNum # no negative page numbers
pageNum += 1
Expand All @@ -232,7 +245,11 @@ def queryGitHub(
(verbosity >= 0), "Sending %s query..." % ("REST" if rest else "GraphQL")
)
response = self._submitQuery(
gitquery, gitvars=gitvars, verbose=(verbosity > 0), rest=rest
gitquery,
gitvars=gitvars,
verbose=(verbosity > 0),
rest=rest,
headers=headers,
)
_vPrint((verbosity >= 0), "Checking response...")
_vPrint((verbosity >= 0), "HTTP/1.1 " + response["headDict"]["Status"])
Expand Down Expand Up @@ -263,6 +280,7 @@ def queryGitHub(
rest=rest,
requestCount=(requestCount - 1),
pageNum=pageNum,
headers=headers,
)
except KeyError:
# Handles error cases that don't return X-RateLimit data
Expand Down Expand Up @@ -295,6 +313,7 @@ def queryGitHub(
rest=rest,
requestCount=requestCount,
pageNum=pageNum,
headers=headers,
)
# Check for server error responses
if statusNum == 502 or statusNum == 503:
Expand Down Expand Up @@ -323,6 +342,7 @@ def queryGitHub(
rest=rest,
requestCount=requestCount,
pageNum=pageNum,
headers=headers,
)
# Check for other error responses
if statusNum >= 400 or statusNum == 204:
Expand Down Expand Up @@ -366,6 +386,7 @@ def queryGitHub(
rest=rest,
requestCount=requestCount,
pageNum=pageNum,
headers=headers,
)
else:
raise RuntimeError(
Expand All @@ -389,6 +410,7 @@ def queryGitHub(
rest=rest,
requestCount=0,
pageNum=pageNum,
headers=headers,
)
outObj.extend(nextObj)
elif not rest:
Expand All @@ -415,6 +437,7 @@ def queryGitHub(
rest=rest,
requestCount=0,
pageNum=pageNum,
headers=headers,
)
newPage = nextObj
for key in keysToList[0:-1]:
Expand All @@ -424,7 +447,9 @@ def queryGitHub(

return outObj

def _submitQuery(self, gitquery, gitvars={}, verbose=False, rest=False):
def _submitQuery(
self, gitquery, gitvars=None, verbose=False, rest=False, headers=None
):
"""Send a curl request to GitHub.

Args:
Expand All @@ -433,11 +458,13 @@ def _submitQuery(self, gitquery, gitvars={}, verbose=False, rest=False):
query: 'query { viewer { login } }'
endpoint: '/user'
gitvars (Optional[Dict]): All query variables.
Defaults to empty.
Defaults to None.
verbose (Optional[bool]): If False, stderr prints will be
suppressed. Defaults to False.
rest (Optional[bool]): If True, uses the REST API instead
of GraphQL. Defaults to False.
headers (Optional[Dict]): Additional headers.
Defaults to None.

Returns:
{
Expand All @@ -448,17 +475,24 @@ def _submitQuery(self, gitquery, gitvars={}, verbose=False, rest=False):
}

"""
if not gitvars:
gitvars = {}
if not headers:
headers = {}

authhead = {"Authorization": "bearer " + self.__githubApiToken}
if not rest:
gitqueryJSON = json.dumps(
{"query": gitquery, "variables": json.dumps(gitvars)}
)
fullResponse = requests.post(
"https://api.github.com/graphql", data=gitqueryJSON, headers=authhead
"https://api.github.com/graphql",
data=gitqueryJSON,
headers={**authhead, **headers},
)
else:
fullResponse = requests.get(
"https://api.github.com" + gitquery, headers=authhead
"https://api.github.com" + gitquery, headers={**authhead, **headers}
)
_vPrint(
verbose,
Expand Down