From 9636e3c34d027b02ffb8b54aea5e2fd5217e3c49 Mon Sep 17 00:00:00 2001 From: David Alber Date: Sat, 24 Mar 2018 21:20:54 -0700 Subject: [PATCH] Using commit search to identify new contributors The current approach to checking for new contributors does not work in repositories with more than 500 contributors because the contributors list API endpoint only associates the first 500 committer emails with GitHub usernames. See #41 for more explanation. This commit modifies the new contributor check to use the commit search API endpoint. A drawback of this approach is that (at least currently) that endpoint does not provide useful information for fork repositories, which is why the new contributor check has a condition on whether the repository is a fork. --- highfive/newpr.py | 35 +++++++++--------- highfive/tests/test_newpr.py | 69 ++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 17 deletions(-) diff --git a/highfive/newpr.py b/highfive/newpr.py index 42e41b38..30cc3f8d 100755 --- a/highfive/newpr.py +++ b/highfive/newpr.py @@ -23,6 +23,7 @@ user_collabo_url = "https://api.github.com/repos/%s/%s/collaborators/%s" issue_url = "https://api.github.com/repos/%s/%s/issues/%s" issue_labels_url = "https://api.github.com/repos/%s/%s/issues/%s/labels" +commit_search_url = "https://api.github.com/search/commits?q=repo:%s/%s+author:%s" welcome_with_reviewer = '@%s (or someone else)' welcome_without_reviewer = "@nrc (NB. this repo may be misconfigured)" @@ -165,24 +166,24 @@ def parse_header_links(value): return links -def is_new_contributor(username, owner, repo, user, token, config): - if 'contributors' in config and username in config['contributors']: +def is_new_contributor(username, owner, repo, user, token, payload): + # If this is a fork, we do not treat anyone as a new user. This is + # because the API endpoint called in this function indicates all + # users in repository forks have zero commits. + if payload['repository']['fork']: return False - # iterate through the pages to try and find the contributor - url = contributors_url % (owner, repo) - while True: - stats_raw = api_req("GET", url, None, user, token) - stats = json.loads(stats_raw['body']) - links = parse_header_links(stats_raw['header'].get('Link')) - - for contributor in stats: - if contributor['login'] == username: - return False - - if not links or 'next' not in links: - return True - url = links['next'] + try: + result = api_req( + 'GET', commit_search_url % (owner, repo, username), None, user, token, + 'application/vnd.github.cloak-preview' + ) + return json.loads(result['body'])['total_count'] > 0 + except urllib2.HTTPError, e: + if e.code == 422: + return False + else: + raise e # If the user specified a reviewer, return the username, otherwise returns None. def find_reviewer(msg): @@ -374,7 +375,7 @@ def new_pr(payload, user, token): set_assignee(reviewer, owner, repo, issue, user, token, author, to_mention) - if is_new_contributor(author, owner, repo, user, token, config): + if is_new_contributor(author, owner, repo, user, token, payload): post_comment(welcome_msg(reviewer, config), owner, repo, issue, user, token) elif post_msg: post_comment(review_msg(reviewer, author), owner, repo, issue, user, token) diff --git a/highfive/tests/test_newpr.py b/highfive/tests/test_newpr.py index c65278d0..c7396c57 100644 --- a/highfive/tests/test_newpr.py +++ b/highfive/tests/test_newpr.py @@ -576,6 +576,75 @@ def test_no_assignee(self): self.mocks['client'].send_then_quit.assert_not_called() self.mocks['post_comment'].assert_not_called() +class TestIsNewContributor(TestNewPR): + @classmethod + def setUpClass(cls): + cls.username = 'commitUser' + cls.owner = 'repo-owner' + cls.repo = 'repo-name' + cls.user = 'integrationUser' + cls.token = 'credential' + + def setUp(self): + super(TestIsNewContributor, self).setUp() + self.payload = {'repository': {'fork': False}} + self.patchers = { + 'api_req': mock.patch('highfive.newpr.api_req'), + } + self.mocks = {k: v.start() for k,v in self.patchers.iteritems()} + + def tearDown(self): + super(TestIsNewContributor, self).tearDown() + + for patcher in self.patchers.itervalues(): + patcher.stop() + + def is_new_contributor(self): + return newpr.is_new_contributor( + self.username, self.owner, self.repo, self.user, self.token, + self.payload + ) + + def api_return(self, total_count): + return { + 'body': json.dumps({'total_count': total_count}), + 'header': {}, + } + + def assert_api_req_call(self): + self.mocks['api_req'].assert_called_once_with( + 'GET', + 'https://api.github.com/search/commits?q=repo:%s/%s+author:%s' % ( + self.owner, self.repo, self.username + ), None, self.user, self.token, + 'application/vnd.github.cloak-preview' + ) + + def test_is_new_contributor_fork(self): + self.payload['repository']['fork'] = True + self.assertFalse(self.is_new_contributor()) + self.mocks['api_req'].assert_not_called() + + def test_is_new_contributor_has_commits(self): + self.mocks['api_req'].return_value = self.api_return(5) + self.assertTrue(self.is_new_contributor()) + self.assert_api_req_call() + + def test_is_new_contributor_no_commits(self): + self.mocks['api_req'].return_value = self.api_return(0) + self.assertFalse(self.is_new_contributor()) + self.assert_api_req_call() + + def test_is_new_contributor_nonexistent_user(self): + self.mocks['api_req'].side_effect = HTTPError(None, 422, None, None, None) + self.assertFalse(self.is_new_contributor()) + self.assert_api_req_call() + + def test_is_new_contributor_error(self): + self.mocks['api_req'].side_effect = HTTPError(None, 403, None, None, None) + self.assertRaises(HTTPError, self.is_new_contributor) + self.assert_api_req_call() + class TestPostWarnings(TestNewPR): @classmethod def setUpClass(cls):