forked from sdgilley/learn-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpr-report.py
134 lines (115 loc) · 5.34 KB
/
pr-report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
'''
This script shows the files deleted or modified in a PR in azureml-examples
If any of these files are referenced azure-docs-pr,
the corresponding file (labeled referenced_from_file) is also shown.
To run this script, first run find_snippets.py to create the snippets.csv file.
Then run from command line:
python pr-report.py <PR number>
To decide if the PR is safe to merge:
* If any deleted cell in a MODIFIED file is referenced in azure-docs-pr, PR is not ready to merge
* If any DELETED file is referenced, PR is not ready to merge.
'''
import pandas as pd
import sys
import gh_auth as a
import utilities as h
# read arguments from command line - pr and optionally, whether to authenticate
import argparse
parser = argparse.ArgumentParser(description='Process a PR number.') # Create the parser
# Add the arguments
parser.add_argument('pr', type=int, help='The PR number you are interested in.')
args = parser.parse_args() # Parse the arguments
pr = args.pr
# fix truncation?
pd.set_option('display.max_colwidth', 500)
# form the URL for the GitHub API
url = f"https://api.github.com/repos/Azure/azureml-examples/pulls/{pr}/files?per_page=100"
print(f"\n============================== PR: {pr} ==============================")
print(f"https://github.com/Azure/azureml-examples/pull/{pr}/files\n")
prfiles = a.get_auth_response(url)
repo = a.connect_repo("Azure/azureml-examples")
if 'message' in prfiles:
print("Error occurred. Check the PR number and try again.")
print(prfiles)
sys.exit()
else:
deleted_files = [file['filename'] for file in prfiles if file['status'] == 'removed']
modified_files = [(file['filename'], file['blob_url']) for file in prfiles if file['status'] == 'modified']
added_files = [file['filename'] for file in prfiles if file['status'] == 'added']
snippets = h.read_snippets() # read the snippets file
# Process the files:
print(f"ADDED FILES: {len(added_files)}\n") # just for info about the PR
modified = len(modified_files)
deleted = len(deleted_files)
print(f"MODIFIED: {modified}")
data =[] # create an empty list to hold data for modified files that are referenced
nb_mods = [] # create an empty list to hold data for modified notebooks
if modified > 0:
for file, blob_url in modified_files:
if (snippets['ref_file'] == file).any():
snippet_match = snippets.loc[snippets['ref_file'] == file, 'from_file']
# Check if there are deleted nb named cells or code comments
nb, adds, deletes, blob_url = h.find_changes(file, prfiles, blob_url)
# print (nb, adds, deletes)
if nb:
nb_mods.append(blob_url)
# print("added to nb_mods: ", file)
deleted_cells = [value for value in deletes if value not in adds]
if deleted_cells:
cell_type = "Notebook" if nb else "Code"
for cell in deleted_cells:
# Append the data to the list
data.append({'Modified File': file,
'Referenced In': snippet_match.to_string(index=False),
'Cell Type': cell_type,
'Cell': cell})
# print(f"*** {cell}")
if data == []:
print("None of the modified files have deleted cells or code snippets.\n")
else:
# Group the data by 'Modified File' and 'Referenced In'
grouped_data = {}
for item in data:
key = (item['Modified File'], item['Referenced In'])
if key not in grouped_data:
grouped_data[key] = []
grouped_data[key].append(item['Cell'])
# Print the grouped data
for (modified_file, referenced_in), cells in grouped_data.items():
print(f"Modified File: {modified_file} \n Referenced in:")
refs = referenced_in.split('\n')
for ref in refs:
print(f" https://github.com/MicrosoftDocs/azure-docs-pr/edit/main/articles/machine-learning/{ref.strip()}")
print(f" {cell_type} cells deleted: {len(cells)}")
for cell in cells:
print(f" * {cell}")
# compare the sha to this same file in branch "temp-fix"
h.compare_branches(repo, file, "main", "temp-fix")
print()
# also print all the modified notebooks
if nb_mods:
print("MODIFIED NOTEBOOKS\nFollow each link to ensure notebooks are valid:")
nb_mods = list(set(nb_mods)) # remove duplicates
for file in nb_mods:
print(f"* {file}\n")
print(f"DELETED: {deleted}")
if deleted > 0:
found = 0
for file in deleted_files:
if (snippets['ref_file'] == file).any():
snippet_match = snippets.loc[snippets['ref_file'] == file, 'from_file']
print(f"DELETED FILE: {file} \n Referenced in:")
refs = snippet_match.to_string(index=False).split('\n')
for ref in refs:
print(f"* https://github.com/MicrosoftDocs/azure-docs-pr/edit/main/articles/machine-learning/{ref.strip()}")
# print(snippet_match.to_string(index=False))
h.compare_branches(repo, file, "main", "temp-fix")
found = +1
if found == 0:
print("None of the deleted files are referenced in azure-docs-pr.\n")
print(f"\n============================== PR: {pr} ==============================\n")
## test PRs:
# 3081 - no problems
# 2890 - deletes files
# 2888 - deletes ids in a file
# 3113 - deletes a cell in a notebook