From a7e853e0da5757db324a89c14a201c34f88672e4 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 11 Feb 2024 18:27:28 -0500 Subject: [PATCH 1/2] Make issue instead of PR comment --- asv_watcher/__init__.py | 4 + asv_watcher/_core/watcher.py | 54 +++++++- scripts/app.py | 261 +++++++++++++++++++++++++++++++---- 3 files changed, 287 insertions(+), 32 deletions(-) diff --git a/asv_watcher/__init__.py b/asv_watcher/__init__.py index ce7437d..72589a8 100644 --- a/asv_watcher/__init__.py +++ b/asv_watcher/__init__.py @@ -1,8 +1,12 @@ from __future__ import annotations +import pandas as pd + from asv_watcher._core.detector import RollingDetector from asv_watcher._core.watcher import Watcher +pd.options.mode.copy_on_write = True + __all__ = ["RollingDetector", "Watcher"] diff --git a/asv_watcher/_core/watcher.py b/asv_watcher/_core/watcher.py index c90ecb4..8c89d72 100644 --- a/asv_watcher/_core/watcher.py +++ b/asv_watcher/_core/watcher.py @@ -28,8 +28,7 @@ def commit_range(self, git_hash): prev_git_hash = time_series.shift(1)[ time_series.git_hash == git_hash ].git_hash.iloc[0] - base_url = "https://github.com/pandas-dev/pandas/compare/" - url = f"{base_url}{prev_git_hash}...{git_hash}" + url = f"{prev_git_hash}...{git_hash}" return url def get_regressions(self, git_hash: str) -> list[tuple[str, str]]: @@ -81,7 +80,56 @@ def generate_report(self, git_hash: str) -> str: "\n\n" ) - result += self.commit_range(git_hash) + base_url = "https://github.com/pandas-dev/pandas/compare/" + result += base_url + self.commit_range(git_hash) result += "\n" return result + + def generate_report_v2(self, git_hash: str, pr: str, authors: str) -> str: + regressions = self.get_regressions(git_hash) + for_report: dict[str, list[str]] = {} + for regression in regressions: + for_report[regression[0]] = for_report.get(regression[0], []) + [ + regression[1] + ] + + result = "" + result += ( + f"PR #{pr} may have induced a performance regression. " + "If it was a necessary behavior change, this may have been " + "expected and everything is okay." + "\n\n" + "Please check the links below. If any ASVs are parameterized, " + "the combinations of parameters that a regression has been detected " + "for appear as subbullets." + "\n\n" + ) + + for benchmark, param_combos in for_report.items(): + base_url = "https://asv-runner.github.io/asv-collection/pandas/#" + url = f"{base_url}{benchmark}" + result += f" - [{benchmark}]({url})\n" + for params in param_combos: + if params == "": + continue + params_list = [param for param in params.split("; ")] + params_suffix = "?p-" + "&p-".join(params_list) + url = f"{base_url}{benchmark}{params_suffix}" + url = urllib.parse.quote(url, safe="/:?=&#") + result += f" - [{params}]({url})\n" + result += "\n" + + result += ( + "Subsequent benchmarks may have skipped some commits. The link" + " below lists the commits that are" + " between the two benchmark runs where the regression was identified." + "\n\n" + ) + + base_url = "https://github.com/pandas-dev/pandas/compare/" + result += base_url + self.commit_range(git_hash) + result += "\n\n" + result += "cc @" + ", @".join(authors.split(", ")) + "\n" + + return result diff --git a/scripts/app.py b/scripts/app.py index 3b44158..1c71517 100644 --- a/scripts/app.py +++ b/scripts/app.py @@ -1,10 +1,13 @@ +import json +import re +import subprocess import time from pathlib import Path import dash import pandas as pd import plotly.graph_objects as go -from dash import Dash, Input, Output, dash_table, dcc, html +from dash import Dash, Input, Output, State, dash_table, dcc, html from plotly.subplots import make_subplots from asv_watcher import Watcher @@ -28,6 +31,31 @@ # Initialize the app app = Dash(__name__) + +def execute(cmd): + response = subprocess.run(cmd, shell=True, capture_output=True) + if response.stderr.decode() != "": + raise ValueError(response.stderr.decode()) + return response.stdout.decode() + + +# TODO: Because calling the GH CLI from Juptyer seems to always have color... +def escape_ansi(line): + ansi_escape = re.compile(r"(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]") + return ansi_escape.sub("", line) + + +response = execute( + "cd /home/richard/dev/pandas/ && NO_COLOR=1 gh label list --limit 200 --json name" +) +labels = [e["name"] for e in json.loads(escape_ansi(response))] + +response = execute( + "cd /home/richard/dev/pandas/ " + "&& gh api repos/:owner/:repo/milestones --jq '.[].title'" +) +milestones = [e for e in response.split("\n") if e != ""] + # App layout app.layout = html.Div( [ @@ -52,16 +80,54 @@ html.Div( [ html.P( - "GitHub comment", style={"display": "inline", "margin-left": "30px"} + "Suspect Commits", + style={"display": "inline", "margin-left": "30px"}, ), - dcc.Clipboard( - id="copy_github_comment", - title="copy", - style={ - "display": "inline", - "fontSize": 20, - "verticalAlign": "top", - }, + dash_table.DataTable( + id="pr_table", + data=pd.DataFrame().to_dict("records"), + page_current=0, + page_size=10, + sort_action="custom", + sort_mode="single", + columns=[ + {"id": "Authors", "name": "Authors"}, + {"id": "PR", "name": "PR", "presentation": "markdown"}, + ], + style_table={"width": "60%"}, + ), + html.Div( + dcc.Input( + id="issue_title", + type="text", + size="80", + ), + ), + html.Div( + dcc.Textarea( + id="issue_body", + value="", + style={"width": "60%", "height": 200}, + ), + ), + html.Div( + dcc.Dropdown( + id="issue_labels", + options=labels, + multi=True, + ), + ), + html.Div( + dcc.Dropdown( + id="issue_milestone", + options=milestones, + multi=False, + ), + ), + html.P( + id="gh_cli_cmd", + children="", + style={"display": "none"}, ), html.Div( children=[ @@ -79,8 +145,19 @@ "align": "center", }, ), + html.Div( + html.Button( + "Create GitHub Issue", + id="issue_generate", + n_clicks=0, + ), + ), ] ), + dcc.ConfirmDialog( + id="issue_confirm", + message="Are you sure you want to create an issue?", + ), ] ) @@ -109,19 +186,137 @@ def update_table(sort_by): @app.callback( - Output("copy_github_comment", "content"), Output("github_comment", "children"), + Input("issue_body", "value"), +) +def update_copy_github_comment(issue_body): + return issue_body + + +@app.callback( + Output("pr_table", "data"), Input("summary", "active_cell"), Input("summary", "derived_viewport_data"), ) -def update_copy_github_comment(active_cell, derived_viewport_data): +def update_pr_table(active_cell, derived_viewport_data): if active_cell: git_hash = derived_viewport_data[active_cell["row"]]["git_hash"] - result = watcher.generate_report(git_hash) - return result, result + commit_range = watcher.commit_range(git_hash) + response = execute( + f"cd /home/richard/dev/pandas" + f" && git rev-list --ancestry-path {commit_range}" + ) + commits = [e for e in response.split("\n") if e != ""] + + data = [] + for commit in commits: + response = execute( + f"cd /home/richard/dev/pandas" + f" && gh pr list" + f' --search "{commit}"' + f" --state merged" + f" --json title,number,author" + ) + titles = [e["title"] for e in json.loads(escape_ansi(response))] + numbers = [e["number"] for e in json.loads(escape_ansi(response))] + authors = [e["author"] for e in json.loads(escape_ansi(response))] + authors = [e for e in authors if not e["is_bot"]] + authors = [e["login"] for e in authors] + assert len(titles) == 1 and len(numbers) == 1 + repo_url = "https://github.com/pandas-dev/pandas" + data.append( + { + "Authors": ", ".join(authors), + "PR": f"[{titles[0]}]({repo_url}/pull/{numbers[0]})", + } + ) + return pd.DataFrame(data).to_dict("records") + return pd.DataFrame().to_dict("records") + + +@app.callback( + Output("issue_title", "value"), + Output("issue_body", "value"), + Output("issue_labels", "value"), + Input("pr_table", "active_cell"), + State("pr_table", "derived_viewport_data"), + State("summary", "active_cell"), + State("summary", "derived_viewport_data"), + prevent_initial_call=True, +) +def update_issue_values(pr_cell, pr_table, summary_cell, summary_table): + if pr_cell and summary_cell: + git_hash = summary_table[summary_cell["row"]]["git_hash"] + + pr_link = pr_table[pr_cell["row"]]["PR"] + idx = pr_link.rfind("/pull/") + pr_number = pr_link[idx + len("/pull/") : -1] + + authors = pr_table[pr_cell["row"]]["Authors"] + + title = f"Potential regression induced by PR #{pr_number}" + body = watcher.generate_report_v2(git_hash, pr_number, authors) + return title, body, ["Performance", "Regression"] return "", "" +@app.callback( + Output("gh_cli_cmd", "children"), + Input("issue_title", "value"), + Input("issue_body", "value"), + Input("issue_labels", "value"), + Input("issue_milestone", "value"), + prevent_initial_call=True, +) +def update_gh_cli_cmd(issue_title, issue_body, issue_labels, issue_milestone): + if not all(label in labels for label in issue_labels): + return "" + if issue_milestone not in milestones: + return "" + issue_labels = ",".join(issue_labels) + assert '"' not in issue_title, issue_title + assert '"' not in issue_body, issue_body + assert '"' not in issue_labels, issue_labels + assert '"' not in issue_milestone, issue_milestone + result = ( + f"gh issue create" + rf' --title "{issue_title}"' + rf' --body "{issue_body}"' + rf' --label "{issue_labels}"' + rf' --milestone "{issue_milestone}"' + ) + return result + + +@app.callback( + Output("gh_cli_cmd", "children", allow_duplicate=True), + Input("issue_confirm", "submit_n_clicks"), + State("gh_cli_cmd", "children"), + prevent_initial_call=True, + allow_duplicate=True, +) +def generate_issue(submit_n_clicks, gh_cli_cmd): + if gh_cli_cmd == "": + return gh_cli_cmd + execute(f"cd /home/richard/dev/pandas && {repr(gh_cli_cmd)}") + # Hack because dash doesn't seem to let you have no output... + return gh_cli_cmd + + +@app.callback( + Output("issue_confirm", "message"), + Output("issue_confirm", "displayed"), + Input("issue_generate", "n_clicks"), + State("gh_cli_cmd", "children"), + prevent_initial_call=True, +) +def display_confirm(n_clicks, gh_cli_cmd): + if gh_cli_cmd == "": + return "", False + message = f"Are you sure you want to create the following issue?\n\n{gh_cli_cmd}" + return message, True + + @app.callback( Output("commit_table", "data"), Input("summary", "active_cell"), @@ -147,9 +342,9 @@ def update_commit_table(active_cell, derived_viewport_data, sort_by): ascending=sort_by[0]["direction"] == "asc", ) - return result[["name", "params", "pct_change", "abs_change", "time"]].to_dict( - "records" - ) + return result[ + ["name", "params", "pct_change", "abs_change", "time", "revision"] + ].to_dict("records") @app.callback( @@ -167,34 +362,42 @@ def update_plot(active_cell, derived_viewport_data): name = derived_viewport_data[active_cell["row"]]["name"] params = derived_viewport_data[active_cell["row"]]["params"] - plot_data = benchmarks.loc[(name, params)][ - [ - "date", - "time_value", - "established_best", - "established_worst", - "is_regression", - ] - ].rename(columns={"time_value": "time"}) + columns = [ + "revision", + "date", + "time", + "established_best", + "established_worst", + "is_regression", + ] + plot_data = ( + benchmarks.loc[(name, params)] + .drop(columns="time") + .rename(columns={"time_value": "time"}) + .reset_index()[columns] + ) fig = make_subplots(specs=[[{"secondary_y": True}]]) for column in plot_data: - if column in ["date", "is_regression"]: + if column in ["date", "is_regression", "revision"]: continue fig.add_trace( go.Scatter( - x=plot_data.eval("revision"), + x=plot_data.index, y=plot_data[column], name=column, ), ) plot_data = plot_data[plot_data.is_regression] + fig.add_trace( go.Scatter( - x=plot_data.eval("revision"), + x=plot_data.index, y=plot_data["time"], mode="markers", name="Regressions", + hovertext=plot_data["date"].astype(str).tolist(), + hoverinfo="text", ) ) fig.update_traces( From c47681514b2ca3d4e8936f15eb6689f7ea02f7b4 Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 28 Feb 2024 21:37:28 -0500 Subject: [PATCH 2/2] More refinements --- asv_watcher/_core/watcher.py | 6 +- scripts/app.py | 202 +++++++++++++++++++++++++---------- 2 files changed, 150 insertions(+), 58 deletions(-) diff --git a/asv_watcher/_core/watcher.py b/asv_watcher/_core/watcher.py index 8c89d72..b95bc28 100644 --- a/asv_watcher/_core/watcher.py +++ b/asv_watcher/_core/watcher.py @@ -109,7 +109,7 @@ def generate_report_v2(self, git_hash: str, pr: str, authors: str) -> str: for benchmark, param_combos in for_report.items(): base_url = "https://asv-runner.github.io/asv-collection/pandas/#" url = f"{base_url}{benchmark}" - result += f" - [{benchmark}]({url})\n" + result += f" - [ ] [{benchmark}]({url})\n" for params in param_combos: if params == "": continue @@ -117,7 +117,7 @@ def generate_report_v2(self, git_hash: str, pr: str, authors: str) -> str: params_suffix = "?p-" + "&p-".join(params_list) url = f"{base_url}{benchmark}{params_suffix}" url = urllib.parse.quote(url, safe="/:?=&#") - result += f" - [{params}]({url})\n" + result += f" - [ ] [{params}]({url})\n" result += "\n" result += ( @@ -128,7 +128,7 @@ def generate_report_v2(self, git_hash: str, pr: str, authors: str) -> str: ) base_url = "https://github.com/pandas-dev/pandas/compare/" - result += base_url + self.commit_range(git_hash) + result += f"[Commit Range]({base_url + self.commit_range(git_hash)})" result += "\n\n" result += "cc @" + ", @".join(authors.split(", ")) + "\n" diff --git a/scripts/app.py b/scripts/app.py index 1c71517..4364d06 100644 --- a/scripts/app.py +++ b/scripts/app.py @@ -39,6 +39,17 @@ def execute(cmd): return response.stdout.decode() +# Exclude any hashes that already have an issue +needle = "Potential regression induced by commit " +cmd = f'gh search issues "{needle}"' +result = execute(cmd) +flagged_hashes = list() +for e in result.split("\t"): + if e.startswith(needle): + flagged_hashes.append(e[len(needle) :]) +summary = summary[~summary.git_hash.str[:7].isin(flagged_hashes)] + + # TODO: Because calling the GH CLI from Juptyer seems to always have color... def escape_ansi(line): ansi_escape = re.compile(r"(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]") @@ -81,48 +92,108 @@ def escape_ansi(line): [ html.P( "Suspect Commits", - style={"display": "inline", "margin-left": "30px"}, - ), - dash_table.DataTable( - id="pr_table", - data=pd.DataFrame().to_dict("records"), - page_current=0, - page_size=10, - sort_action="custom", - sort_mode="single", - columns=[ - {"id": "Authors", "name": "Authors"}, - {"id": "PR", "name": "PR", "presentation": "markdown"}, - ], - style_table={"width": "60%"}, + style={ + "display": "inline", + "margin-left": "30px", + "font-size": "20px", + }, ), html.Div( - dcc.Input( - id="issue_title", - type="text", - size="80", + dash_table.DataTable( + id="pr_table", + data=pd.DataFrame().to_dict("records"), + page_current=0, + page_size=10, + sort_action="custom", + sort_mode="single", + columns=[ + {"id": "Authors", "name": "Authors"}, + {"id": "PR", "name": "PR", "presentation": "markdown"}, + ], + style_table={"width": "60%", "margin": "auto"}, ), + style={ + "width": "100%", + "margin-left": "auto", + "margin-right": "auto", + "margin-bottom": "30px", + }, ), html.Div( - dcc.Textarea( - id="issue_body", - value="", - style={"width": "60%", "height": 200}, - ), + [ + html.P( + "Title:", + style={ + "width": "50px", + "margin-right": "10px", + "display": "inline", + }, + ), + dcc.Input( + id="issue_title", + type="text", + size="80", + style={"display": "inline"}, + ), + ], + style={"margin-bottom": "30px"}, ), html.Div( - dcc.Dropdown( - id="issue_labels", - options=labels, - multi=True, - ), + [ + html.P( + "Body:", + style={ + "width": "50px", + "margin-right": "10px", + "display": "inline", + "vertical-align": "top", + }, + ), + dcc.Textarea( + id="issue_body", + value="", + style={"width": "60%", "height": 200}, + ), + ], + style={"margin-bottom": "30px"}, ), html.Div( - dcc.Dropdown( - id="issue_milestone", - options=milestones, - multi=False, - ), + [ + html.P( + "Labels:", + style={ + "width": "50px", + "margin-right": "10px", + "display": "inline", + }, + ), + dcc.Dropdown( + id="issue_labels", + options=labels, + multi=True, + style={"width": "900px"}, + ), + ], + style={"margin-bottom": "30px"}, + ), + html.Div( + [ + html.P( + "Milestone:", + style={ + "width": "50px", + "margin-right": "10px", + "display": "inline", + }, + ), + dcc.Dropdown( + id="issue_milestone", + options=milestones, + multi=False, + style={"width": "150px"}, + ), + ], + style={"margin-bottom": "30px"}, ), html.P( id="gh_cli_cmd", @@ -131,27 +202,46 @@ def escape_ansi(line): ), html.Div( children=[ - dcc.Markdown( - id="github_comment", - link_target="_blank", - ) + html.Div( + children=[ + dcc.Markdown( + id="github_comment", + link_target="_blank", + ) + ], + style={ + "border": "2px black solid", + "border-radius": "25px", + "padding": "0px 30px 0px 30px", + "width": "50%", + "float": "left", + "display": "flex", + "align-items": "center", + "justify-content": "center", + }, + ), + html.Div( + html.Button( + "Create GitHub Issue", + id="issue_generate", + n_clicks=0, + ), + style={ + "width": "50%", + "float": "right", + "display": "flex", + "align-items": "center", + "justify-content": "center", + }, + ), ], style={ - "border": "2px black solid", - "border-radius": "25px", - "padding": "0px 30px 0px 30px", - "margin": "auto", - "width": "fit-content", - "align": "center", + "display": "flex", + "width": "80%", + "align-items": "center", + "justify-content": "center", }, ), - html.Div( - html.Button( - "Create GitHub Issue", - id="issue_generate", - n_clicks=0, - ), - ), ] ), dcc.ConfirmDialog( @@ -213,7 +303,7 @@ def update_pr_table(active_cell, derived_viewport_data): response = execute( f"cd /home/richard/dev/pandas" f" && gh pr list" - f' --search "{commit}"' + f' --search "{commit[:7]}"' f" --state merged" f" --json title,number,author" ) @@ -222,7 +312,8 @@ def update_pr_table(active_cell, derived_viewport_data): authors = [e["author"] for e in json.loads(escape_ansi(response))] authors = [e for e in authors if not e["is_bot"]] authors = [e["login"] for e in authors] - assert len(titles) == 1 and len(numbers) == 1 + assert len(titles) == 1, titles + assert len(numbers) == 1, numbers repo_url = "https://github.com/pandas-dev/pandas" data.append( { @@ -254,10 +345,10 @@ def update_issue_values(pr_cell, pr_table, summary_cell, summary_table): authors = pr_table[pr_cell["row"]]["Authors"] - title = f"Potential regression induced by PR #{pr_number}" + title = f"Potential regression induced by commit {git_hash[:7]}" body = watcher.generate_report_v2(git_hash, pr_number, authors) return title, body, ["Performance", "Regression"] - return "", "" + return "", "", ["Performance", "Regression"] @app.callback( @@ -298,7 +389,7 @@ def update_gh_cli_cmd(issue_title, issue_body, issue_labels, issue_milestone): def generate_issue(submit_n_clicks, gh_cli_cmd): if gh_cli_cmd == "": return gh_cli_cmd - execute(f"cd /home/richard/dev/pandas && {repr(gh_cli_cmd)}") + execute(f"cd /home/richard/dev/pandas && {gh_cli_cmd}") # Hack because dash doesn't seem to let you have no output... return gh_cli_cmd @@ -386,6 +477,7 @@ def update_plot(active_cell, derived_viewport_data): x=plot_data.index, y=plot_data[column], name=column, + visible=column in ["time"], ), ) plot_data = plot_data[plot_data.is_regression]