Skip to content

Commit

Permalink
labeler: switch to using git diff for labeling (#4052)
Browse files Browse the repository at this point in the history
* add label calculation script

* add additional ignore files

* add debug mode

* handle spaces in filenames safely

* fix compat, simplify

* set correct default

* fix shebang

* add test for new labeler script

* set env vars, remove jq dep

* include history for merge check

* simplify

* Simplify PR size labeling workflow

- Replace pr-size-labeler with custom labeler script
- Remove redundant labeler_v2 workflow file
- Enhance labeler script to clean up existing size labels before adding new ones

* test label failure

* remove test failure

* Update diff tool to show number of changes per file

- Switched from `--shortstat` to `--numstat` for more detailed file change output

* Refactor ignore arguments in git diff script

- Consolidated exclude arguments into IGNORE_ARGS for clarity

* Enforce stricter PR labeler checks

- Added FAIL_IF_XL environment variable for labeler script execution
  • Loading branch information
mastercactapus authored Sep 4, 2024
1 parent c82aa1a commit 1d33de1
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 34 deletions.
42 changes: 8 additions & 34 deletions .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,38 +10,12 @@ jobs:
if: github.event.pull_request.draft == false
name: Label the PR size
steps:
- uses: mastercactapus/pr-size-labeler@eb6a25f40d6a3327581aca35839b60d419e8019a # https://github.com/CodelyTV/pr-size-labeler/pull/61, updated with latest from main
- uses: actions/checkout@v4
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
xs_label: 'size/xs'
xs_max_size: '15'
s_label: 'size/s'
s_max_size: '100'
m_label: 'size/m'
m_max_size: '250'
l_label: 'size/l'
l_max_size: '500'
xl_label: 'size/xl'
fail_if_xl: 'true'
ignore_deleted: 'true'
message_if_xl: >
This PR exceeds the recommended size of 500 lines.
Please make sure you are NOT addressing multiple issues with one PR.
Note this PR might be rejected due to its size.
files_to_ignore: |
"*.lock"
"graphql2/generated.go"
"graphql2/maplimit.go"
"graphql2/mapconfig.go"
"graphql2/models_gen.go"
"pkg/sysapi/*.pb.go"
"swo/swodb/*.go"
"web/src/*.d.ts"
"Makefile.binaries.mk"
"gadb/*.go"
"migrate/schema.sql"
"go.mod"
"go.sum"
".yarn/releases/*"
"devtools/pgdump-lite/pgd/*"
"timezone/*.txt"
fetch-depth: 0
- name: Run labeler script
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.number }}
FAIL_IF_XL: '1'
run: ./devtools/scripts/github-set-pr-label.sh
4 changes: 4 additions & 0 deletions .labelignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
*.lock
go.mod
go.sum
.yarn/releases/*
112 changes: 112 additions & 0 deletions devtools/scripts/git-diff-label-calc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/env bash
set -e

# Outputs a label based on the size of the diff between the current branch and the main branch.

# env setting defaults
if [ -z "$MAIN_BRANCH" ]; then
MAIN_BRANCH=master
fi
if [ -z "$FAIL_IF_XL" ]; then
FAIL_IF_XL=0
fi

# const settings
IGNORE_PATTERN_FILE=.labelignore
XS_LABEL=size/xs
XS_MAX=20
S_LABEL=size/s
S_MAX=100
M_LABEL=size/m
M_MAX=250
L_LABEL=size/l
L_MAX=500
XL_LABEL=size/xl

if [ -z "$DEBUG" ]; then
DEBUG=0
fi
if [ "$1" = "--debug" ]; then
DEBUG=1
fi

# debug works like echo but only prints when --debug is passed and to stderr
debug() {
if [ $DEBUG -eq 1 ]; then
echo "$1" >&2
fi
}

IGNORE_ARGS=()

# Get a list of files including the "generated by" header, then filter them out by only those with DO NOT EDIT on the first line, in case we have code in the repo that does the generation.
while IFS= read -r file; do
# include only files where DO NOT EDIT appears on the very first line
if head -n 1 "$file" | grep -q "DO NOT EDIT"; then
debug "IGNORE: generated file: $file"
IGNORE_ARGS+=(":(exclude)$file")
fi
done < <(git grep -l -e 'generated by .* DO NOT EDIT')

# Read the .labelignore file and add the resolved files to the IGNORE_ARGS array.
while IFS= read -r pattern; do
# Skip empty lines and comments
case "$pattern" in
'' | \#*) continue ;;
esac

# Use git ls-files with the pattern
while IFS= read -r file; do
debug "IGNORE: pattern '$pattern': $file"
IGNORE_ARGS+=(":(exclude)$file")
done < <(git ls-files "$pattern")
done < <(cat "$IGNORE_PATTERN_FILE" 2>/dev/null)

# Perform diff for merging to the main branch:
# --ignore-blank-lines: ignore changes that only involve blank lines
# --numstat: output only the number of changes (per file)
# -w: ignore whitespace
# --diff-filter=d: exclude deleted files
# --minimal: produce the smallest diff possible
# --merge-base: find the common ancestor of the current branch and the main branch
# $IGNORE_ARGS[@]: list of `:(exclude)file` patterns to ignore
OUTPUT=$(
if [ "$DEBUG" = "1" ]; then set -x; fi
git diff --ignore-blank-lines --numstat -w --diff-filter=d --minimal --merge-base "origin/$MAIN_BRANCH" -- "${IGNORE_ARGS[@]}"
)

debug
debug "DIFF FILE STATS"
debug "==============="
debug "$OUTPUT"
debug

sum=$(
echo "$OUTPUT" | awk '{ total += $1 + $2 } END { print total }'
)

debug "TOTAL: $sum"

LABEL=""
if [ $sum -le $XS_MAX ]; then
LABEL=$XS_LABEL
elif [ $sum -le $S_MAX ]; then
LABEL=$S_LABEL
elif [ $sum -le $M_MAX ]; then
LABEL=$M_LABEL
elif [ $sum -le $L_MAX ]; then
LABEL=$L_LABEL
else
LABEL=$XL_LABEL
fi

debug "LABEL: $LABEL"

if [ $FAIL_IF_XL -eq 1 ] && [ "$LABEL" = "$XL_LABEL" ]; then
echo ""
echo "ERROR: This PR exceeds the maximum size of $L_MAX lines."
echo "Please make sure you are NOT addressing multiple issues with one PR."
exit 1
fi

echo "$LABEL"
19 changes: 19 additions & 0 deletions devtools/scripts/github-set-pr-label.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash
set -e

if [ -z "PR_NUMBER" ]; then
echo "PR_NUMBER is not set"
exit 1
fi

LABEL=$(./devtools/scripts/git-diff-label-calc.sh --debug)

# Remove any existing test/* labels
for label in $(gh pr view "$PR_NUMBER" --json labels --jq '.labels[] | select(.name | startswith("size/")) | .name'); do
if [ "$label" == "$LABEL" ]; then
continue # Skip the label we want to add
fi
gh pr edit "$PR_NUMBER" --remove-label "$label"
done

gh pr edit "$PR_NUMBER" --add-label "$LABEL"

0 comments on commit 1d33de1

Please sign in to comment.