diff --git a/analysis/generate_paper.sh b/analysis/generate_paper.sh index d20b5361..dcfb5c8c 100755 --- a/analysis/generate_paper.sh +++ b/analysis/generate_paper.sh @@ -47,7 +47,7 @@ main() { # # Untangling performance # - python src/python/main/analysis/print_median_performance.py --d4j "$D4J_SCORE_FILE" --lltc4j "$LLTC4J_SCORE_FILE" > "${PAPER_REPOSITORY}/tables/tool-performance.tex" 2> "${PAPER_REPOSITORY}/lib/tool-performance.tex" + python src/python/main/analysis/print_performance.py --d4j "$D4J_SCORE_FILE" --lltc4j "$LLTC4J_SCORE_FILE" --aggregator 'mean' > "${PAPER_REPOSITORY}/tables/tool-performance.tex" 2> "${PAPER_REPOSITORY}/lib/tool-performance.tex" # # Untangling statistics diff --git a/src/python/main/analysis/print_median_performance.py b/src/python/main/analysis/print_performance.py similarity index 89% rename from src/python/main/analysis/print_median_performance.py rename to src/python/main/analysis/print_performance.py index 8dd2683a..f4c6550e 100755 --- a/src/python/main/analysis/print_median_performance.py +++ b/src/python/main/analysis/print_performance.py @@ -19,7 +19,7 @@ PRECISION = 2 -def main(d4j_file:str, lltc4j_file:str): +def main(d4j_file:str, lltc4j_file:str, aggregator:str): """ Implementation of the script's logic. See the script's documentation for details. @@ -31,18 +31,17 @@ def main(d4j_file:str, lltc4j_file:str): df_scores = load_dataframes(d4j_file, lltc4j_file, names=["Defects4J", "LLTC4J"]) # calculate performance - performance_operator='median' df_performance = df_scores.groupby(["dataset"]).agg( { - "smartcommit_rand_index": performance_operator, - "flexeme_rand_index": performance_operator, - "filename_rand_index": performance_operator, + "smartcommit_rand_index": aggregator, + "flexeme_rand_index": aggregator, + "filename_rand_index": aggregator, } ) df_performance = clean_labels(df_performance) # print performance in latex format - print_performance_commands(df_performance, performance_operator) + print_performance_commands(df_performance, aggregator) print_performance_table(df_performance) def print_performance_table(dataframe: pd.DataFrame): @@ -135,5 +134,12 @@ def clean_labels(dataframe: pd.DataFrame): metavar="LLTC4J_SCORE_FILE", ) + parser.add_argument( + "--aggregator", + help="The aggregator operation used to calculate the performance. e.g., median, mean", + required=True, + metavar="AGGREGATOR", + ) + args = parser.parse_args() - main(args.d4j, args.lltc4j) + main(args.d4j, args.lltc4j, args.aggregator) diff --git a/src/python/test/analysis/test_print_median_performance.py b/src/python/test/analysis/test_print_performance.py similarity index 57% rename from src/python/test/analysis/test_print_median_performance.py rename to src/python/test/analysis/test_print_performance.py index f2917949..3860fc74 100644 --- a/src/python/test/analysis/test_print_median_performance.py +++ b/src/python/test/analysis/test_print_performance.py @@ -1,9 +1,9 @@ """ -Tests for print_median_performance.py +Tests for print_performance.py """ import pytest -import src.python.main.analysis.print_median_performance as print_median_performance +import src.python.main.analysis.print_performance as print_median_performance @pytest.fixture @@ -35,12 +35,11 @@ def sample_lltc4j_scores(tmpdir): return str(file) - def test_calculate_performance(sample_d4j_scores, sample_lltc4j_scores, capfd): """ Tests that the performance metrics are calculated correctly. """ - print_median_performance.main(sample_d4j_scores, sample_lltc4j_scores) + print_median_performance.main(sample_d4j_scores, sample_lltc4j_scores, "median") captured = capfd.readouterr() @@ -68,3 +67,36 @@ def test_calculate_performance(sample_d4j_scores, sample_lltc4j_scores, capfd): assert captured.out == expected_standard_output assert captured.err == expected_error_output + +def test_calculate_performance(sample_d4j_scores, sample_lltc4j_scores, capfd): + """ + Tests that the performance metrics are calculated correctly. + """ + print_median_performance.main(sample_d4j_scores, sample_lltc4j_scores, "mean") + + captured = capfd.readouterr() + + expected_standard_output = ( + "\\begin{tabular}{lrrr}\n" + "\\toprule\n" + " & Flexeme & SmartCommit & File-based \\\\\n" + "Dataset & & & \\\\\n" + "\\midrule\n" + "Defects4J & 0.65 & \\bfseries 0.75 & 0.55 \\\\\n" + "LLTC4J & 0.40 & \\bfseries 0.68 & 0.52 \\\\\n" + "\\bottomrule\n" + "\\end{tabular}\n" + "\n" + ) + + expected_error_output = ( + "\\newcommand\\defectsfjFlexemeMean{0.65\\xspace}\n" + "\\newcommand\\defectsfjSmartcommitMean{0.75\\xspace}\n" + "\\newcommand\\defectsfjFilebasedMean{0.55\\xspace}\n" + "\\newcommand\\lltcfjFlexemeMean{0.4\\xspace}\n" + "\\newcommand\\lltcfjSmartcommitMean{0.68\\xspace}\n" + "\\newcommand\\lltcfjFilebasedMean{0.52\\xspace}\n" + ) + + assert captured.out == expected_standard_output + assert captured.err == expected_error_output