Skip to content

Commit

Permalink
Move aggregation operator to cli argument
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomsch committed Dec 9, 2023
1 parent 2aaa1fe commit fd42341
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 12 deletions.
2 changes: 1 addition & 1 deletion analysis/generate_paper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ main() {
#
# Untangling performance
#
python src/python/main/analysis/print_median_performance.py --d4j "$D4J_SCORE_FILE" --lltc4j "$LLTC4J_SCORE_FILE" > "${PAPER_REPOSITORY}/tables/tool-performance.tex" 2> "${PAPER_REPOSITORY}/lib/tool-performance.tex"
python src/python/main/analysis/print_performance.py --d4j "$D4J_SCORE_FILE" --lltc4j "$LLTC4J_SCORE_FILE" --aggregator 'mean' > "${PAPER_REPOSITORY}/tables/tool-performance.tex" 2> "${PAPER_REPOSITORY}/lib/tool-performance.tex"

#
# Untangling statistics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
PRECISION = 2


def main(d4j_file:str, lltc4j_file:str):
def main(d4j_file:str, lltc4j_file:str, aggregator:str):
"""
Implementation of the script's logic. See the script's documentation for details.
Expand All @@ -31,18 +31,17 @@ def main(d4j_file:str, lltc4j_file:str):
df_scores = load_dataframes(d4j_file, lltc4j_file, names=["Defects4J", "LLTC4J"])

# calculate performance
performance_operator='median'
df_performance = df_scores.groupby(["dataset"]).agg(
{
"smartcommit_rand_index": performance_operator,
"flexeme_rand_index": performance_operator,
"filename_rand_index": performance_operator,
"smartcommit_rand_index": aggregator,
"flexeme_rand_index": aggregator,
"filename_rand_index": aggregator,
}
)
df_performance = clean_labels(df_performance)

# print performance in latex format
print_performance_commands(df_performance, performance_operator)
print_performance_commands(df_performance, aggregator)
print_performance_table(df_performance)

def print_performance_table(dataframe: pd.DataFrame):
Expand Down Expand Up @@ -135,5 +134,12 @@ def clean_labels(dataframe: pd.DataFrame):
metavar="LLTC4J_SCORE_FILE",
)

parser.add_argument(
"--aggregator",
help="The aggregator operation used to calculate the performance. e.g., median, mean",
required=True,
metavar="AGGREGATOR",
)

args = parser.parse_args()
main(args.d4j, args.lltc4j)
main(args.d4j, args.lltc4j, args.aggregator)
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""
Tests for print_median_performance.py
Tests for print_performance.py
"""
import pytest

import src.python.main.analysis.print_median_performance as print_median_performance
import src.python.main.analysis.print_performance as print_median_performance


@pytest.fixture
Expand Down Expand Up @@ -35,12 +35,11 @@ def sample_lltc4j_scores(tmpdir):
return str(file)



def test_calculate_performance(sample_d4j_scores, sample_lltc4j_scores, capfd):
"""
Tests that the performance metrics are calculated correctly.
"""
print_median_performance.main(sample_d4j_scores, sample_lltc4j_scores)
print_median_performance.main(sample_d4j_scores, sample_lltc4j_scores, "median")

captured = capfd.readouterr()

Expand Down Expand Up @@ -68,3 +67,36 @@ def test_calculate_performance(sample_d4j_scores, sample_lltc4j_scores, capfd):

assert captured.out == expected_standard_output
assert captured.err == expected_error_output

def test_calculate_performance(sample_d4j_scores, sample_lltc4j_scores, capfd):
"""
Tests that the performance metrics are calculated correctly.
"""
print_median_performance.main(sample_d4j_scores, sample_lltc4j_scores, "mean")

captured = capfd.readouterr()

expected_standard_output = (
"\\begin{tabular}{lrrr}\n"
"\\toprule\n"
" & Flexeme & SmartCommit & File-based \\\\\n"
"Dataset & & & \\\\\n"
"\\midrule\n"
"Defects4J & 0.65 & \\bfseries 0.75 & 0.55 \\\\\n"
"LLTC4J & 0.40 & \\bfseries 0.68 & 0.52 \\\\\n"
"\\bottomrule\n"
"\\end{tabular}\n"
"\n"
)

expected_error_output = (
"\\newcommand\\defectsfjFlexemeMean{0.65\\xspace}\n"
"\\newcommand\\defectsfjSmartcommitMean{0.75\\xspace}\n"
"\\newcommand\\defectsfjFilebasedMean{0.55\\xspace}\n"
"\\newcommand\\lltcfjFlexemeMean{0.4\\xspace}\n"
"\\newcommand\\lltcfjSmartcommitMean{0.68\\xspace}\n"
"\\newcommand\\lltcfjFilebasedMean{0.52\\xspace}\n"
)

assert captured.out == expected_standard_output
assert captured.err == expected_error_output

0 comments on commit fd42341

Please sign in to comment.