From 9004282e49ab62c36eaa4fa03b66bd2c3823f26b Mon Sep 17 00:00:00 2001 From: Thomas Schweizer Date: Wed, 6 Dec 2023 17:59:57 -0800 Subject: [PATCH 1/2] Tool performance residuals and clean up --- analysis/generate_paper.sh | 2 +- .../statistical_analysis_untangling_tool.R | 48 +++++++++++-------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/analysis/generate_paper.sh b/analysis/generate_paper.sh index 4b830592..14bca36a 100755 --- a/analysis/generate_paper.sh +++ b/analysis/generate_paper.sh @@ -59,7 +59,7 @@ main() { # RQ1 # Rscript analysis/paper/performance_distribution.R "${TMP_DIR}/decomposition_scores.csv" "${PAPER_REPOSITORY}/figures/rq1-performance-distribution.pdf" - Rscript analysis/paper/statistical_analysis_untangling_tool.R "${TMP_DIR}/decomposition_scores.csv" "${PAPER_REPOSITORY}/data/rq1.txt" + Rscript analysis/paper/statistical_analysis_untangling_tool.R "${TMP_DIR}/decomposition_scores.csv" "${PAPER_REPOSITORY}/data/" Rscript analysis/paper/compare_models.R "${TMP_DIR}/decomposition_scores.csv" "${PAPER_REPOSITORY}/tables/model-comparison.tex" > "${PAPER_REPOSITORY}"/data/model-comparison.txt # diff --git a/analysis/paper/statistical_analysis_untangling_tool.R b/analysis/paper/statistical_analysis_untangling_tool.R index ccc510e0..a7ab7d39 100755 --- a/analysis/paper/statistical_analysis_untangling_tool.R +++ b/analysis/paper/statistical_analysis_untangling_tool.R @@ -1,15 +1,29 @@ #!/usr/bin/env Rscript # -# Generates statistical analysis for the untangling tools with p-value, Cohen's d, R^2, and model residuals normality test results. +# Run and export the statistical analysis for the untangling tools with p-value, Cohen's d, R^2, and model residuals normality test results. +# The statistical analysis results are saved in two files: +# - `outputDir/untangling_tool_performance_statistical_analysis.txt` contains the summary of the model, ANOVA, Shapiro-Wilk normality test, and Cohen's d. +# - `outputDir/untangling_tool_performance_residuals.pdf` contains the residual plots of the model. # # Arguments: -# - 1: The `decomposition_scores.csv` file generated by `decompose.sh`. -# - 1: The path to the file where the results will be saved. +# - 1: The untangling performance file `decomposition_scores.csv` containing the performance of the untangling tools for a dataset. +# - 1: The path to the directory where the analysis results will be saved. # # Output: # The results are saved as text data. The output file contains # the output of the summary() and cohen.d() functions. +args = commandArgs(trailingOnly=TRUE) + +if (length(args)!=2) { + stop("Please provide the untangling performance file and the path where to store the results. Example: 'statistical_analysis_untangling_tool.R tool_performance.csv analysis/'", call.=FALSE) +} +untanglingPerformanceFile = args[1] +outputDir= args[2] + +residuals_plot_file = paste(outputDir, "untangling_tool_performance_residuals.pdf", sep="/") +untangling_tool_performance_statistical_analysis_file = paste(outputDir, "untangling_tool_performance_statistical_analysis.txt", sep="/") + library(librarian) library(tidyverse) library(car) @@ -21,38 +35,30 @@ library(flexplot) library(rsq) shelf(broom) -args = commandArgs(trailingOnly=TRUE) - -if (length(args)!=2) { - stop("Please provide an input file and output file.", call.=FALSE) -} -inputFile = args[1] -outputFile = args[2] - -data <- read.csv(inputFile, header = FALSE, col.names = c('Project', 'BugID', 'SmartCommit', 'Flexeme', 'FileUntangling')) +data <- read.csv(untanglingPerformanceFile, header = FALSE, col.names = c('Project', 'BugID', 'SmartCommit', 'Flexeme', 'FileUntangling')) data <- subset(data, select = -c(FileUntangling)) data$BugID <- as_factor(data$BugID) -# Convert to long format -data_long = pivot_longer(data, cols = 3:4, names_to = 'Tool', values_to = 'Performance') +# Convert to long format and select only SmartCommit and Flexeme to compare. +data_long = pivot_longer(data, cols = c('SmartCommit', 'Flexeme'), names_to = 'Tool', values_to = 'Performance') # The summary can be interpreted as follows: - # Intercept row shows whether the baseline treatment (whichever is first) is significantly different from 0. # The second row, containing the other treatment, shows whether the other treatment is significantly # different from the intercept. - model <- lm(Performance ~ Tool, data=data_long) + # Residuals # It is recommended to look at the residuals to check for normality rather than apply a statistical test. +pdf(residuals_plot_file) visualize(model, "residuals") +dev.off() -sink(outputFile) +# Analysis results +sink(untangling_tool_performance_statistical_analysis_file) summary(model) -#estimates(model_simple) - -tidy(model) - +anova(model) +shapiro.test(residuals(model)) cohen.d(data_long$Performance[data_long$Tool == "SmartCommit"], data_long$Performance[data_long$Tool == "Flexeme"]) sink() From b5b0effdd2dc972ca11694439f609708281417ff Mon Sep 17 00:00:00 2001 From: Thomas Schweizer Date: Wed, 6 Dec 2023 18:01:32 -0800 Subject: [PATCH 2/2] Move to source directory --- analysis/generate_paper.sh | 2 +- .../paper => src/r/main}/statistical_analysis_untangling_tool.R | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename {analysis/paper => src/r/main}/statistical_analysis_untangling_tool.R (100%) diff --git a/analysis/generate_paper.sh b/analysis/generate_paper.sh index 14bca36a..51a55870 100755 --- a/analysis/generate_paper.sh +++ b/analysis/generate_paper.sh @@ -59,7 +59,7 @@ main() { # RQ1 # Rscript analysis/paper/performance_distribution.R "${TMP_DIR}/decomposition_scores.csv" "${PAPER_REPOSITORY}/figures/rq1-performance-distribution.pdf" - Rscript analysis/paper/statistical_analysis_untangling_tool.R "${TMP_DIR}/decomposition_scores.csv" "${PAPER_REPOSITORY}/data/" + Rscript src/r/main/statistical_analysis_untangling_tool.R "${TMP_DIR}/decomposition_scores.csv" "${PAPER_REPOSITORY}/data/" Rscript analysis/paper/compare_models.R "${TMP_DIR}/decomposition_scores.csv" "${PAPER_REPOSITORY}/tables/model-comparison.tex" > "${PAPER_REPOSITORY}"/data/model-comparison.txt # diff --git a/analysis/paper/statistical_analysis_untangling_tool.R b/src/r/main/statistical_analysis_untangling_tool.R similarity index 100% rename from analysis/paper/statistical_analysis_untangling_tool.R rename to src/r/main/statistical_analysis_untangling_tool.R