Skip to content

Commit

Permalink
Add File untangling to Figure 3, Table 3, and Table 4
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomsch committed Oct 17, 2023
1 parent 3d0d4e4 commit 40d4804
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 12 deletions.
7 changes: 3 additions & 4 deletions analysis/paper/compare_models.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,10 @@ outputFile = args[2]
# We use R to handle the linear mixed models because Python doesn't support lmms with 2 random effects (cross effects).
#
data <- read.csv(inputFile, header = FALSE, col.names = c('Project', 'BugID', 'SmartCommit', 'Flexeme', 'FileUntangling'))
data <- subset(data, select = -c(FileUntangling))
data$BugID <- as_factor(data$BugID)

# Convert to long format
data_long = pivot_longer(data, cols = 3:4, names_to = 'Tool', values_to = 'Performance')
data_long = pivot_longer(data, cols = all_of(c("SmartCommit", "Flexeme", "FileUntangling")), names_to = 'Tool', values_to = 'Performance')

# Random model
model_mixed <- lmer(Performance ~ Tool + (1|Project) + (1|BugID), data=data_long)
Expand All @@ -50,10 +49,10 @@ rsq(model_simple, adj=TRUE) # Adjusted R^2

# Create Coefficients dataframe
coeffs = rbind(summary(model_mixed)$coefficients[, "Estimate"], summary(model_simple)$coefficients[, "Estimate"])
colnames(coeffs) <- c('Flexeme', 'SmartCommit')
colnames(coeffs) <- c('Flexeme', 'SmartCommit', 'FileUntangling')
# Create p-values dataframe
p_vals = rbind(summary(model_mixed)$coefficients[, "Pr(>|t|)"], summary(model_simple)$coefficients[, "Pr(>|t|)"])
colnames(p_vals) <- c('Flexeme', 'SmartCommit')
colnames(p_vals) <- c('Flexeme', 'SmartCommit', 'FileUntangling')

tool_stats <- cbind(coeffs, p_vals)
tool_stats <- rbind(colnames(tool_stats),tool_stats)
Expand Down
7 changes: 3 additions & 4 deletions analysis/paper/performance_distribution.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ inputFile = args[1]
outputFile = args[2]


data <- read.csv(inputFile, header = FALSE, col.names = c('Project', 'BugID', 'SmartCommit', 'Flexeme', 'FileUntangling'))
data <- subset(data, select = -c(FileUntangling))
data$BugID <- as_factor(data$BugID)
data <- read.csv(inputFile, header = FALSE, col.names = c('Project', 'Commit', 'SmartCommit', 'Flexeme', 'File_Untangling'))
data$Commit <- as_factor(data$Commit)

# Convert to long format
data_long = pivot_longer(data, cols = 3:4, names_to = 'Tool', values_to = 'Performance')
data_long = pivot_longer(data, cols = c("SmartCommit", "Flexeme", "File_Untangling"), names_to = 'Tool', values_to = 'Performance')

pdf(outputFile)
flexplot(Performance ~ Tool, data = data_long)
Expand Down
14 changes: 10 additions & 4 deletions analysis/paper/statistical_analysis_commit_metrics.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,20 +95,26 @@ metrics.data <- read.csv(metrics.path)
metrics.names <- colnames(metrics.data %>% select(-c('project', 'vid')))

# Join performance with metrics
performance.metrics <- left_join(performance.data, metrics.data, by = c('project' = 'project', 'bug_id' = 'vid')) %>% select(-c('file_untangling'))
performance.metrics <- left_join(performance.data, metrics.data, by = c('project' = 'project', 'bug_id' = 'vid'))

# Global
performance.metrics.long = pivot_longer(performance.metrics, cols = c('smartcommit_rand_index', 'flexeme_rand_index'), names_to = 'Tool', values_to = 'performance')
performance.metrics.long = pivot_longer(performance.metrics, cols = c('smartcommit_rand_index', 'flexeme_rand_index', 'file_untangling'), names_to = 'Tool', values_to = 'performance')
summarise_model_all_variables(performance.metrics.long, outputPath, "impact_metrics_all.txt")

# SmartCommit All
performance.metrics.smartcommit <- select(performance.metrics, -c('flexeme_rand_index')) %>% rename(performance = smartcommit_rand_index)
performance.metrics.smartcommit <- select(performance.metrics, -c('flexeme_rand_index', 'file_untangling')) %>% rename(performance = smartcommit_rand_index)
summarise_model_all_variables(performance.metrics.smartcommit, outputPath, "impact_metrics_smartcommit_all.txt")

# Flexeme All
performance.metrics.flexeme <- select(performance.metrics, -c('smartcommit_rand_index')) %>% rename(performance = flexeme_rand_index)
performance.metrics.flexeme <- select(performance.metrics, -c('smartcommit_rand_index', 'file_untangling')) %>% rename(performance = flexeme_rand_index)
summarise_model_all_variables(performance.metrics.flexeme, outputPath, "impact_metrics_flexeme_all.txt")

# File Untangling ALL
performance.metrics.file <- select(performance.metrics, -c('smartcommit_rand_index', 'flexeme_rand_index')) %>% rename(performance = file_untangling)
summarise_model_all_variables(performance.metrics.file, outputPath, "impact_metrics_file_all.txt")

# For each metric, do model analysis and print graph in pdf
generate_pairwise_analysis(performance.metrics.smartcommit, metrics.names, "smartcommit", outputPath)
generate_pairwise_analysis(performance.metrics.flexeme, metrics.names, "flexeme", outputPath)
generate_pairwise_analysis(performance.metrics.file.smartcommit, metrics.names, "file-smartcommit", outputPath)
generate_pairwise_analysis(performance.metrics.file.flexeme, metrics.names, "file-flexeme", outputPath)

0 comments on commit 40d4804

Please sign in to comment.