-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Update compare_columns function and tests
- Loading branch information
1 parent
e1894c2
commit aaec8c0
Showing
5 changed files
with
204 additions
and
100 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,86 +1,43 @@ | ||
# tests/testthat/test-compare_columns.R | ||
|
||
test_that("compare_columns identifies differences correctly", { | ||
# Basic test with different columns | ||
df1 <- data.frame(a = 1, b = 2, c = 3) | ||
df2 <- data.frame(b = 2, c = 3, d = 4) | ||
result <- compare_columns(df1, df2) | ||
|
||
expect_equal(result$unique_to_df1, "a") | ||
expect_equal(result$unique_to_df2, "d") | ||
|
||
# Test with identical dataframes | ||
df3 <- data.frame(x = 1, y = 2) | ||
df4 <- data.frame(x = 3, y = 4) | ||
result2 <- compare_columns(df3, df4) | ||
|
||
expect_equal(length(result2$unique_to_df1), 0) | ||
expect_equal(length(result2$unique_to_df2), 0) | ||
|
||
# Test with completely different columns | ||
df5 <- data.frame(a = 1, b = 2) | ||
df6 <- data.frame(c = 3, d = 4) | ||
result3 <- compare_columns(df5, df6) | ||
|
||
expect_equal(result3$unique_to_df1, c("a", "b")) | ||
expect_equal(result3$unique_to_df2, c("c", "d")) | ||
test_that("compare_columns handles basic comparison correctly", { | ||
df1 <- data.frame(a = 1:3, b = 2:4, c = 3:5) | ||
df2 <- data.frame(b = 2:4, c = 3:5, d = 4:6) | ||
|
||
result <- compare_columns(df1, df2, "DF1", "DF2") | ||
|
||
# Test core functionality | ||
expect_equal(result$summary_data$unique_cols$DF1, "a") | ||
expect_equal(result$summary_data$unique_cols$DF2, "d") | ||
expect_equal(result$summary_data$mutual_cols, c("b", "c")) | ||
expect_equal(result$summary_data$total_cols$DF1, 3) | ||
expect_equal(result$summary_data$total_cols$DF2, 3) | ||
}) | ||
|
||
test_that("compare_columns handles custom names correctly", { | ||
df1 <- data.frame(a = 1, b = 2) | ||
df2 <- data.frame(b = 2, c = 3) | ||
test_that("compare_columns validates input correctly", { | ||
df1 <- data.frame(a = 1:3) | ||
not_df <- list(a = 1:3) | ||
|
||
result <- compare_columns(df1, df2, "first", "second") | ||
|
||
# Test custom naming | ||
expect_equal(names(result), c("unique_to_first", "unique_to_second")) | ||
expect_equal(result$unique_to_first, "a") | ||
expect_equal(result$unique_to_second, "c") | ||
# Test input validation | ||
expect_error(compare_columns(df1, not_df), "Both inputs must be data frames") | ||
expect_error(compare_columns(df1, df1, 1, "df2"), "Data frame names must be character") | ||
}) | ||
|
||
test_that("compare_columns handles empty dataframes", { | ||
df1 <- data.frame() | ||
df2 <- data.frame(a = 1) | ||
|
||
result1 <- compare_columns(df1, df2) | ||
expect_equal(length(result1$unique_to_df1), 0) | ||
expect_equal(result1$unique_to_df2, "a") | ||
df1 <- data.frame(a = numeric(0)) | ||
df2 <- data.frame(b = numeric(0)) | ||
|
||
result2 <- compare_columns(df2, df1) | ||
expect_equal(result2$unique_to_df1, "a") | ||
expect_equal(length(result2$unique_to_df2), 0) | ||
}) | ||
|
||
test_that("compare_columns validates inputs correctly", { | ||
df1 <- data.frame(a = 1) | ||
not_df <- list(a = 1) | ||
|
||
# Test invalid dataframe inputs | ||
expect_error(compare_columns(not_df, df1), "Both inputs must be data frames or tibbles") | ||
expect_error(compare_columns(df1, not_df), "Both inputs must be data frames or tibbles") | ||
|
||
# Test invalid name inputs | ||
expect_error(compare_columns(df1, df1, 1, "second"), "Data frame names must be character strings") | ||
expect_error(compare_columns(df1, df1, "first", TRUE), "Data frame names must be character strings") | ||
}) | ||
|
||
test_that("compare_columns works with tibbles", { | ||
skip_if_not_installed("tibble") | ||
library(tibble) | ||
|
||
tbl1 <- tibble(a = 1, b = 2) | ||
tbl2 <- tibble(b = 2, c = 3) | ||
result <- compare_columns(df1, df2) | ||
|
||
result <- compare_columns(tbl1, tbl2) | ||
expect_equal(result$unique_to_df1, "a") | ||
expect_equal(result$unique_to_df2, "c") | ||
expect_equal(result$summary_data$row_counts$df1, 0) | ||
expect_equal(result$summary_data$row_counts$df2, 0) | ||
expect_equal(result$summary_data$unique_cols$df1, "a") | ||
}) | ||
|
||
test_that("compare_columns preserves column order", { | ||
df1 <- data.frame(c = 1, a = 2, b = 3) | ||
df2 <- data.frame(d = 1, b = 2, e = 3) | ||
test_that("compare_columns identifies duplicate rows correctly", { | ||
df1 <- data.frame(a = c(1,1,2), b = c(2,2,3)) | ||
df2 <- data.frame(b = c(2,2,3), c = c(3,3,4)) | ||
|
||
result <- compare_columns(df1, df2) | ||
expect_equal(result$unique_to_df1, c("c", "a")) | ||
expect_equal(result$unique_to_df2, c("d", "e")) | ||
|
||
expect_equal(result$summary_data$unique_rows$df1, 2) | ||
expect_equal(result$summary_data$unique_rows$df2, 2) | ||
}) |