Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix get_age() denominator calculations once & for all #31

Merged
merged 5 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,24 +165,34 @@ stale_package_check = function(con) {

# Accurately calculate fractional age, quickly
## R CMD check appeasement
cycle_type = rem = int_yrs = i.start = start = end = NULL
cycle_type = rem = int_yrs = i.start = start = end = n_days NULL

cycle_types = data.table(

# Used to pick elements of 'age_within_quadrennium' based
# where March 1 can fall for a Unix date modulo 1461.
mar1_cycle_types = data.table(
start = c(0L, 59L, 424L, 790L, 1155L),
end = c(58L, 423L, 789L, 1154L, 1460L),
val = c(3L, 2L, 1L, 4L, 3L),
key = c('start', 'end')
)

extra_part_mapping = list(
data.table(start = c(0L, 366L, 731L, 1096L), end=c(365L, 730L, 1095L, 1461L)),
# These are # of days with in a life's quadrennial partitioning. If your age in
# days modulo 1461 d satisfies start[1L]<=d<=end[1L], you haven't reached
# one year within the current quadrennium (int_yrs==0). The exact boundaries
# vary based on whether you were born before/after March 1 and when the next
# leap year will happen.
age_within_quadrennium = list(
data.table(start = c(0L, 366L, 731L, 1096L), end=c(365L, 730L, 1095L, 1460L)),
data.table(start = c(0L, 365L, 731L, 1096L), end=c(364L, 730L, 1095L, 1460L)),
data.table(start = c(0L, 365L, 730L, 1096L), end=c(364L, 729L, 1095L, 1460L)),
data.table(start = c(0L, 365L, 730L, 1095L), end=c(364L, 729L, 1094L, 1460L))
)
for (DT in extra_part_mapping) DT[, `:=`(int_yrs=0:3, n_days=end + 1L - start)]
for (DT in extra_part_mapping) setkeyv(DT, c('start', 'end'))
for (DT in age_within_quadrennium) DT[, `:=`(int_yrs=0:3, n_days=end + 1L - start)]
for (DT in age_within_quadrennium) setkeyv(DT, c('start', 'end'))

# Approach: split life into 4-year intervals (quadrennia): each of those has 1461 days.
# within the most recent quadrennium, calculate full + partial years.
get_age <- function(birthdays, ref_dates) {
bday <- unclass(birthdays)
ref <- unclass(ref_dates)
Expand All @@ -192,12 +202,12 @@ get_age <- function(birthdays, ref_dates) {
)
x[ , 'cycle_type' := {
bdr <- bday %% 1461L
overlaps = foverlaps(data.table(start = bdr, end = bdr), cycle_types)
overlaps = foverlaps(data.table(start = bdr, end = bdr), mar1_cycle_types)
overlaps$val
}]
x[ , by = cycle_type, 'extra' := {
overlaps = foverlaps(data.table(start = rem, end = rem), extra_part_mapping[[.BY$cycle_type]])
overlaps[ , int_yrs + (i.start - start) / (end + 1L - start)]
overlaps = foverlaps(data.table(start = rem, end = rem), age_within_quadrennium[[.BY$cycle_type]])
overlaps[ , int_yrs + (i.start - start) / n_days]
}]
4L * ((ref - bday) %/% 1461L) + x$extra
}
Expand Down
47 changes: 24 additions & 23 deletions tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,28 +84,28 @@ test_that('quick date utils work', {
})

test_that('get_age works', {
test_df = data.frame(
birth_date = as.Date(c(
"1978-12-30", "1978-12-31", "1979-01-01",
"1962-12-30", "1962-12-31", "1963-01-01",
"2000-06-16", "2000-06-17", "2000-06-18",
"2007-03-18", "2007-03-19", "2007-03-20",
"1968-02-29", "1968-02-29", "1968-02-29",
"2024-12-22", "2025-03-01", "2026-03-01",
NULL
)),
given_date = as.Date(c(
"2015-12-31", "2015-12-31", "2015-12-31",
"2015-12-31", "2015-12-31", "2015-12-31",
"2050-06-17", "2050-06-17", "2050-06-17",
"2008-03-19", "2008-03-19", "2008-03-19",
"2015-02-28", "2015-03-01", "2015-03-02",
"2031-12-23", "2028-12-22", "2029-03-02",
NULL
))
)
birth_date = as.Date(c(
"1978-12-30", "1978-12-31", "1979-01-01",
"1962-12-30", "1962-12-31", "1963-01-01",
"2000-06-16", "2000-06-17", "2000-06-18",
"2007-03-18", "2007-03-19", "2007-03-20",
"1968-02-29", "1968-02-29", "1968-02-29",
"2024-12-22", "2025-03-01", "2026-03-01",
"2027-03-01",
NULL
))
given_date = as.Date(c(
"2015-12-31", "2015-12-31", "2015-12-31",
"2015-12-31", "2015-12-31", "2015-12-31",
"2050-06-17", "2050-06-17", "2050-06-17",
"2008-03-19", "2008-03-19", "2008-03-19",
"2015-02-28", "2015-03-01", "2015-03-02",
"2031-12-23", "2028-12-22", "2029-03-02",
"2030-03-02",
NULL
))
expect_identical(
with(test_df, get_age(birth_date, given_date)), c(
get_age(birth_date, given_date), c(
37.0 + 1.0/366.0, # will be 366 days until 2016-12-31
37.0,
37.0 - 1.0/365.0,
Expand All @@ -124,11 +124,12 @@ test_that('get_age works', {

47.0 - 1.0/365.0, # my judgment: birthday occurs on 3/1 for 2/29 babies, so 364/365 the way there
47.0,
47.0 + 1.0/366.0,
47.0 + 1.0/365.0, # 365 days until 2016-02-29, not 366

7.0 + 1.0/366.0, # 366 days until 2032-12-22, not 367 (#23)
3.0 + 296.0/365.0, # 365 days until 2029-03-01, not 366 (#26)
3.0 + 1.0/365.0, # 365 days until 2030-03-01, not 365 (#28)
3.0 + 1.0/365.0, # 365 days until 2030-03-01, not 366 (#28)
3.0 + 1.0/365.0, # 365 days until 2031-03-02, not 366 (#30)
NULL
)
)
Expand Down
Loading