Skip to content

Commit

Permalink
Put data in a subdirectory (#192)
Browse files Browse the repository at this point in the history
  • Loading branch information
damonbayer authored Dec 6, 2024
1 parent 799b1bf commit 10c8776
Show file tree
Hide file tree
Showing 12 changed files with 200 additions and 201 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -399,5 +399,5 @@ private_data/*
.vscode/settings.json

# Test data exceptions to the general data exclusion
!pipelines/tests/covid-19_r_2024-01-29_f_2023-11-01_t_2024-01-29/model_runs/TD/data.csv
!pipelines/tests/covid-19_r_2024-01-29_f_2023-11-01_t_2024-01-29/model_runs/TD/eval_data.tsv
!pipelines/tests/covid-19_r_2024-01-29_f_2023-11-01_t_2024-01-29/model_runs/TD/data/data.tsv
!pipelines/tests/covid-19_r_2024-01-29_f_2023-11-01_t_2024-01-29/model_runs/TD/data/eval_data.tsv
6 changes: 3 additions & 3 deletions hewr/R/process_state_forecast.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
process_state_forecast <- function(model_run_dir, save = TRUE) {
disease_name_nssp <- parse_model_run_dir_path(model_run_dir)$disease

train_data_path <- fs::path(model_run_dir, "data", ext = "csv")
train_dat <- readr::read_csv(train_data_path, show_col_types = FALSE)
train_data_path <- fs::path(model_run_dir, "data", "data", ext = "tsv")
train_dat <- readr::read_tsv(train_data_path, show_col_types = FALSE)

eval_data_path <- fs::path(model_run_dir, "eval_data", ext = "tsv")
eval_data_path <- fs::path(model_run_dir, "data", "eval_data", ext = "tsv")
eval_dat <- readr::read_tsv(eval_data_path, show_col_types = FALSE) |>
dplyr::mutate(data_type = "eval")

Expand Down
2 changes: 1 addition & 1 deletion pipelines/build_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def build_model_from_dir(model_dir):
data_path = model_dir / "data_for_model_fit.json"
data_path = model_dir / "data" / "data_for_model_fit.json"
prior_path = model_dir / "priors.py"

with open(
Expand Down
2 changes: 1 addition & 1 deletion pipelines/forecast_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ def main(
first_training_date=first_training_date,
last_training_date=last_training_date,
latest_comprehensive_path=eval_data_path,
output_data_dir=model_run_dir,
output_data_dir=Path(model_run_dir, "data"),
last_eval_date=report_date + timedelta(days=n_forecast_days),
)

Expand Down
8 changes: 4 additions & 4 deletions pipelines/generate_epiweekly.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ purrr::walk(script_packages, \(pkg) {
#' @return None. The function writes the epiweekly data to a CSV file in the
#' specified directory.
convert_daily_to_epiweekly <- function(
model_run_dir, dataname = "data.csv",
model_run_dir, dataname = "data.tsv",
strict = TRUE, day_of_week = 7) {
ext <- path_ext(dataname)
data_basename <- path_ext_remove(dataname)
Expand All @@ -42,7 +42,7 @@ convert_daily_to_epiweekly <- function(
delim <- if (ext == "csv") "," else "\t"
message(glue::glue("Generating epi-weekly data {model_run_dir}..."))

data_path <- path(model_run_dir, dataname)
data_path <- path(model_run_dir, "data", dataname)

daily_data <- read_delim(
data_path,
Expand Down Expand Up @@ -73,7 +73,7 @@ convert_daily_to_epiweekly <- function(
# epiweek end date determines data_type classification

output_file <- path(
model_run_dir,
model_run_dir, "data",
glue::glue("epiweekly_{data_basename}"),
ext = ext
)
Expand All @@ -82,7 +82,7 @@ convert_daily_to_epiweekly <- function(
}

main <- function(model_run_dir) {
convert_daily_to_epiweekly(model_run_dir, dataname = "data.csv")
convert_daily_to_epiweekly(model_run_dir, dataname = "data.tsv")
convert_daily_to_epiweekly(model_run_dir, dataname = "eval_data.tsv")
}

Expand Down
12 changes: 5 additions & 7 deletions pipelines/prep_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,16 +332,14 @@ def process_and_save_state(
"state_pop": state_pop,
"right_truncation_offset": right_truncation_offset,
}

os.makedirs(model_run_dir, exist_ok=True)
data_dir = Path(model_run_dir, "data")
os.makedirs(data_dir, exist_ok=True)

if logger is not None:
logger.info(f"Saving {state_abb} to {model_run_dir}")
data_to_save.write_csv(Path(model_run_dir, "data.csv"))
logger.info(f"Saving {state_abb} to {data_dir}")
data_to_save.write_csv(Path(data_dir, "data.tsv"), separator="\t")

with open(
Path(model_run_dir, "data_for_model_fit.json"), "w"
) as json_file:
with open(Path(data_dir, "data_for_model_fit.json"), "w") as json_file:
json.dump(data_for_model_fit, json_file)

return None
1 change: 1 addition & 0 deletions pipelines/score_forecast.R
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ read_and_score_location <- function(model_run_dir,
)

truth_path <- fs::path(model_run_dir,
"data",
eval_data_filename,
ext = eval_data_file_ext
)
Expand Down

This file was deleted.

Loading

0 comments on commit 10c8776

Please sign in to comment.