-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
23 changed files
with
729 additions
and
482 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,35 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
S3method(post_process,accident_data) | ||
S3method(print,jpaccidents_config) | ||
S3method(summary,jpaccidents_config) | ||
export(convert_deg) | ||
export(detect_dataset_name) | ||
export(download_accident_data) | ||
export(filter_data_by_tag) | ||
export(extract_schema_columns) | ||
export(get_config) | ||
export(get_default_config_path) | ||
export(load_config) | ||
export(lookup_column_names) | ||
export(lookup_dataset_name) | ||
export(post_process_highway) | ||
export(post_process_main) | ||
export(post_process_sub) | ||
export(read_accident_data) | ||
export(read_accidents_data) | ||
export(reset_config) | ||
export(select_post_processor) | ||
importFrom(cli,cli_abort) | ||
importFrom(cli,cli_alert_danger) | ||
importFrom(cli,cli_alert_success) | ||
importFrom(cli,cli_alert_warning) | ||
importFrom(curl,multi_download) | ||
importFrom(dplyr,bind_rows) | ||
importFrom(lubridate,make_datetime) | ||
importFrom(readr,cols) | ||
importFrom(readr,locale) | ||
importFrom(readr,read_csv) | ||
importFrom(sf,st_as_sf) | ||
importFrom(stats,setNames) | ||
importFrom(tidyr,ends_with) | ||
importFrom(tidyr,pivot_longer) | ||
importFrom(utils,download.file) | ||
importFrom(utils,read.csv) | ||
importFrom(yaml,read_yaml) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#' Identify dataset names from file paths | ||
#' | ||
#' This function identifies dataset names based on the file paths and | ||
#' configuration patterns. | ||
#' | ||
#' Invalid files are replaced with `NA`, and a warning is issued. | ||
#' | ||
#' @param files A character string specifying the file paths. | ||
#' @param quiet A logical indicating whether to suppress warnings. | ||
#' @return A character vector of dataset names | ||
#' @export | ||
#' @examples | ||
#' detect_dataset_name(c("example/honhyo_2022.csv", "example/honhyo_2023.csv")) | ||
detect_dataset_name <- function(files, quiet = FALSE) { | ||
# Get configuration | ||
config <- get_config() | ||
|
||
# Determine dataset name based on file pattern | ||
patterns <- sapply(config$file_types, `[[`, "pattern") | ||
file_type_names <- names(config$file_types) | ||
|
||
# Helper function: Match file path to a pattern | ||
detect_file_type <- function(file) { | ||
matches <- sapply(patterns, grepl, logical(1), x = file) | ||
if (any(matches)) { | ||
return(file_type_names[which(matches)[1]]) # Return the first match | ||
} | ||
NA_character_ | ||
} | ||
|
||
# Apply detection logic to all files | ||
dataset_names <- vapply( | ||
files, | ||
detect_file_type, | ||
character(1), | ||
USE.NAMES = FALSE | ||
) | ||
|
||
# Warn about files with no matching dataset | ||
invalid_files <- files[is.na(dataset_names)] | ||
if (0 < length(invalid_files) && !quiet) { | ||
cli_alert_warning("Unknown dataset for file(s): {invalid_files}. These have been replaced with NA") | ||
} | ||
|
||
return(dataset_names) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#' Filter data by schema | ||
#' | ||
#' This function filters a data frame based on a specified schema, which | ||
#' defines the columns to keep. It also handles special case where data | ||
#' needs to be reshaped to long format before filtering. | ||
#' | ||
#' @param data An accident data object. | ||
#' @param schema_name A character string specifying the schema to use for | ||
#' filtering. | ||
#' @return A data frame containing only the columns defined in the schema, or | ||
#' `NULL`. | ||
#' @export | ||
#' @examples | ||
#' \dontrun{ | ||
#' persons_info <- extract_schema_columns(accident_data, "persons_info") | ||
#' } | ||
extract_schema_columns <- function(data, schema_name) { | ||
# Load configuration settings containing to access schema definitions | ||
config <- get_config() | ||
schema <- config$schemas[[schema_name]] | ||
|
||
# Validate schema existence | ||
if (is.null(schema)) { | ||
cli_alert_danger("The schema does not exist: {schema_name}") | ||
return(NULL) | ||
} | ||
|
||
# Retrieve dataset name and validate against schema's supported files | ||
dataset_name <- attr(data, "dataset_name") | ||
source_files <- schema$source_files | ||
|
||
if (!dataset_name %in% source_files) { | ||
cli_alert_warning("This schema does not support the current dataset: dataset_name") | ||
return(NULL) | ||
} | ||
|
||
# Handle specific transformation requirements for certain schemas | ||
if (dataset_name == "main_data" && schema_name == "persons_info") { | ||
data <- pivot_longer( | ||
data, | ||
cols = ends_with(c("_a", "_b")), | ||
names_to = c(".value", "sub_id"), | ||
names_pattern = "^(.+)[_]([a|b])$" | ||
) | ||
} | ||
|
||
# Extract columns based on schema's primary key and data columns | ||
schema_columns <- c(schema$primary_key, schema$data_columns) | ||
selected_columns <- names(data) %in% schema_columns | ||
filtered_data <- data[, selected_columns, drop = FALSE] | ||
|
||
# Attach schema name to the resulting data | ||
attr(filtered_data, "schema_name") <- schema_name | ||
|
||
return(filtered_data) | ||
} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.