diff --git a/.github/workflows/check-links.yml b/.github/workflows/check-links.yml deleted file mode 100644 index 21e50fa5..00000000 --- a/.github/workflows/check-links.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: Check URLs πŸ”— - -on: - push: - branches: - - main - pull_request: - branches: - - main - -jobs: - links: - name: Validate Links πŸ•ΈοΈ - runs-on: ubuntu-latest - if: > - !contains(github.event.commits[0].message, '[skip links]') - steps: - - uses: actions/checkout@v3 - - - name: Check URLs in docs πŸ“‘ - uses: lycheeverse/lychee-action@v1.5.1 - with: - fail: true - jobSummary: true - format: markdown - output: links-results.md - args: >- - --exclude-private - --exclude "https://github.com.*.git|lycheeverse.*" - --verbose - --no-progress - **/*.md - **/*.html - **/*.Rmd - **/*.yaml - **/*.yml - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml index 05c1b71c..b15acc93 100644 --- a/.github/workflows/check-standard.yaml +++ b/.github/workflows/check-standard.yaml @@ -1,21 +1,34 @@ +--- # Workflow derived from https://github.com/r-lib/actions/tree/master/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help name: R-CMD-check πŸ“¦ on: + # 'push' events are triggered when commits + # are pushed to one of these branches push: branches: - main + tags: + - "v*" + # 'pull_request' events are triggered when PRs are + # created against one of these target branches. pull_request: + types: + - opened + - synchronize + - reopened + - ready_for_review branches: - main + # 'workflow_dispatch' gives you the ability + # to run this workflow on demand, anytime + workflow_dispatch: jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - + check: name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - + runs-on: ${{ matrix.config.os }} strategy: fail-fast: false matrix: @@ -25,24 +38,29 @@ jobs: - { os: ubuntu-latest, r: "devel", http-user-agent: "release" } - { os: ubuntu-latest, r: "release" } - { os: ubuntu-latest, r: "oldrel-1" } - env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes - + if: > + !contains(github.event.commits[0].message, '[skip checks]') steps: - - uses: actions/checkout@v3 + - name: Checkout repository πŸ›Ž + uses: actions/checkout@v4 - - uses: r-lib/actions/setup-pandoc@v2 + - name: Install Pandoc + uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-r@v2 + - name: Setup R πŸ“Š + uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - - uses: r-lib/actions/setup-r-dependencies@v2 + - name: Install R package dependencies πŸ“¦ + uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: rcmdcheck + extra-packages: any::rcmdcheck - - uses: r-lib/actions/check-r-package@v2 + - name: Run R CMD check 🎯 + uses: r-lib/actions/check-r-package@v2 diff --git a/.github/workflows/common.yaml b/.github/workflows/common.yaml new file mode 100644 index 00000000..15c84c01 --- /dev/null +++ b/.github/workflows/common.yaml @@ -0,0 +1,113 @@ +--- +name: xportr CI/CD Workflows + +on: + # 'push' events are triggered when commits + # are pushed to one of these branches + push: + branches: + - main + tags: + - "v*" + # 'pull_request' events are triggered when PRs are + # created against one of these target branches. + pull_request: + types: + - opened + - synchronize + - reopened + - ready_for_review + branches: + - main + # 'workflow_dispatch' gives you the ability + # to run this workflow on demand, anytime + workflow_dispatch: + +concurrency: + group: common-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + R_VERSION: "release" + +jobs: + # Get R version from environmental variable + # and use it in downstream jobs + get_r_version: + name: Get R version + runs-on: ubuntu-latest + outputs: + r-version: ${{ steps.get_r_version.outputs.R_VERSION }} + steps: + - name: Get R Version for Downstream Container Jobs + id: get_r_version + run: echo "R_VERSION=$R_VERSION" >> $GITHUB_OUTPUT + shell: bash + + # Test code coverage of R Package + coverage: + name: Code Coverage + uses: pharmaverse/admiralci/.github/workflows/code-coverage.yml@main + if: > + github.event_name != 'release' + needs: get_r_version + with: + r-version: "${{ needs.get_r_version.outputs.r-version }}" + # Whether to skip code coverage badge creation + # Setting to 'false' will require you to create + # an orphan branch called 'badges' in your repository + skip-coverage-badges: true + + # Ensure that styling guidelines are followed + style: + name: Code Style + uses: pharmaverse/admiralci/.github/workflows/style.yml@main + if: github.event_name == 'pull_request' + needs: get_r_version + with: + r-version: "${{ needs.get_r_version.outputs.r-version }}" + + # Ensure there are no broken URLs in the package documentation + links: + name: Links + uses: pharmaverse/admiralci/.github/workflows/links.yml@main + if: github.event_name == 'pull_request' + + # Build the website and deploy to `gh-pages` branch + site: + name: Documentation + uses: pharmaverse/admiralci/.github/workflows/pkgdown.yml@main + if: github.event_name == 'push' || startsWith(github.ref, 'refs/tags/v') + needs: get_r_version + with: + r-version: "release" + skip-multiversion-docs: true + secrets: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + # Ensure there are no linter errors in the package + linter: + name: Lint + uses: pharmaverse/admiralci/.github/workflows/lintr.yml@main + if: github.event_name == 'pull_request' + needs: get_r_version + with: + r-version: "${{ needs.get_r_version.outputs.r-version }}" + + # Ensure there are no spelling errors in the package + spellcheck: + name: Spelling + uses: pharmaverse/admiralci/.github/workflows/spellcheck.yml@main + if: github.event_name == 'pull_request' + needs: get_r_version + with: + r-version: "${{ needs.get_r_version.outputs.r-version }}" + + # Bumps development version of the package + vbump: + name: Version Bump πŸ€œπŸ€› + if: github.event_name == 'push' + uses: insightsengineering/r.pkg.template/.github/workflows/version-bump.yaml@main + secrets: + REPO_GITHUB_TOKEN: ${{ secrets.REPO_GITHUB_TOKEN }} + diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml deleted file mode 100644 index 85e6a1b3..00000000 --- a/.github/workflows/lint.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -name: Check Lint 🧹 - -on: - push: - branches: - - main - pull_request: - branches: - - main - -jobs: - lint: - runs-on: ubuntu-latest - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - steps: - - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-r@v2 - with: - use-public-rspm: true - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - extra-packages: any::lintr, local::. - needs: lint - - - name: Lint - run: lintr::lint_package() - shell: Rscript {0} - env: - LINTR_ERROR_ON_LINT: true diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml deleted file mode 100644 index 967eb46d..00000000 --- a/.github/workflows/pkgdown.yaml +++ /dev/null @@ -1,48 +0,0 @@ -name: Deploy pkgdown site πŸ“œ - -on: - push: - branches: - - main - -jobs: - pkgdown: - runs-on: macOS-latest - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - steps: - - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-r@v2 - - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Restore R package cache - uses: actions/cache@v2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install dependencies - run: | - remotes::install_deps(dependencies = TRUE) - install.packages("pkgdown", type = "binary") - remotes::install_github("warnes/SASxport", ref = "master") - shell: Rscript {0} - - - name: Install package - run: R CMD INSTALL . - - - name: Deploy package - run: | - git config --local user.email "actions@github.com" - git config --local user.name "GitHub Actions" - Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml deleted file mode 100644 index 5c7adb19..00000000 --- a/.github/workflows/spellcheck.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: Check Spelling πŸ†Ž - -on: - workflow_dispatch: - push: - branches: - - main - pull_request: - branches: - - main - -concurrency: - group: spelling-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - roxygen: - name: Spellcheck πŸ”  - runs-on: ubuntu-20.04 - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - if: > - !contains(github.event.commits[0].message, '[skip spellcheck]') - && github.event.pull_request.draft == false - steps: - - name: Checkout repo πŸ›Ž - uses: actions/checkout@v3 - with: - persist-credentials: false - fetch-depth: 0 - - - name: Setup R πŸ“Š - uses: r-lib/actions/setup-r@v2 - with: - r-version: 4.1.3 - - - uses: actions/cache@v2 - if: startsWith(runner.os, 'Linux') - with: - path: ~/.local/share/renv - key: ${{ runner.os }}-renv-${{ hashFiles('**/renv.lock') }} - restore-keys: | - ${{ runner.os }}-renv- - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Install system dependencies - if: runner.os == 'Linux' - run: | - while read -r cmd - do - eval sudo $cmd - done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - - - name: Run Spellcheck πŸ‘Ÿ - uses: insightsengineering/r-spellcheck-action@v3 - with: - exclude: data/*,**/*.Rd,**/*.md,*.md - additional_options: "" diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml deleted file mode 100644 index 410da4b5..00000000 --- a/.github/workflows/style.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: Check Style 🎨 - -on: - push: - branches: - - main - pull_request: - branches: - - main - -concurrency: - group: style-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - style: - name: Check code style πŸ§‘β€πŸŽ¨ - runs-on: ubuntu-latest - if: > - !contains(github.event.commits[0].message, '[skip stylecheck]') - && github.event.pull_request.draft == false - - steps: - - uses: actions/checkout@v3 - with: - path: ${{ github.event.repository.name }} - fetch-depth: 0 - - - uses: r-lib/actions/setup-r@v2 - with: - use-public-rspm: true - - - name: Install styler πŸ–ŒοΈ - run: install.packages(c("styler", "knitr", "roxygen2"), repos = "https://cloud.r-project.org") - shell: Rscript {0} - - - name: Run styler πŸ–ΌοΈ - run: | - detect <- styler::style_pkg(dry = "on") - if (TRUE %in% detect$changed) { - problems <- subset(detect$file, detect$changed == T) - cat(paste("Styling errors found in", length(problems), "files\n")) - cat("Please run `styler::style_pkg()` to fix the style\n") - quit(status = 1) - } - shell: Rscript {0} - working-directory: ${{ github.event.repository.name }} diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml deleted file mode 100644 index 43ae648d..00000000 --- a/.github/workflows/test-coverage.yaml +++ /dev/null @@ -1,47 +0,0 @@ -name: Check Test Coverage πŸ§ͺ - -on: - push: - branches: - - main - pull_request: - branches: - - main - -jobs: - test-coverage: - runs-on: macOS-latest - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - steps: - - uses: actions/checkout@v3 - - - uses: r-lib/actions/setup-r@v2 - - - uses: r-lib/actions/setup-pandoc@v1 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - - - name: Restore R package cache - uses: actions/cache@v2 - with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install dependencies - run: | - install.packages(c("remotes")) - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("covr") - remotes::install_github("warnes/SASxport", ref = "master") - shell: Rscript {0} - - - name: Test coverage - run: covr::codecov() - shell: Rscript {0} diff --git a/.github/workflows/vbump.yaml b/.github/workflows/vbump.yaml deleted file mode 100644 index a2091019..00000000 --- a/.github/workflows/vbump.yaml +++ /dev/null @@ -1,14 +0,0 @@ -name: Version Bump ⬆️ - -on: - push: - branches: - - main - -jobs: - vbump: - name: Version Bump πŸ€œπŸ€› - if: github.event_name == 'push' - uses: insightsengineering/r.pkg.template/.github/workflows/version-bump.yaml@main - secrets: - REPO_GITHUB_TOKEN: ${{ secrets.REPO_GITHUB_TOKEN }} diff --git a/.lintr b/.lintr index 80754030..775d1428 100644 --- a/.lintr +++ b/.lintr @@ -1,8 +1,13 @@ linters: linters_with_defaults( line_length_linter(120), object_usage_linter = NULL, - object_name_linter = NULL, - trailing_whitespace_linter(allow_empty_lines = TRUE, allow_in_strings = TRUE) + object_name_linter = object_name_linter( + styles = c("snake_case", "symbols"), + regexes = c( + xportr_attr = "^_xportr\\.[a-z_]+_$", # Attribute names used in xportr + ADaM = "^AD[A-Z]{2,3}$" # Supports CDISC ADaM standard for non-sponsored datasets + ) + ) ) encoding: "UTF-8" exclusions: list() diff --git a/DESCRIPTION b/DESCRIPTION index c737bdc8..d94f40be 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,18 +1,17 @@ Package: xportr Title: Utilities to Output CDISC SDTM/ADaM XPT Files -Version: 0.3.1.9017 +Version: 0.3.1.9028 Authors@R: c( person("Eli", "Miller", , "Eli.Miller@AtorusResearch.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2127-9456")), - person("Vignesh ", "Thanikachalam", role = "aut"), person("Ben", "Straub", role = "aut"), - person("Ross", "Didenko", role = "aut"), person("Zelos", "Zhu", role = "aut"), person("Ethan", "Brockmann", role = "aut"), person("Vedha", "Viyash", role = "aut"), person("Andre", "Verissimo", role = "aut"), person("Sophie", "Shapcott", role = "aut"), person("Celine", "Piraux", role = "aut"), + person("Kangjie", "Zhang", role = "aut"), person("Adrian", "Chan", role = "aut"), person("Sadchla", "Mascary", role = "aut"), person("Atorus/GSK JPT", role = "cph") @@ -20,7 +19,8 @@ Authors@R: c( Description: Tools to build CDISC compliant data sets and check for CDISC compliance. License: MIT + file LICENSE -URL: https://github.com/atorus-research/xportr +URL: https://atorus-research.github.io/xportr/, + https://github.com/atorus-research/xportr BugReports: https://github.com/atorus-research/xportr/issues Depends: R (>= 3.5) @@ -30,33 +30,25 @@ Imports: dplyr (>= 1.0.2), glue (>= 1.4.2), haven (>= 2.5.0), - janitor, lifecycle, magrittr, purrr (>= 0.3.4), readr, rlang (>= 0.4.10), stringr (>= 1.4.0), - tidyselect, - tm + tidyselect Suggests: - admiral, - devtools, DT, knitr, labelled, - lintr, metacore, readxl, rmarkdown, - spelling, testthat (>= 3.0.0), - usethis, withr VignetteBuilder: knitr Config/testthat/edition: 3 Encoding: UTF-8 -LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 diff --git a/NAMESPACE b/NAMESPACE index 19e4f108..30a076c0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,11 +8,10 @@ export(xportr_length) export(xportr_metadata) export(xportr_options) export(xportr_order) +export(xportr_split) export(xportr_type) export(xportr_write) export(xpt_validate) -import(haven) -import(rlang) importFrom(checkmate,assert) importFrom(checkmate,assert_character) importFrom(checkmate,assert_choice) @@ -32,8 +31,10 @@ importFrom(cli,cli_alert_success) importFrom(cli,cli_div) importFrom(cli,cli_h2) importFrom(cli,cli_text) +importFrom(cli,cli_warn) importFrom(dplyr,across) importFrom(dplyr,arrange) +importFrom(dplyr,as_tibble) importFrom(dplyr,bind_cols) importFrom(dplyr,case_when) importFrom(dplyr,distinct) @@ -53,7 +54,7 @@ importFrom(dplyr,ungroup) importFrom(glue,glue) importFrom(glue,glue_collapse) importFrom(graphics,stem) -importFrom(janitor,make_clean_names) +importFrom(haven,write_xpt) importFrom(lifecycle,deprecated) importFrom(magrittr,"%>%") importFrom(magrittr,extract2) @@ -65,6 +66,15 @@ importFrom(purrr,map_dbl) importFrom(purrr,pluck) importFrom(purrr,walk) importFrom(readr,parse_number) +importFrom(rlang,"%||%") +importFrom(rlang,":=") +importFrom(rlang,.data) +importFrom(rlang,abort) +importFrom(rlang,inform) +importFrom(rlang,local_options) +importFrom(rlang,sym) +importFrom(rlang,warn) +importFrom(rlang,with_options) importFrom(stringr,str_detect) importFrom(stringr,str_extract) importFrom(stringr,str_replace) @@ -72,7 +82,6 @@ importFrom(stringr,str_replace_all) importFrom(tidyselect,all_of) importFrom(tidyselect,any_of) importFrom(tidyselect,where) -importFrom(tm,stemDocument) importFrom(utils,capture.output) importFrom(utils,packageVersion) importFrom(utils,str) diff --git a/NEWS.md b/NEWS.md index 8e16bec5..44bd475a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,9 +6,14 @@ * All `xportr` functions now have `verbose = NULL` as the default (#151) +* Remove unused packages from Suggests (#221) + * `xportr_write()` now accepts `metadata` argument which can be used to set the dataset label to stay consistent with the other `xportr_*` functions. It is noteworthy that the dataset label set using the `xportr_df_label()` function will be retained during the `xportr_write()`. + * Exporting a new dataset `dataset_spec` that contains the Dataset Specification for ADSL. (#179) + * Added a check for character variable lengths up to 200 bytes in `xpt_validate()`(#91, #189). + * File name check is moved to strict_checks condition to allow underscores in the file name. Underscores are allowed in xpt but not per FDA requirements. (#126) * It is now possible to get and set the xportr options using the helper function `xportr_options()` (#130) @@ -16,6 +21,7 @@ * Added `xportr.character_metadata_types` and `xportr.numeric_metadata_types` to list the metadata types that are character or numeric. Updated `xportr.character_types` and `xportr.numeric_types` to list only the R types that are character and the R types that are numeric. This ensures that all R types, including dates, are now managed by xportr_type. If the R type differs from the metadata type, the variable is coerced (#161).. * Adds argument assertions to public functions using `{checkmate}` (#175) +* `xportr_split()` is a new function that allows users to split a dataset into multiple output files based on a variable. (#183) * `xportr_metadata()` can set `verbose` for a whole pipeline, i.e. setting `verbose` in `xportr_metadata()` will populate to all `xportr` functions. (#151) @@ -25,6 +31,13 @@ * New argument in `xportr_length()` allows selection between the length from metadata, as previously done, or from the calculated maximum length per variable when `length_source` is set to β€œdata” (#91) +* Series of basic checks added to the `xportr_format()` function to ensure format lengths, prefixes are accurate for the variable type. Also to ensure that any numeric date/datetime/time variables have a format. (#164) + +* Make `xportr_type()` drop factor levels when coercing variables + +* `xportr_length()` assigns the maximum length value instead of 200 for a character variable when the length is missing in the metadata (#207) + + ## Deprecation and Breaking Changes * The `domain` argument for xportr functions will no longer be dynamically @@ -32,10 +45,12 @@ determined by the name of the data frame passed as the .df argument. This was done to make the use of xportr functions more explicit. (#182) * The `label` argument from the `xportr_write()` function is deprecated in favor of the `metadata` argument. (#179) - * The `metacore` argument, which was renamed to `metadata` in the following six xportr functions: (`xportr_df_label()`, `xportr_format()`, `xportr_label()`, `xportr_length()`, `xportr_order()`, and `xportr_type()`) in version `0.3.0` with a soft deprecation warning, has now been hard deprecated. Please update your code to use the new `metadata` argument in place of `metacore`. * `SASlength` and `SAStype` were removed since they did not have an impact on `xpt_validate` or any other functions (#132) +* Removes `admiral` from suggested dependencies (#237) +* `adsl` data object is now called `adsl_xportr` (#237) +* Data objects are no longer lazy loaded, which means that when needed the user must call `data("name_of_object")` first (#237) ## Documentation @@ -45,17 +60,17 @@ done to make the use of xportr functions more explicit. (#182) * Removed non-user facing function documentation (#192) -# xportr 0.3.1 +## Miscellaneous -## New Features and Bug Fixes +* Tests use `{withr}` to create temporary files that are automatically deleted (#219) -* Make `xportr_type()` drop factor levels when coercing variables +# xportr 0.3.2 -## Documentation +* Removed unused packages, `{tm}` and `{janitor}` from Imports (#241) -* Set up Development version of Website (#187) +# xportr 0.3.1 -## Deprecation and Breaking Changes +* Fixed issues around code coverage (#170) and `lintr` (#176) # xportr 0.3.0 @@ -70,7 +85,7 @@ done to make the use of xportr functions more explicit. (#182) * Added function `xportr_metadata()` to explicitly set metadata at the start of a pipeline (#44) * Metadata order columns are now coerced to numeric by default in `xportr_order()` to prevent character sorting (#149) * Message is shown on `xportr_*` functions when the metadata being used has multiple variables with the same name in the same domain (#128) -* Fixed an issue with `xport_type()` where `DT`, `DTM` variables with a format specified in the metadata (e.g. date9., datetime20.) were being converted to numeric, which will cause a 10 year difference when reading it back by `read_xpt()`. SAS's uniform start date is 1960 whereas Linux's uniform start date is 1970 (#142). +* Fixed an issue with `xport_type()` where `DT`, `DTM` variables with a format specified in the metadata (e.g. `date9.`, `datetime20.`) were being converted to numeric, which will cause a 10 year difference when reading it back by `read_xpt()`. SAS's uniform start date is 1960 whereas Linux's uniform start date is 1970 (#142). * Fixed an issue with R's pipe `|>` that was causing functions to abort (#97) * Removed `<` and `>` as illegal characters in variable and dataset labels (#98) diff --git a/R/data.R b/R/data.R index 02267f2f..f993b070 100644 --- a/R/data.R +++ b/R/data.R @@ -2,62 +2,70 @@ #' #' An example dataset containing subject level data #' -#' @format ## `adsl` -#' A data frame with 254 rows and 48 columns: +#' @source Dataset created by `admiral::use_ad_template("adsl")` +#' @usage data("adsl_xportr") +#' +#' @format ## `adsl_xportr` +#' A data frame with 306 rows and 51 columns: #' \describe{ #' \item{STUDYID}{Study Identifier} #' \item{USUBJID}{Unique Subject Identifier} #' \item{SUBJID}{Subject Identifier for the Study} +#' \item{RFSTDTC}{Subject Reference Start Date/Time} +#' \item{RFENDTC}{Subject Reference End Date/Time} +#' \item{RFXSTDTC}{Date/Time of First Study Treatment} +#' \item{RFXENDTC}{Date/Time of Last Study Treatment} +#' \item{RFICDTC}{Date/Time of Informed Consent} +#' \item{RFPENDTC}{Date/Time of End of Participation} +#' \item{DTHDTC}{Date/Time of Death} +#' \item{DTHFL}{Subject Death Flag} #' \item{SITEID}{Study Site Identifier} -#' \item{SITEGR1}{Pooled Site Group 1} +#' \item{AGE}{Age} +#' \item{AGEU}{Age Units} +#' \item{SEX}{Sex} +#' \item{RACE}{Race} +#' \item{ETHNIC}{Ethnicity} +#' \item{ARMCD}{Planned Arm Code} #' \item{ARM}{Description of Planned Arm} +#' \item{ACTARMCD}{Actual Arm Code} +#' \item{ACTARM}{Description of Actual Arm} +#' \item{COUNTRY}{Country} +#' \item{DMDTC}{Date/Time of Collection} +#' \item{DMDY}{Study Day of Collection} #' \item{TRT01P}{Planned Treatment for Period 01} -#' \item{TRT01PN}{Planned Treatment for Period 01 (N)} #' \item{TRT01A}{Actual Treatment for Period 01} -#' \item{TRT01AN}{Actual Treatment for Period 01 (N)} +#' \item{TRTSDTM}{Datetime of First Exposure to Treatment} +#' \item{TRTSTMF}{Time of First Exposure Imputation Flag} +#' \item{TRTEDTM}{Datetime of Last Exposure to Treatment} +#' \item{TRTETMF}{Time of Last Exposure Imputation Flag} #' \item{TRTSDT}{Date of First Exposure to Treatment} #' \item{TRTEDT}{Date of Last Exposure to Treatment} -#' \item{TRTDUR}{Duration of Treatment (days)} -#' \item{AVGDD}{Avg Daily Dose (as planned)} -#' \item{CUMDOSE}{Cumulative Dose (as planned)} -#' \item{AGE}{Age} -#' \item{AGEGR1}{Pooled Age Group 1} -#' \item{AGEGR1N}{Pooled Age Group 1 (N)} -#' \item{AGEU}{Age Units} -#' \item{RACE}{Race} -#' \item{RACEN}{Race (N)} -#' \item{SEX}{Sex} -#' \item{ETHNIC}{Ethnicity} +#' \item{TRTDURD}{Total Treatment Duration (Days)} +#' \item{SCRFDT}{Screen Failure Date} +#' \item{EOSDT}{End of Study Date} +#' \item{EOSSTT}{End of Study Status} +#' \item{FRVDT}{Final Retrieval Visit Date} +#' \item{RANDDT}{Date of Randomization} +#' \item{DTHDT}{Date of Death} +#' \item{DTHDTF}{Date of Death Imputation Flag} +#' \item{DTHADY}{Relative Day of Death} +#' \item{LDDTHELD}{Elapsed Days from Last Dose to Death} +#' \item{LSTALVDT}{Date Last Known Alive} #' \item{SAFFL}{Safety Population Flag} -#' \item{ITTFL}{Intent-To-Treat Population Flag} -#' \item{EFFFL}{Efficacy Population Flag} -#' \item{COMP8FL}{Completers of Week 8 Population Flag} -#' \item{COMP16FL}{Completers of Week 16 Population Flag} -#' \item{COMP24FL}{Completers of Week 24 Population Flag} -#' \item{DISCONFL}{Did the Subject Discontinue the Study} -#' \item{DSRAEFL}{Discontinued due to AE} -#' \item{DTHFL}{Subject Died} -#' \item{BMIBL}{Baseline BMI (kg/m^2)} -#' \item{BMIBLGR1}{Pooled Baseline BMI Group 1} -#' \item{HEIGHTBL}{Baseline Height (cm)} -#' \item{WEIGHTBL}{Baseline Weight (kg)} -#' \item{EDUCLVL}{Years of Education} -#' \item{DISONSDT}{Date of Onset of Disease} -#' \item{DURDIS}{Duration of Disease (Months)} -#' \item{DURDSGR1}{Pooled Disease Duration Group 1} -#' \item{VISIT1DT}{Date of Visit 1} -#' \item{RFSTDTC}{Subject Reference Start Date/Time} -#' \item{RFENDTC}{Subject Reference End Date/Time} -#' \item{VISNUMEN}{End of Trt Visit (Vis 12 or Early Term.)} -#' \item{RFENDT}{Date of Discontinuation/Completion} -#' \item{DCDECOD}{Standardized Disposition Term} -#' \item{DCREASCD}{Reason for Discontinuation} -#' \item{MMSETOT}{MMSE Total} +#' \item{RACEGR1}{Pooled Race Group 1} +#' \item{AGEGR1}{Pooled Age Group 1} +#' \item{REGION1}{Geographic Region 1} +#' \item{LDDTHGR1}{Last Dose to Death - Days Elapsed Group 1} +#' \item{DTH30FL}{Death Within 30 Days of Last Trt Flag} +#' \item{DTHA30FL}{Death After 30 Days from Last Trt Flag} +#' \item{DTHB30FL}{Death Within 30 Days of First Trt Flag} #' } -"adsl" +"adsl_xportr" #' Example Dataset Variable Specification #' +#' @usage data("var_spec") +#' #' @format ## `var_spec` #' A data frame with 216 rows and 19 columns: #' \describe{ @@ -85,6 +93,7 @@ #' Example Dataset Specification #' +#' @usage data("dataset_spec") #' @format ## `dataset_spec` #' A data frame with 1 row and 9 columns: #' \describe{ diff --git a/R/format.R b/R/format.R index e59e1c08..f0abcca6 100644 --- a/R/format.R +++ b/R/format.R @@ -8,6 +8,64 @@ #' #' @return Data frame with `SASformat` attributes for each variable. #' +#' @section Format Checks: This function carries out a series of basic +#' checks to ensure the formats being applied make sense. +#' +#' Note, the 'type' of message that is generated will depend on the value +#' passed to the `verbose` argument: with 'stop' producing an error, 'warn' +#' producing a warning, or 'message' producing a message. A value of 'none' +#' will not output any messages. +#' +#' 1) If the variable has a suffix of `DT`, `DTM`, `TM` (indicating a +#' numeric date/time variable) then a message will be shown if there is +#' no format associated with it. +#' +#' 2) If a variable is character then a message will be shown if there is +#' no `$` prefix in the associated format. +#' +#' 3) If a variable is character then a message will be shown if the +#' associated format has greater than 31 characters (excluding the `$`). +#' +#' 4) If a variable is numeric then a message will be shown if there is a +#' `$` prefix in the associated format. +#' +#' 5) If a variable is numeric then a message will be shown if the +#' associated format has greater than 32 characters. +#' +#' 6) All formats will be checked against a list of formats considered +#' 'standard' as part of an ADaM dataset. Note, however, this list is not +#' exhaustive (it would not be feasible to check all the functions +#' within the scope of this package). If the format is not found in the +#' 'standard' list, then a message is created advising the user to +#' check. +#' +#' | \strong{Format Name} | \strong{w Values} | \strong{d Values} | +#' |----------------------|-------------------|--------------------| +#' | w.d | 1 - 32 | ., 0 - 31 | +#' | $w. | 1 - 200 | | +#' | DATEw. | ., 5 - 11 | | +#' | DATETIMEw. | 7 - 40 | | +#' | DDMMYYw. | ., 2 - 10 | | +#' | HHMM. | | | +#' | MMDDYYw. | ., 2 - 10 | | +#' | TIMEw. | ., 2 - 20 | | +#' | WEEKDATEw. | ., 3 - 37 | | +#' | YYMMDDw. | ., 2 - 10 | | +#' | B8601DAw. | ., 8 - 10 | | +#' | B8601DTw.d | ., 15 - 26 | ., 0 - 6 | +#' | B8601TM. | | | +#' | IS8601DA. | | | +#' | IS8601TM. | | | +#' | E8601DAw. | ., 10 | | +#' | E8601DNw. | ., 10 | | +#' | E8601DTw.d | ., 16 - 26 | ., 0 - 6 | +#' | E8601DXw. | ., 20 - 35 | | +#' | E8601LXw. | ., 20 - 35 | | +#' | E8601LZw. | ., 9 - 20 | | +#' | E8601TMw.d | ., 8 - 15 | ., 0 - 6 | +#' | E8601TXw. | ., 9 - 20 | | +#' | E8601TZw.d | ., 9 - 20 | ., 0 - 6 | +#' #' @section Metadata: The argument passed in the 'metadata' argument can either #' be a metacore object, or a data.frame containing the data listed below. If #' metacore is used, no changes to options are required. @@ -44,6 +102,7 @@ xportr_format <- function(.df, metadata = NULL, domain = NULL, + verbose = NULL, metacore = deprecated()) { if (!missing(metacore)) { lifecycle::deprecate_stop( @@ -60,11 +119,18 @@ xportr_format <- function(.df, metadata <- metadata %||% attr(.df, "_xportr.df_metadata_") + # Verbose should use an explicit verbose option first, then the value set in + # metadata, and finally fall back to the option value + verbose <- verbose %||% + attr(.df, "_xportr.df_verbose_") %||% + getOption("xportr.length_verbose", "none") + ## End of common section assert_data_frame(.df) assert_string(domain, null.ok = TRUE) assert_metadata(metadata) + assert_choice(verbose, choices = .internal_verbose_choices) domain_name <- getOption("xportr.domain_name") format_name <- getOption("xportr.format_name") @@ -74,7 +140,7 @@ xportr_format <- function(.df, if (domain_name %in% names(metadata) && !is.null(domain)) { metadata <- metadata %>% - dplyr::filter(!!sym(domain_name) == domain & !is.na(!!sym(format_name))) + filter(!!sym(domain_name) == domain & !is.na(!!sym(format_name))) } else { # Common check for multiple variables name check_multiple_var_specs(metadata, variable_name) @@ -90,13 +156,97 @@ xportr_format <- function(.df, names(format) <- filtered_metadata[[variable_name]] + # Returns modified .df + check_formats(.df, format, verbose) +} + +# Internal function to check formats +check_formats <- function(.df, format, verbose) { + # vector of expected formats for clinical trials (usually character or date/time) + # https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/leforinforref + # /n0p2fmevfgj470n17h4k9f27qjag.htm#n0wi06aq4kydlxn1uqc0p6eygu75 + + expected_formats <- .internal_format_list + + # w.d format for numeric variables + format_regex <- .internal_format_regex + for (i in seq_len(ncol(.df))) { - format_sas <- purrr::pluck(format, colnames(.df)[i]) - if (is.na(format_sas) || is.null(format_sas)) { - format_sas <- "" + format_sas <- pluck(format, colnames(.df)[i], .default = "") + format_sas[is.na(format_sas)] <- "" + + # series of checks for formats + + # check that any variables ending DT, DTM, TM have a format + if (identical(format_sas, "")) { + if (isTRUE(grepl("(DT|DTM|TM)$", colnames(.df)[i]))) { + message <- glue( + "(xportr::xportr_format) {encode_vars(colnames(.df)[i])} is expected to have a format but does not." + ) + xportr_logger(message, type = verbose) + } + } else { + # remaining checks to be carried out if a format exists + + # if the variable is character + if (is.character(.df[[i]])) { + # character variable formats should start with a $ + if (isFALSE(grepl("^\\$", format_sas))) { + message <- glue( + "(xportr::xportr_format)", + " {encode_vars(colnames(.df)[i])} is a character variable", + " and should have a `$` prefix." + ) + xportr_logger(message, type = verbose) + } + # character variable formats should have length <= 31 (excluding the $) + if (nchar(gsub("[.]$", "", format_sas)) > 32) { + message <- glue( + "(xportr::xportr_format)", + " Format for character variable {encode_vars(colnames(.df)[i])}", + " should have length <= 31 (excluding `$`)." + ) + xportr_logger(message, type = verbose) + } + } + + # if the variable is numeric + if (is.numeric(.df[[i]])) { + # numeric variables should not start with a $ + if (isTRUE(grepl("^\\$", format_sas))) { + message <- glue( + "(xportr::xportr_format)", + " {encode_vars(colnames(.df)[i])} is a numeric variable and", + " should not have a `$` prefix." + ) + xportr_logger(message, type = verbose) + } + # numeric variable formats should have length <= 32 + if (nchar(gsub("\\.$", "", format_sas)) > 32) { + message <- glue( + "(xportr::xportr_format)", + " Format for numeric variable {encode_vars(colnames(.df)[i])}", + " should have length <= 32." + ) + xportr_logger(message, type = verbose) + } + } + + # check if the format is either one of the expected formats or follows the regular expression for w.d format + if ( + isFALSE(format_sas %in% toupper(expected_formats)) && + isFALSE(str_detect(format_sas, pattern = format_regex)) + ) { + message <- glue( + "(xportr::xportr_format)", + " Check format {encode_vars(format_sas)} for variable {encode_vars(colnames(.df)[i])}", + " - is this correct?" + ) + xportr_logger(message, type = verbose) + } } + attr(.df[[i]], "format.sas") <- format_sas } - .df } diff --git a/R/length.R b/R/length.R index d87fe2b2..33a98394 100644 --- a/R/length.R +++ b/R/length.R @@ -2,18 +2,12 @@ #' #' Assigns the SAS length to a specified data frame, either from a metadata object #' or based on the calculated maximum data length. If a length isn't present for -#' a variable the length value is set to 200 for character columns, and 8 +#' a variable the length value is set to maximum data length for character columns, and 8 #' for non-character columns. This value is stored in the 'width' attribute of the column. #' #' @inheritParams xportr #' @param metadata A data frame containing variable level metadata. See #' 'Metadata' section for details. -#' @param domain Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset -#' the metadata object. If none is passed, then name of the dataset passed as -#' .df will be used. -#' @param verbose The action this function takes when an action is taken on the -#' dataset or function validation finds an issue. See 'Messaging' section for -#' details. Options are 'stop', 'warn', 'message', and 'none' #' @param length_source Choose the assigned length from either metadata or data. #' #' If `"metadata"` is specified, the assigned length is from the metadata length. @@ -118,51 +112,57 @@ xportr_length <- function(.df, check_multiple_var_specs(metadata, variable_name) } + # Get max length for missing length and when length_source == "data" + var_length_max <- variable_max_length(.df) + + length_data <- var_length_max[[variable_length]] + names(length_data) <- var_length_max[[variable_name]] + # Check any variables missed in metadata but present in input data --- miss_vars <- setdiff(names(.df), metadata[[variable_name]]) - length_log(miss_vars, verbose) - - if (length_source == "metadata") { + miss_length <- character(0L) + width_attr <- if (identical(length_source, "metadata")) { length_metadata <- metadata[[variable_length]] names(length_metadata) <- metadata[[variable_name]] - for (i in names(.df)) { - if (i %in% miss_vars) { - attr(.df[[i]], "width") <- impute_length(.df[[i]]) - } else { - attr(.df[[i]], "width") <- length_metadata[[i]] - } - } - } - - # Assign length from data - if (length_source == "data") { - var_length_max <- variable_max_length(.df) - - length_data <- var_length_max[[variable_length]] - names(length_data) <- var_length_max[[variable_name]] + # Check any variables with missing length in metadata + miss_length <- names(length_metadata[is.na(length_metadata)]) + + # Build `width` attribute + vapply( + names(.df), + function(i) { + if (i %in% miss_vars || is.na(length_metadata[[i]])) { + as.numeric(length_data[[i]]) + } else { + as.numeric(length_metadata[[i]]) + } + }, + numeric(1L) + ) + } else if (identical(length_source, "data")) { + length_msg <- left_join(var_length_max, metadata[, c(variable_name, variable_length)], by = variable_name) + length_msg <- length_msg %>% + mutate( + length_df = as.numeric(length_msg[[paste0(variable_length, ".x")]]), + length_meta = as.numeric(length_msg[[paste0(variable_length, ".y")]]) + ) %>% + filter(.data$length_df < .data$length_meta) %>% + select(any_of(c(variable_name, "length_df", "length_meta"))) - for (i in names(.df)) { - attr(.df[[i]], "width") <- length_data[[i]] - } + max_length_msg(length_msg, verbose) - length_msg <- left_join(var_length_max, metadata[, c(variable_name, variable_length)], by = variable_name) %>% - filter(length.x < length.y) + # Build `width` attribute + length_data[names(.df)] + } - max_length_msg(length_msg, verbose) + for (i in names(.df)) { + attr(.df[[i]], "width") <- width_attr[[i]] } + # Message for missing var and missing length + length_log(miss_vars, miss_length, verbose) .df } - -impute_length <- function(col) { - characterTypes <- getOption("xportr.character_types") - # first_class will collapse to character if it is the option - if (first_class(col) %in% "character") { - 200 - } else { - 8 - } -} diff --git a/R/messages.R b/R/messages.R index e85d6875..e7d49d64 100644 --- a/R/messages.R +++ b/R/messages.R @@ -102,17 +102,19 @@ type_log <- function(meta_ordered, type_mismatch_ind, verbose) { #' Utility for Lengths #' #' @param miss_vars Variables missing from metadata +#' @param miss_length Variables with missing length in metadata #' @param verbose Provides additional messaging for user #' #' @return Output to Console #' @noRd -length_log <- function(miss_vars, verbose) { +length_log <- function(miss_vars, miss_length, verbose) { assert_character(miss_vars) + assert_character(miss_length) assert_choice(verbose, choices = .internal_verbose_choices) - if (length(miss_vars) > 0) { + if (length(c(miss_vars, miss_length)) > 0) { cli_h2("Variable lengths missing from metadata.") - cli_alert_success("{ length(miss_vars) } lengths resolved") + cli_alert_success("{ length(c(miss_vars, miss_length)) } lengths resolved {encode_vars(c(miss_vars, miss_length))}") xportr_logger( glue( diff --git a/R/metadata.R b/R/metadata.R index e6060ece..92116eca 100644 --- a/R/metadata.R +++ b/R/metadata.R @@ -32,14 +32,12 @@ #' #' xportr_metadata(adlb, metadata, "test") #' -#' if (rlang::is_installed("magrittr")) { -#' library(magrittr) +#' library(magrittr) #' -#' adlb %>% -#' xportr_metadata(metadata, "test") %>% -#' xportr_type() %>% -#' xportr_order() -#' } +#' adlb %>% +#' xportr_metadata(metadata, "test") %>% +#' xportr_type() %>% +#' xportr_order() xportr_metadata <- function(.df, metadata = NULL, domain = NULL, diff --git a/R/split.R b/R/split.R new file mode 100644 index 00000000..74b84257 --- /dev/null +++ b/R/split.R @@ -0,0 +1,37 @@ +#' Split xpt file output +#' +#' Per the FDA Study Data Technical Conformance +#' Guide(https://www.fda.gov/media/88173/download) section 3.3.2, dataset files +#' sizes shouldn't exceed 5 GB. If datasets are large enough, they should be +#' split based on a variable. For example, laboratory readings in `ADLB` can be +#' split by `LBCAT` to split up hematology and chemistry data. +#' +#' This function will tell `xportr_write()` to split the data frame based on the +#' variable passed in `split_by`. When written, the file name will be prepended +#' with a number for uniqueness. These files should be noted in the Reviewer Guides per +#' CDISC guidance to note how you split your files. +#' +#' @inheritParams xportr_length +#' @param split_by A quoted variable that will be passed to `base::split()`. +#' +#' @return A data frame with an additional attribute added so `xportr_write()` +#' knows how to split the data frame. +#' +#' +#' @export +#' +#' @examples +#' data("adsl_xportr") +#' adsl <- adsl_xportr +#' +#' adlb <- data.frame( +#' USUBJID = c(1001, 1002, 1003), +#' LBCAT = c("HEMATOLOGY", "HEMATOLOGY", "CHEMISTRY") +#' ) +#' +#' adsl <- xportr_split(adsl, "LBCAT") +xportr_split <- function(.df, split_by = NULL) { + attr(.df, "_xportr.split_by_") <- split_by + + return(.df) +} diff --git a/R/support-test.R b/R/support-test.R index fa9cb048..29c71dc0 100644 --- a/R/support-test.R +++ b/R/support-test.R @@ -46,9 +46,11 @@ minimal_table <- function(n_rows = 3, cols = c("x", "y")) { size = n_rows, replace = TRUE ), - d = sample(Sys.Date() + c(1, -1, 10, -10), size = n_rows, replace = TRUE) + d = sample(Sys.Date() + c(1, -1, 10, -10), size = n_rows, replace = TRUE), + e = sample(c(1, 2), replace = TRUE, size = n_rows) ) %>% - select(all_of(cols)) + mutate(e = if_else(seq_along(.data$e) %% 2 == 0, NA, .data$e)) %>% + select(all_of(tolower(cols))) } #' Minimal metadata data frame mock for a ADaM dataset @@ -122,15 +124,19 @@ local_cli_theme <- function(.local_envir = parent.frame()) { `.alert-success` = list(before = NULL) ) - withr::local_options(list(cli.user_theme = cli_theme_tests), .local_envir = .local_envir) - withr::local_envvar(list(NO_COLOR = "yes"), .local_envir = .local_envir) - app <- cli::start_app(output = "message", .auto_close = FALSE) - withr::defer(cli::stop_app(app), envir = .local_envir) + # Use rlang::local_options instead of withr (Suggest package) + local_options(cli.user_theme = cli_theme_tests, .frame = .local_envir) + app <- cli::start_app(output = "message", .auto_close = FALSE, .envir = .local_envir) + + if (requireNamespace("withr", quietly = TRUE)) { + withr::local_envvar(NO_COLOR = "yes", .frame = .local_envir) + withr::defer(cli::stop_app(app), envir = .local_envir) + } } #' Test if multiple vars in spec will result in warning message #' @keywords internal -multiple_vars_in_spec_helper <- function(FUN) { +multiple_vars_in_spec_helper <- function(fun) { adsl <- minimal_table(30) metadata <- minimal_metadata( dataset = TRUE, @@ -147,18 +153,18 @@ multiple_vars_in_spec_helper <- function(FUN) { dplyr::bind_rows(metadata) %>% dplyr::rename(Dataset = "dataset") - withr::local_options(list(xportr.length_verbose = "message")) + local_options(xportr.length_verbose = "message") # Setup temporary options with active verbose and Remove empty lines in cli theme local_cli_theme() adsl %>% - FUN(metadata, "adsl") %>% + fun(metadata, "adsl") %>% testthat::expect_message("There are multiple specs for the same variable name") } #' Test if multiple vars in spec with appropriate #' @keywords internal -multiple_vars_in_spec_helper2 <- function(FUN) { +multiple_vars_in_spec_helper2 <- function(fun) { adsl <- minimal_table(30) metadata <- minimal_metadata( dataset = TRUE, @@ -175,12 +181,12 @@ multiple_vars_in_spec_helper2 <- function(FUN) { dplyr::bind_rows(metadata) %>% dplyr::rename(Dataset = "dataset") - withr::local_options(list(xportr.length_verbose = "message", xportr.domain_name = "Dataset")) + local_options(xportr.length_verbose = "message", xportr.domain_name = "Dataset") # Setup temporary options with active verbose and Remove empty lines in cli theme local_cli_theme() adsl %>% xportr_metadata(domain = "adsl") %>% - FUN(metadata, "adsl") %>% + fun(metadata, "adsl") %>% testthat::expect_no_message(message = "There are multiple specs for the same variable name") } diff --git a/R/type.R b/R/type.R index 10d5c97d..d02c3bfa 100644 --- a/R/type.R +++ b/R/type.R @@ -104,10 +104,10 @@ xportr_type <- function(.df, domain_name <- getOption("xportr.domain_name") variable_name <- getOption("xportr.variable_name") type_name <- getOption("xportr.type_name") - characterTypes <- c(getOption("xportr.character_types"), "_character") - characterMetadataTypes <- c(getOption("xportr.character_metadata_types"), "_character") - numericMetadataTypes <- c(getOption("xportr.numeric_metadata_types"), "_numeric") - numericTypes <- c(getOption("xportr.numeric_types"), "_numeric") + character_types <- c(getOption("xportr.character_types"), "_character") + character_metadata_types <- c(getOption("xportr.character_metadata_types"), "_character") + numeric_metadata_types <- c(getOption("xportr.numeric_metadata_types"), "_numeric") + numeric_types <- c(getOption("xportr.numeric_types"), "_numeric") if (inherits(metadata, "Metacore")) metadata <- metadata$var_spec @@ -134,15 +134,15 @@ xportr_type <- function(.df, mutate( # _character is used here as a mask of character, in case someone doesn't # want 'character' coerced to character - type.x = if_else(type.x %in% characterTypes, "_character", type.x), - type.x = if_else(type.x %in% numericTypes, + type.x = if_else(type.x %in% character_types, "_character", type.x), + type.x = if_else(type.x %in% numeric_types, "_numeric", type.x ), type.y = if_else(is.na(type.y), type.x, type.y), type.y = tolower(type.y), - type.y = if_else(type.y %in% characterMetadataTypes, "_character", type.y), - type.y = if_else(type.y %in% numericMetadataTypes, "_numeric", type.y) + type.y = if_else(type.y %in% character_metadata_types, "_character", type.y), + type.y = if_else(type.y %in% numeric_metadata_types, "_numeric", type.y) ) # It is possible that a variable exists in the table that isn't in the metadata @@ -166,7 +166,7 @@ xportr_type <- function(.df, orig_attributes <- attributes(.df[[i]]) orig_attributes$class <- NULL orig_attributes$levels <- NULL - if (correct_type[i] %in% characterTypes) { + if (correct_type[i] %in% character_types) { .df[[i]] <<- as.character(.df[[i]]) } else { .df[[i]] <<- as.numeric(.df[[i]]) diff --git a/R/utils-xportr.R b/R/utils-xportr.R index ca89ed74..c4c0658e 100644 --- a/R/utils-xportr.R +++ b/R/utils-xportr.R @@ -173,6 +173,66 @@ xpt_validate_var_names <- function(varnames, return(err_cnd) } +#' Internal list of formats to check +#' @noRd +.internal_format_list <- c( + NA, + "", + paste("$", 1:200, ".", sep = ""), + paste("date", 5:11, ".", sep = ""), + paste("time", 2:20, ".", sep = ""), + paste("datetime", 7:40, ".", sep = ""), + paste("yymmdd", 2:10, ".", sep = ""), + paste("mmddyy", 2:10, ".", sep = ""), + paste("ddmmyy", 2:10, ".", sep = ""), + "E8601DA.", + "E8601DA10.", + "E8601DN.", + "E8601DN10.", + "E8601TM.", + paste("E8601TM", 8:15, ".", sep = ""), + paste("E8601TM", 8:15, ".", sort(rep(0:6, 8)), sep = ""), + "E8601TZ.", + paste("E8601TZ", 9:20, ".", sep = ""), + paste("E8601TZ", 9:20, ".", sort(rep(0:6, 12)), sep = ""), + "E8601TX.", + paste("E8601TX", 9:20, ".", sep = ""), + "E8601DT.", + paste("E8601DT", 16:26, ".", sep = ""), + paste("E8601DT", 16:26, ".", sort(rep(0:6, 11)), sep = ""), + "E8601LX.", + paste("E8601LX", 20:35, ".", sep = ""), + "E8601LZ.", + paste("E8601LZ", 9:20, ".", sep = ""), + "E8601DX.", + paste("E8601DX", 20:35, ".", sep = ""), + "B8601DT.", + paste("B8601DT", 15:26, ".", sep = ""), + paste("B8601DT", 15:26, ".", sort(rep(0:6, 12)), sep = ""), + "IS8601DA.", + "B8601DA.", + paste("B8601DA", 8:10, ".", sep = ""), + "weekdate.", + paste("weekdate", 3:37, ".", sep = ""), + "mmddyy.", + "ddmmyy.", + "yymmdd.", + "date.", + "time.", + "hhmm.", + "IS8601TM.", + "E8601TM.", + "B8601TM." +) + +#' Internal regex for format w.d +#' @noRd +.internal_format_regex <- paste( + sep = "|", + "^([1-9]|[12][0-9]|3[0-2])\\.$", + "^([1-9]|[12][0-9]|3[0-2])\\.([1-9]|[12][0-9]|3[0-1])$" +) + #' Validate Dataset Can be Written to xpt #' #' Function used to validate dataframes before they are sent to @@ -222,57 +282,9 @@ xpt_validate <- function(data) { ## The usual expected formats in clinical trials: characters, dates # Formats: https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/leforinforref/n0zwce550r32van1fdd5yoixrk4d.htm - expected_formats <- c( - NA, - "", - paste("$", 1:200, ".", sep = ""), - paste("date", 5:11, ".", sep = ""), - paste("time", 2:20, ".", sep = ""), - paste("datetime", 7:40, ".", sep = ""), - paste("yymmdd", 2:10, ".", sep = ""), - paste("mmddyy", 2:10, ".", sep = ""), - paste("ddmmyy", 2:10, ".", sep = ""), - "E8601DA.", - "E8601DA10.", - "E8601DN.", - "E8601DN10.", - "E8601TM.", - paste0("E8601TM", 8:15, "."), - paste0("E8601TM", 8:15, ".", 0:6), - "E8601TZ.", - paste("E8601TZ", 9:20, "."), - paste("E8601TZ", 9:20, ".", 0:6), - "E8601TX.", - paste0("E8601TX", 9:20, "."), - "E8601DT.", - paste0("E8601DT", 16:26, "."), - paste0("E8601DT", 16:26, ".", 0:6), - "E8601LX.", - paste0("E8601LX", 20:35, "."), - "E8601LZ.", - paste0("E8601LZ", 9:20, "."), - "E8601DX.", - paste0("E8601DX", 20:35, "."), - "B8601DT.", - paste0("B8601DT", 15:26, "."), - paste0("B8601DT", 15:26, ".", 0:6), - "IS8601DA.", - "B8601DA.", - paste0("B8601DA", 8:10, "."), - "weekdate.", - paste0("weekdate", 3:37, "."), - "mmddyy.", - "ddmmyy.", - "yymmdd.", - "date.", - "time.", - "hhmm.", - "IS8601TM.", - "E8601TM.", - "B8601TM." - ) - format_regex <- "^([1-9]|[12][0-9]|3[0-2])\\.$|^([1-9]|[12][0-9]|3[0-2])\\.([1-9]|[12][0-9]|3[0-1])$" + expected_formats <- .internal_format_list + format_regex <- .internal_format_regex # 3.1 Invalid types is_valid <- toupper(formats) %in% toupper(expected_formats) | @@ -319,9 +331,9 @@ get_pipe_call <- function() { #' @return "character" or class of vector #' @noRd first_class <- function(x) { - characterTypes <- getOption("xportr.character_types") + character_types <- getOption("xportr.character_types") class_ <- tolower(class(x)[1]) - if (class_ %in% characterTypes) { + if (class_ %in% character_types) { "character" } else { class_ @@ -398,7 +410,7 @@ variable_max_length <- function(.df) { #' @param metadata A data frame or `Metacore` object containing variable level #' @inheritParams checkmate::check_logical #' metadata. -check_metadata <- function(metadata, include_fun_message, null.ok = FALSE) { +check_metadata <- function(metadata, include_fun_message, null.ok = FALSE) { # nolint: object_name. if (is.null(metadata) && null.ok) { return(TRUE) } @@ -426,9 +438,9 @@ check_metadata <- function(metadata, include_fun_message, null.ok = FALSE) { #' metadata. assert_metadata <- function(metadata, include_fun_message = TRUE, - null.ok = FALSE, + null.ok = FALSE, # nolint: object_name. add = NULL, - .var.name = vname(metadata)) { + .var.name = vname(metadata)) { # nolint: object_name. makeAssertion( metadata, check_metadata(metadata, include_fun_message, null.ok), @@ -440,3 +452,23 @@ assert_metadata <- function(metadata, #' Internal choices for verbose option #' @noRd .internal_verbose_choices <- c("none", "warn", "message", "stop") + +#' Internal function to check xpt file size +#' @noRd +check_xpt_size <- function(path) { + fs <- file.size(path) + + fs_string <- c( + "i" = paste0("xpt file size is: ", round(fs / 1e+9, 2)), " GB.", + "x" = paste0( + "XPT file sizes should not exceed 5G. It is", + " recommended you call `xportr_split` to split the file into smaller files." + ) + ) + + if (fs > 5e+9) { + cli_warn(fs_string, class = "xportr.xpt_size") # nocov + } + + invisible(NULL) +} diff --git a/R/write.R b/R/write.R index 7d55c05e..27a4884c 100644 --- a/R/write.R +++ b/R/write.R @@ -105,9 +105,28 @@ xportr_write <- function(.df, data <- as.data.frame(.df) tryCatch( - write_xpt(data, path = path, version = 5, name = name), + { + # If data is not split, data is just written out + if (is.null(attr(data, "_xportr.split_by_"))) { + write_xpt(data, path = path, version = 5, name = name) + check_xpt_size(path) + } else { + # If data is split, perform the split and get an index for the for loop + split_data <- split(data, data[[attr(data, "_xportr.split_by_")]]) + split_index <- unique(data[[attr(data, "_xportr.split_by_")]]) + paths <- get_split_path(path, seq_along(split_index)) + # Iterate on the unique values of the split + for (i in seq_along(split_index)) { + # Write out the split data, `get_split_path` will determine file name + write_xpt(split_data[[i]], + path = paths[i], version = 5, name = name + ) + check_xpt_size(paths[i]) + } + } + }, error = function(err) { - rlang::abort( + abort( paste0( "Error reported by haven::write_xpt, error was: \n", err @@ -118,3 +137,22 @@ xportr_write <- function(.df, invisible(data) } + +#' Figure out path for split data. +#' +#' Will append a number before the file extension to indicate the split. +#' +#' i.e. `adsl.xpt` will become `adsl1.xpt` and `adsl2.xpt` +#' +#' @param path Path variable specified by user +#' @param ind Index of split variable +#' +#' @noRd +get_split_path <- function(path, ind) { + paste0( + tools::file_path_sans_ext(path), + ind, + ".", + tools::file_ext(path) + ) +} diff --git a/R/xportr-package.R b/R/xportr-package.R index 00e6f528..ce107099 100644 --- a/R/xportr-package.R +++ b/R/xportr-package.R @@ -101,20 +101,21 @@ #' @keywords internal #' @aliases xportr-package #' -#' @import rlang haven +#' @importFrom lifecycle deprecated +#' @importFrom haven write_xpt +#' @importFrom rlang abort warn inform with_options local_options .data := sym +#' %||% #' @importFrom dplyr left_join bind_cols filter select rename rename_with n #' everything arrange group_by summarize mutate ungroup case_when distinct -#' tribble if_else across +#' tribble if_else across as_tibble #' @importFrom glue glue glue_collapse #' @importFrom cli cli_alert_info cli_h2 cli_alert_success cli_div cli_text -#' cli_alert_danger +#' cli_alert_danger cli_warn #' @importFrom tidyselect all_of any_of where #' @importFrom utils capture.output str tail packageVersion #' @importFrom stringr str_detect str_extract str_replace str_replace_all #' @importFrom readr parse_number #' @importFrom purrr map_chr map2_chr walk iwalk map map_dbl pluck -#' @importFrom janitor make_clean_names -#' @importFrom tm stemDocument #' @importFrom graphics stem #' @importFrom magrittr %>% extract2 #' @importFrom checkmate assert assert_character assert_choice assert_data_frame @@ -126,12 +127,11 @@ globalVariables(c( "abbr_parsed", "abbr_stem", "adj_orig", "adj_parsed", "col_pos", "dict_varname", "lower_original_varname", "my_minlength", "num_st_ind", "original_varname", "renamed_n", "renamed_var", "use_bundle", "viable_start", "type.x", "type.y", - "variable", "length.x", "lenght.y" + "variable", "length.x", "lenght.y", "e", "length_df", "length_meta" )) # The following block is used by usethis to automatically manage # roxygen namespace tags. Modify with care! ## usethis namespace: start -#' @importFrom lifecycle deprecated ## usethis namespace: end NULL diff --git a/R/xportr.R b/R/xportr.R index ed7c3ba1..d5a046cd 100644 --- a/R/xportr.R +++ b/R/xportr.R @@ -2,7 +2,7 @@ #' #' @param .df A data frame of CDISC standard. #' @param var_metadata A data frame containing variable level metadata -#' @param domain Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +#' @param domain Appropriate CDISC dataset name, e.g. ADAE, DM. Used to subset #' the metadata object. If none is passed, then name of the dataset passed as #' .df will be used. #' @param verbose The action this function takes when an action is taken on the @@ -19,6 +19,9 @@ #' @export #' #' @examplesIf requireNamespace("magrittr") +#' data("adsl_xportr", "dataset_spec", "var_spec") +#' adsl <- adsl_xportr +#' #' library(magrittr) #' test_dir <- tempdir() #' diff --git a/README.Rmd b/README.Rmd index 8b71800d..e67d13ee 100644 --- a/README.Rmd +++ b/README.Rmd @@ -46,7 +46,7 @@ install.packages("xportr") ### Development version: ```{r, eval = FALSE} -devtools::install_github("https://github.com/atorus-research/xportr.git", ref = "devel") +install.packages("xportr", repos = c("https://pharmaverse.r-universe.dev", getOption("repos"))) ``` # What is xportr? @@ -104,33 +104,34 @@ To do this we will need to do the following: All of which can be done using a well-defined specification file and the `{xportr}` package! -First we will start with our `ADSL` dataset created in R. This example `ADSL` dataset is taken from the [`{admiral}`](https://pharmaverse.github.io/admiral/index.html) package. The script that generates this `ADSL` dataset can be created by using this command `admiral::use_ad_template("adsl")`. This `ADSL` dataset has 306 observations and 48 variables. +First we will start with our `ADSL` dataset created in R. +This example `ADSL` dataset contains 306 observations and 51 variables. ```{r, eval=TRUE, message=FALSE, warning=FALSE} library(dplyr) -library(admiral) library(xportr) -adsl <- admiral::admiral_adsl +data("adsl_xportr") +ADSL <- adsl_xportr ``` -We have created a dummy specification file called `ADaM_admiral_spec.xlsx` found in the `specs` folder of this package. You can use `system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = "xportr")` to access this file. +We have created a dummy specification file called `ADaM_spec.xlsx` found in the `specs` folder of this package. You can use `system.file(file.path("specs/", "ADaM_spec.xlsx"), package = "xportr")` to access this file. ```{r} -spec_path <- system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = "xportr") +spec_path <- system.file(file.path("specs", "ADaM_spec.xlsx"), package = "xportr") var_spec <- readxl::read_xlsx(spec_path, sheet = "Variables") %>% dplyr::rename(type = "Data Type") %>% - rlang::set_names(tolower) + dplyr::rename_with(tolower) dataset_spec <- readxl::read_xlsx(spec_path, sheet = "Datasets") %>% dplyr::rename(label = "Description") %>% - rlang::set_names(tolower) + dplyr::rename_with(tolower) ``` Each `xportr_` function has been written in a way to take in a part of the specification file and apply that piece to the dataset. Setting `verbose = "warn"` will send appropriate warning message to the console. We have suppressed the warning for the sake of brevity. ```{r, warning = FALSE, message=FALSE, eval=TRUE} -adsl %>% +ADSL %>% xportr_metadata(var_spec, "ADSL") %>% xportr_type(verbose = "warn") %>% xportr_length(verbose = "warn") %>% @@ -144,7 +145,7 @@ adsl %>% The `xportr_metadata()` function can reduce duplication by setting the variable specification and domain explicitly at the top of a pipeline. If you would like to use the `verbose` argument, you will need to set in each function call. ```{r, warning=FALSE, message=FALSE, eval=FALSE} -adsl %>% +ADSL %>% xportr_metadata(var_spec, "ADSL", verbose = "warn") %>% xportr_type() %>% xportr_length() %>% @@ -159,7 +160,7 @@ Furthermore, if you're calling all xportr functions at once with common metadata ```{r, warning=FALSE, message=FALSE, eval=FALSE} xportr( - .df = adsl, + .df = ADSL, var_metadata = var_spec, df_metadata = dataset_spec, domain = "ADSL", diff --git a/README.md b/README.md index e64b19ed..9277dc47 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ install.packages("xportr") ### Development version: ``` r -devtools::install_github("https://github.com/atorus-research/xportr.git", ref = "devel") +install.packages("xportr", repos = c("https://pharmaverse.r-universe.dev", getOption("repos"))) ``` # What is xportr? @@ -100,35 +100,30 @@ All of which can be done using a well-defined specification file and the `{xportr}` package! First we will start with our `ADSL` dataset created in R. This example -`ADSL` dataset is taken from the -[`{admiral}`](https://pharmaverse.github.io/admiral/index.html) package. -The script that generates this `ADSL` dataset can be created by using -this command `admiral::use_ad_template("adsl")`. This `ADSL` dataset has -306 observations and 48 variables. +`ADSL` dataset contains 306 observations and 51 variables. ``` r library(dplyr) -library(admiral) library(xportr) -adsl <- admiral::admiral_adsl +data("adsl_xportr") +ADSL <- adsl_xportr ``` -We have created a dummy specification file called -`ADaM_admiral_spec.xlsx` found in the `specs` folder of this package. -You can use -`system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = "xportr")` +We have created a dummy specification file called `ADaM_spec.xlsx` found +in the `specs` folder of this package. You can use +`system.file(file.path("specs/", "ADaM_spec.xlsx"), package = "xportr")` to access this file. ``` r -spec_path <- system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = "xportr") +spec_path <- system.file(file.path("specs", "ADaM_spec.xlsx"), package = "xportr") var_spec <- readxl::read_xlsx(spec_path, sheet = "Variables") %>% dplyr::rename(type = "Data Type") %>% - rlang::set_names(tolower) + dplyr::rename_with(tolower) dataset_spec <- readxl::read_xlsx(spec_path, sheet = "Datasets") %>% dplyr::rename(label = "Description") %>% - rlang::set_names(tolower) + dplyr::rename_with(tolower) ``` Each `xportr_` function has been written in a way to take in a part of @@ -137,7 +132,7 @@ the specification file and apply that piece to the dataset. Setting We have suppressed the warning for the sake of brevity. ``` r -adsl %>% +ADSL %>% xportr_metadata(var_spec, "ADSL") %>% xportr_type(verbose = "warn") %>% xportr_length(verbose = "warn") %>% @@ -154,7 +149,7 @@ If you would like to use the `verbose` argument, you will need to set in each function call. ``` r -adsl %>% +ADSL %>% xportr_metadata(var_spec, "ADSL", verbose = "warn") %>% xportr_type() %>% xportr_length() %>% @@ -170,7 +165,7 @@ metadata and verbosity, you can shorten it by simply using `xportr()`. ``` r xportr( - .df = adsl, + .df = ADSL, var_metadata = var_spec, df_metadata = dataset_spec, domain = "ADSL", diff --git a/_pkgdown.yml b/_pkgdown.yml index 6f8dc67c..2bdcf856 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -32,6 +32,7 @@ reference: - xportr_order - xportr_df_label - xportr_metadata + - xportr_split - xportr - title: xportr helper functions @@ -42,7 +43,7 @@ reference: - title: xportr example datasets and specification files - contents: - - adsl + - adsl_xportr - var_spec - dataset_spec diff --git a/data/adsl.rda b/data/adsl.rda deleted file mode 100644 index 21ae127f..00000000 Binary files a/data/adsl.rda and /dev/null differ diff --git a/data/adsl_xportr.rda b/data/adsl_xportr.rda new file mode 100644 index 00000000..ce36d188 Binary files /dev/null and b/data/adsl_xportr.rda differ diff --git a/dev/var_names_xportr.Rmd b/dev/var_names_xportr.Rmd index 7d0b6e5b..3dd9f286 100644 --- a/dev/var_names_xportr.Rmd +++ b/dev/var_names_xportr.Rmd @@ -34,16 +34,14 @@ The 6 main functions within `xportr` pacakge: * `xportr_write()` -The demo will make use of a small `ADSL` data set that is apart of the [`{admiral}`](https://pharmaverse.github.io/admiral/index.html) package. The script that generates this `ADSL` dataset can be created by using this command `admiral::use_ad_template("adsl")`. - -The `ADSL` has the following features: +The demo will make use of a small `ADSL` dataset available with the `xportr` package and has the following features: * 306 observations -* 48 variables +* 54 variables * Data types other than character and numeric * Missing labels on variables * Missing label for data set -* Order of varibles not following specification file +* Order of variables not following specification file * Formats missing
@@ -53,15 +51,15 @@ library(haven) library(dplyr) library(labelled) library(xportr) -library(admiral) -adsl <- adsl <- admiral::admiral_adsl +data("adsl_xportr") +ADSL <- adsl_xportr ```
```{r, echo = FALSE} -DT::datatable(adsl, options = list( +DT::datatable(ADSL, options = list( autoWidth = FALSE, scrollX = TRUE, pageLength = 5, lengthMenu = c(5, 10, 15, 20) )) @@ -74,16 +72,17 @@ DT::datatable(adsl, options = list(
-In order to make use of the functions within `xportr` you will need to create an R data frame that contains your specification file. You will most likely need to do some pre-processing of your spec sheets after loading in the spec files for them to work appropriately with the `xportr` functions. Please see our example spec sheets in `system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = "xportr")`. +In order to make use of the functions within `xportr` you will need to create an R data frame that contains your specification file. You will most likely need to do some pre-processing of your spec sheets after loading in the spec files for them to work appropriately with the `xportr` functions. Please see our example spec sheets in `system.file(file.path("specs", "ADaM_spec.xlsx"), package = "xportr")`.
```{r} var_spec <- readxl::read_xlsx( - system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = "xportr"), sheet = "Variables") %>% + system.file(file.path("specs", "ADaM_spec.xlsx"), package = "xportr"), + sheet = "Variables" +) %>% dplyr::rename(type = "Data Type") %>% - rlang::set_names(tolower) - + dplyr::rename_with(tolower) ```
@@ -110,7 +109,7 @@ DT::datatable(var_spec_view, options = list( In order to be compliant with transport v5 specifications an `xpt` file can only have two data types: character and numeric/dbl. Currently the `ADSL` data set has chr, dbl, time, factor and date. ```{r, eval = TRUE} -look_for(adsl, details = TRUE) +look_for(ADSL, details = TRUE) ```
@@ -120,7 +119,7 @@ Using `xport_type` and the supplied specification file, we can *coerce* the vari
```{r, echo = TRUE} -adsl_type <- xportr_type(adsl,var_spec, domain = "ADSL", verbose = "message") +adsl_type <- xportr_type(ADSL, var_spec, domain = "ADSL", verbose = "message") ```
@@ -138,9 +137,9 @@ Next we can apply the lengths from a variable level specification file to the da
```{r} -capture.output(str(adsl, give.head=TRUE)) %>% - as_tibble() %>% - head(n=7) +capture.output(str(ADSL, give.head = TRUE)) %>% + as_tibble() %>% + head(n = 7) ```
@@ -150,15 +149,15 @@ No lengths have been applied to the variables as seen in the printout for the fi
```{r} -adsl_length <- adsl %>% xportr_length(var_spec, "ADSL", "message") +adsl_length <- ADSL %>% xportr_length(var_spec, "ADSL", "message") ```
```{r} -capture.output(str(adsl_length, give.head=TRUE)) %>% - as_tibble() %>% - head(n=7) +capture.output(str(adsl_length, give.head = TRUE)) %>% + as_tibble() %>% + head(n = 7) ```
@@ -180,7 +179,7 @@ asdf Please observe that our `ADSL` dataset is missing many variable labels. Sometimes these labels can be lost while using R's function. However, A CDISC compliant data set needs to have each variable with a variable label. ```{r, eval = TRUE} -look_for(adsl, details = FALSE) +look_for(ADSL, details = FALSE) ```
@@ -190,7 +189,7 @@ Using the `xport_label` function we can take the specifications file and label a
```{r} -adsl_update <- adsl %>% xportr_label(var_spec, "ADSL", "message") +adsl_update <- ADSL %>% xportr_label(var_spec, "ADSL", "message") ``` ```{r} @@ -206,16 +205,16 @@ An appropriate data set label must be supplied as well. Currently, the `ADSL` d #### Option1 ```{r, eval = TRUE} -capture.output(str(adsl))[45] +capture.output(str(ADSL))[45] ``` ```{r, eval = FALSE} -adsl_df_lbl <- adsl %>% xportr_df_label(data_spec, "ADSL") -adsl %>% xportr_varnames("message") +adsl_df_lbl <- ADSL %>% xportr_df_label(data_spec, "ADSL") +ADSL %>% xportr_varnames("message") ``` ```{r, eval = FALSE} -attr(adsl, "label") +attr(ADSL, "label") ```
@@ -241,10 +240,8 @@ For strings containing variable names `xportr_tidy_rename()` was designed to ide ```{r eval=FALSE} renamed_var_spec <- var_spec %>% filter(dataset == "ADSL") %>% - mutate(tidy_variable = xportr_tidy_rename(variable) - ) %>% + mutate(tidy_variable = xportr_tidy_rename(variable)) %>% select(order, dataset, variable, tidy_variable, tidyselect::everything()) - ``` Note the above messages detail the rule(s) and variable names that were out of compliance, the number of renamed variables, and even the old and newly tidied names. The function uses a step-wise renaming algorithm to maintain the original variable names characteristics as much as possible. Below is the view of the the old variable names juxtaposed the new. @@ -252,7 +249,7 @@ Note the above messages detail the rule(s) and variable names that were out of c
```{r, echo = FALSE, eval = FALSE} -just_new_names <- +just_new_names <- renamed_var_spec %>% filter(variable != tidy_variable) @@ -264,16 +261,18 @@ DT::datatable(just_new_names, options = list( The function `xportr_varnames()` takes `xportr_tidy_rename` a step further to help users change the data.frame columns directly. It also is slightly less flexible from a customization perspective, attempting to follow the submission constraints precisely where as `xportr_tidy_rename` can be used for a wider breadth of renaming applications. Executing the code below, we create a new data.frame called `adsl_renamed`, but the variable names are already compliant. The `identical` function below shows us that nothing has changed. ```{r, eval = FALSE} -adsl_renamed <- adsl %>% xportr_varnames() -identical(adsl, adsl_renamed) +adsl_renamed <- ADSL %>% xportr_varnames() +identical(ADSL, adsl_renamed) ``` In the interest of showcasing `xportr_varnames` capabilities, we create a fictional data.frame `adxx` riddled with non-compliant variable names below. In fact, `adxx` only has one valid variable name: "STUDYID". Calling the function shows this data.frame's variables violate all four compliance rules and proceeds to rename all but "STUDYID". ```{r, eval = FALSE} -varnames <- c("", "STUDYID", "studyid", "subject id", "1c. ENT", "1b. Eyes", - "1d. Lungs", "1e. Heart", "year number", "1a. Skin_Desc") +varnames <- c( + "", "STUDYID", "studyid", "subject id", "1c. ENT", "1b. Eyes", + "1d. Lungs", "1e. Heart", "year number", "1a. Skin_Desc" +) adxx <- data.frame(matrix(0, ncol = 10, nrow = 3)) colnames(adxx) <- varnames @@ -287,10 +286,10 @@ Second, if your organization holds an ontology of controlled terms, you can then ```{r, eval = FALSE} my_dictionary <- data.frame(original_varname = "subject id", dict_varname = "SUBJID") xportr_varnames(adxx, - relo_2_end = FALSE, - letter_for_num_prefix = "x", - dict_dat = my_dictionary) # 'SUBJID' used - + relo_2_end = FALSE, + letter_for_num_prefix = "x", + dict_dat = my_dictionary +) # 'SUBJID' used ``` Please review the documentation using `?xportr_tidy_rename` OR `?xportr_varnames` to learn how the abbreviation algorithm works and to further customize the renaming of your variable names using a slurry of additional arguments. @@ -304,8 +303,7 @@ Please review the documentation using `?xportr_tidy_rename` OR `?xportr_varnames Finally, we arrive at exporting the R data frame object as a xpt file with the function `xportr_write`. The xpt file will be written directly to your current working directory. To make it more interesting, we have put together all five function with the magrittr pipe. A user can now coerce, apply length, variable labels, data set label and write out their final xpt file in one pipe! Appropriate warnings and messages will be supplied to a user for any potential issues before sending off to standard clinical data set validator application or data reviewers. ```{r, eval=FALSE} - -adsl %>% +ADSL %>% xportr_type(var_spec, "ADSL", "message") %>% xportr_length(var_spec, "ADSL", "message") %>% xportr_label(var_spec, "ADSL", "message") %>% @@ -318,7 +316,7 @@ Optionally, leave out xportr_varnames and instead use `tidy_varnames = TRUE` in ```{r, eval=FALSE} # No xportr_varnames()! -adsl %>% +ADSL %>% xportr_type(var_spec, "ADSL", "message") %>% xportr_length(var_spec, "ADSL", "message") %>% xportr_label(var_spec, "ADSL", "message") %>% diff --git a/inst/WORDLIST b/inst/WORDLIST index aff5d7c5..c0d8ae96 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,46 +1,79 @@ ADAE ADSL ADaM +adlb AE Atorus BMI CDISC -CDSIC Codelist Completers +DATETIMEw +DATEw +DAw DCREASCD +DDMMYYw DM +DNw +DTw +DXw +Didenko +fda GSK +HHMM JPT -Lifecycle +LXw +LZw +MMDDYYw MMSE ORCID PHUSE Pharma Repostiory +SAS's SASformat SDSP SDTM Standardisation +TIMEw +TMw TRTDUR +TXw +TZw +Thanikachalam Trt Vignesh Vis +WEEKDATEw XPT +YYMMDDw acrf adrg bootswatch chr -cli +datetime deliverables df +durationdatetime +incompletedatetime +intervaldatetime iso magrittr metacore +num +partialdate +partialdatetime +partialtime +posixct +posixt pre repo +sas sdrg validator validators xportr's xpt +https +lbcat +www diff --git a/inst/specs/ADaM_admiral_spec.xlsx b/inst/specs/ADaM_admiral_spec.xlsx deleted file mode 100644 index 0d98afb4..00000000 Binary files a/inst/specs/ADaM_admiral_spec.xlsx and /dev/null differ diff --git a/inst/specs/ADaM_spec.xlsx b/inst/specs/ADaM_spec.xlsx index f05d1b62..1edddf47 100644 Binary files a/inst/specs/ADaM_spec.xlsx and b/inst/specs/ADaM_spec.xlsx differ diff --git a/man/adsl.Rd b/man/adsl.Rd deleted file mode 100644 index cd943674..00000000 --- a/man/adsl.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{adsl} -\alias{adsl} -\title{Analysis Dataset Subject Level} -\format{ -\subsection{\code{adsl}}{ - -A data frame with 254 rows and 48 columns: -\describe{ -\item{STUDYID}{Study Identifier} -\item{USUBJID}{Unique Subject Identifier} -\item{SUBJID}{Subject Identifier for the Study} -\item{SITEID}{Study Site Identifier} -\item{SITEGR1}{Pooled Site Group 1} -\item{ARM}{Description of Planned Arm} -\item{TRT01P}{Planned Treatment for Period 01} -\item{TRT01PN}{Planned Treatment for Period 01 (N)} -\item{TRT01A}{Actual Treatment for Period 01} -\item{TRT01AN}{Actual Treatment for Period 01 (N)} -\item{TRTSDT}{Date of First Exposure to Treatment} -\item{TRTEDT}{Date of Last Exposure to Treatment} -\item{TRTDUR}{Duration of Treatment (days)} -\item{AVGDD}{Avg Daily Dose (as planned)} -\item{CUMDOSE}{Cumulative Dose (as planned)} -\item{AGE}{Age} -\item{AGEGR1}{Pooled Age Group 1} -\item{AGEGR1N}{Pooled Age Group 1 (N)} -\item{AGEU}{Age Units} -\item{RACE}{Race} -\item{RACEN}{Race (N)} -\item{SEX}{Sex} -\item{ETHNIC}{Ethnicity} -\item{SAFFL}{Safety Population Flag} -\item{ITTFL}{Intent-To-Treat Population Flag} -\item{EFFFL}{Efficacy Population Flag} -\item{COMP8FL}{Completers of Week 8 Population Flag} -\item{COMP16FL}{Completers of Week 16 Population Flag} -\item{COMP24FL}{Completers of Week 24 Population Flag} -\item{DISCONFL}{Did the Subject Discontinue the Study} -\item{DSRAEFL}{Discontinued due to AE} -\item{DTHFL}{Subject Died} -\item{BMIBL}{Baseline BMI (kg/m^2)} -\item{BMIBLGR1}{Pooled Baseline BMI Group 1} -\item{HEIGHTBL}{Baseline Height (cm)} -\item{WEIGHTBL}{Baseline Weight (kg)} -\item{EDUCLVL}{Years of Education} -\item{DISONSDT}{Date of Onset of Disease} -\item{DURDIS}{Duration of Disease (Months)} -\item{DURDSGR1}{Pooled Disease Duration Group 1} -\item{VISIT1DT}{Date of Visit 1} -\item{RFSTDTC}{Subject Reference Start Date/Time} -\item{RFENDTC}{Subject Reference End Date/Time} -\item{VISNUMEN}{End of Trt Visit (Vis 12 or Early Term.)} -\item{RFENDT}{Date of Discontinuation/Completion} -\item{DCDECOD}{Standardized Disposition Term} -\item{DCREASCD}{Reason for Discontinuation} -\item{MMSETOT}{MMSE Total} -} -} -} -\usage{ -adsl -} -\description{ -An example dataset containing subject level data -} -\keyword{datasets} diff --git a/man/adsl_xportr.Rd b/man/adsl_xportr.Rd new file mode 100644 index 00000000..5bc8d642 --- /dev/null +++ b/man/adsl_xportr.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{adsl_xportr} +\alias{adsl_xportr} +\title{Analysis Dataset Subject Level} +\format{ +\subsection{\code{adsl_xportr}}{ + +A data frame with 306 rows and 51 columns: +\describe{ +\item{STUDYID}{Study Identifier} +\item{USUBJID}{Unique Subject Identifier} +\item{SUBJID}{Subject Identifier for the Study} +\item{RFSTDTC}{Subject Reference Start Date/Time} +\item{RFENDTC}{Subject Reference End Date/Time} +\item{RFXSTDTC}{Date/Time of First Study Treatment} +\item{RFXENDTC}{Date/Time of Last Study Treatment} +\item{RFICDTC}{Date/Time of Informed Consent} +\item{RFPENDTC}{Date/Time of End of Participation} +\item{DTHDTC}{Date/Time of Death} +\item{DTHFL}{Subject Death Flag} +\item{SITEID}{Study Site Identifier} +\item{AGE}{Age} +\item{AGEU}{Age Units} +\item{SEX}{Sex} +\item{RACE}{Race} +\item{ETHNIC}{Ethnicity} +\item{ARMCD}{Planned Arm Code} +\item{ARM}{Description of Planned Arm} +\item{ACTARMCD}{Actual Arm Code} +\item{ACTARM}{Description of Actual Arm} +\item{COUNTRY}{Country} +\item{DMDTC}{Date/Time of Collection} +\item{DMDY}{Study Day of Collection} +\item{TRT01P}{Planned Treatment for Period 01} +\item{TRT01A}{Actual Treatment for Period 01} +\item{TRTSDTM}{Datetime of First Exposure to Treatment} +\item{TRTSTMF}{Time of First Exposure Imputation Flag} +\item{TRTEDTM}{Datetime of Last Exposure to Treatment} +\item{TRTETMF}{Time of Last Exposure Imputation Flag} +\item{TRTSDT}{Date of First Exposure to Treatment} +\item{TRTEDT}{Date of Last Exposure to Treatment} +\item{TRTDURD}{Total Treatment Duration (Days)} +\item{SCRFDT}{Screen Failure Date} +\item{EOSDT}{End of Study Date} +\item{EOSSTT}{End of Study Status} +\item{FRVDT}{Final Retrieval Visit Date} +\item{RANDDT}{Date of Randomization} +\item{DTHDT}{Date of Death} +\item{DTHDTF}{Date of Death Imputation Flag} +\item{DTHADY}{Relative Day of Death} +\item{LDDTHELD}{Elapsed Days from Last Dose to Death} +\item{LSTALVDT}{Date Last Known Alive} +\item{SAFFL}{Safety Population Flag} +\item{RACEGR1}{Pooled Race Group 1} +\item{AGEGR1}{Pooled Age Group 1} +\item{REGION1}{Geographic Region 1} +\item{LDDTHGR1}{Last Dose to Death - Days Elapsed Group 1} +\item{DTH30FL}{Death Within 30 Days of Last Trt Flag} +\item{DTHA30FL}{Death After 30 Days from Last Trt Flag} +\item{DTHB30FL}{Death Within 30 Days of First Trt Flag} +} +} +} +\source{ +Dataset created by \code{admiral::use_ad_template("adsl")} +} +\usage{ +data("adsl_xportr") +} +\description{ +An example dataset containing subject level data +} +\keyword{datasets} diff --git a/man/dataset_spec.Rd b/man/dataset_spec.Rd index 6d581ab2..e89cf02d 100644 --- a/man/dataset_spec.Rd +++ b/man/dataset_spec.Rd @@ -22,7 +22,7 @@ A data frame with 1 row and 9 columns: } } \usage{ -dataset_spec +data("dataset_spec") } \description{ Example Dataset Specification diff --git a/man/metadata.Rd b/man/metadata.Rd index 30918a0c..cd286905 100644 --- a/man/metadata.Rd +++ b/man/metadata.Rd @@ -12,7 +12,7 @@ xportr_metadata(.df, metadata = NULL, domain = NULL, verbose = NULL) \item{metadata}{A data frame containing variable level metadata. See 'Metadata' section for details.} -\item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +\item{domain}{Appropriate CDISC dataset name, e.g. ADAE, DM. Used to subset the metadata object. If none is passed, then name of the dataset passed as .df will be used.} @@ -49,12 +49,10 @@ adlb <- data.frame( xportr_metadata(adlb, metadata, "test") -if (rlang::is_installed("magrittr")) { - library(magrittr) +library(magrittr) - adlb \%>\% - xportr_metadata(metadata, "test") \%>\% - xportr_type() \%>\% - xportr_order() -} +adlb \%>\% + xportr_metadata(metadata, "test") \%>\% + xportr_type() \%>\% + xportr_order() } diff --git a/man/multiple_vars_in_spec_helper.Rd b/man/multiple_vars_in_spec_helper.Rd index d3cefce6..6553f5aa 100644 --- a/man/multiple_vars_in_spec_helper.Rd +++ b/man/multiple_vars_in_spec_helper.Rd @@ -4,7 +4,7 @@ \alias{multiple_vars_in_spec_helper} \title{Test if multiple vars in spec will result in warning message} \usage{ -multiple_vars_in_spec_helper(FUN) +multiple_vars_in_spec_helper(fun) } \description{ Test if multiple vars in spec will result in warning message diff --git a/man/multiple_vars_in_spec_helper2.Rd b/man/multiple_vars_in_spec_helper2.Rd index f3e09957..c9b6c471 100644 --- a/man/multiple_vars_in_spec_helper2.Rd +++ b/man/multiple_vars_in_spec_helper2.Rd @@ -4,7 +4,7 @@ \alias{multiple_vars_in_spec_helper2} \title{Test if multiple vars in spec with appropriate} \usage{ -multiple_vars_in_spec_helper2(FUN) +multiple_vars_in_spec_helper2(fun) } \description{ Test if multiple vars in spec with appropriate diff --git a/man/var_spec.Rd b/man/var_spec.Rd index 5460c33d..06aba9d9 100644 --- a/man/var_spec.Rd +++ b/man/var_spec.Rd @@ -32,7 +32,7 @@ A data frame with 216 rows and 19 columns: } } \usage{ -var_spec +data("var_spec") } \description{ Example Dataset Variable Specification diff --git a/man/xportr-package.Rd b/man/xportr-package.Rd index 8f4327ff..d077e53c 100644 --- a/man/xportr-package.Rd +++ b/man/xportr-package.Rd @@ -3,7 +3,6 @@ \docType{package} \name{xportr-package} \alias{xportr-package} -\alias{_PACKAGE} \title{The \code{xportr} package} \description{ \code{xportr} is designed to be a clinical workflow friendly method for outputting @@ -86,7 +85,8 @@ coerce R classes to numeric XPT types. Default: c("integer", "numeric", "num", " } \item{ xportr.numeric_types - The default character vector used to explicitly -coerce R classes to numeric XPT types. Default: c("integer", "float", "numeric", "posixct", "posixt", "time", "date") +coerce R classes to numeric XPT types. Default: c("integer", "float", +"numeric", "posixct", "posixt", "time", "date") } } } @@ -109,6 +109,7 @@ options update in the \code{.Rprofile.site} file in the R home directory.} \seealso{ Useful links: \itemize{ + \item \url{https://atorus-research.github.io/xportr/} \item \url{https://github.com/atorus-research/xportr} \item Report bugs at \url{https://github.com/atorus-research/xportr/issues} } @@ -119,15 +120,14 @@ Useful links: Authors: \itemize{ - \item Vignesh Thanikachalam \item Ben Straub - \item Ross Didenko \item Zelos Zhu \item Ethan Brockmann \item Vedha Viyash \item Andre Verissimo \item Sophie Shapcott \item Celine Piraux + \item Kangjie Zhang \item Adrian Chan \item Sadchla Mascary } diff --git a/man/xportr.Rd b/man/xportr.Rd index c810dae1..bc5dcf97 100644 --- a/man/xportr.Rd +++ b/man/xportr.Rd @@ -21,7 +21,7 @@ xportr( \item{df_metadata}{A data frame containing dataset level metadata.} -\item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +\item{domain}{Appropriate CDISC dataset name, e.g. ADAE, DM. Used to subset the metadata object. If none is passed, then name of the dataset passed as .df will be used.} @@ -44,6 +44,9 @@ Wrapper to apply all core xportr functions and write xpt } \examples{ \dontshow{if (requireNamespace("magrittr")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +data("adsl_xportr", "dataset_spec", "var_spec") +adsl <- adsl_xportr + library(magrittr) test_dir <- tempdir() diff --git a/man/xportr_df_label.Rd b/man/xportr_df_label.Rd index 691de990..5f95d771 100644 --- a/man/xportr_df_label.Rd +++ b/man/xportr_df_label.Rd @@ -12,7 +12,7 @@ xportr_df_label(.df, metadata = NULL, domain = NULL, metacore = deprecated()) \item{metadata}{A data frame containing dataset. See 'Metadata' section for details.} -\item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +\item{domain}{Appropriate CDISC dataset name, e.g. ADAE, DM. Used to subset the metadata object. If none is passed, then name of the dataset passed as .df will be used.} diff --git a/man/xportr_format.Rd b/man/xportr_format.Rd index e085a345..e45f66dc 100644 --- a/man/xportr_format.Rd +++ b/man/xportr_format.Rd @@ -4,7 +4,13 @@ \alias{xportr_format} \title{Assign SAS Format} \usage{ -xportr_format(.df, metadata = NULL, domain = NULL, metacore = deprecated()) +xportr_format( + .df, + metadata = NULL, + domain = NULL, + verbose = NULL, + metacore = deprecated() +) } \arguments{ \item{.df}{A data frame of CDISC standard.} @@ -12,10 +18,14 @@ xportr_format(.df, metadata = NULL, domain = NULL, metacore = deprecated()) \item{metadata}{A data frame containing variable level metadata. See 'Metadata' section for details.} -\item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +\item{domain}{Appropriate CDISC dataset name, e.g. ADAE, DM. Used to subset the metadata object. If none is passed, then name of the dataset passed as .df will be used.} +\item{verbose}{The action this function takes when an action is taken on the +dataset or function validation finds an issue. See 'Messaging' section for +details. Options are 'stop', 'warn', 'message', and 'none'} + \item{metacore}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Previously used to pass metadata now renamed with \code{metadata}} } @@ -27,6 +37,61 @@ Assigns a SAS format from a variable level metadata to a given data frame. If no format is found for a given variable, it is set as an empty character vector. This is stored in the '\code{format.sas}' attribute. } +\section{Format Checks}{ + This function carries out a series of basic +checks to ensure the formats being applied make sense. + +Note, the 'type' of message that is generated will depend on the value +passed to the \code{verbose} argument: with 'stop' producing an error, 'warn' +producing a warning, or 'message' producing a message. A value of 'none' +will not output any messages. +\enumerate{ +\item If the variable has a suffix of \code{DT}, \code{DTM}, \code{TM} (indicating a +numeric date/time variable) then a message will be shown if there is +no format associated with it. +\item If a variable is character then a message will be shown if there is +no \code{$} prefix in the associated format. +\item If a variable is character then a message will be shown if the +associated format has greater than 31 characters (excluding the \code{$}). +\item If a variable is numeric then a message will be shown if there is a +\code{$} prefix in the associated format. +\item If a variable is numeric then a message will be shown if the +associated format has greater than 32 characters. +\item All formats will be checked against a list of formats considered +'standard' as part of an ADaM dataset. Note, however, this list is not +exhaustive (it would not be feasible to check all the functions +within the scope of this package). If the format is not found in the +'standard' list, then a message is created advising the user to +check. +}\tabular{lll}{ + \strong{Format Name} \tab \strong{w Values} \tab \strong{d Values} \cr + w.d \tab 1 - 32 \tab ., 0 - 31 \cr + $w. \tab 1 - 200 \tab \cr + DATEw. \tab ., 5 - 11 \tab \cr + DATETIMEw. \tab 7 - 40 \tab \cr + DDMMYYw. \tab ., 2 - 10 \tab \cr + HHMM. \tab \tab \cr + MMDDYYw. \tab ., 2 - 10 \tab \cr + TIMEw. \tab ., 2 - 20 \tab \cr + WEEKDATEw. \tab ., 3 - 37 \tab \cr + YYMMDDw. \tab ., 2 - 10 \tab \cr + B8601DAw. \tab ., 8 - 10 \tab \cr + B8601DTw.d \tab ., 15 - 26 \tab ., 0 - 6 \cr + B8601TM. \tab \tab \cr + IS8601DA. \tab \tab \cr + IS8601TM. \tab \tab \cr + E8601DAw. \tab ., 10 \tab \cr + E8601DNw. \tab ., 10 \tab \cr + E8601DTw.d \tab ., 16 - 26 \tab ., 0 - 6 \cr + E8601DXw. \tab ., 20 - 35 \tab \cr + E8601LXw. \tab ., 20 - 35 \tab \cr + E8601LZw. \tab ., 9 - 20 \tab \cr + E8601TMw.d \tab ., 8 - 15 \tab ., 0 - 6 \cr + E8601TXw. \tab ., 9 - 20 \tab \cr + E8601TZw.d \tab ., 9 - 20 \tab ., 0 - 6 \cr +} +} + \section{Metadata}{ The argument passed in the 'metadata' argument can either be a metacore object, or a data.frame containing the data listed below. If diff --git a/man/xportr_label.Rd b/man/xportr_label.Rd index eb03df81..a61e0583 100644 --- a/man/xportr_label.Rd +++ b/man/xportr_label.Rd @@ -18,7 +18,7 @@ xportr_label( \item{metadata}{A data frame containing variable level metadata. See 'Metadata' section for details.} -\item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +\item{domain}{Appropriate CDISC dataset name, e.g. ADAE, DM. Used to subset the metadata object. If none is passed, then name of the dataset passed as .df will be used.} diff --git a/man/xportr_length.Rd b/man/xportr_length.Rd index 8d034eb8..c3180a71 100644 --- a/man/xportr_length.Rd +++ b/man/xportr_length.Rd @@ -19,7 +19,7 @@ xportr_length( \item{metadata}{A data frame containing variable level metadata. See 'Metadata' section for details.} -\item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +\item{domain}{Appropriate CDISC dataset name, e.g. ADAE, DM. Used to subset the metadata object. If none is passed, then name of the dataset passed as .df will be used.} @@ -43,7 +43,7 @@ Data frame with SAS default length attributes for each variable. \description{ Assigns the SAS length to a specified data frame, either from a metadata object or based on the calculated maximum data length. If a length isn't present for -a variable the length value is set to 200 for character columns, and 8 +a variable the length value is set to maximum data length for character columns, and 8 for non-character columns. This value is stored in the 'width' attribute of the column. } \section{Messaging}{ diff --git a/man/xportr_order.Rd b/man/xportr_order.Rd index 26b87f42..03617d4f 100644 --- a/man/xportr_order.Rd +++ b/man/xportr_order.Rd @@ -18,7 +18,7 @@ xportr_order( \item{metadata}{A data frame containing variable level metadata. See 'Metadata' section for details.} -\item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +\item{domain}{Appropriate CDISC dataset name, e.g. ADAE, DM. Used to subset the metadata object. If none is passed, then name of the dataset passed as .df will be used.} diff --git a/man/xportr_split.Rd b/man/xportr_split.Rd new file mode 100644 index 00000000..5c9da890 --- /dev/null +++ b/man/xportr_split.Rd @@ -0,0 +1,41 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/split.R +\name{xportr_split} +\alias{xportr_split} +\title{Split xpt file output} +\usage{ +xportr_split(.df, split_by = NULL) +} +\arguments{ +\item{.df}{A data frame of CDISC standard.} + +\item{split_by}{A quoted variable that will be passed to \code{base::split()}.} +} +\value{ +A data frame with an additional attribute added so \code{xportr_write()} +knows how to split the data frame. +} +\description{ +Per the FDA Study Data Technical Conformance +Guide(https://www.fda.gov/media/88173/download) section 3.3.2, dataset files +sizes shouldn't exceed 5 GB. If datasets are large enough, they should be +split based on a variable. For example, laboratory readings in \code{ADLB} can be +split by \code{LBCAT} to split up hematology and chemistry data. +} +\details{ +This function will tell \code{xportr_write()} to split the data frame based on the +variable passed in \code{split_by}. When written, the file name will be prepended +with a number for uniqueness. These files should be noted in the Reviewer Guides per +CDISC guidance to note how you split your files. +} +\examples{ +data("adsl_xportr") +adsl <- adsl_xportr + +adlb <- data.frame( + USUBJID = c(1001, 1002, 1003), + LBCAT = c("HEMATOLOGY", "HEMATOLOGY", "CHEMISTRY") +) + +adsl <- xportr_split(adsl, "LBCAT") +} diff --git a/man/xportr_type.Rd b/man/xportr_type.Rd index 736fe0c6..05489fcf 100644 --- a/man/xportr_type.Rd +++ b/man/xportr_type.Rd @@ -18,7 +18,7 @@ xportr_type( \item{metadata}{A data frame containing variable level metadata. See 'Metadata' section for details.} -\item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +\item{domain}{Appropriate CDISC dataset name, e.g. ADAE, DM. Used to subset the metadata object. If none is passed, then name of the dataset passed as .df will be used.} diff --git a/man/xportr_write.Rd b/man/xportr_write.Rd index c6bd4a1d..bde66844 100644 --- a/man/xportr_write.Rd +++ b/man/xportr_write.Rd @@ -22,7 +22,7 @@ used as \code{xpt} name.} \item{metadata}{A data frame containing dataset. See 'Metadata' section for details.} -\item{domain}{Appropriate CDSIC dataset name, e.g. ADAE, DM. Used to subset +\item{domain}{Appropriate CDISC dataset name, e.g. ADAE, DM. Used to subset the metadata object. If none is passed, then name of the dataset passed as .df will be used.} diff --git a/tests/testthat/test-depreciation.R b/tests/testthat/test-depreciation.R index 2679ecc9..d1eb0cd2 100644 --- a/tests/testthat/test-depreciation.R +++ b/tests/testthat/test-depreciation.R @@ -1,5 +1,5 @@ test_that("xportr_df_label: deprecated metacore gives an error", { - withr::local_options(lifecycle_verbosity = "quiet") + local_options(lifecycle_verbosity = "quiet") df <- data.frame(x = "a", y = "b") df_meta <- data.frame(dataset = "df", label = "Label") @@ -7,7 +7,7 @@ test_that("xportr_df_label: deprecated metacore gives an error", { }) test_that("xportr_format: deprecated metacore gives an error", { - withr::local_options(lifecycle_verbosity = "quiet") + local_options(lifecycle_verbosity = "quiet") df <- data.frame(x = 1, y = 2) df_meta <- data.frame( dataset = "df", @@ -19,7 +19,7 @@ test_that("xportr_format: deprecated metacore gives an error", { }) test_that("xportr_label: using the deprecated metacore argument gives an error", { - withr::local_options(lifecycle_verbosity = "quiet") + local_options(lifecycle_verbosity = "quiet") df <- data.frame(x = "a", y = "b") df_meta <- data.frame(dataset = "df", variable = "x", label = "foo") @@ -28,7 +28,7 @@ test_that("xportr_label: using the deprecated metacore argument gives an error", }) test_that("xportr_length: using the deprecated metacore argument gives an error", { - withr::local_options(lifecycle_verbosity = "quiet") + local_options(lifecycle_verbosity = "quiet") df <- data.frame(x = "a", y = "b") df_meta <- data.frame( dataset = "df", @@ -41,7 +41,7 @@ test_that("xportr_length: using the deprecated metacore argument gives an error" }) test_that("xportr_order: using the deprecated metacore argument gives an error", { - withr::local_options(lifecycle_verbosity = "quiet") + local_options(lifecycle_verbosity = "quiet") df <- data.frame(c = 1:5, a = "a", d = 5:1, b = LETTERS[1:5]) df_meta <- data.frame( @@ -54,7 +54,7 @@ test_that("xportr_order: using the deprecated metacore argument gives an error", }) test_that("xportr_type: using the deprecated metacore argument gives an error", { - withr::local_options(lifecycle_verbosity = "quiet") + local_options(lifecycle_verbosity = "quiet") df <- data.frame( Subj = as.character(c(123, 456, 789, "", NA, NA_integer_)), Different = c("a", "b", "c", "", NA, NA_character_), diff --git a/tests/testthat/test-df_label.R b/tests/testthat/test-df_label.R index 2cbe1736..1a0cfdd8 100644 --- a/tests/testthat/test-df_label.R +++ b/tests/testthat/test-df_label.R @@ -1,11 +1,5 @@ test_that("xportr_df_label: error when metadata is not set", { - adsl <- data.frame( - USUBJID = c(1001, 1002, 1003), - SITEID = c(001, 002, 003), - AGE = c(63, 35, 27), - SEX = c("M", "F", "M") - ) - + adsl <- minimal_table() expect_error( xportr_df_label(adsl), diff --git a/tests/testthat/test-format.R b/tests/testthat/test-format.R index 63b4ff92..7769cd10 100644 --- a/tests/testthat/test-format.R +++ b/tests/testthat/test-format.R @@ -34,3 +34,255 @@ test_that("xportr_format: Works as expected with only one domain in metadata", { expect_silent(xportr_format(adsl, metadata)) }) + +test_that("xportr_format: Variable ending in DT should produce a warning if no format", { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + format = c(NA, NA) + ) + + expect_warning( + xportr_format(adsl, metadata, verbose = "warn"), + regexp = "(xportr::xportr_format) `BRTHDT` is expected to have a format but does not.", + fixed = TRUE + ) +}) + +test_that("xportr_format: Variable ending in TM should produce an error if no format", { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHTM = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHTM"), + format = c(NA, NA) + ) + + expect_error( + xportr_format(adsl, metadata, verbose = "stop"), + regexp = "(xportr::xportr_format) `BRTHTM` is expected to have a format but does not.", + fixed = TRUE + ) +}) + +test_that("xportr_format: Variable ending in DTM should produce a warning if no format", { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDTM = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDTM"), + format = c(NA, NA) + ) + + expect_warning( + xportr_format(adsl, metadata, verbose = "warn"), + regexp = "(xportr::xportr_format) `BRTHDTM` is expected to have a format but does not.", + fixed = TRUE + ) +}) + +test_that( + "xportr_format: If a variable is character then an error should be produced if format does not start with `$`", + { + adsl <- data.frame( + USUBJID = c("1001", "1002", "1003"), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + format = c("4.", "DATE9.") + ) + + expect_error( + xportr_format(adsl, metadata, verbose = "stop"), + regexp = "(xportr::xportr_format) `USUBJID` is a character variable and should have a `$` prefix.", + fixed = TRUE + ) + } +) + +test_that("xportr_format: If a variable is character then a warning should be produced if format is > 32 in length", { + adsl <- data.frame( + USUBJID = c("1001", "1002", "1003"), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + format = c("$AVERYLONGFORMATNAMEWHICHISGREATERTHAN32.", "DATE9.") + ) + + res <- evaluate_promise(xportr_format(adsl, metadata, verbose = "warn")) + + expect_equal( + res$warnings, + c( + "(xportr::xportr_format) Format for character variable `USUBJID` should have length <= 31 (excluding `$`).", + paste0( + "(xportr::xportr_format) Check format ", + "`$AVERYLONGFORMATNAMEWHICHISGREATERTHAN32.` for variable ", + "`USUBJID` - is this correct?" + ) + ) + ) +}) + +test_that("xportr_format: If a variable is numeric then an error should be produced if a format starts with `$`", { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + format = c("$4.", "DATE9.") + ) + + expect_error( + xportr_format(adsl, metadata, verbose = "stop"), + regexp = "(xportr::xportr_format) `USUBJID` is a numeric variable and should not have a `$` prefix.", + fixed = TRUE + ) +}) + +test_that("xportr_format: If a variable is numeric then a warning should be produced if format is > 32 in length", { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + format = c("AVERYLONGFORMATNAMEWHICHISGREATERTHAN32.", "DATE9.") + ) + + res <- evaluate_promise(xportr_format(adsl, metadata, verbose = "warn")) + + expect_equal( + res$warnings, + c( + "(xportr::xportr_format) Format for numeric variable `USUBJID` should have length <= 32.", + paste0( + "(xportr::xportr_format) Check format ", + "`AVERYLONGFORMATNAMEWHICHISGREATERTHAN32.` for variable ", + "`USUBJID` - is this correct?" + ) + ) + ) +}) + +test_that( + "xportr_format: If a format is not one of the expected formats identified, then a message should be produced", + { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + format = c("NOTASTDFMT.", "DATE9.") + ) + + expect_message( + xportr_format(adsl, metadata, verbose = "message"), + regexp = "(xportr::xportr_format) Check format `NOTASTDFMT.` for variable `USUBJID` - is this correct?", + fixed = TRUE + ) + } +) + +test_that( + "xportr_format: Check for case-sensitivity. Using lowercase should not affect anything", + { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2), + BRTHTM = c(2, 2, 5) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl", "adsl"), + variable = c("USUBJID", "BRTHDT", "BRTHTM"), + format = c(NA, "date9.", "time5.") + ) + + expect_silent(xportr_format(adsl, metadata)) + } +) + +test_that( + "xportr_format: Check for case-sensitivity. Using mixed case should not affect anything", + { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2), + BRTHTM = c(2, 2, 5) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl", "adsl"), + variable = c("USUBJID", "BRTHDT", "BRTHTM"), + format = c(NA, "daTe9.", "TimE5.") + ) + + expect_silent(xportr_format(adsl, metadata)) + } +) + +test_that( + "xportr_format: Check for case-sensitivity. Using a mixture of case should not affect anything", + { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2), + BRTHTM = c(2, 2, 5), + BRTHDTM = c(3, 5, 7) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl", "adsl", "adsl"), + variable = c("USUBJID", "BRTHDT", "BRTHTM", "BRTHDTM"), + format = c(NA, "DATE9.", "time5.", "DaTeTiMe16.") + ) + + expect_silent(xportr_format(adsl, metadata)) + } +) + +test_that( + "xportr_format: If 'verbose' option is 'none', then no messaging should appear", + { + adsl <- data.frame( + USUBJID = c(1001, 1002, 1003), + BRTHDT = c(1, 1, 2) + ) + + metadata <- data.frame( + dataset = c("adsl", "adsl"), + variable = c("USUBJID", "BRTHDT"), + format = c("NOTASTDFMT.", NA) + ) + + expect_silent( + xportr_format(adsl, metadata, verbose = "none") + ) + } +) diff --git a/tests/testthat/test-length.R b/tests/testthat/test-length.R index ffdc599a..12fce410 100644 --- a/tests/testthat/test-length.R +++ b/tests/testthat/test-length.R @@ -10,7 +10,7 @@ test_that("xportr_length: Accepts valid domain names in metadata object", { metadata <- minimal_metadata(dataset = TRUE, length = TRUE, var_names = colnames(adsl)) # Setup temporary options with active verbose - withr::local_options(list(xportr.length_verbose = "message")) + local_options(xportr.length_verbose = "message") # Test minimal call with valid data and without domain adsl %>% @@ -50,7 +50,7 @@ test_that("xportr_length: CDISC data frame is being piped after another xportr f ) # Setup temporary options with active verbose - withr::local_options(list(xportr.length_verbose = "message")) + local_options(xportr.length_verbose = "message") adsl %>% xportr_type(metadata, domain = "adsl", verbose = "message") %>% @@ -69,9 +69,9 @@ test_that("xportr_length: Impute character lengths based on class", { mutate(length = length - 1) # Setup temporary options with `verbose = "none"` - withr::local_options(list(xportr.length_verbose = "none")) + local_options(xportr.length_verbose = "none") # Define controlled `character_types` for this test - withr::local_options(list(xportr.character_types = c("character", "date"))) + local_options(xportr.character_types = c("character", "date")) # Remove empty lines in cli theme local_cli_theme() @@ -104,7 +104,7 @@ test_that("xportr_length: Throws message when variables not present in metadata" metadata <- minimal_metadata(dataset = TRUE, length = TRUE, var_names = c("x")) # Setup temporary options with `verbose = "message"` - withr::local_options(list(xportr.length_verbose = "message")) + local_options(xportr.length_verbose = "message") # Remove empty lines in cli theme local_cli_theme() @@ -163,17 +163,6 @@ test_that("xportr_length: Domain not in character format", { ) }) -test_that("xportr_length: Column length of known/unkown character types is 200/8 ", { - expect_equal(impute_length(123), 8) - expect_equal(impute_length(123L), 8) - expect_equal(impute_length("string"), 200) - expect_equal(impute_length(Sys.Date()), 8) - expect_equal(impute_length(Sys.time()), 8) - - withr::local_options(list(xportr.character_types = c("character", "date"))) - expect_equal(impute_length(Sys.time()), 8) -}) - test_that("xportr_length: error when metadata is not set", { adsl <- minimal_table(30) diff --git a/tests/testthat/test-messages.R b/tests/testthat/test-messages.R index 2055914e..1da3e004 100644 --- a/tests/testthat/test-messages.R +++ b/tests/testthat/test-messages.R @@ -22,9 +22,9 @@ test_that("length_log: Missing lengths messages are shown", { # Remove empty lines in cli theme local_cli_theme() - length_log(c("var1", "var2", "var3"), "message") %>% + length_log(c("var1", "var2", "var3"), c("var4"), "message") %>% expect_message("Variable lengths missing from metadata.") %>% - expect_message("lengths resolved") %>% + expect_message("lengths resolved `var1`.*`var2`.*`var3`.*`var4`") %>% expect_message("Problem with `var1`.*`var2`.*`var3`") }) diff --git a/tests/testthat/test-metadata.R b/tests/testthat/test-metadata.R index 29a9aab0..7a7d695d 100644 --- a/tests/testthat/test-metadata.R +++ b/tests/testthat/test-metadata.R @@ -513,12 +513,12 @@ test_that("xportr_length: Check if length gets imputed when a new variable is pa xportr_length(df, df_meta, domain = "df") ) - # 200 is the imputed length for character and 8 for other data types as in impute_length() - expect_equal(c(x = 1, y = 200, z = 8), map_dbl(df_with_width, attr, "width")) + # Max length is the imputed length for character and 8 for other data types + expect_equal(c(x = 1, y = 1, z = 8), map_dbl(df_with_width, attr, "width")) expect_equal(df_with_width, structure( list( x = structure("a", width = 1), - y = structure("b", width = 200), + y = structure("b", width = 1), z = structure(3, width = 8) ), row.names = c(NA, -1L), `_xportr.df_arg_` = "df", class = "data.frame" @@ -638,16 +638,16 @@ test_that("xportr_type: Variable types are coerced as expected and can raise mes # tests for `xportr_metadata()` basic functionality # start test_that("xportr_metadata: Check metadata interaction with other functions", { - skip_if_not_installed("admiral") - adsl <- admiral::admiral_adsl - - var_spec <- - readxl::read_xlsx( - system.file("specs", "ADaM_admiral_spec.xlsx", package = "xportr"), - sheet = "Variables" - ) %>% + data("adsl_xportr", envir = environment()) + adsl <- adsl_xportr + + skip_if_not_installed("readxl") + var_spec <- readxl::read_xlsx( + system.file("specs", "ADaM_spec.xlsx", package = "xportr"), + sheet = "Variables" + ) %>% dplyr::rename(type = "Data Type") %>% - rlang::set_names(tolower) + dplyr::rename_with(tolower) # Divert all messages to tempfile, instead of printing them # note: be aware as this should only be used in tests that don't track @@ -722,7 +722,9 @@ test_that("xportr_*: Domain is kept in between calls", { # Divert all messages to tempfile, instead of printing them # note: be aware as this should only be used in tests that don't track # messages - withr::local_message_sink(tempfile()) + if (requireNamespace("withr", quietly = TRUE)) { + withr::local_message_sink(withr::local_tempfile()) + } adsl <- minimal_table(30) @@ -757,6 +759,9 @@ test_that("xportr_*: Domain is kept in between calls", { # end test_that("`xportr_metadata()` results match traditional results", { + data("var_spec", "dataset_spec", "adsl_xportr", envir = environment()) + adsl <- adsl_xportr + if (require(magrittr, quietly = TRUE)) { skip_if_not_installed("withr") trad_path <- withr::local_file("adsltrad.xpt") diff --git a/tests/testthat/test-pkg-load.R b/tests/testthat/test-pkg-load.R index 82341de1..be913992 100644 --- a/tests/testthat/test-pkg-load.R +++ b/tests/testthat/test-pkg-load.R @@ -1,21 +1,21 @@ test_that(".onLoad: Unset options get initialised on package load with defaults", { skip_if(getOption("testthat_interactive")) - withr::with_options( - list(xportr.df_domain_name = NULL), + with_options( { expect_no_error(.onLoad()) expect_equal(getOption("xportr.df_domain_name"), "dataset") - } + }, + xportr.df_domain_name = NULL ) }) test_that(".onLoad: Initialised options are retained and not overwritten", { skip_if(getOption("testthat_interactive")) - withr::with_options( - list(xportr.df_domain_name = "custom_domain"), + with_options( { expect_no_error(.onLoad()) expect_equal(getOption("xportr.df_domain_name"), "custom_domain") - } + }, + xportr.df_domain_name = "custom_domain" ) }) diff --git a/tests/testthat/test-type.R b/tests/testthat/test-type.R index d1c7b58c..2bbe15de 100644 --- a/tests/testthat/test-type.R +++ b/tests/testthat/test-type.R @@ -107,7 +107,9 @@ test_that("xportr_type: Variables retain column attributes, besides class", { # Divert all messages to tempfile, instead of printing them # note: be aware as this should only be used in tests that don't track # messages - withr::local_message_sink(tempfile()) + if (requireNamespace("withr", quietly = TRUE)) { + withr::local_message_sink(withr::local_tempfile()) + } df_type_label <- adsl %>% xportr_metadata(domain = "adsl") %>% diff --git a/tests/testthat/test-write.R b/tests/testthat/test-write.R index 31837977..bb036cf0 100644 --- a/tests/testthat/test-write.R +++ b/tests/testthat/test-write.R @@ -1,23 +1,27 @@ -data_to_save <- dplyr::tibble(X = c(1, 2, NA), Y = c("a", "", "c"), Z = c(1, 2, 3)) +data_to_save <- function() { + minimal_table(cols = c("e", "b", "x")) %>% + rename_with(toupper) %>% + as_tibble() +} -test_that("xportr_write: exported data can be saved to a file", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") +# Skip large file tests unless explicitly requested +test_large_files <- Sys.getenv("XPORTR.TEST_LARGE_FILES", FALSE) - on.exit(unlink(tmpdir)) +test_that("xportr_write: exported data can be saved to a file", { + skip_if_not_installed("withr") + tmp <- withr::local_file("xyz.xpt") + local_data <- data_to_save() - xportr_write(data_to_save, path = tmp) - expect_equal(read_xpt(tmp), data_to_save) + xportr_write(local_data, path = tmp) + expect_equal(read_xpt(tmp), local_data) }) test_that("xportr_write: exported data can still be saved to a file with a label", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") - - on.exit(unlink(tmpdir)) + skip_if_not_installed("withr") + tmp <- withr::local_file("xyz.xpt") suppressWarnings( - xportr_write(data_to_save, + xportr_write(data_to_save(), path = tmp, label = "Lorem ipsum dolor sit amet", domain = "data_to_save" @@ -27,13 +31,11 @@ test_that("xportr_write: exported data can still be saved to a file with a label }) test_that("xportr_write: exported data can be saved to a file with a metadata", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") - - on.exit(unlink(tmpdir)) + skip_if_not_installed("withr") + tmp <- withr::local_file("xyz.xpt") xportr_write( - data_to_save, + data_to_save(), path = tmp, domain = "data_to_save", metadata = data.frame( @@ -45,13 +47,11 @@ test_that("xportr_write: exported data can be saved to a file with a metadata", }) test_that("xportr_write: exported data can be saved to a file with a existing metadata", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") - - on.exit(unlink(tmpdir)) + skip_if_not_installed("withr") + tmp <- withr::local_file("xyz.xpt") df <- xportr_df_label( - data_to_save, + data_to_save(), domain = "data_to_save", data.frame( dataset = "data_to_save", @@ -64,15 +64,11 @@ test_that("xportr_write: exported data can be saved to a file with a existing me }) test_that("xportr_write: expect error when invalid multibyte string is passed in label", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") - - on.exit(unlink(tmpdir)) - + skip_if_not_installed("withr") expect_error( xportr_write( - data_to_save, - tmp, + data_to_save(), + withr::local_file("xyz.xpt"), metadata = data.frame( dataset = "data_to_save", label = "Lorizzle ipsizzle dolizzl\xe7 pizzle" @@ -82,129 +78,114 @@ test_that("xportr_write: expect error when invalid multibyte string is passed in }) test_that("xportr_write: expect error when file name is over 8 characters long", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, paste0(paste(letters[1:9], collapse = ""), ".xpt")) - - on.exit(unlink(tmpdir)) - - expect_error(xportr_write(data_to_save, tmp)) + skip_if_not_installed("withr") + expect_error( + xportr_write( + data_to_save(), + withr::local_file(paste0(paste(letters[1:9], collapse = ""), ".xpt")) + ), + "\\.df file name must be 8 characters or less\\." + ) }) test_that("xportr_write: expect error when file name contains non-ASCII symbols or special characters", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, ".xpt") - - on.exit(unlink(tmpdir)) - - expect_error(xportr_write(data_to_save, tmp, strict_checks = TRUE)) + skip_if_not_installed("withr") + expect_error( + xportr_write(data_to_save(), withr::local_file(".xpt"), strict_checks = TRUE), + "`\\.df` cannot contain any non-ASCII, symbol or underscore characters\\." + ) }) test_that("xportr_write: expect warning when file name contains underscore and strict_checks = FALSE", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "test_.xpt") - - on.exit(unlink(tmpdir)) - - expect_warning(xportr_write(data_to_save, tmp, strict_checks = FALSE)) + skip_if_not_installed("withr") + expect_warning( + xportr_write(data_to_save(), withr::local_file("test_.xpt"), strict_checks = FALSE), + "`\\.df` cannot contain any non-ASCII, symbol or underscore characters\\." + ) }) test_that("xportr_write: expect error when label contains non-ASCII symbols or special characters", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") - - on.exit(unlink(tmpdir)) - + skip_if_not_installed("withr") expect_error( xportr_write( - data_to_save, - tmp, - expect_error( - xportr_write( - data_to_save, - domain = "data_to_save", - tmp, - metadata = data.frame( - dataset = "data_to_save", - label = "çtestç" - ) - ) + data_to_save(), + domain = "data_to_save", + path = withr::local_file("xyz.xpt"), + metadata = data.frame( + dataset = "data_to_save", + label = "çtestç" ) - ) + ), + "`label` cannot contain any non-ASCII, symbol or special characters" ) }) test_that("xportr_write: expect error when label is over 40 characters", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") - - on.exit(unlink(tmpdir)) - + skip_if_not_installed("withr") expect_error( xportr_write( - data_to_save, + data_to_save(), domain = "data_to_save", - tmp, + path = withr::local_file("xyz.xpt"), metadata = data.frame( dataset = "data_to_save", label = paste(rep("a", 41), collapse = "") ) - ) + ), + "Length of dataset label must be 40 characters or less" ) }) test_that("xportr_write: expect error when an xpt validation fails with strict_checks set to TRUE", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") - attr(data_to_save$X, "format.sas") <- "foo" - - on.exit(unlink(tmpdir)) + skip_if_not_installed("withr") + local_data <- data_to_save() + attr(local_data$X, "format.sas") <- "foo" expect_error( xportr_write( - data_to_save, tmp, + local_data, + withr::local_file("xyz.xpt"), domain = "data_to_save", metadata = data.frame( dataset = "data_to_save", label = "label" ), strict_checks = TRUE - ) + ), + "Format 'X' must have a valid format\\." ) }) test_that("xportr_write: expect warning when an xpt validation fails with strict_checks set to FALSE", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") - attr(data_to_save$X, "format.sas") <- "foo" - - on.exit(unlink(tmpdir)) + skip_if_not_installed("withr") + local_data <- data_to_save() + attr(local_data$X, "format.sas") <- "foo" expect_warning( xportr_write( - data_to_save, tmp, + local_data, + withr::local_file("xyz.xpt"), domain = "data_to_save", metadata = data.frame( dataset = "data_to_save", label = "label" ), strict_checks = FALSE - ) + ), + "Format 'X' must have a valid format\\." ) }) - test_that("xportr_write: Capture errors by haven and report them as such", { - tmpdir <- tempdir() - tmp <- file.path(tmpdir, "xyz.xpt") - attr(data_to_save$X, "format.sas") <- "E8601LXw.asdf" - - on.exit(unlink(tmpdir)) - + skip_if_not_installed("withr") + local_data <- data_to_save() + attr(local_data$X, "format.sas") <- "E8601LXw.asdf" expect_error( suppressWarnings( xportr_write( - data_to_save, tmp, + local_data, + withr::local_file("xyz.xpt"), domain = "data_to_save", metadata = data.frame( dataset = "data_to_save", @@ -216,3 +197,64 @@ test_that("xportr_write: Capture errors by haven and report them as such", { "Error reported by haven" ) }) + +test_that("xportr_write: `split_by` attribute is used to split the data", { + tmpdir <- tempdir() + tmp <- file.path(tmpdir, "xyz.xpt") + + on.exit(unlink(tmpdir)) + + dts <- data_to_save() + dts %>% + xportr_split(split_by = "X") %>% + xportr_write(path = tmp) + + expect_true( + file.exists(file.path(tmpdir, "xyz1.xpt")) + ) + expect_true( + file.exists(file.path(tmpdir, "xyz2.xpt")) + ) + expect_true( + file.exists(file.path(tmpdir, "xyz3.xpt")) + ) + expect_equal( + read_xpt(file.path(tmpdir, "xyz1.xpt")) %>% + extract2("X") %>% + unique() %>% + length(), + 1 + ) + expect_equal( + read_xpt(file.path(tmpdir, "xyz2.xpt")) %>% + extract2("X") %>% + unique() %>% + length(), + 1 + ) + expect_equal( + read_xpt(file.path(tmpdir, "xyz3.xpt")) %>% + extract2("X") %>% + unique() %>% + length(), + 1 + ) +}) + +test_that("xportr_write: Large file sizes are reported and warned", { + skip_if_not(test_large_files) + tmpdir <- tempdir() + tmp <- file.path(tmpdir, "xyz.xpt") + + on.exit(unlink(tmpdir)) + + # Large_df should be at least 5GB + large_df <- do.call( + data.frame, replicate(80000, rep("large", 80000), simplify = FALSE) + ) + + expect_warning( + xportr_write(large_df, path = tmp), + class = "xportr.xpt_size" + ) +}) diff --git a/tests/testthat/test-xportr.R b/tests/testthat/test-xportr.R index 60161a72..62d3097c 100644 --- a/tests/testthat/test-xportr.R +++ b/tests/testthat/test-xportr.R @@ -1,4 +1,7 @@ test_that("pipeline results match `xportr()` results", { + data("var_spec", "dataset_spec", "adsl_xportr", envir = environment()) + adsl <- adsl_xportr + if (require(magrittr, quietly = TRUE)) { skip_if_not_installed("withr") pipeline_path <- withr::local_file("adslpipe.xpt") diff --git a/vignettes/deepdive.Rmd b/vignettes/deepdive.Rmd index ae920ef1..15c27667 100644 --- a/vignettes/deepdive.Rmd +++ b/vignettes/deepdive.Rmd @@ -93,8 +93,8 @@ Each of the core `{xportr}` functions for applying labels, types, formats, order In this section, we are going to explore the 5 core `{xportr}` functions using: -* `xportr::adsl` - An ADSL ADaM dataset from the Pilot 3 Submission to the FDA -* `xportr::var_spec` - The ADSL ADaM Specification File from the Pilot 3 Submission to the FDA +* `data("adsl_xportr", package = "xportr")` - An ADSL ADaM dataset from the Pilot 3 Submission to the FDA +* `data("var_spec", package = "xportr")` - The ADSL ADaM Specification File from the Pilot 3 Submission to the FDA We will focus on warning and error messaging with contrived examples from these functions by manipulating either the datasets or the specification files. @@ -120,12 +120,13 @@ Additionally, we have a helper function `xportr_options()` which works just like Let's take a look at our example specification file names available in this package. We can see that all the columns start with an upper case letter and have spaces in several of them. We could convert all the column names to lower case and deal with the spacing using some `{dplyr}` functions or base R, or we could just use `options()`! ```{r, message = FALSE} -library(rlang) library(xportr) library(dplyr) library(haven) +data("adsl_xportr", "var_spec", "dataset_spec", package = "xportr") colnames(var_spec) +ADSL <- adsl_xportr ``` By using `options()` or `xportr_options()` at the beginning of our script we can tell `{xportr}` what the valid names are (see chunk below). Please note that before we set the options the package assumed every thing was in lowercase and there were no spaces in the names. After running `options()` or `xportr_options()`, `{xportr}` sees the column `Variable` as the valid name rather than `variable`. You can inspect `xportr_options` function docs to look at additional options. @@ -177,7 +178,7 @@ xportr_options( Each of the core `{xportr}` functions requires several inputs: A valid dataframe, a metadata object and a domain name, along with optional messaging. For example, here is a simple call using all of the functions. As you can see, a lot of information is repeated in each call. ```{r, eval = FALSE} -adsl %>% +ADSL %>% xportr_type(var_spec, "ADSL", "message") %>% xportr_length(var_spec, "ADSL", verbose = "message") %>% xportr_label(var_spec, "ADSL", "message") %>% @@ -191,7 +192,7 @@ To help reduce these repetitive calls, we have created `xportr_metadata()`. A us ```{r, eval = FALSE} -adsl %>% +ADSL %>% xportr_metadata(var_spec, "ADSL") %>% xportr_type() %>% xportr_length(length_source = "metadata") %>% @@ -217,11 +218,11 @@ Similarly, we can read the Dataset spec file and call it `dataset_spec`. ```{r} var_spec <- var_spec %>% rename(type = "Data Type") %>% - set_names(tolower) + rename_with(tolower) dataset_spec <- dataset_spec %>% rename(label = "Description") %>% - set_names(tolower) + rename_with(tolower) ``` ```{r, echo = FALSE} @@ -264,16 +265,19 @@ datatable( ## `xportr_type()` -We are going to explore the type column in the metadata object. A submission to a Health Authority should only have character and numeric types in the data. In the `ADSL` data we have several columns that are in the Date type: `TRTSDT`, `TRTEDT`, `DISONSDT`, `VISIT1DT` and `RFENDT` - under the hood these are actually numeric values and will be left as is. We will change one variable type to a [factor variable](https://forcats.tidyverse.org/), which is a common data structure in R, to give us some educational opportunities to see `xportr_type()` in action. +We are going to explore the type column in the metadata object. +A submission to a Health Authority should only have character and numeric types in the data. +In the `ADSL` data we have several columns that are in the Date type: `TRTSDT`, `TRTEDT`, `SCRFDT`, `EOSDT`, `FRVDT`, `RANDDT`, `DTHDT`, `LSTALVDT` - under the hood these are actually numeric values and will be left as is. +We will change one variable type to a [factor variable](https://forcats.tidyverse.org/), which is a common data structure in R, to give us some educational opportunities to see `xportr_type()` in action. ```{r} -adsl_fct <- adsl %>% +adsl_fct <- ADSL %>% mutate(STUDYID = as_factor(STUDYID)) ``` ```{r, echo = FALSE} adsl_glimpse <- adsl_fct %>% - select(STUDYID, TRTSDT, TRTEDT, DISONSDT, VISIT1DT, RFENDT) + select(STUDYID, TRTSDT, TRTEDT, SCRFDT, EOSDT, FRVDT, RANDDT, DTHDT, LSTALVDT) ``` ```{r, echo = FALSE} @@ -286,7 +290,7 @@ adsl_type <- xportr_type(.df = adsl_fct, metadata = var_spec, domain = "ADSL", v ```{r, echo = FALSE} adsl_type_glimpse <- adsl_type %>% - select(STUDYID, TRTSDT, TRTEDT, DISONSDT, VISIT1DT, RFENDT) + select(STUDYID, TRTSDT, TRTEDT, SCRFDT, EOSDT, FRVDT, RANDDT, DTHDT, LSTALVDT) ``` Success! As we can see below, `xportr_type()` applied the types from the metadata object to the `STUDYID` variables converting to the proper type. The functions in `{xportr}` also display this coercion to the user in the console, which is seen above. @@ -306,12 +310,12 @@ adsl_type <- xportr_type(.df = adsl_fct, metadata = var_spec, domain = "ADSL", v Next we will use `xportr_length()` to apply the length column of the _metadata object_ to the `ADSL` dataset. Using the `str()` function we have displayed all the variables with their attributes. You can see that each variable has a label, but there is no information on the lengths of the variable. ```{r, max.height='300px', attr.output='.numberLines', echo = FALSE} -str(adsl) +str(ADSL) ``` ```{r, echo = TRUE} adsl_length <- xportr_length( - .df = adsl, + .df = ADSL, metadata = var_spec, domain = "ADSL", verbose = "warn", @@ -332,7 +336,7 @@ Just like we did for `xportr_type()`, setting `verbose = "stop"` immediately sto ```{r, echo = TRUE, error = TRUE} adsl_length <- xportr_length( - .df = adsl, + .df = ADSL, metadata = var_spec, domain = "ADSL", verbose = "stop", @@ -358,9 +362,9 @@ var_spec_lbl <- var_spec %>% "Length of variable label must be 40 characters or less", label )) -adsl_lbl <- adsl +adsl_lbl <- ADSL -adsl_lbl <- haven::zap_label(adsl) +adsl_lbl <- haven::zap_label(ADSL) ``` We have successfully removed all the labels. @@ -392,7 +396,7 @@ adsl_label <- xportr_label(.df = adsl_lbl, metadata = var_spec_lbl, domain = "AD The order of the dataset can greatly increase readability of the dataset for downstream stakeholders. For example, having all the treatment related variables or analysis variables grouped together can help with inspection and understanding of the dataset. `xportr_order()` can take the order information from the metadata and apply it to your dataset. ```{r} -adsl_ord <- xportr_order(.df = adsl, metadata = var_spec, domain = "ADSL", verbose = "warn") +adsl_ord <- xportr_order(.df = ADSL, metadata = var_spec, domain = "ADSL", verbose = "warn") ``` Readers are encouraged to inspect the dataset and metadata to see the past order and updated order after calling the function. Note the messaging from `xportr_order()`: @@ -402,7 +406,7 @@ Readers are encouraged to inspect the dataset and metadata to see the past order ```{r, echo = TRUE, error = TRUE} -adsl_ord <- xportr_order(.df = adsl, metadata = var_spec, domain = "ADSL", verbose = "stop") +adsl_ord <- xportr_order(.df = ADSL, metadata = var_spec, domain = "ADSL", verbose = "stop") ``` Just like we did for the other functions, setting `verbose = "stop"` immediately stops R from processing the order. If variables or metadata are missing from either, the re-ordering will not process until corrective action is performed. @@ -414,7 +418,7 @@ Formats play an important role in the SAS language and have a column in specific This example is slightly different from previous examples. You will need to use `xportr_type()` to coerce R Date variables and others types to character or numeric. Only then can you use `xportr_format()` to apply the format column to the dataset. ```{r, echo = TRUE} -adsl_fmt <- adsl %>% +adsl_fmt <- ADSL %>% xportr_type(metadata = var_spec, domain = "ADSL", verbose = "warn") %>% xportr_format(metadata = var_spec, domain = "ADSL") ``` @@ -435,7 +439,7 @@ We will make use of `xportr_metadata()` to reduce repetitive metadata and domain It is also note worthy that you can set the dataset label using the `xportr_df_label` and a `dataset_spec` which will be used by the `xportr_write()` ```{r, echo = TRUE, error = TRUE} -adsl %>% +ADSL %>% xportr_metadata(var_spec, "ADSL") %>% xportr_type() %>% xportr_length(length_source = "metadata") %>% @@ -451,7 +455,7 @@ Success! We have applied types, lengths, labels, ordering and formats to our dat The next two examples showcase the `strict_checks = TRUE` option in `xportr_write()` where we will look at formats and labels. ```{r, echo = TRUE, error = TRUE} -adsl %>% +ADSL %>% xportr_write(path = "adsl.xpt", metadata = dataset_spec, domain = "ADSL", strict_checks = TRUE) ``` @@ -466,8 +470,7 @@ var_spec_lbl <- var_spec %>% "Length of variable label must be 40 characters or less", label )) - -adsl %>% +ADSL %>% xportr_metadata(var_spec_lbl, "ADSL") %>% xportr_label() %>% xportr_type() %>% diff --git a/vignettes/xportr.Rmd b/vignettes/xportr.Rmd index 7ea6eaa3..3b4a3c8a 100644 --- a/vignettes/xportr.Rmd +++ b/vignettes/xportr.Rmd @@ -80,15 +80,10 @@ datatable_template <- function(input_data) { # Getting Started with xportr -The demo will make use of a small `ADSL` data set that is apart of the [`{admiral}`](https://pharmaverse.github.io/admiral/index.html) package. -The script that generates this `ADSL` dataset can be created by using this command -`admiral::use_ad_template("adsl")`. For a deeper discussion into `{xportr}` be sure -to check out the [Deep Dive](deepdive.html) User Guide. - -The `ADSL` has the following features: +The demo will make use of a small `ADSL` dataset available with the `xportr` package and has the following features: * 306 observations -* 48 variables +* 51 variables * Data types other than character and numeric * Missing labels on variables * Missing label for data set @@ -110,33 +105,29 @@ To create a fully compliant v5 xpt `ADSL` dataset, that was developed using R, w library(dplyr) library(labelled) library(xportr) -library(admiral) -library(rlang) library(readxl) # Loading in our example data -adsl <- admiral::admiral_adsl +data("adsl_xportr", package = "xportr") ``` ```{r, echo = FALSE} -datatable_template(adsl) +datatable_template(adsl_xportr) ``` -**NOTE:** The `ADSL` dataset can be created by using this command `admiral::use_ad_template("adsl")`. - # Preparing your Specification Files -In order to make use of the functions within `{xportr}` you will need to create an R data frame that contains your specification file. You will most likely need to do some pre-processing of your spec sheets after loading in the spec files for them to work appropriately with the `xportr` functions. Please see our example spec sheets in `system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = "xportr")` to see how `xportr` expects the specification sheets. +In order to make use of the functions within `{xportr}` you will need to create an R data frame that contains your specification file. You will most likely need to do some pre-processing of your spec sheets after loading in the spec files for them to work appropriately with the `xportr` functions. Please see our example spec sheets in `system.file(file.path("specs", "ADaM_spec.xlsx"), package = "xportr")` to see how `xportr` expects the specification sheets. ```{r} var_spec <- read_xlsx( - system.file(paste0("specs/", "ADaM_admiral_spec.xlsx"), package = "xportr"), + system.file(file.path("specs/", "ADaM_spec.xlsx"), package = "xportr"), sheet = "Variables" ) %>% rename(type = "Data Type") %>% - set_names(tolower) + rename_with(tolower) ``` Below is a quick snapshot of the specification file pertaining to the `ADSL` data set, which we will make use of in the 6 `{xportr}` function calls below. Take note of the order, label, type, length and format columns. @@ -157,14 +148,14 @@ of the datasets. We have suppressed these calls for the sake of brevity. In order to be compliant with transport v5 specifications an `xpt` file can only have two data types: character and numeric/dbl. Currently the `ADSL` data set has chr, dbl, time, factor and date. ```{r, max_height = "200px", echo = FALSE} -str(adsl) +str(adsl_xportr) ``` Using `xportr_type()` and the supplied specification file, we can *coerce* the variables in the `ADSL` set to be either numeric or character. ```{r, echo = TRUE} -adsl_type <- xportr_type(adsl, var_spec, domain = "ADSL", verbose = "message") +adsl_type <- xportr_type(adsl_xportr, var_spec, domain = "ADSL", verbose = "message") ``` @@ -180,13 +171,13 @@ str(adsl_type) Next we can apply the lengths from a variable level specification file to the data frame. `xportr_length()` will identify variables that are missing from your specification file. The function will also alert you to how many lengths have been applied successfully. Before we apply the lengths lets verify that no lengths have been applied to the original dataframe. ```{r, max_height = "200px", echo = FALSE} -str(adsl) +str(adsl_xportr) ``` No lengths have been applied to the variables as seen in the printout - the lengths would be in the `attr()` part of each variables. Let's now use `xportr_length()` to apply our lengths from the specification file. ```{r} -adsl_length <- adsl %>% xportr_length(var_spec, domain = "ADSL", verbose = "message") +adsl_length <- adsl_xportr %>% xportr_length(var_spec, domain = "ADSL", verbose = "message") ``` @@ -201,7 +192,7 @@ Note the additional `attr(*, "width")=` after each variable with the width. The Please note that the order of the `ADSL` variables, see above, does not match the specification file `order` column. We can quickly remedy this with a call to `xportr_order()`. Note that the variable `SITEID` has been moved as well as many others to match the specification file order column. Variables not in the spec are moved to the end of the data and a message is written to the console. ```{r, echo = TRUE} -adsl_order <- xportr_order(adsl, var_spec, domain = "ADSL", verbose = "message") +adsl_order <- xportr_order(adsl_xportr, var_spec, domain = "ADSL", verbose = "message") ``` ```{r, echo = FALSE} @@ -213,7 +204,7 @@ datatable_template(adsl_order) Now we apply formats to the dataset. These will typically be `DATE9.`, `DATETIME20` or `TIME5`, but many others can be used. Notice that in the `ADSL` dataset there are 8 Date/Time variables and they are missing formats. Here we just take a peak at a few `TRT` variables, which have a `NULL` format. ```{r, max_height = "200px", echo = FALSE} -adsl_fmt_pre <- adsl %>% +adsl_fmt_pre <- adsl_xportr %>% select(TRTSDT, TRTEDT, TRTSDTM, TRTEDTM) tribble( @@ -228,7 +219,7 @@ tribble( Using our `xportr_format()` we can apply our formats to the dataset. ```{r} -adsl_fmt <- adsl %>% xportr_format(var_spec, domain = "ADSL") +adsl_fmt <- adsl_xportr %>% xportr_format(var_spec, domain = "ADSL") ``` ```{r, max_height = "200px", echo = FALSE} @@ -252,7 +243,7 @@ to a dataframe. The above output has these individual calls bound together for e Please observe that our `ADSL` dataset is missing many variable labels. Sometimes these labels can be lost while using R's function. However, a CDISC compliant data set needs to have each variable with a label. ```{r, max_height = "200px", echo = FALSE} -adsl_no_lbls <- haven::zap_label(adsl) +adsl_no_lbls <- haven::zap_label(adsl_xportr) str(adsl_no_lbls) ``` @@ -260,7 +251,7 @@ str(adsl_no_lbls) Using the `xport_label` function we can take the specifications file and label all the variables available. `xportr_label` will produce a warning message if you the variable in the data set is not in the specification file. ```{r} -adsl_lbl <- adsl %>% xportr_label(var_spec, domain = "ADSL", "message") +adsl_lbl <- adsl_xportr %>% xportr_label(var_spec, domain = "ADSL", "message") ``` ```{r, max_height = "200px"} @@ -272,7 +263,7 @@ str(adsl_lbl) Finally, we arrive at exporting the R data frame object as a `xpt` file with `xportr_write()`. The `xpt` file will be written directly to your current working directory. To make it more interesting, we have put together all six functions with the magrittr pipe, `%>%`. A user can now apply types, length, variable labels, formats, data set label and write out their final xpt file in one pipe! Appropriate warnings and messages will be supplied to a user to the console for any potential issues before sending off to standard clinical data set validator application or data reviewers. ```{r} -adsl %>% +adsl_xportr %>% xportr_type(var_spec, "ADSL", "message") %>% xportr_length(var_spec, "ADSL", verbose = "message") %>% xportr_label(var_spec, "ADSL", "message") %>%