Skip to content

Commit

Permalink
state filtering fixed, closes issue #16
Browse files Browse the repository at this point in the history
  • Loading branch information
mstrimas committed Mar 28, 2018
1 parent 49d32f2 commit c046aa6
Show file tree
Hide file tree
Showing 56 changed files with 592 additions and 320 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: auk
Title: eBird Data Extraction and Processing with AWK
Version: 0.2.0.9000
Version: 0.2.1
Authors@R: c(
person("Matthew", "Strimas-Mackey", email = "mes335@cornell.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-8929-7776")),
person("Eliot", "Miller", role = "aut"),
Expand Down
6 changes: 4 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# auk 0.2.0.9000
# auk 0.2.1

- Removed all non-ASCII characters from example files, fixing [issue #14](https://github.com/CornellLabofOrnithology/auk/issues/14)
- Patch release fixing a couple bugs
- Removed all non-ASCII characters from example files, closes [issue #14](https://github.com/CornellLabofOrnithology/auk/issues/14)
- Fixed issue with state filtering not working, closes [issue $16](https://github.com/CornellLabofOrnithology/auk/issues/16)

# auk 0.2.0

Expand Down
26 changes: 22 additions & 4 deletions R/auk-filter.r
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ auk_filter.auk_ebd <- function(x, file, file_sampling, keep, drop, awk_file,
if (!dir.exists(dirname(file))) {
stop("Output directory doesn't exist.")
}
if (!overwrite && file.exists(file)) {
if (!overwrite && file.exists(file) && execute) {
stop("Output file already exists, use overwrite = TRUE.")
}
file <- path.expand(file)
Expand All @@ -137,7 +137,7 @@ auk_filter.auk_ebd <- function(x, file, file_sampling, keep, drop, awk_file,
if (!dir.exists(dirname(file_sampling))) {
stop("Output directory for sampling file doesn't exist.")
}
if (!overwrite && file.exists(file_sampling)) {
if (!overwrite && file.exists(file_sampling) && execute) {
stop("Output sampling file already exists, use overwrite = TRUE.")
}
file_sampling <- path.expand(file_sampling)
Expand Down Expand Up @@ -382,6 +382,23 @@ awk_translate <- function(filters, col_idx, sep, select) {
condition <- paste0("$", idx, " in countries")
filter_strings$country <- str_interp(awk_if, list(condition = condition))
}
# state filter
if (length(filters$state) == 0) {
filter_strings$state_array <- ""
filter_strings$state <- ""
} else {
# generate list
state_list <- paste(filters$state, collapse = "\t")
state_array <- "
split(\"%s\", stateValues, \"\t\")
for (i in stateValues) states[stateValues[i]] = 1"
filter_strings$state_array <- sprintf(state_array, state_list)

# check in list
idx <- col_idx$index[col_idx$id == "state"]
condition <- paste0("$", idx, " in states")
filter_strings$state <- str_interp(awk_if, list(condition = condition))
}
# extent filter
if (length(filters$extent) == 0) {
filter_strings$extent <- ""
Expand Down Expand Up @@ -518,18 +535,19 @@ awk_translate <- function(filters, col_idx, sep, select) {
# awk script template
awk_filter <- "
BEGIN {
FS = \"${sep}\"
OFS = \"${sep}\"
FS = OFS = \"${sep}\"
${species_array}
${country_array}
${state_array}
}
{
keep = 1
# filters
${species}
${country}
${state}
${extent}
${date_substr}
${date}
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ drops these erroneous records and removes the blank column.
f_out <- tempfile()
# remove problem records
auk_clean(f, f_out)
#> [1] "/var/folders/mg/qh40qmqd7376xn8qxd6hm5lwjyy0h2/T//RtmpsecHbt/file7fcc25c943e1"
#> [1] "/var/folders/mg/qh40qmqd7376xn8qxd6hm5lwjyy0h2/T//RtmpW4B7dt/file956167ef8afd"
# number of lines in input
length(readLines(f))
#> [1] 51
Expand Down
4 changes: 4 additions & 0 deletions cran-comments.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Patch release

Fixes bug preventing state filtering from working and removes all non-ASCII characters from data files.

# Test environments

- local OS X install, R 3.4.4
Expand Down
10 changes: 5 additions & 5 deletions data-raw/ebd-samples.r
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ write_tsv(y, f, na = "")
readLines(f) %>%
stri_trans_general("latin-ascii") %>%
iconv("latin1", "ASCII", sub="") %>%
str_replace_all("'|\"", "") %>%
str_replace_all("\"", "") %>%
writeLines(f)
stopifnot(length(tools::showNonASCII(readLines(f))) == 0)
stopifnot(all(read_ebd(f)$scientific_name %in% ebird_taxonomy$scientific_name))
Expand All @@ -61,7 +61,7 @@ y <- sample_n(y, 50)
f <- "inst/extdata/ebd-sample_messy.txt"
write_tsv(y, f, na = "")
readLines(f) %>%
str_replace_all("'|\"", "") %>%
str_replace_all("\"", "") %>%
writeLines(f)
stopifnot(length(tools::showNonASCII(readLines(f))) == 0)
stopifnot(all(read_ebd(f)$scientific_name %in% ebird_taxonomy$scientific_name))
Expand All @@ -88,7 +88,7 @@ write_tsv(x_ebd, f, na = "")
readLines(f) %>%
stri_trans_general("latin-ascii") %>%
iconv("latin1", "ASCII", sub="") %>%
str_replace_all("'|\"", "") %>%
str_replace_all("\"", "") %>%
writeLines(f)
stopifnot(length(tools::showNonASCII(readLines(f))) == 0)
stopifnot(all(read_ebd(f)$scientific_name %in% ebird_taxonomy$scientific_name))
Expand All @@ -103,7 +103,7 @@ write_tsv(x_samp, f, na = "")
readLines(f) %>%
stri_trans_general("latin-ascii") %>%
iconv("latin1", "ASCII", sub="") %>%
str_replace_all("'|\"", "") %>%
str_replace_all("\"", "") %>%
writeLines(f)
stopifnot(length(tools::showNonASCII(readLines(f))) == 0)

Expand Down Expand Up @@ -134,7 +134,7 @@ write_tsv(ru_ex, f, na = "")
readLines(f) %>%
stri_trans_general("latin-ascii") %>%
iconv("latin1", "ASCII", sub="") %>%
str_replace_all("'|\"", "") %>%
str_replace_all("\"", "") %>%
writeLines(f)
stopifnot(length(tools::showNonASCII(readLines(f))) == 0)
stopifnot(all(read_ebd(f)$scientific_name %in% ebird_taxonomy$scientific_name))
Expand Down
8 changes: 7 additions & 1 deletion docs/CONDUCT.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion docs/CONTRIBUTING.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion docs/LICENSE.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 7 additions & 4 deletions docs/articles/auk.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions docs/articles/development.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions docs/articles/index.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 8 additions & 2 deletions docs/authors.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit c046aa6

Please sign in to comment.