Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

is.valid #4480

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .dev/.Rprofile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Matt's ~/.Rprofile is a link to this file at ~/GitHub/data.table/.dev/.Rprofile

# options(repos = c(CRAN="http://cran.stat.ucla.edu"))
# options(repos = c(CRAN=c("http://cran.stat.ucla.edu", "http://cloud.r-project.org"))) # both needed for revdep checks sometimes
options(repos = c(CRAN="http://cloud.r-project.org"))

options(help_type="html")
options(error=quote(dump.frames()))
options(width=200)
options(digits.secs=3) # for POSIXct to print milliseconds
suppressWarnings(RNGversion("3.5.0")) # so when I create tests in dev there isn't a mismatch when run by cc()

Sys.setenv(PROJ_PATH=path.expand("~/GitHub/data.table"))
source(paste0(Sys.getenv("PROJ_PATH"),"/.dev/cc.R"))

17 changes: 17 additions & 0 deletions .dev/.bash_aliases
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Matt's ~/.bash_aliases is a link to this file ~/GitHub/data.table/.dev/.bash_aliases

# One off configure meld as difftool:
# git config --global diff.tool meld
# git config --global difftool.prompt false
alias gd='git difftool &> /dev/null'
alias gdm='git difftool master &> /dev/null'

alias Rdevel='~/build/R-devel/bin/R --vanilla'
alias Rdevel-strict-gcc='~/build/R-devel-strict-gcc/bin/R --vanilla'
alias Rdevel-strict-clang='~/build/R-devel-strict-clang/bin/R --vanilla'
alias Rdevel32='~/build/32bit/R-devel/bin/R --vanilla'
alias R310='~/build/R-3.1.0/bin/R --vanilla'
alias revdepsh='cd ~/build/revdeplib/ && export TZ=UTC && export R_LIBS_SITE=none && export R_LIBS=~/build/revdeplib/ && export _R_CHECK_FORCE_SUGGESTS_=false'
alias revdepr='revdepsh; R_PROFILE_USER=~/GitHub/data.table/.dev/revdep.R ~/build/R-devel/bin/R'

export R_PROFILE_USER='~/.Rprofile' # ignore the .Rprofile now in ~/GitHub/data.table/
11 changes: 6 additions & 5 deletions .dev/CRAN_Release.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ cd R-devel # used for revdep testing: .dev/revdep.R.
./configure CFLAGS="-O2 -Wall -pedantic"
make

# use latest available below `apt cache search gcc-` or `clang-`
# use latest available below `apt-cache search gcc-` or `clang-`
cd ../R-devel-strict-clang
./configure --without-recommended-packages --disable-byte-compiled-packages --disable-openmp --enable-strict-barrier --disable-long-double CC="clang-8 -fsanitize=undefined,address -fno-sanitize=float-divide-by-zero -fno-omit-frame-pointer"
make
Expand Down Expand Up @@ -479,7 +479,7 @@ sudo apt-get -y install r-base r-base-dev
sudo apt-get -y build-dep r-base-dev
sudo apt-get -y build-dep qpdf
sudo apt-get -y install aptitude
sudo aptitude build-dep r-cran-rgl # leads to libglu1-mesa-dev
sudo aptitude -y build-dep r-cran-rgl # leads to libglu1-mesa-dev
sudo apt-get -y build-dep r-cran-rmpi
sudo apt-get -y build-dep r-cran-cairodevice
sudo apt-get -y build-dep r-cran-tkrplot
Expand All @@ -490,8 +490,7 @@ sudo apt-get -y install libv8-dev
sudo apt-get -y install gsl-bin libgsl0-dev
sudo apt-get -y install libgtk2.0-dev netcdf-bin
sudo apt-get -y install libcanberra-gtk-module
sudo apt-get -y install git
sudo apt-get -y install openjdk-8-jdk
sudo apt-get -y install openjdk-11-jdk # solves "fatal error: jni.h: No such file or directory"; change 11 to match "java --version"
sudo apt-get -y install libnetcdf-dev udunits-bin libudunits2-dev
sudo apt-get -y install tk8.6-dev
sudo apt-get -y install clustalo # for package LowMACA
Expand All @@ -512,14 +511,16 @@ sudo apt-get -y install libmagick++-dev # for magick
sudo apt-get -y install libjq-dev libprotoc-dev libprotobuf-dev and protobuf-compiler # for protolite
sudo apt-get -y install python-dev # for PythonInR
sudo apt-get -y install gdal-bin libgeos-dev # for rgdal/raster tested via lidR
sudo apt-get build-dep r-cran-rsymphony # for Rsymphony: coinor-libcgl-dev coinor-libclp-dev coinor-libcoinutils-dev coinor-libosi-dev coinor-libsymphony-dev
sudo apt-get -y build-dep r-cran-rsymphony # for Rsymphony: coinor-libcgl-dev coinor-libclp-dev coinor-libcoinutils-dev coinor-libosi-dev coinor-libsymphony-dev
sudo apt-get -y install libtesseract-dev libleptonica-dev tesseract-ocr-eng # for tesseract
sudo apt-get -y install libssl-dev libsasl2-dev
sudo apt-get -y install biber # for ctsem
sudo apt-get -y install libopenblas-dev # for ivmte (+ local R build with default ./configure to pick up shared openblas)
sudo apt-get -y install libhiredis-dev # for redux used by nodbi
sudo apt-get -y install libzmq3-dev # for rzmq
sudo apt-get -y install libimage-exiftool-perl # for camtrapR
sudo apt-get -y install parallel # for revdepr.R
sudo apt-get -y install pandoc-citeproc # for basecallQC
sudo R CMD javareconf
# ENDIF

Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ export(nafill)
export(setnafill)
export(.Last.updated)
export(fcoalesce)
export(is.valid)

S3method("[", data.table)
S3method("[<-", data.table)
Expand Down
65 changes: 65 additions & 0 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -3109,3 +3109,68 @@ isReallyReal = function(x) {
names(on) = xCols
return(list(on = on, ops = idx_op))
}

is.valid = function(x, deep=TRUE, err=TRUE) {
fail = function(msg) {
if (err) stop(call.=FALSE, msg)
ans = setattr(copy(FALSE), "failed", msg)
return(ans)
}
if (!is.data.table(x)) return(fail("is not a data.table"))
nc = ncol(x)
nr = vapply(x, length, 0L)
nr = unique(nr) ## integer() or scalar are valid at that point
if (nc && length(nr)!=1L) return(fail("columns have different length"))
if (!nc && length(nr)) return(fail("has no columns but has rows")) # nocov ## because nrow uses our C dim that checks ncol, unlike base R where attributes are used
if (!selfrefok(x)) return(fail("selfref is not ok"))
cols = names(x)
key = key(x)
indices = indices(x, TRUE)
if (!nc && length(cols)) return(fail("has no columns but has names")) # nocov
if (!nc && !is.null(key)) return(fail("has no columns but has key"))
if (!nc && !is.null(indices)) return(fail("has no columns but has indices"))
if (!nc) return(TRUE)
if (!length(cols)) return(fail("has columns but no names"))
if (nc != length(cols)) return(fail("number of columns is different than number of column names"))
if (anyDuplicated(cols)) return(fail("has duplicated column names"))
if (anyNA(cols)) return(fail("has NA names"))
if (anyNA(key)) return(fail("has NA key"))
if (anyNA(indices)) return(fail("has NA indices"))
is.ascii = function(x) {
nna = !is.na(x)
ans = rep(TRUE, length(x))
ans[nna] = iconv(x[nna], "latin1", "ASCII") == x[nna]
ans
}
if (any(!is.ascii(cols))) return(fail("column names are not ASCII"))
if (length(key) && any(!key %chin% cols)) return(fail("key columns not exist"))
if (length(indices) && any(!unique(unlist(indices(d, TRUE))) %chin% cols)) return(fail("indices columns not exist"))
if (any(substr(cols, 1L, 2L)=="x.")) return(fail("some column has 'x.' in their names"))
if (any(substr(cols, 1L, 2L)=="i.")) return(fail("some column has 'i.' in their names"))
special = c(".GRP",".BY",".I",".NGRP",".N",".EACHI",".SD")
if (any(special %chin% cols)) return(fail("some column names are overlapping to our special symbols, see ?.N"))
if (any(vapply(x, isALTREP, NA))) return(fail("some columns are ALTREP"))
colSupported = function(x) {
dm = dim(x)
if (is.null(x)) return(FALSE) # nocov
if (length(dm) && length(dm)>1L) return(FALSE)
if (inherits(x, "data.frame")) return(FALSE) # nocov
if (inherits(x, "data.table")) return(FALSE) # nocov
if (is.language(x)) return(FALSE)
## TODO add more for POSIXlt, etc.
TRUE
}
if (nc && any(!vapply(x, colSupported, NA))) return(fail("some columns are of unsupported type and should be wrapped into list"))
if (deep) {
if (length(key)) {
o = forderv(x, key) ## lazy=FALSE after lazy-forder merged, otherwise existing key will be re-used and always true!
if (length(o)) return(fail("key is invalid"))
}
if (length(indices)) {
for (idx_cols in indices) {
o = forderv(x, idx_cols) ## lazy=FALSE after lazy-forder merged, otherwise existing key will be re-used and always true!
if (!identical(o, getindex(x, idx_cols))) return(fail("some indices are invalid"))
}
}
}
}
2 changes: 2 additions & 0 deletions R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, c
coerceFill = function(x) .Call(CcoerceFillR, x)

testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L])

isALTREP = function(x) .Call(CisALTREP, x)
58 changes: 58 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -16853,3 +16853,61 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN))
test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))
A = data.table(A=as.complex(rep(NA, 5)))
test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))

# Add is.valid(DT) function #2334
d = data.frame(x=1L)
test(2139.01, is.valid(d), error="is not a data.table")
d = structure(list(x=1:2, y=1L), class=c("data.table","data.frame"))
test(2139.02, is.valid(d), error="columns have different length")
d = structure(list(1L), .Names="x", row.names=c(NA, -1L), class=c("data.table","data.frame"))
test(2139.03, is.valid(d), error="selfref is not ok")
#test(2139.04, is.valid(d), error="has no columns but has names")
d = data.table()
setattr(d, "sorted", "x")
test(2139.05, is.valid(d), error="has no columns but has key")
d = data.table()
setattr(d, "index", setattr(integer(), "__x", integer()))
test(2139.06, is.valid(d), error="has no columns but has indices")
d = data.table(1L)
setattr(d, "names", NULL)
test(2139.07, is.valid(d), error="has columns but no names")
#test(2139.08, is.valid(d), error="number of columns is different than number of column names")
d = data.table(x=1L, x=2L)
test(2139.09, is.valid(d), error="has duplicated column names")
d = data.table(1L, 2L)
setDT(list(dd=names(d)))[2L, "dd" := NA_character_]
test(2139.10, is.valid(d), error="has NA names")
d = data.table(1L, 2L)
setattr(d, "sorted", c("V1",NA))
test(2139.11, is.valid(d), error="has NA key")
#test(2139.12, is.valid(d), error="has NA indices")
d = data.table("\u3b1"=1L)
#test(2139.13, is.valid(d), error="column names are not ASCII")
d = data.table(a=1L)
setattr(d, "sorted", "b")
test(2139.14, is.valid(d), error="key columns not exist")
d = data.table(a=1L)
setattr(d, "index", setattr(integer(), "__b", integer()))
test(2139.15, is.valid(d), error="indices columns not exist")
d = data.table(x.a=1L)
test(2139.16, is.valid(d), error="some column has 'x.' in their names")
d = data.table(i.a=1L)
test(2139.17, is.valid(d), error="some column has 'i.' in their names")
d = data.table(.I=1L)
test(2139.18, is.valid(d), error="some column names are overlapping to our special symbols, see ?.N")
d = list(x=1:2)
setattr(d, "class", c("data.table","data.frame"))
invisible(setalloccol(d))
test(2139.19, is.valid(d), error="some columns are ALTREP")
d = list(x=1L, m=matrix(1L))
suppressWarnings(setDT(d))
test(2139.201, is.valid(d), error="some columns are of unsupported type and should be wrapped into list")
d = list(x=1L, m=expression(1L))
setDT(d)
test(2139.202, is.valid(d), error="some columns are of unsupported type and should be wrapped into list")
d = data.table(x=2:1)
setattr(d, "sorted", "x")
test(2139.21, is.valid(d), error="key is invalid")
d = data.table(x=2:1)
setattr(d, "index", setattr(integer(), "__x", integer()))
test(2139.22, is.valid(d), error="some indices are invalid")
1 change: 1 addition & 0 deletions src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ bool islocked(SEXP x);
SEXP islockedR(SEXP x);
bool need2utf8(SEXP x);
SEXP coerceUtf8IfNeeded(SEXP x);
SEXP isALTREP(SEXP x);

// types.c
char *end(char *start);
Expand Down
1 change: 1 addition & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ R_CallMethodDef callMethods[] = {
{"CfrollapplyR", (DL_FUNC) &frollapplyR, -1},
{"CtestMsgR", (DL_FUNC) &testMsgR, -1},
{"C_allNAR", (DL_FUNC) &allNAR, -1},
{"CisALTREP", (DL_FUNC) &isALTREP, -1},
{NULL, NULL, 0}
};

Expand Down
3 changes: 3 additions & 0 deletions src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,6 @@ SEXP coerceUtf8IfNeeded(SEXP x) {
return(ans);
}

SEXP isALTREP(SEXP x) {
return ScalarLogical(ALTREP(x));
}