Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nafill character, logical, etc. #4980

Draft
wants to merge 19 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions inst/tests/nafill.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ x = 1:10
test(3.01, nafill(x, "locf", fill=0L), x)
test(3.02, setnafill(list(copy(x)), "locf", fill=0L), list(x))
test(3.03, setnafill(x, "locf"), error="in-place update is supported only for list")
test(3.04, nafill(letters[1:5], fill=0), error="must be numeric type, or list/data.table")
test(3.05, setnafill(list(letters[1:5]), fill=0), error="must be numeric type, or list/data.table")
test(3.04, nafill(letters[1:5], fill=0), error="must be*type, or list/data.table")
test(3.05, setnafill(list(letters[1:5]), fill=0), error="must be*type, or list/data.table")
test(3.06, nafill(x, fill=1:2), error="fill must be a vector of length 1")
test(3.07, nafill(x, fill="asd"), x, warning=c("Coercing.*character.*integer","NAs introduced by coercion"))

Expand Down
67 changes: 56 additions & 11 deletions src/nafill.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,28 @@ void nafillInteger64(int64_t *x, uint_fast64_t nx, unsigned int type, int64_t fi
if (verbose)
snprintf(ans->message[0], 500, "%s: took %.3fs\n", __func__, omp_get_wtime()-tic);
}
void nafillString(const SEXP *x, uint_fast64_t nx, unsigned int type, SEXP fill, ans_t *ans, bool verbose) {
double tic=0.0;
if (verbose)
tic = omp_get_wtime();
if (type==0) { // const
for (uint_fast64_t i=0; i<nx; i++) {
((SEXP*)ans->char_v)[i] = x[i]==NA_STRING ? fill : x[i];
}
} else if (type==1) { // locf
((SEXP*)ans->char_v)[0] = x[0]==NA_STRING ? fill : x[0];
for (uint_fast64_t i=1; i<nx; i++) {
((SEXP*)ans->char_v)[i] = x[i]==NA_STRING ? ((SEXP*)ans->char_v)[i-1] : x[i];
}
} else if (type==2) { // nocb
((SEXP*)ans->char_v)[nx-1] = x[nx-1]==NA_STRING ? fill : x[nx-1];
for (int_fast64_t i=nx-2; i>=0; i--) {
((SEXP*)ans->char_v)[i] = x[i]==NA_STRING ? ((SEXP*)ans->char_v)[i+1] : x[i];
}
}
if (verbose)
snprintf(ans->message[0], 500, "%s: took %.3fs\n", __func__, omp_get_wtime()-tic);
}

SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, SEXP cols) {
int protecti=0;
Expand All @@ -108,26 +130,37 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
if (obj_scalar) {
if (binplace)
error(_("'x' argument is atomic vector, in-place update is supported only for list/data.table"));
else if (!isReal(obj) && !isInteger(obj))
error(_("'x' argument must be numeric type, or list/data.table of numeric types"));
else if (!isReal(obj) && !isInteger(obj) && !isLogical(obj) && !isFactor(obj) && !isString(obj))
error(_("'x' argument must be numeric/integer/logical/factor/character/integer64, or list/data.table of such types"));
SEXP obj1 = obj;
obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list
SET_VECTOR_ELT(obj, 0, obj1);
}
SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE))); protecti++; // nafill cols=NULL which turns into seq_along(obj)
x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++;
int *icols = INTEGER(ricols);
bool hadChar = false;
bool* wasChar = (bool*)R_alloc(length(ricols), sizeof(bool)); // this is not yet used but can be used to run alll non-char columns in parallel region and char in single threaded
for (int i=0; i<length(ricols); i++) {
SEXP this_col = VECTOR_ELT(obj, icols[i]-1);
if (!isReal(this_col) && !isInteger(this_col))
error(_("'x' argument must be numeric type, or list/data.table of numeric types"));
if (isString(this_col)) {
hadChar = true;
wasChar[i] = true;
} else {
wasChar[i] = false;
if (!isReal(this_col) && !isInteger(this_col) && !isLogical(this_col) && !isFactor(this_col))
error(_("'x' argument must be numeric/integer/logical/factor/character/integer64, or list/data.table of such types"));
}
SET_VECTOR_ELT(x, i, this_col);
}
R_len_t nx = length(x);

// data pointers
double **dx = (double**)R_alloc(nx, sizeof(double*));
int32_t **ix = (int32_t**)R_alloc(nx, sizeof(int32_t*));
int32_t **ix = (int32_t**)R_alloc(nx, sizeof(int32_t*)); // also logical and factor
const SEXP **sx = (const SEXP**)R_alloc(nx, sizeof(SEXP*)); // character
int64_t **i64x = (int64_t**)R_alloc(nx, sizeof(int64_t*));
// nrows of columns
uint_fast64_t *inx = (uint_fast64_t*)R_alloc(nx, sizeof(uint_fast64_t));
SEXP ans = R_NilValue;
ans_t *vans = (ans_t *)R_alloc(nx, sizeof(ans_t));
Expand All @@ -139,23 +172,32 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
dx[i] = REAL(xi);
i64x[i] = (int64_t *)REAL(xi);
ix[i] = NULL;
} else {
sx[i] = NULL;
} else if (isInteger(xi) || isLogical(xi) || isFactor(xi)) {
ix[i] = INTEGER(xi);
dx[i] = NULL;
i64x[i] = NULL;
sx[i] = NULL;
} else if (isString(xi)) {
ix[i] = NULL;
dx[i] = NULL;
i64x[i] = NULL;
sx[i] = SEXPPTR_RO(xi);
} else {
error(_("internal error: unknown column type, should have been caught by now, please report")); // # nocov
}
}
if (!binplace) {
ans = PROTECT(allocVector(VECSXP, nx)); protecti++;
for (R_len_t i=0; i<nx; i++) {
SET_VECTOR_ELT(ans, i, allocVector(TYPEOF(VECTOR_ELT(x, i)), inx[i]));
const SEXP ansi = VECTOR_ELT(ans, i);
const void *p = isReal(ansi) ? (void *)REAL(ansi) : (void *)INTEGER(ansi);
vans[i] = ((ans_t) { .dbl_v=(double *)p, .int_v=(int *)p, .int64_v=(int64_t *)p, .status=0, .message={"\0","\0","\0","\0"} });
const void *p = isReal(ansi) ? (void *)REAL(ansi) : (isInteger(ansi) ? (void *)INTEGER(ansi) : (void*) SEXPPTR_RO(ansi));
vans[i] = ((ans_t) { .dbl_v=(double *)p, .int_v=(int *)p, .int64_v=(int64_t *)p, .char_v=(void*)p, .status=0, .message={"\0","\0","\0","\0"} });
}
} else {
for (R_len_t i=0; i<nx; i++) {
vans[i] = ((ans_t) { .dbl_v=dx[i], .int_v=ix[i], .int64_v=i64x[i], .status=0, .message={"\0","\0","\0","\0"} });
vans[i] = ((ans_t) { .dbl_v=dx[i], .int_v=ix[i], .int64_v=i64x[i], .char_v=(void*)sx[i], .status=0, .message={"\0","\0","\0","\0"} });
}
}

Expand Down Expand Up @@ -190,7 +232,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
fillp[i] = SEXPPTR_RO(VECTOR_ELT(fill, i)); // do like this so we can use in parallel region
}
}
#pragma omp parallel for if (nx>1) num_threads(getDTthreads(nx, true))
#pragma omp parallel for if (nx>1 && !hadChar) num_threads(getDTthreads(nx, true))
for (R_len_t i=0; i<nx; i++) {
switch (TYPEOF(VECTOR_ELT(x, i))) {
case REALSXP : {
Expand All @@ -200,9 +242,12 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
nafillDouble(dx[i], inx[i], itype, hasFill ? ((double *)fillp[i])[0] : NA_REAL, nan_is_na, &vans[i], verbose);
}
} break;
case INTSXP : {
case LGLSXP: case INTSXP : {
nafillInteger(ix[i], inx[i], itype, hasFill ? ((int32_t *)fillp[i])[0] : NA_INTEGER, &vans[i], verbose);
} break;
case STRSXP : {
nafillString(sx[i], inx[i], itype, hasFill ? ((SEXP *)fillp[i])[0] : NA_STRING, &vans[i], verbose);
} break;
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ typedef struct ans_t {
int32_t *int_v; // used in nafill
double *dbl_v; // used in froll, nafill
int64_t *int64_v; // used in nafill
//void *char_v; // to be used in nafill but then must escape parallelism
void *char_v; // used in nafill
uint8_t status; // 0:ok, 1:message, 2:warning, 3:error; unix return signal: {0,1,2}=0, {3}=1
char message[4][ANS_MSG_SIZE]; // STDOUT: output, STDERR: message, warning, error
// implicit n_message limit discussed here: https://github.com/Rdatatable/data.table/issues/3423#issuecomment-487722586
Expand Down