Skip to content

Commit

Permalink
replace and replace_all
Browse files Browse the repository at this point in the history
  • Loading branch information
sc1f committed Oct 28, 2021
1 parent 8100fec commit 7ed7ffd
Show file tree
Hide file tree
Showing 12 changed files with 449 additions and 73 deletions.
12 changes: 9 additions & 3 deletions cpp/perspective/src/cpp/computed_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,11 +436,15 @@ t_computed_function_store::t_computed_function_store(t_expression_vocab& vocab,
, m_lower_fn(computed_function::lower(vocab, is_type_validator))
, m_to_string_fn(computed_function::to_string(vocab, is_type_validator))
, m_match_fn(computed_function::match(regex_mapping))
, m_fullmatch_fn(computed_function::fullmatch(regex_mapping))
, m_match_all_fn(computed_function::match_all(regex_mapping))
, m_search_fn(
computed_function::search(vocab, regex_mapping, is_type_validator))
, m_indexof_fn(computed_function::indexof(regex_mapping))
, m_substring_fn(computed_function::substring(vocab, is_type_validator)) {}
, m_substring_fn(computed_function::substring(vocab, is_type_validator))
, m_replace_fn(
computed_function::replace(vocab, regex_mapping, is_type_validator))
, m_replace_all_fn(
computed_function::replace_all(vocab, regex_mapping, is_type_validator)) {}

void
t_computed_function_store::register_computed_functions(
Expand Down Expand Up @@ -490,10 +494,12 @@ t_computed_function_store::register_computed_functions(

// Regex functions
sym_table.add_function("match", m_match_fn);
sym_table.add_function("fullmatch", m_fullmatch_fn);
sym_table.add_function("match_all", m_match_all_fn);
sym_table.add_function("search", m_search_fn);
sym_table.add_function("indexof", m_indexof_fn);
sym_table.add_function("substring", m_substring_fn);
sym_table.add_function("replace", m_replace_fn);
sym_table.add_function("replace_all", m_replace_all_fn);

// And scalar constants
sym_table.add_constant("True", t_computed_expression_parser::TRUE_SCALAR);
Expand Down
205 changes: 197 additions & 8 deletions cpp/perspective/src/cpp/computed_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ namespace computed_function {
std::string temp_str
= std::string(temp_string.begin(), temp_string.end());

// Don't allow empty strings from the user
if (temp_str == "")
return rval;

if (m_is_type_validator) {
// Return the sentinel value which indicates a valid output from
// type checking, as the output value is not STATUS_CLEAR
Expand Down Expand Up @@ -443,14 +439,14 @@ namespace computed_function {
return rval;
}

fullmatch::fullmatch(t_regex_mapping& regex_mapping)
match_all::match_all(t_regex_mapping& regex_mapping)
: exprtk::igeneric_function<t_tscalar>("TS")
, m_regex_mapping(regex_mapping) {}

fullmatch::~fullmatch() {}
match_all::~match_all() {}

t_tscalar
fullmatch::operator()(t_parameter_list parameters) {
match_all::operator()(t_parameter_list parameters) {
t_tscalar rval;
rval.clear();
rval.m_type = DTYPE_BOOL;
Expand Down Expand Up @@ -673,7 +669,7 @@ namespace computed_function {
}

// Passed type checking, assign values
if (i == 0 && !m_is_type_validator) {
if (i == 0) {
search_string = temp_scalar.to_string();
} else if (i == 1) {
start_idx = temp_scalar.to_double();
Expand All @@ -686,6 +682,11 @@ namespace computed_function {
return rval;
}
}

// done type checking
if (m_is_type_validator) {
return rval;
}

std::size_t length = search_string.length();

Expand Down Expand Up @@ -718,6 +719,194 @@ namespace computed_function {
return rval;
}

replace::replace(t_expression_vocab& expression_vocab,
t_regex_mapping& regex_mapping, bool is_type_validator)
: exprtk::igeneric_function<t_tscalar>("TS?")
, m_expression_vocab(expression_vocab)
, m_regex_mapping(regex_mapping)
, m_is_type_validator(is_type_validator) {}

replace::~replace() {}

t_tscalar
replace::operator()(t_parameter_list parameters) {
t_tscalar rval;
rval.clear();
rval.m_type = DTYPE_STR;

// the string to be replaced
t_scalar_view string_scalar_view(parameters[0]);
t_tscalar string_scalar = string_scalar_view();

// the replace pattern
t_string_view pattern_view(parameters[1]);
std::string match_pattern =
std::string(pattern_view.begin(), pattern_view.end());

// replacer can be a string literal, for the string '' as intern does
// not pick up on empty strings but we need to be able to replace
// with empty string. Thus, type-check replacer before continuing.
const t_generic_type& gt(parameters[2]);
t_tscalar replacer_scalar;

if (gt.type == t_generic_type::e_scalar) {
t_scalar_view replacer_view(gt);
replacer_scalar = replacer_view();
} else if (gt.type == t_generic_type::e_string) {
t_string_view replacer_view(gt);
std::string replacer_str =
std::string(replacer_view.begin(), replacer_view.end());

// only the empty string should be passed in as a string literal,
// all other strings must be interned first.
if (replacer_str.size() != 0) {
rval.m_status = STATUS_CLEAR;
return rval;
}

// use the empty string from vocab
replacer_scalar.set(m_expression_vocab.get_empty_string());
} else {
rval.m_status = STATUS_CLEAR;
return rval;
}

if (string_scalar.m_type != DTYPE_STR
|| replacer_scalar.m_type != DTYPE_STR
|| match_pattern.size() == 0) {
rval.m_status = STATUS_CLEAR;
return rval;
}

// typecheck the regex
RE2* compiled_pattern = m_regex_mapping.intern(match_pattern);

if (compiled_pattern == nullptr) {
rval.m_status = STATUS_CLEAR;
return rval;
}

// done with type_checking
if (m_is_type_validator) return rval;

// make a copy of search_str, as replace() will mutate it and we
// don't want to mutate the string in the vocab
std::string search_string = string_scalar.to_string();

if (search_string.size() == 0) return rval;

// but we can take a reference to the replacer
const std::string& replacer_string = replacer_scalar.to_string();
re2::StringPiece replacer(replacer_string);

bool replaced = RE2::Replace(
&(search_string), *(compiled_pattern), replacer);

if (!replaced) {
// Return the original result if the replacement didn't happen
return string_scalar;
}

// Or the string with the replacement set
rval.set(m_expression_vocab.intern(search_string));

return rval;
}

replace_all::replace_all(t_expression_vocab& expression_vocab,
t_regex_mapping& regex_mapping, bool is_type_validator)
: exprtk::igeneric_function<t_tscalar>("TS?")
, m_expression_vocab(expression_vocab)
, m_regex_mapping(regex_mapping)
, m_is_type_validator(is_type_validator) {}

replace_all::~replace_all() {}

t_tscalar
replace_all::operator()(t_parameter_list parameters) {
t_tscalar rval;
rval.clear();
rval.m_type = DTYPE_STR;

// the string to be replaced
t_scalar_view string_scalar_view(parameters[0]);
t_tscalar string_scalar = string_scalar_view();

// the replace pattern
t_string_view pattern_view(parameters[1]);
std::string match_pattern =
std::string(pattern_view.begin(), pattern_view.end());

// replacer can be a string literal, for the string '' as intern does
// not pick up on empty strings but we need to be able to replace
// with empty string. Thus, type-check replacer before continuing.
const t_generic_type& gt(parameters[2]);
t_tscalar replacer_scalar;

if (gt.type == t_generic_type::e_scalar) {
t_scalar_view replacer_view(gt);
replacer_scalar = replacer_view();
} else if (gt.type == t_generic_type::e_string) {
t_string_view replacer_view(gt);
std::string replacer_str =
std::string(replacer_view.begin(), replacer_view.end());

// only the empty string should be passed in as a string literal,
// all other strings must be interned first.
if (replacer_str.size() != 0) {
rval.m_status = STATUS_CLEAR;
return rval;
}

// use the empty string from vocab
replacer_scalar.set(m_expression_vocab.get_empty_string());
} else {
rval.m_status = STATUS_CLEAR;
return rval;
}

if (string_scalar.m_type != DTYPE_STR
|| replacer_scalar.m_type != DTYPE_STR
|| match_pattern.size() == 0) {
rval.m_status = STATUS_CLEAR;
return rval;
}

// typecheck the regex
RE2* compiled_pattern = m_regex_mapping.intern(match_pattern);

if (compiled_pattern == nullptr) {
rval.m_status = STATUS_CLEAR;
return rval;
}

// done with type_checking
if (m_is_type_validator) return rval;

// make a copy of search_str, as replace() will mutate it and we
// don't want to mutate the string in the vocab
std::string search_string = string_scalar.to_string();

if (search_string.size() == 0) return rval;

// but we can take a reference to the replacer
const std::string& replacer_string = replacer_scalar.to_string();
re2::StringPiece replacer(replacer_string);

std::size_t replaced = RE2::GlobalReplace(
&(search_string), *(compiled_pattern), replacer);

if (replaced == 0) {
// Return the original result if the replacement didn't happen
return string_scalar;
}

// Or the string with the replacement set
rval.set(m_expression_vocab.intern(search_string));

return rval;
}

hour_of_day::hour_of_day()
: exprtk::igeneric_function<t_tscalar>("T") {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,12 @@ struct PERSPECTIVE_EXPORT t_computed_function_store {
computed_function::lower m_lower_fn;
computed_function::to_string m_to_string_fn;
computed_function::match m_match_fn;
computed_function::fullmatch m_fullmatch_fn;
computed_function::match_all m_match_all_fn;
computed_function::search m_search_fn;
computed_function::indexof m_indexof_fn;
computed_function::substring m_substring_fn;
computed_function::replace m_replace_fn;
computed_function::replace_all m_replace_all_fn;
};

} // end namespace perspective
17 changes: 12 additions & 5 deletions cpp/perspective/src/include/perspective/computed_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,10 @@ namespace computed_function {
REGEX_FUNCTION_HEADER(match)

/**
* @brief fullmatch(string, pattern) => True if the string fully matches
* @brief match_all(string, pattern) => True if the string fully matches
* pattern, and False otherwise.
*/
REGEX_FUNCTION_HEADER(fullmatch)
REGEX_FUNCTION_HEADER(match_all)

/**
* @brief search(string, pattern) => Returns the substring in the first
Expand All @@ -159,12 +159,19 @@ namespace computed_function {
STRING_FUNCTION_HEADER(substring)

/**
* @brief replace(string, pattern, replace_str) => string with all matches
* of pattern replaced with replace_str, or the original string without
* any replacements if the string does not match pattern.
* @brief replace(string, replace_str, pattern) => Replaces the first match
* of pattern inside string with replace_str, or returns the original
* string if no replacements were made.
*/
REGEX_STRING_FUNCTION_HEADER(replace)

/**
* @brief replace_all(string, replace_str, pattern) => Replaces all matches
* of pattern inside string with replace_str, or returns the original
* string if no replacements were made.
*/
REGEX_STRING_FUNCTION_HEADER(replace_all)

#define FUNCTION_HEADER(NAME) \
struct NAME : public exprtk::igeneric_function<t_tscalar> { \
NAME(); \
Expand Down
34 changes: 25 additions & 9 deletions packages/perspective/src/js/perspective.js
Original file line number Diff line number Diff line change
Expand Up @@ -1466,22 +1466,19 @@ export default function (Module) {
// First, look for a column alias, which is a // style comment
// on the first line of the expression.
let expression_alias;
let alias_match = expression_string.match(/^\/\/(?<alias>.+?)$/m);

let parsed_expression_string = expression_string.replace(
/\/\/(.+?)$/m,
(_, alias) => {
expression_alias = alias.trim();
return "";
}
);
if (alias_match?.groups?.alias) {
expression_alias = alias_match.groups.alias.trim();
}

// If an alias does not exist, the alias is the expression itself.
if (!expression_alias || expression_alias.length == 0) {
expression_alias = expression_string;
}

// Replace `true` and `false` reserved words with symbols
parsed_expression_string = parsed_expression_string.replace(
let parsed_expression_string = expression_string.replace(
/([a-zA-Z_]+[a-zA-Z0-9_]*)/g,
(match) => {
if (match == "true") {
Expand Down Expand Up @@ -1526,7 +1523,26 @@ export default function (Module) {
// way of handling it.
// TODO I concur -- texodus
parsed_expression_string = parsed_expression_string.replace(
/(bucket|match|fullmatch|search|indexof)\(.*?,\s*(intern\(\'(.+)\'\)).*\)/g,
/(bucket|match|match_all|search|indexof)\(.*?,\s*(intern\(\'(.+)\'\)).*\)/g,
(match, _, intern_fn, value) => {
// Takes a string of the form fn(x, intern('y'))
// and removes intern() to create fn(x, 'y')
const intern_idx = match.indexOf(intern_fn);
return `${match.substring(
0,
intern_idx
)}'${value}'${match.substring(
intern_idx + intern_fn.length
)}`;
}
);

// replace and replace_all have multiple string params, only one of
// which needs to be interned - the regex differs from the one
// above as it asserts the middle parameter is the one to be
// replaced.
parsed_expression_string = parsed_expression_string.replace(
/(replace_all|replace)\(.*?,\s*(intern\(\'(.*)\'\)),.*\)/g,
(match, _, intern_fn, value) => {
// Takes a string of the form fn(x, intern('y'), z)
// and removes intern() to create fn(x, 'y', z)
Expand Down
Loading

0 comments on commit 7ed7ffd

Please sign in to comment.