Skip to content

Commit

Permalink
Add code + a few extra comments from the scalar UDF examples in the
Browse files Browse the repository at this point in the history
docs. Didn't do extra testing, just reused the code straight from
the docs with 2-space indentation.
  • Loading branch information
John Russell committed Feb 10, 2014
1 parent ed1ca5a commit c182fe2
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 0 deletions.
70 changes: 70 additions & 0 deletions udf-sample.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,74 @@ BooleanVal FuzzyEquals(FunctionContext* ctx, const DoubleVal& x, const DoubleVal
return BooleanVal(delta < EPSILON);
}

// Check if the input string has any occurrences of the letters (a,e,i,o,u).
// Case-insensitive, so also detects (A,E,I,O,U).
BooleanVal HasVowels(FunctionContext* context, const StringVal& input)
{
if (input.is_null) return BooleanVal::null();

int index;
uint8_t *ptr;

for (ptr = input.ptr, index = 0; index <= input.len; index++, ptr++)
{
uint8_t c = tolower(*ptr);
if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
{
return BooleanVal(true);
}
}
return BooleanVal(false);
}

// Count all occurrences of the letters (a,e,i,o,u) in the input string.
// Case-insensitive, so also counts (A,E,I,O,U).
IntVal CountVowels(FunctionContext* context, const StringVal& arg1)
{
if (arg1.is_null) return IntVal::null();

int count;
int index;
uint8_t *ptr;

for (ptr = arg1.ptr, count = 0, index = 0; index <= arg1.len; index++, ptr++)
{
uint8_t c = tolower(*ptr);
if (c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u')
{
count++;
}
}
return IntVal(count);
}

// Remove all occurrences of the letters (a,e,i,o,u) from the input string.
// Case-insensitive, so also removes (A,E,I,O,U).
StringVal StripVowels(FunctionContext* context, const StringVal& arg1)
{
if (arg1.is_null) return StringVal::null();

int index;
std::string original((const char *)arg1.ptr,arg1.len);
std::string shorter("");

for (index = 0; index < original.length(); index++)
{
uint8_t c = original[index];
uint8_t l = tolower(c);

if (l == 'a' || l == 'e' || l == 'i' || l == 'o' || l == 'u')
{
;
}
else
{
shorter.append(1, (char)c);
}
}
// The modified string is stored in 'shorter', which is destroyed when this function ends. We need to make a string val
// and copy the contents.
StringVal result(context, shorter.size()); // Only the version of the ctor that takes a context object allocates new memory
memcpy(result.ptr, shorter.c_str(), shorter.size());
return result;
}
23 changes: 23 additions & 0 deletions udf-sample.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,27 @@ IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2);
// > select fuzzy_equals(1, 1.00000001);
BooleanVal FuzzyEquals(FunctionContext* context, const DoubleVal& x, const DoubleVal& y);

// Perform tests, calculations, and transformations
// on a string value, using the set of letters 'aeiou'.

// Usage: > create function hasvowels(string) returns boolean
// location '/user/cloudera/libudfsample.so' SYMBOL='HasVowels';
// > select hasvowels('banana');
// > select hasvowels('grr hm shhh');
// > select hasvowels(c1) from t1;
BooleanVal HasVowels(FunctionContext* context, const StringVal& input);


// Usage: > create function countvowels(string) returns boolean
// location '/user/cloudera/libudfsample.so' SYMBOL='CountVowels';
// > select countvowels('abracadabra hocus pocus');
// > select countvowels(c1) from t1;
IntVal CountVowels(FunctionContext* context, const StringVal& arg1);

// Usage: > create function stripvowels(string) returns boolean
// location '/user/cloudera/libudfsample.so' SYMBOL='StripVowels';
// > select stripvowels('colour color');
// > select stripvowels(c1) from t1;
StringVal StripVowels(FunctionContext* context, const StringVal& arg1);

#endif

0 comments on commit c182fe2

Please sign in to comment.