diff --git a/java/.gitignore b/java/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/java/.gitignore @@ -0,0 +1 @@ +target diff --git a/java/pom.xml b/java/pom.xml new file mode 100644 index 0000000..3925bf1 --- /dev/null +++ b/java/pom.xml @@ -0,0 +1,67 @@ + + 4.0.0 + + com.cloudera.impala + hive-udf-samples + 1.0 + jar + + hive-udf-samples + + + UTF-8 + ${env.IMPALA_HADOOP_VERSION} + ${env.IMPALA_HIVE_VERSION} + + + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + + + org.apache.hive + hive-exec + system + ${hive.version} + ${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/hive-exec-${hive.version}.jar + + + + + + cdh.rcs.releases.repo + https://repository.cloudera.com/content/groups/cdh-releases-rcs + CDH Releases Repository + + true + + + + cdh.releases.repo + https://repository.cloudera.com/content/repositories/releases + CDH Releases Repository + + false + + + + cdh.snapshots.repo + https://repository.cloudera.com/content/repositories/snapshots + CDH Snapshots Repository + + true + + + + cloudera.thirdparty.repo + https://repository.cloudera.com/content/repositories/third-party + Cloudera Third Party Repository + + false + + + + diff --git a/java/src/main/java/com/cloudera/FuzzyEqualsUdf.java b/java/src/main/java/com/cloudera/FuzzyEqualsUdf.java new file mode 100644 index 0000000..07265d9 --- /dev/null +++ b/java/src/main/java/com/cloudera/FuzzyEqualsUdf.java @@ -0,0 +1,44 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.cloudera.impala; + +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; + +/** + * Udf that returns true if two double arguments are approximately equal. + * Usage: > create fuzzy_equals(double, double) returns boolean + * location '/user/cloudera/hive-udf-samples-1.0.jar' + * SYMBOL='com.cloudera.impala.FuzzyEqualsUdf'; + * > select fuzzy_equals(1, 1.000001); + */ +public class FuzzyEqualsUdf extends UDF { + public FuzzyEqualsUdf() { + } + + public BooleanWritable evaluate(DoubleWritable x, DoubleWritable y) { + double EPSILON = 0.000001f; + if (x == null || y == null) return null; + return new BooleanWritable(Math.abs(x.get() - y.get()) < EPSILON); + } +} diff --git a/udf-sample-test.cc b/udf-sample-test.cc index d282bf0..35c3f20 100644 --- a/udf-sample-test.cc +++ b/udf-sample-test.cc @@ -32,6 +32,12 @@ int main(int argc, char** argv) { passed &= UdfTestHarness::ValidateUdf( AddUdf, IntVal::null(), IntVal(2), IntVal::null()); + // Test FuzzyEquals sample. + passed &= UdfTestHarness::ValidateUdf( + FuzzyEquals, DoubleVal(1.0), DoubleVal(1.0000000001), BooleanVal(true)); + passed &= UdfTestHarness::ValidateUdf( + FuzzyEquals, DoubleVal(1.1), DoubleVal(1.0), BooleanVal(false)); + cout << "Tests " << (passed ? "Passed." : "Failed.") << endl; return !passed; } diff --git a/udf-sample.cc b/udf-sample.cc index ad15235..e66ea63 100644 --- a/udf-sample.cc +++ b/udf-sample.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "udf-sample.h" +#include // In this sample we are declaring a UDF that adds two ints and returns an int. IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2) { @@ -22,3 +23,11 @@ IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2) // Multiple UDFs can be defined in the same file +BooleanVal FuzzyEquals(FunctionContext* ctx, const DoubleVal& x, const DoubleVal& y) { + const double EPSILON = 0.000001f; + if (x.is_null || y.is_null) return BooleanVal::null(); + double delta = fabs(x.val - y.val); + return BooleanVal(delta < EPSILON); +} + + diff --git a/udf-sample.h b/udf-sample.h index 8cede06..afe14b1 100644 --- a/udf-sample.h +++ b/udf-sample.h @@ -20,6 +20,15 @@ using namespace impala_udf; +// Usage: > create function add(int, int) returns int +// location '/user/cloudera/libudfsample.so' SYMBOL='AddUdf'; +// > select add(1, 2); IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2); +// Returns true if x is approximately equal to y. +// Usage: > create function fuzzy_equals(double, double) returns boolean +// location '/user/cloudera/libudfsample.so' SYMBOL='FuzzyEquals'; +// > select fuzzy_equals(1, 1.00000001); +BooleanVal FuzzyEquals(FunctionContext* context, const DoubleVal& x, const DoubleVal& y); + #endif