Skip to content

Commit

Permalink
Add FuzzyEquals example.
Browse files Browse the repository at this point in the history
  • Loading branch information
Nong Li committed Jan 22, 2014
1 parent 059e595 commit ed1ca5a
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 0 deletions.
1 change: 1 addition & 0 deletions java/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
target
67 changes: 67 additions & 0 deletions java/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.cloudera.impala</groupId>
<artifactId>hive-udf-samples</artifactId>
<version>1.0</version>
<packaging>jar</packaging>

<name>hive-udf-samples</name>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hadoop.version>${env.IMPALA_HADOOP_VERSION}</hadoop.version>
<hive.version>${env.IMPALA_HIVE_VERSION}</hive.version>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<scope>system</scope>
<version>${hive.version}</version>
<systemPath>${env.IMPALA_HOME}/thirdparty/hive-${hive.version}/lib/hive-exec-${hive.version}.jar</systemPath>
</dependency>
</dependencies>

<repositories>
<repository>
<id>cdh.rcs.releases.repo</id>
<url>https://repository.cloudera.com/content/groups/cdh-releases-rcs</url>
<name>CDH Releases Repository</name>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>cdh.releases.repo</id>
<url>https://repository.cloudera.com/content/repositories/releases</url>
<name>CDH Releases Repository</name>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>cdh.snapshots.repo</id>
<url>https://repository.cloudera.com/content/repositories/snapshots</url>
<name>CDH Snapshots Repository</name>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>cloudera.thirdparty.repo</id>
<url>https://repository.cloudera.com/content/repositories/third-party</url>
<name>Cloudera Third Party Repository</name>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
</project>
44 changes: 44 additions & 0 deletions java/src/main/java/com/cloudera/FuzzyEqualsUdf.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.cloudera.impala;

import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

/**
* Udf that returns true if two double arguments are approximately equal.
* Usage: > create fuzzy_equals(double, double) returns boolean
* location '/user/cloudera/hive-udf-samples-1.0.jar'
* SYMBOL='com.cloudera.impala.FuzzyEqualsUdf';
* > select fuzzy_equals(1, 1.000001);
*/
public class FuzzyEqualsUdf extends UDF {
public FuzzyEqualsUdf() {
}

public BooleanWritable evaluate(DoubleWritable x, DoubleWritable y) {
double EPSILON = 0.000001f;
if (x == null || y == null) return null;
return new BooleanWritable(Math.abs(x.get() - y.get()) < EPSILON);
}
}
6 changes: 6 additions & 0 deletions udf-sample-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ int main(int argc, char** argv) {
passed &= UdfTestHarness::ValidateUdf<IntVal, IntVal, IntVal>(
AddUdf, IntVal::null(), IntVal(2), IntVal::null());

// Test FuzzyEquals sample.
passed &= UdfTestHarness::ValidateUdf<BooleanVal, DoubleVal, DoubleVal>(
FuzzyEquals, DoubleVal(1.0), DoubleVal(1.0000000001), BooleanVal(true));
passed &= UdfTestHarness::ValidateUdf<BooleanVal, DoubleVal, DoubleVal>(
FuzzyEquals, DoubleVal(1.1), DoubleVal(1.0), BooleanVal(false));

cout << "Tests " << (passed ? "Passed." : "Failed.") << endl;
return !passed;
}
9 changes: 9 additions & 0 deletions udf-sample.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include "udf-sample.h"
#include <math.h>

// In this sample we are declaring a UDF that adds two ints and returns an int.
IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2) {
Expand All @@ -22,3 +23,11 @@ IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2)

// Multiple UDFs can be defined in the same file

BooleanVal FuzzyEquals(FunctionContext* ctx, const DoubleVal& x, const DoubleVal& y) {
const double EPSILON = 0.000001f;
if (x.is_null || y.is_null) return BooleanVal::null();
double delta = fabs(x.val - y.val);
return BooleanVal(delta < EPSILON);
}


9 changes: 9 additions & 0 deletions udf-sample.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@

using namespace impala_udf;

// Usage: > create function add(int, int) returns int
// location '/user/cloudera/libudfsample.so' SYMBOL='AddUdf';
// > select add(1, 2);
IntVal AddUdf(FunctionContext* context, const IntVal& arg1, const IntVal& arg2);

// Returns true if x is approximately equal to y.
// Usage: > create function fuzzy_equals(double, double) returns boolean
// location '/user/cloudera/libudfsample.so' SYMBOL='FuzzyEquals';
// > select fuzzy_equals(1, 1.00000001);
BooleanVal FuzzyEquals(FunctionContext* context, const DoubleVal& x, const DoubleVal& y);

#endif

0 comments on commit ed1ca5a

Please sign in to comment.