From 534bd6f4ac570887d69a85fa4df0a3fc9338adf5 Mon Sep 17 00:00:00 2001 From: philo Date: Thu, 21 Apr 2022 16:44:49 +0800 Subject: [PATCH 1/5] Add substring_index support --- .../ColumnarExpressionConverter.scala | 2 ++ .../expression/ColumnarTernaryOperator.scala | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala index cddd56bd5..3a40341f0 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarExpressionConverter.scala @@ -422,6 +422,8 @@ object ColumnarExpressionConverter extends Logging { case regexp: RegExpReplace => containsSubquery(regexp.subject) || containsSubquery( regexp.regexp) || containsSubquery(regexp.rep) || containsSubquery(regexp.pos) + case substrIndex: ColumnarSubstringIndex => + substrIndex.children.map(containsSubquery).exists(_ == true) case expr => throw new UnsupportedOperationException( s" --> ${expr.getClass} | ${expr} is not currently supported.") diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala index 8d8440e21..23b3fcd17 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala @@ -202,6 +202,23 @@ class ColumnarRegExpExtract(subject: Expression, regexp: Expression, idx: Expres } } +class ColumnarSubstringIndex(strExpr: Expression, delimExpr: Expression, + countExpr: Expression, original: Expression) + extends SubstringIndex(strExpr, delimExpr, countExpr) with ColumnarExpression { + + override def doColumnarCodeGen(args: Object): (TreeNode, ArrowType) = { + val (str_node, _): (TreeNode, ArrowType) = + strExpr.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args) + val (delim_node, _): (TreeNode, ArrowType) = + delimExpr.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args) + val (count_node, _): (TreeNode, ArrowType) = + countExpr.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args) + val resultType = new ArrowType.Utf8() + (TreeBuilder.makeFunction("substr_index", + Lists.newArrayList(str_node, delim_node, count_node), resultType), resultType) + } +} + object ColumnarTernaryOperator { def create(src: Expression, arg1: Expression, arg2: Expression, @@ -217,6 +234,8 @@ object ColumnarTernaryOperator { new ColumnarStringLocate(src, arg1, arg2, sl) case re: RegExpExtract => new ColumnarRegExpExtract(src, arg1, arg2, re) + case substrIndex: SubstringIndex => + new ColumnarSubstringIndex(src, arg1, arg2, re) case other => throw new UnsupportedOperationException(s"not currently supported: $other.") } From dce2cd3a814fa8706fe7e28f65ae62ad11869a37 Mon Sep 17 00:00:00 2001 From: philo Date: Thu, 21 Apr 2022 17:10:11 +0800 Subject: [PATCH 2/5] Fix a compile issue --- .../com/intel/oap/expression/ColumnarTernaryOperator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala index 23b3fcd17..b26e8c15f 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala @@ -235,7 +235,7 @@ object ColumnarTernaryOperator { case re: RegExpExtract => new ColumnarRegExpExtract(src, arg1, arg2, re) case substrIndex: SubstringIndex => - new ColumnarSubstringIndex(src, arg1, arg2, re) + new ColumnarSubstringIndex(src, arg1, arg2, substrIndex) case other => throw new UnsupportedOperationException(s"not currently supported: $other.") } From e11e9db54891bc974024fa08309818c4f8955c9f Mon Sep 17 00:00:00 2001 From: philo Date: Thu, 21 Apr 2022 17:24:17 +0800 Subject: [PATCH 3/5] Change arrow branch for test [will revert at last] --- arrow-data-source/script/build_arrow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-data-source/script/build_arrow.sh b/arrow-data-source/script/build_arrow.sh index d8ec40128..a6f223966 100755 --- a/arrow-data-source/script/build_arrow.sh +++ b/arrow-data-source/script/build_arrow.sh @@ -62,7 +62,7 @@ echo "ARROW_SOURCE_DIR=${ARROW_SOURCE_DIR}" echo "ARROW_INSTALL_DIR=${ARROW_INSTALL_DIR}" mkdir -p $ARROW_SOURCE_DIR mkdir -p $ARROW_INSTALL_DIR -git clone https://github.com/oap-project/arrow.git --branch arrow-4.0.0-oap $ARROW_SOURCE_DIR +git clone https://github.com/PHILO-HE/arrow.git --branch substring_index $ARROW_SOURCE_DIR pushd $ARROW_SOURCE_DIR cmake ./cpp \ From 9a1c7d9204198e1774128c201a7303604af182ac Mon Sep 17 00:00:00 2001 From: philo Date: Fri, 22 Apr 2022 09:45:36 +0800 Subject: [PATCH 4/5] Revert "Change arrow branch for test [will revert at last]" This reverts commit e11e9db54891bc974024fa08309818c4f8955c9f. --- arrow-data-source/script/build_arrow.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-data-source/script/build_arrow.sh b/arrow-data-source/script/build_arrow.sh index a6f223966..d8ec40128 100755 --- a/arrow-data-source/script/build_arrow.sh +++ b/arrow-data-source/script/build_arrow.sh @@ -62,7 +62,7 @@ echo "ARROW_SOURCE_DIR=${ARROW_SOURCE_DIR}" echo "ARROW_INSTALL_DIR=${ARROW_INSTALL_DIR}" mkdir -p $ARROW_SOURCE_DIR mkdir -p $ARROW_INSTALL_DIR -git clone https://github.com/PHILO-HE/arrow.git --branch substring_index $ARROW_SOURCE_DIR +git clone https://github.com/oap-project/arrow.git --branch arrow-4.0.0-oap $ARROW_SOURCE_DIR pushd $ARROW_SOURCE_DIR cmake ./cpp \ From b378f12f4c5ff192ddbd17c421cf990864b0f6e1 Mon Sep 17 00:00:00 2001 From: philo Date: Fri, 22 Apr 2022 10:11:00 +0800 Subject: [PATCH 5/5] Return false for checking codegen support --- .../com/intel/oap/expression/ColumnarTernaryOperator.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala index b26e8c15f..4d7b8e777 100644 --- a/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala +++ b/native-sql-engine/core/src/main/scala/com/intel/oap/expression/ColumnarTernaryOperator.scala @@ -206,6 +206,10 @@ class ColumnarSubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression, original: Expression) extends SubstringIndex(strExpr, delimExpr, countExpr) with ColumnarExpression { + override def supportColumnarCodegen(args: java.lang.Object): Boolean = { + false + } + override def doColumnarCodeGen(args: Object): (TreeNode, ArrowType) = { val (str_node, _): (TreeNode, ArrowType) = strExpr.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)