diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 0a1215e2464b5..d6f1d70bfd722 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -29,12 +29,21 @@ authors = { workspace = true } rust-version = { workspace = true } [features] +# enable string functions +string_expressions = [] # enable core functions core_expressions = [] # enable datetime functions datetime_expressions = [] # Enable encoding by default so the doctests work. In general don't automatically enable all packages. -default = ["core_expressions", "datetime_expressions", "encoding_expressions", "math_expressions", "regex_expressions"] +default = [ + "core_expressions", + "datetime_expressions", + "encoding_expressions", + "math_expressions", + "regex_expressions", + "string_expressions", +] # enable encode/decode functions encoding_expressions = ["base64", "hex"] # enable math functions diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs index 1d48dcadbebfd..1be41f373bfd7 100644 --- a/datafusion/functions/src/lib.rs +++ b/datafusion/functions/src/lib.rs @@ -84,6 +84,10 @@ use log::debug; #[macro_use] pub mod macros; +#[cfg(feature = "string_expressions")] +pub mod string; +make_stub_package!(string, "string_expressions"); + /// Core datafusion expressions /// Enabled via feature flag `core_expressions` #[cfg(feature = "core_expressions")] @@ -137,7 +141,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> { .chain(datetime::functions()) .chain(encoding::functions()) .chain(math::functions()) - .chain(regex::functions()); + .chain(regex::functions()) + .chain(string::functions()); all_functions.try_for_each(|udf| { let existing_udf = registry.register_udf(udf)?; diff --git a/datafusion/functions/src/string/mod.rs b/datafusion/functions/src/string/mod.rs new file mode 100644 index 0000000000000..47701396d9e63 --- /dev/null +++ b/datafusion/functions/src/string/mod.rs @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! "regx" DataFusion functions + +pub mod starts_with; +// create UDFs +export_functions!(); diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs new file mode 100644 index 0000000000000..0474684788a9b --- /dev/null +++ b/datafusion/functions/src/string/starts_with.rs @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{Array, ArrayRef, OffsetSizeTrait}; +use arrow::compute::kernels::regexp; +use arrow::datatypes::DataType; +use datafusion_common::exec_err; +use datafusion_common::ScalarValue; +use datafusion_common::{arrow_datafusion_err, plan_err}; +use datafusion_common::{ + cast::as_generic_string_array, internal_err, DataFusionError, Result, +}; +use datafusion_expr::ColumnarValue; +use datafusion_expr::TypeSignature::*; +use datafusion_expr::{ScalarUDFImpl, Signature, Volatility}; +use std::any::Any; +use std::sync::Arc; + +#[derive(Debug)] +pub(super) struct StartsWithFunc { + signature: Signature, +} +impl StartsWithFunc { + pub fn new() -> Self { + use DataType::*; + Self { + signature: Signature::one_of( + vec![ + Exact(vec![Utf8, Utf8]), + Exact(vec![LargeUtf8, Utf8]), + Exact(vec![Utf8, Utf8, Utf8]), + Exact(vec![LargeUtf8, Utf8, Utf8]), + ], + Volatility::Immutable, + ), + } + } +} + +impl ScalarUDFImpl for StartsWithFunc { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "starts_with" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + use DataType::*; + + Ok(Boolean) + } + + fn invoke(&self, args: &[ColumnarValue]) -> Result { + unimplemented!("starts_with") + } +} + +#[cfg(test)] +mod tests {}