Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change functions in Velox directory to use the new writer interface. #1415

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions velox/examples/SimpleFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,15 +288,15 @@ struct MySimpleSplitFunction {
const char splitChar{' '};

FOLLY_ALWAYS_INLINE bool call(
out_type<Array<Varchar>>& out,
out_type<ArrayWriterT<Varchar>>& out,
const arg_type<Varchar>& input) {
auto start = input.begin();
auto cur = start;

// This code doesn't copy the string contents.
do {
cur = std::find(start, input.end(), splitChar);
out.append(out_type<Varchar>(StringView(start, cur - start)));
out.add_item().copy_from(StringView(start, cur - start));
start = cur + 1;
} while (cur < input.end());
return true;
Expand All @@ -307,7 +307,7 @@ void register6() {
registerFunction<MyAsciiAwareFunction, Varchar, Varchar>(
{"my_ascii_aware_func"});

registerFunction<MySimpleSplitFunction, Array<Varchar>, Varchar>(
registerFunction<MySimpleSplitFunction, ArrayWriterT<Varchar>, Varchar>(
{"my_simple_split_func"});
}

Expand Down Expand Up @@ -405,19 +405,19 @@ struct MyComplexTimesTwoFunction {
// are currently implemented based on std::vector. Vector elements are
// currently wrapped by std::optional to represent their nullability.
FOLLY_ALWAYS_INLINE bool call(
out_type<Array<int64_t>>& result,
out_type<ArrayWriterT<int64_t>>& result,
const arg_type<Array<int64_t>>& inputArray) {
result.reserve(inputArray.size());
for (const auto& it : inputArray) {
result.append(it.has_value() ? it.value() * 2 : 0);
result.push_back(it.has_value() ? it.value() * 2 : 0);
}
return true;
}

// This method takes and returns a Map. Map proxy objects are implemented
// using std::unordered_map; values are wrapped by std::optional.
FOLLY_ALWAYS_INLINE bool call(
out_type<Map<int64_t, double>>& result,
out_type<MapWriterT<int64_t, double>>& result,
const arg_type<Map<int64_t, double>>& inputMap) {
result.reserve(inputMap.size());
for (const auto& it : inputMap) {
Expand All @@ -430,7 +430,7 @@ struct MyComplexTimesTwoFunction {
// Takes and returns a Row. Rows are backed by std::tuple; individual elements
// are std::optional.
FOLLY_ALWAYS_INLINE bool call(
out_type<Row<int64_t, double>>& result,
out_type<RowWriterT<int64_t, double>>& result,
const arg_type<Row<int64_t, double>>& inputRow) {
const auto& elem0 = inputRow.template at<0>();
const auto& elem1 = inputRow.template at<1>();
Expand All @@ -456,15 +456,17 @@ struct MyComplexTimesTwoFunction {
};

void register8() {
registerFunction<MyComplexTimesTwoFunction, Array<int64_t>, Array<int64_t>>(
{"my_array_func"});
registerFunction<
MyComplexTimesTwoFunction,
Map<int64_t, double>,
ArrayWriterT<int64_t>,
Array<int64_t>>({"my_array_func"});
registerFunction<
MyComplexTimesTwoFunction,
MapWriterT<int64_t, double>,
Map<int64_t, double>>({"my_map_func"});
registerFunction<
MyComplexTimesTwoFunction,
Row<int64_t, double>,
RowWriterT<int64_t, double>,
Row<int64_t, double>>({"my_row_func"});
registerFunction<
MyComplexTimesTwoFunction,
Expand Down
18 changes: 10 additions & 8 deletions velox/expression/tests/SimpleFunctionCallNullFreeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ struct NonDefaultBehaviorFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

bool callNullable(
out_type<Array<int32_t>>& out,
out_type<ArrayWriterT<int32_t>>& out,
const arg_type<Array<int32_t>>* input) {
out.append(kCallNullable);
out.push_back(kCallNullable);

if (input) {
for (auto i : *input) {
if (i.has_value()) {
out.append(i.value());
out.push_back(*i);
}
}
}
Expand All @@ -87,21 +87,23 @@ struct NonDefaultBehaviorFunction {
}

bool callNullFree(
out_type<Array<int32_t>>& out,
out_type<ArrayWriterT<int32_t>>& out,
const null_free_arg_type<Array<int32_t>>& input) {
out.append(kCallNullFree);
out.push_back(kCallNullFree);

for (auto i : input) {
out.append(i);
out.push_back(i);
}

return true;
}
};

TEST_F(SimpleFunctionCallNullFreeTest, nonDefaultBehavior) {
registerFunction<NonDefaultBehaviorFunction, Array<int32_t>, Array<int32_t>>(
{"non_default_behavior"});
registerFunction<
NonDefaultBehaviorFunction,
ArrayWriterT<int32_t>,
Array<int32_t>>({"non_default_behavior"});

// Make a vector with a NULL.
auto arrayVectorWithNull = makeVectorWithNullArrays<int32_t>(
Expand Down
8 changes: 4 additions & 4 deletions velox/expression/tests/SimpleFunctionInitTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ struct NonDefaultWithArrayInitFunction {
}

bool callNullable(
out_type<Array<int32_t>>& out,
out_type<ArrayWriterT<int32_t>>& out,
const arg_type<int32_t>* first,
const arg_type<Array<int32_t>>* /*second*/) {
if (!first) {
Expand All @@ -64,10 +64,10 @@ struct NonDefaultWithArrayInitFunction {

if (!elements_.empty()) {
for (auto i : elements_) {
out.append(i + *first);
out.push_back(i + *first);
}
} else {
out.append(*first);
out.push_back(*first);
}

return true;
Expand All @@ -83,7 +83,7 @@ struct NonDefaultWithArrayInitFunction {
TEST_F(SimpleFunctionInitTest, initializationArray) {
registerFunction<
NonDefaultWithArrayInitFunction,
Array<int32_t>,
ArrayWriterT<int32_t>,
int32_t,
Array<int32_t>>({"non_default_behavior_with_init"});

Expand Down
46 changes: 26 additions & 20 deletions velox/expression/tests/SimpleFunctionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,18 +94,18 @@ struct ArrayWriterFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<Array<int64_t>>& out,
out_type<ArrayWriterT<int64_t>>& out,
const arg_type<int64_t>& input) {
const size_t size = arrayData[input].size();
out.reserve(size);
for (const auto i : arrayData[input]) {
out.append(i);
out.push_back(i);
}
}
};

TEST_F(SimpleFunctionTest, arrayWriter) {
registerFunction<ArrayWriterFunction, Array<int64_t>, int64_t>(
registerFunction<ArrayWriterFunction, ArrayWriterT<int64_t>, int64_t>(
{"array_writer_func"}, ARRAY(BIGINT()));

const size_t rows = arrayData.size();
Expand All @@ -131,19 +131,21 @@ struct ArrayOfStringsWriterFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<Array<Varchar>>& out,
out_type<ArrayWriterT<Varchar>>& out,
const arg_type<int64_t>& input) {
const size_t size = stringArrayData[input].size();
out.reserve(size);
for (const auto value : stringArrayData[input]) {
out.append(out_type<Varchar>(StringView(value)));
out.add_item().copy_from(value);
}
}
};

TEST_F(SimpleFunctionTest, arrayOfStringsWriter) {
registerFunction<ArrayOfStringsWriterFunction, Array<Varchar>, int64_t>(
{"array_of_strings_writer_func"}, ARRAY(VARCHAR()));
registerFunction<
ArrayOfStringsWriterFunction,
ArrayWriterT<Varchar>,
int64_t>({"array_of_strings_writer_func"}, ARRAY(VARCHAR()));

const size_t rows = stringArrayData.size();
auto flatVector = makeFlatVector<int64_t>(rows, [](auto row) { return row; });
Expand Down Expand Up @@ -240,15 +242,15 @@ struct RowWriterFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE bool call(
out_type<Row<int64_t, double>>& out,
out_type<RowWriterT<int64_t, double>>& out,
const arg_type<int64_t>& input) {
out = std::make_tuple(rowVectorCol1[input], rowVectorCol2[input]);
return true;
}
};

TEST_F(SimpleFunctionTest, rowWriter) {
registerFunction<RowWriterFunction, Row<int64_t, double>, int64_t>(
registerFunction<RowWriterFunction, RowWriterT<int64_t, double>, int64_t>(
{"row_writer_func"}, ROW({BIGINT(), DOUBLE()}));

const size_t rows = rowVectorCol1.size();
Expand Down Expand Up @@ -336,21 +338,21 @@ struct ArrayRowWriterFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE bool call(
out_type<Array<Row<int64_t, double>>>& out,
out_type<ArrayWriterT<RowWriterT<int64_t, double>>>& out,
const arg_type<int32_t>& input) {
// Appends each row three times.
auto tuple = std::make_tuple(rowVectorCol1[input], rowVectorCol2[input]);
out.append(std::optional(tuple));
out.append(std::optional(tuple));
out.append(std::optional(tuple));
out.add_item() = tuple;
out.add_item() = tuple;
out.add_item() = tuple;
return true;
}
};

TEST_F(SimpleFunctionTest, arrayRowWriter) {
registerFunction<
ArrayRowWriterFunction,
Array<Row<int64_t, double>>,
ArrayWriterT<RowWriterT<int64_t, double>>,
int32_t>({"array_row_writer_func"}, ARRAY(ROW({BIGINT(), DOUBLE()})));

const size_t rows = rowVectorCol1.size();
Expand Down Expand Up @@ -731,20 +733,22 @@ struct MyArrayStringReuseFunction {

static constexpr int32_t reuse_strings_from_arg = 0;

void call(out_type<Array<Varchar>>& out, const arg_type<Varchar>& input) {
void call(
out_type<ArrayWriterT<Varchar>>& out,
const arg_type<Varchar>& input) {
auto start = input.begin();
auto cur = start;

do {
cur = std::find(start, input.end(), ' ');
out.append(std::optional{StringView(start, cur - start)});
out.add_item().copy_from(StringView(start, cur - start));
start = cur + 1;
} while (cur < input.end());
}
};

TEST_F(SimpleFunctionTest, arrayStringReuse) {
registerFunction<MyArrayStringReuseFunction, Array<Varchar>, Varchar>(
registerFunction<MyArrayStringReuseFunction, ArrayWriterT<Varchar>, Varchar>(
{"my_array_string_reuse_func"});

std::vector<StringView> inputData = {
Expand Down Expand Up @@ -772,15 +776,17 @@ template <typename T>
struct MapStringOut {
VELOX_DEFINE_FUNCTION_TYPES(T);

void call(out_type<Map<Varchar, Varchar>>& out, int64_t n) {
void call(out_type<MapWriterT<Varchar, Varchar>>& out, int64_t n) {
auto string = std::to_string(n);
out.emplace(StringView(string), std::optional{StringView(string)});
auto [key, value] = out.add_item();
key.copy_from(string);
value.copy_from(string);
}
};

// Output map with string.
TEST_F(SimpleFunctionTest, mapStringOut) {
registerFunction<MapStringOut, Map<Varchar, Varchar>, int64_t>(
registerFunction<MapStringOut, MapWriterT<Varchar, Varchar>, int64_t>(
{"func_map_string_out"});

auto input = vectorMaker_.flatVector<int64_t>({1, 2, 3, 4});
Expand Down
16 changes: 0 additions & 16 deletions velox/functions/prestosql/benchmarks/ArrayWriterBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,21 +126,6 @@ struct SimpleGeneralInterface {
}
};

template <typename T>
struct SimpleOld {
template <typename TOut>
bool call(TOut& out, const int64_t& n) {
for (int i = 0; i < n; i++) {
if (WITH_NULLS && i % 5) {
out.append(std::nullopt);
} else {
out.append(i);
}
}
return true;
}
};

class ArrayWriterBenchmark : public functions::test::FunctionBenchmarkBase {
public:
ArrayWriterBenchmark() : FunctionBenchmarkBase() {
Expand All @@ -150,7 +135,6 @@ class ArrayWriterBenchmark : public functions::test::FunctionBenchmarkBase {
{"simple_push_back"});
registerFunction<SimpleGeneralInterface, ArrayWriterT<int64_t>, int64_t>(
{"simple_general"});
registerFunction<SimpleOld, Array<int64_t>, int64_t>({"simple_old"});

facebook::velox::exec::registerVectorFunction(
"vector_resize_optimized",
Expand Down