Skip to content

Commit

Permalink
Change functions in Velox directory to use the new writer interface. (f…
Browse files Browse the repository at this point in the history
…acebookincubator#1415)

Summary:
Pull Request resolved: facebookincubator#1415

This part is step one towards deprecating old writers.
The following diff will replace ArrayWriterT and switch out_type<Array> meaning, the same for all other types.

Reviewed By: mbasmanova

Differential Revision: D35625851

fbshipit-source-id: 29b01bdbce75fa0f694d17495240981ad380b9c7
  • Loading branch information
laithsakka authored and artem.malyshev committed May 31, 2022
1 parent e13d2bc commit 2b50557
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 59 deletions.
24 changes: 13 additions & 11 deletions velox/examples/SimpleFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -288,15 +288,15 @@ struct MySimpleSplitFunction {
const char splitChar{' '};

FOLLY_ALWAYS_INLINE bool call(
out_type<Array<Varchar>>& out,
out_type<ArrayWriterT<Varchar>>& out,
const arg_type<Varchar>& input) {
auto start = input.begin();
auto cur = start;

// This code doesn't copy the string contents.
do {
cur = std::find(start, input.end(), splitChar);
out.append(out_type<Varchar>(StringView(start, cur - start)));
out.add_item().copy_from(StringView(start, cur - start));
start = cur + 1;
} while (cur < input.end());
return true;
Expand All @@ -307,7 +307,7 @@ void register6() {
registerFunction<MyAsciiAwareFunction, Varchar, Varchar>(
{"my_ascii_aware_func"});

registerFunction<MySimpleSplitFunction, Array<Varchar>, Varchar>(
registerFunction<MySimpleSplitFunction, ArrayWriterT<Varchar>, Varchar>(
{"my_simple_split_func"});
}

Expand Down Expand Up @@ -405,19 +405,19 @@ struct MyComplexTimesTwoFunction {
// are currently implemented based on std::vector. Vector elements are
// currently wrapped by std::optional to represent their nullability.
FOLLY_ALWAYS_INLINE bool call(
out_type<Array<int64_t>>& result,
out_type<ArrayWriterT<int64_t>>& result,
const arg_type<Array<int64_t>>& inputArray) {
result.reserve(inputArray.size());
for (const auto& it : inputArray) {
result.append(it.has_value() ? it.value() * 2 : 0);
result.push_back(it.has_value() ? it.value() * 2 : 0);
}
return true;
}

// This method takes and returns a Map. Map proxy objects are implemented
// using std::unordered_map; values are wrapped by std::optional.
FOLLY_ALWAYS_INLINE bool call(
out_type<Map<int64_t, double>>& result,
out_type<MapWriterT<int64_t, double>>& result,
const arg_type<Map<int64_t, double>>& inputMap) {
result.reserve(inputMap.size());
for (const auto& it : inputMap) {
Expand All @@ -430,7 +430,7 @@ struct MyComplexTimesTwoFunction {
// Takes and returns a Row. Rows are backed by std::tuple; individual elements
// are std::optional.
FOLLY_ALWAYS_INLINE bool call(
out_type<Row<int64_t, double>>& result,
out_type<RowWriterT<int64_t, double>>& result,
const arg_type<Row<int64_t, double>>& inputRow) {
const auto& elem0 = inputRow.template at<0>();
const auto& elem1 = inputRow.template at<1>();
Expand All @@ -456,15 +456,17 @@ struct MyComplexTimesTwoFunction {
};

void register8() {
registerFunction<MyComplexTimesTwoFunction, Array<int64_t>, Array<int64_t>>(
{"my_array_func"});
registerFunction<
MyComplexTimesTwoFunction,
Map<int64_t, double>,
ArrayWriterT<int64_t>,
Array<int64_t>>({"my_array_func"});
registerFunction<
MyComplexTimesTwoFunction,
MapWriterT<int64_t, double>,
Map<int64_t, double>>({"my_map_func"});
registerFunction<
MyComplexTimesTwoFunction,
Row<int64_t, double>,
RowWriterT<int64_t, double>,
Row<int64_t, double>>({"my_row_func"});
registerFunction<
MyComplexTimesTwoFunction,
Expand Down
18 changes: 10 additions & 8 deletions velox/expression/tests/SimpleFunctionCallNullFreeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,14 @@ struct NonDefaultBehaviorFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

bool callNullable(
out_type<Array<int32_t>>& out,
out_type<ArrayWriterT<int32_t>>& out,
const arg_type<Array<int32_t>>* input) {
out.append(kCallNullable);
out.push_back(kCallNullable);

if (input) {
for (auto i : *input) {
if (i.has_value()) {
out.append(i.value());
out.push_back(*i);
}
}
}
Expand All @@ -87,21 +87,23 @@ struct NonDefaultBehaviorFunction {
}

bool callNullFree(
out_type<Array<int32_t>>& out,
out_type<ArrayWriterT<int32_t>>& out,
const null_free_arg_type<Array<int32_t>>& input) {
out.append(kCallNullFree);
out.push_back(kCallNullFree);

for (auto i : input) {
out.append(i);
out.push_back(i);
}

return true;
}
};

TEST_F(SimpleFunctionCallNullFreeTest, nonDefaultBehavior) {
registerFunction<NonDefaultBehaviorFunction, Array<int32_t>, Array<int32_t>>(
{"non_default_behavior"});
registerFunction<
NonDefaultBehaviorFunction,
ArrayWriterT<int32_t>,
Array<int32_t>>({"non_default_behavior"});

// Make a vector with a NULL.
auto arrayVectorWithNull = makeVectorWithNullArrays<int32_t>(
Expand Down
8 changes: 4 additions & 4 deletions velox/expression/tests/SimpleFunctionInitTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ struct NonDefaultWithArrayInitFunction {
}

bool callNullable(
out_type<Array<int32_t>>& out,
out_type<ArrayWriterT<int32_t>>& out,
const arg_type<int32_t>* first,
const arg_type<Array<int32_t>>* /*second*/) {
if (!first) {
Expand All @@ -64,10 +64,10 @@ struct NonDefaultWithArrayInitFunction {

if (!elements_.empty()) {
for (auto i : elements_) {
out.append(i + *first);
out.push_back(i + *first);
}
} else {
out.append(*first);
out.push_back(*first);
}

return true;
Expand All @@ -83,7 +83,7 @@ struct NonDefaultWithArrayInitFunction {
TEST_F(SimpleFunctionInitTest, initializationArray) {
registerFunction<
NonDefaultWithArrayInitFunction,
Array<int32_t>,
ArrayWriterT<int32_t>,
int32_t,
Array<int32_t>>({"non_default_behavior_with_init"});

Expand Down
46 changes: 26 additions & 20 deletions velox/expression/tests/SimpleFunctionTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,18 +94,18 @@ struct ArrayWriterFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<Array<int64_t>>& out,
out_type<ArrayWriterT<int64_t>>& out,
const arg_type<int64_t>& input) {
const size_t size = arrayData[input].size();
out.reserve(size);
for (const auto i : arrayData[input]) {
out.append(i);
out.push_back(i);
}
}
};

TEST_F(SimpleFunctionTest, arrayWriter) {
registerFunction<ArrayWriterFunction, Array<int64_t>, int64_t>(
registerFunction<ArrayWriterFunction, ArrayWriterT<int64_t>, int64_t>(
{"array_writer_func"}, ARRAY(BIGINT()));

const size_t rows = arrayData.size();
Expand All @@ -131,19 +131,21 @@ struct ArrayOfStringsWriterFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<Array<Varchar>>& out,
out_type<ArrayWriterT<Varchar>>& out,
const arg_type<int64_t>& input) {
const size_t size = stringArrayData[input].size();
out.reserve(size);
for (const auto value : stringArrayData[input]) {
out.append(out_type<Varchar>(StringView(value)));
out.add_item().copy_from(value);
}
}
};

TEST_F(SimpleFunctionTest, arrayOfStringsWriter) {
registerFunction<ArrayOfStringsWriterFunction, Array<Varchar>, int64_t>(
{"array_of_strings_writer_func"}, ARRAY(VARCHAR()));
registerFunction<
ArrayOfStringsWriterFunction,
ArrayWriterT<Varchar>,
int64_t>({"array_of_strings_writer_func"}, ARRAY(VARCHAR()));

const size_t rows = stringArrayData.size();
auto flatVector = makeFlatVector<int64_t>(rows, [](auto row) { return row; });
Expand Down Expand Up @@ -240,15 +242,15 @@ struct RowWriterFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE bool call(
out_type<Row<int64_t, double>>& out,
out_type<RowWriterT<int64_t, double>>& out,
const arg_type<int64_t>& input) {
out = std::make_tuple(rowVectorCol1[input], rowVectorCol2[input]);
return true;
}
};

TEST_F(SimpleFunctionTest, rowWriter) {
registerFunction<RowWriterFunction, Row<int64_t, double>, int64_t>(
registerFunction<RowWriterFunction, RowWriterT<int64_t, double>, int64_t>(
{"row_writer_func"}, ROW({BIGINT(), DOUBLE()}));

const size_t rows = rowVectorCol1.size();
Expand Down Expand Up @@ -336,21 +338,21 @@ struct ArrayRowWriterFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE bool call(
out_type<Array<Row<int64_t, double>>>& out,
out_type<ArrayWriterT<RowWriterT<int64_t, double>>>& out,
const arg_type<int32_t>& input) {
// Appends each row three times.
auto tuple = std::make_tuple(rowVectorCol1[input], rowVectorCol2[input]);
out.append(std::optional(tuple));
out.append(std::optional(tuple));
out.append(std::optional(tuple));
out.add_item() = tuple;
out.add_item() = tuple;
out.add_item() = tuple;
return true;
}
};

TEST_F(SimpleFunctionTest, arrayRowWriter) {
registerFunction<
ArrayRowWriterFunction,
Array<Row<int64_t, double>>,
ArrayWriterT<RowWriterT<int64_t, double>>,
int32_t>({"array_row_writer_func"}, ARRAY(ROW({BIGINT(), DOUBLE()})));

const size_t rows = rowVectorCol1.size();
Expand Down Expand Up @@ -731,20 +733,22 @@ struct MyArrayStringReuseFunction {

static constexpr int32_t reuse_strings_from_arg = 0;

void call(out_type<Array<Varchar>>& out, const arg_type<Varchar>& input) {
void call(
out_type<ArrayWriterT<Varchar>>& out,
const arg_type<Varchar>& input) {
auto start = input.begin();
auto cur = start;

do {
cur = std::find(start, input.end(), ' ');
out.append(std::optional{StringView(start, cur - start)});
out.add_item().copy_from(StringView(start, cur - start));
start = cur + 1;
} while (cur < input.end());
}
};

TEST_F(SimpleFunctionTest, arrayStringReuse) {
registerFunction<MyArrayStringReuseFunction, Array<Varchar>, Varchar>(
registerFunction<MyArrayStringReuseFunction, ArrayWriterT<Varchar>, Varchar>(
{"my_array_string_reuse_func"});

std::vector<StringView> inputData = {
Expand Down Expand Up @@ -772,15 +776,17 @@ template <typename T>
struct MapStringOut {
VELOX_DEFINE_FUNCTION_TYPES(T);

void call(out_type<Map<Varchar, Varchar>>& out, int64_t n) {
void call(out_type<MapWriterT<Varchar, Varchar>>& out, int64_t n) {
auto string = std::to_string(n);
out.emplace(StringView(string), std::optional{StringView(string)});
auto [key, value] = out.add_item();
key.copy_from(string);
value.copy_from(string);
}
};

// Output map with string.
TEST_F(SimpleFunctionTest, mapStringOut) {
registerFunction<MapStringOut, Map<Varchar, Varchar>, int64_t>(
registerFunction<MapStringOut, MapWriterT<Varchar, Varchar>, int64_t>(
{"func_map_string_out"});

auto input = vectorMaker_.flatVector<int64_t>({1, 2, 3, 4});
Expand Down
16 changes: 0 additions & 16 deletions velox/functions/prestosql/benchmarks/ArrayWriterBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,21 +126,6 @@ struct SimpleGeneralInterface {
}
};

template <typename T>
struct SimpleOld {
template <typename TOut>
bool call(TOut& out, const int64_t& n) {
for (int i = 0; i < n; i++) {
if (WITH_NULLS && i % 5) {
out.append(std::nullopt);
} else {
out.append(i);
}
}
return true;
}
};

class ArrayWriterBenchmark : public functions::test::FunctionBenchmarkBase {
public:
ArrayWriterBenchmark() : FunctionBenchmarkBase() {
Expand All @@ -150,7 +135,6 @@ class ArrayWriterBenchmark : public functions::test::FunctionBenchmarkBase {
{"simple_push_back"});
registerFunction<SimpleGeneralInterface, ArrayWriterT<int64_t>, int64_t>(
{"simple_general"});
registerFunction<SimpleOld, Array<int64_t>, int64_t>({"simple_old"});

facebook::velox::exec::registerVectorFunction(
"vector_resize_optimized",
Expand Down

0 comments on commit 2b50557

Please sign in to comment.