diff --git a/cpp-client/deephaven/dhclient/CMakeLists.txt b/cpp-client/deephaven/dhclient/CMakeLists.txt index 23bd8e5e033..f4fa3a9a8fe 100644 --- a/cpp-client/deephaven/dhclient/CMakeLists.txt +++ b/cpp-client/deephaven/dhclient/CMakeLists.txt @@ -74,6 +74,7 @@ set(ALL_FILES include/private/deephaven/client/impl/util.h src/arrowutil/arrow_client_table.cc + src/arrowutil/arrow_column_source.cc include/private/deephaven/client/arrowutil/arrow_client_table.h include/private/deephaven/client/arrowutil/arrow_column_source.h include/private/deephaven/client/arrowutil/arrow_value_converter.h @@ -107,6 +108,7 @@ set(ALL_FILES src/utility/table_maker.cc include/public/deephaven/client/utility/arrow_util.h + include/public/deephaven/client/utility/internal_types.h include/public/deephaven/client/utility/misc_types.h include/public/deephaven/client/utility/table_maker.h ) diff --git a/cpp-client/deephaven/dhclient/include/private/deephaven/client/arrowutil/arrow_column_source.h b/cpp-client/deephaven/dhclient/include/private/deephaven/client/arrowutil/arrow_column_source.h index 16b5c30e987..c292d06b91c 100644 --- a/cpp-client/deephaven/dhclient/include/private/deephaven/client/arrowutil/arrow_column_source.h +++ b/cpp-client/deephaven/dhclient/include/private/deephaven/client/arrowutil/arrow_column_source.h @@ -22,9 +22,22 @@ namespace internal { // null-ness by determining whether the optional has a value. // kTimestamp is its own special case, where nullness is determined by the underlying nanos // being equal to Deephaven's NULL_LONG. -// kLocalDate and kLocalTime are like kTimestamp except they resolve to different data types. +// kLocalDate and kLocalTime are similar to kTimestamp in nullness except they resolve to different +// data types. enum class ArrowProcessingStyle { kNormal, kBooleanOrString, kTimestamp, kLocalDate, kLocalTime }; +/** + * When 'array' has dynamic type arrow::TimestampArray or arrow::Time64Array, look at the + * underlying time resolution of the arrow type and calculate a conversion factor from that unit + * to nanoseconds. For example if the underlying time unit is arrow::TimeUnit::MILLI, then the + * conversion factor would be 1_000_000, meaning that one needs to multiply incoming millisecond + * values by one million to convert them to nanoseconds. If 'array' is not one of those types, + * return 1. + * @param array The Arrow array + * @return For supported time types, the conversion factor to nanoseconds. Otherwise, 1. + */ +size_t CalcTimeNanoScaleFactor(const arrow::Array &array); + template class GenericArrowColumnSource final : public TColumnSourceBase { using BooleanChunk = deephaven::dhcore::chunk::BooleanChunk; @@ -37,7 +50,8 @@ class GenericArrowColumnSource final : public TColumnSourceBase { using UInt64Chunk = deephaven::dhcore::chunk::UInt64Chunk; public: - static std::shared_ptr OfArrowArray(std::shared_ptr array) { + static std::shared_ptr + OfArrowArray(std::shared_ptr array) { std::vector> arrays{std::move(array)}; return OfArrowArrayVec(std::move(arrays)); } @@ -48,7 +62,9 @@ class GenericArrowColumnSource final : public TColumnSourceBase { } explicit GenericArrowColumnSource(std::vector> arrays) : - arrays_(std::move(arrays)) {} + arrays_(std::move(arrays)) { + time_nano_scale_factor_ = arrays_.empty() ? 1 : CalcTimeNanoScaleFactor(*arrays_.front()); + } ~GenericArrowColumnSource() final = default; @@ -67,13 +83,14 @@ class GenericArrowColumnSource final : public TColumnSourceBase { // This algorithm is a little tricky because the source data and RowSequence are both // segmented, perhaps in different ways. - auto *typed_dest = VerboseCast(DEEPHAVEN_LOCATION_EXPR(dest_data)); + auto *typed_dest = VerboseCast(DEEPHAVEN_LOCATION_EXPR(dest_data)); auto *destp = typed_dest->data(); auto outerp = arrays_.begin(); size_t src_segment_begin = 0; size_t src_segment_end = (*outerp)->length(); - auto *null_destp = optional_dest_null_flags != nullptr ? optional_dest_null_flags->data() : nullptr; + auto *null_destp = + optional_dest_null_flags != nullptr ? optional_dest_null_flags->data() : nullptr; rows.ForEachInterval([&](uint64_t requested_segment_begin, uint64_t requested_segment_end) { while (true) { @@ -147,11 +164,12 @@ class GenericArrowColumnSource final : public TColumnSourceBase { const auto *src_endp = innerp->raw_values() + relative_end; for (const auto *ip = src_beginp; ip != src_endp; ++ip) { - *destp = DateTime::FromNanos(*ip); + auto is_null = *ip == DeephavenTraits::kNullValue; + *destp = DateTime::FromNanos(is_null ? *ip : (*ip * time_nano_scale_factor_)); ++destp; if (null_destp != nullptr) { - *null_destp = *ip == DeephavenTraits::kNullValue; + *null_destp = is_null; ++null_destp; } } @@ -175,11 +193,12 @@ class GenericArrowColumnSource final : public TColumnSourceBase { const auto *src_endp = innerp->raw_values() + relative_end; for (const auto *ip = src_beginp; ip != src_endp; ++ip) { - *destp = LocalTime::FromNanos(*ip); + auto is_null = *ip == DeephavenTraits::kNullValue; + *destp = LocalTime::FromNanos(is_null ? *ip : (*ip * time_nano_scale_factor_)); ++destp; if (null_destp != nullptr) { - *null_destp = *ip == DeephavenTraits::kNullValue; + *null_destp = is_null; ++null_destp; } } @@ -200,6 +219,18 @@ class GenericArrowColumnSource final : public TColumnSourceBase { private: std::vector> arrays_; + /** + * This value is valid for Style == ArrowProcessingStyle::kTimestamp and + * ArrowProcessingStyle::kLocalTime, and ignored for other ArrowProcessingStyle enumeration + * values. + * + * These ArrowProcessingStyles come into play when processing the arrow types + * arrow::TimestampType and arrow::Time64Type respectively. + * + * The value stores a conversion factor from whatever the input scale is to nanoseconds. + * For example, if the input timescale is milliseconds, this value will be 1_000_000. + */ + size_t time_nano_scale_factor_ = 1; }; } // namespace internal diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/utility/internal_types.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/utility/internal_types.h new file mode 100644 index 00000000000..bedc713f20f --- /dev/null +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/utility/internal_types.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending + */ +#pragma once + +#include + +namespace deephaven::client::utility { +/** + * For Deephaven use only + */ +namespace internal { +/** + * This class exists only for the benefit of the unit tests. Our normal DateTime class has a + * native time resolution of nanoseconds. This class allows our unit tests to upload a + * DateTime having a different time unit so we can confirm that the client and server both handle + * it correctly. + */ +template +struct InternalDateTime { + explicit InternalDateTime(int64_t value) : value_(value) {} + + int64_t value_ = 0; +}; + +/** + * This class exists only for the benefit of the unit tests. Our normal LocalTime class has a + * native time resolution of nanoseconds. This class allows our unit tests to upload a + * LocalTime having a different time unit so we can confirm that the client and server both handle + * it correctly. + */ +template +struct InternalLocalTime { + // Arrow Time64 only supports micro and nano units + static_assert(UNIT == arrow::TimeUnit::MICRO || UNIT == arrow::TimeUnit::NANO); + + explicit InternalLocalTime(int64_t value) : value_(value) {} + + int64_t value_ = 0; +}; +} // namespace internal +} // namespace deephaven::client::utility diff --git a/cpp-client/deephaven/dhclient/include/public/deephaven/client/utility/table_maker.h b/cpp-client/deephaven/dhclient/include/public/deephaven/client/utility/table_maker.h index 02f39647c82..56e56be2043 100644 --- a/cpp-client/deephaven/dhclient/include/public/deephaven/client/utility/table_maker.h +++ b/cpp-client/deephaven/dhclient/include/public/deephaven/client/utility/table_maker.h @@ -19,6 +19,9 @@ #include "deephaven/client/client.h" #include "deephaven/client/utility/arrow_util.h" +#include "deephaven/client/utility/internal_types.h" +#include "deephaven/client/utility/misc_types.h" +#include "deephaven/dhcore/types.h" #include "deephaven/dhcore/utility/utility.h" #include "deephaven/third_party/fmt/format.h" @@ -356,6 +359,38 @@ struct TypeConverterTraits> { } }; +template +struct TypeConverterTraits> { + static std::shared_ptr GetDataType() { + return arrow::timestamp(UNIT, "UTC"); + } + static arrow::TimestampBuilder GetBuilder() { + return arrow::TimestampBuilder(GetDataType(), arrow::default_memory_pool()); + } + static int64_t Reinterpret(const deephaven::client::utility::internal::InternalDateTime &o) { + return o.value_; + } + static std::string_view GetDeephavenTypeName() { + return "java.time.ZonedDateTime"; + } +}; + +template +struct TypeConverterTraits> { + static std::shared_ptr GetDataType() { + return arrow::time64(UNIT); + } + static arrow::Time64Builder GetBuilder() { + return arrow::Time64Builder(GetDataType(), arrow::default_memory_pool()); + } + static int64_t Reinterpret(const deephaven::client::utility::internal::InternalLocalTime &o) { + return o.value_; + } + static std::string_view GetDeephavenTypeName() { + return "java.time.LocalTime"; + } +}; + template TypeConverter TypeConverter::CreateNew(const std::vector &values) { using deephaven::client::utility::OkOrThrow; diff --git a/cpp-client/deephaven/dhclient/src/arrowutil/arrow_column_source.cc b/cpp-client/deephaven/dhclient/src/arrowutil/arrow_column_source.cc new file mode 100644 index 00000000000..1131eba68a3 --- /dev/null +++ b/cpp-client/deephaven/dhclient/src/arrowutil/arrow_column_source.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending + */ +#include "deephaven/client/arrowutil/arrow_column_source.h" +#include "deephaven/client/utility/arrow_util.h" + +using deephaven::client::utility::OkOrThrow; + +namespace deephaven::client::arrowutil { +namespace internal { + +namespace { +struct NanoScaleFactorVisitor final : public arrow::TypeVisitor { + size_t result_ = 1; + + arrow::Status Visit(const arrow::Int8Type &/*type*/) final { + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Int16Type &/*type*/) final { + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Int32Type &/*type*/) final { + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Int64Type &/*type*/) final { + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::FloatType &/*type*/) final { + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::DoubleType &/*type*/) final { + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::BooleanType &/*type*/) final { + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::UInt16Type &/*type*/) final { + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::StringType &/*type*/) final { + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::TimestampType &type) final { + result_ = ScaleFromUnit(type.unit()); + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Date64Type &/*type*/) final { + return arrow::Status::OK(); + } + + arrow::Status Visit(const arrow::Time64Type &type) final { + result_ = ScaleFromUnit(type.unit()); + return arrow::Status::OK(); + } + + static size_t ScaleFromUnit(arrow::TimeUnit::type unit) { + switch (unit) { + case arrow::TimeUnit::SECOND: return 1'000'000'000; + case arrow::TimeUnit::MILLI: return 1'000'000; + case arrow::TimeUnit::MICRO: return 1'000; + case arrow::TimeUnit::NANO: return 1; + default: { + auto message = fmt::format("Unhandled arrow::TimeUnit {}", static_cast(unit)); + throw std::runtime_error(DEEPHAVEN_LOCATION_STR(message)); + } + } + } +}; +} // namespace + +size_t CalcTimeNanoScaleFactor(const arrow::Array &array) { + NanoScaleFactorVisitor visitor; + OkOrThrow(DEEPHAVEN_LOCATION_EXPR(array.type()->Accept(&visitor))); + return visitor.result_; +} +} // namespace internal +} // namespace deephaven::client::arrowutil diff --git a/cpp-client/deephaven/dhclient/src/utility/arrow_util.cc b/cpp-client/deephaven/dhclient/src/utility/arrow_util.cc index e71848bafef..bee1613764c 100644 --- a/cpp-client/deephaven/dhclient/src/utility/arrow_util.cc +++ b/cpp-client/deephaven/dhclient/src/utility/arrow_util.cc @@ -81,12 +81,7 @@ struct ArrowToElementTypeId final : public arrow::TypeVisitor { return arrow::Status::OK(); } - arrow::Status Visit(const arrow::TimestampType &type) final { - if (type.unit() != arrow::TimeUnit::NANO) { - auto message = fmt::format("Expected TimestampType with nano units, got {}", - type.ToString()); - throw std::runtime_error(DEEPHAVEN_LOCATION_STR(message)); - } + arrow::Status Visit(const arrow::TimestampType &/*type*/) final { type_id_ = ElementTypeId::kTimestamp; return arrow::Status::OK(); } @@ -96,12 +91,7 @@ struct ArrowToElementTypeId final : public arrow::TypeVisitor { return arrow::Status::OK(); } - arrow::Status Visit(const arrow::Time64Type &type) final { - if (type.unit() != arrow::TimeUnit::NANO) { - auto message = fmt::format("Expected Time64Type with nano units, got {}", - type.ToString()); - throw std::runtime_error(DEEPHAVEN_LOCATION_STR(message)); - } + arrow::Status Visit(const arrow::Time64Type &/*type*/) final { type_id_ = ElementTypeId::kLocalTime; return arrow::Status::OK(); } diff --git a/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/types.h b/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/types.h index 1f5cd02763c..a87b819b30d 100644 --- a/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/types.h +++ b/cpp-client/deephaven/dhcore/include/public/deephaven/dhcore/types.h @@ -541,7 +541,6 @@ class LocalTime { /** * Converts nanoseconds-since-start-of-day to LocalTime. The Deephaven null value sentinel is * turned into LocalTime(0). - * TODO(kosak): find out null convention * @param nanos Nanoseconds since the start of the day. * @return The corresponding LocalTime. */ @@ -579,7 +578,6 @@ class LocalTime { return !(lhs == rhs); } }; - } // namespace deephaven::dhcore template<> struct fmt::formatter : ostream_formatter {}; diff --git a/cpp-client/deephaven/tests/CMakeLists.txt b/cpp-client/deephaven/tests/CMakeLists.txt index c6a246386f1..307e3c704e5 100644 --- a/cpp-client/deephaven/tests/CMakeLists.txt +++ b/cpp-client/deephaven/tests/CMakeLists.txt @@ -27,6 +27,7 @@ add_executable(dhclient_tests src/table_test.cc src/test_util.cc src/ticking_test.cc + src/time_unit_test.cc src/ungroup_test.cc src/update_by_test.cc src/utility_test.cc diff --git a/cpp-client/deephaven/tests/src/time_unit_test.cc b/cpp-client/deephaven/tests/src/time_unit_test.cc new file mode 100644 index 00000000000..f3b605b9539 --- /dev/null +++ b/cpp-client/deephaven/tests/src/time_unit_test.cc @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending + */ +#include +#include "deephaven/third_party/catch.hpp" +#include "deephaven/tests/test_util.h" +#include "deephaven/client/utility/internal_types.h" +#include "deephaven/client/utility/misc_types.h" +#include "deephaven/dhcore/container/row_sequence.h" +#include "deephaven/dhcore/types.h" + +using deephaven::client::Client; +using deephaven::client::TableHandle; +using deephaven::client::utility::TableMaker; +using deephaven::client::utility::internal::InternalDateTime; +using deephaven::client::utility::internal::InternalLocalTime; +using deephaven::dhcore::DateTime; +using deephaven::dhcore::DeephavenConstants; +using deephaven::dhcore::LocalDate; +using deephaven::dhcore::LocalTime; +using deephaven::dhcore::chunk::BooleanChunk; +using deephaven::dhcore::chunk::DateTimeChunk; +using deephaven::dhcore::chunk::LocalTimeChunk; +using deephaven::dhcore::container::RowSequence; + +namespace deephaven::client::tests { +TEST_CASE("Uploaded Arrow Timestamp units get normalized to nanos at FillChunk time", "[timeunit]") { + auto tm = TableMakerForTests::Create(); + + std::vector>> dt_sec; + std::vector>> dt_milli; + std::vector>> dt_micro; + std::vector>> dt_nano; + + // First row: one second (in various units) + dt_sec.emplace_back(1); + dt_milli.emplace_back(1'000); + dt_micro.emplace_back(1'000'000); + dt_nano.emplace_back(1'000'000'000); + + // Second row: null + dt_sec.emplace_back(); + dt_milli.emplace_back(); + dt_micro.emplace_back(); + dt_nano.emplace_back(); + + TableMaker maker; + maker.AddColumn("dt_sec", dt_sec); + maker.AddColumn("dt_milli", dt_milli); + maker.AddColumn("dt_micro", dt_micro); + maker.AddColumn("dt_nano", dt_nano); + auto t = maker.MakeTable(tm.Client().GetManager()); + + std::cout << t.Stream(true) << '\n'; + + auto client_table = t.ToClientTable(); + + auto rs = RowSequence::CreateSequential(0, 2); + auto dest = DateTimeChunk::Create(2); + auto nulls = BooleanChunk::Create(2); + + auto expected = DateTime::FromNanos(1'000'000'000); + + for (size_t i = 0; i != client_table->NumColumns(); ++i) { + auto col = client_table->GetColumn(i); + col->FillChunk(*rs, &dest, &nulls); + + CHECK(expected == dest.data()[0]); + CHECK(false == nulls.data()[0]); + + CHECK(true == nulls.data()[1]); + } +} + +TEST_CASE("Uploaded Arrow Time64 units get normalized to nanos at FillChunk time", "[timeunit][.hidden]") { + auto tm = TableMakerForTests::Create(); + + std::vector>> lt_micro; + std::vector>> lt_nano; + + // First row: one second (in various units) + lt_micro.emplace_back(1'000'000); + lt_nano.emplace_back(1'000'000'000); + + // Second row: null + lt_micro.emplace_back(); + lt_nano.emplace_back(); + + TableMaker maker; + maker.AddColumn("lt_micro", lt_micro); + maker.AddColumn("lt_nano", lt_nano); + auto t = maker.MakeTable(tm.Client().GetManager()); + + std::cout << t.Stream(true) << '\n'; + + auto client_table = t.ToClientTable(); + + auto rs = RowSequence::CreateSequential(0, 2); + auto dest = LocalTimeChunk::Create(2); + auto nulls = BooleanChunk::Create(2); + + auto expected = LocalTime::FromNanos(1'000'000'000); + + for (size_t i = 0; i != client_table->NumColumns(); ++i) { + auto col = client_table->GetColumn(i); + col->FillChunk(*rs, &dest, &nulls); + + CHECK(expected == dest.data()[0]); + CHECK(false == nulls.data()[0]); + + CHECK(true == nulls.data()[1]); + } +} +} // namespace deephaven::client::tests