Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8371297134 Type promotion between integral and floating types #2173

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/arcticdb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@ if(${TEST})
processing/test/test_component_manager.cpp
processing/test/test_expression.cpp
processing/test/test_filter_and_project_sparse.cpp
processing/test/test_has_valid_type_promotion.cpp
processing/test/test_type_promotion.cpp
processing/test/test_operation_dispatch.cpp
processing/test/test_parallel_processing.cpp
processing/test/test_resample.cpp
Expand Down
170 changes: 99 additions & 71 deletions cpp/arcticdb/entity/type_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,32 +39,57 @@ namespace arcticdb {
return false;
}

std::optional<entity::TypeDescriptor> has_valid_type_promotion(
constexpr bool is_mixed_float_and_integer(entity::DataType left, entity::DataType right) {
return (is_integer_type(left) && is_floating_point_type(right)) || (is_floating_point_type(left) && is_integer_type(right));
}

static std::optional<entity::TypeDescriptor> common_type_float_integer(const entity::TypeDescriptor& left, const entity::TypeDescriptor& right) {
util::check(left.dimension() == right.dimension(), "Dimensions should match but were left={} right={}", left.dimension(), right.dimension());
auto dimension = left.dimension();
auto left_type = left.data_type();
auto right_type = right.data_type();
auto left_size = slice_bit_size(left_type);
auto right_size = slice_bit_size(right_type);
internal::check<ErrorCode::E_ASSERTION_FAILURE>(is_mixed_float_and_integer(left_type, right_type),
"Expected one int and one float in common_type_floats_integer");

auto target_size = entity::SizeBits::UNKNOWN_SIZE_BITS;
auto floating_size = is_floating_point_type(left_type) ? left_size : right_size;
auto integral_size = is_integer_type(left_type) ? left_size : right_size;
if (floating_size == entity::SizeBits::S64 || integral_size >= entity::SizeBits::S32) {
// (u)int64 up to float64 will lose precision, we accept that
target_size = entity::SizeBits::S64;
} else {
// (u)int(8/16) can fit in float32 since float32 has 24 precision bits
target_size = entity::SizeBits::S32;
}

return std::make_optional<entity::TypeDescriptor>(combine_data_type(entity::ValueType::FLOAT, target_size), dimension);
}

bool is_valid_type_promotion_to_target(
const entity::TypeDescriptor& source,
const entity::TypeDescriptor& target
) {

if (source.dimension() != target.dimension()) {
// Empty of dimension 0 means lack of any given type and can be promoted to anything (even if the dimensions
// don't match), e.g. empty type can become int or array of ints. Empty type of higher dimension is used to
// specify an empty array or an empty matrix, thus it cannot become any other type unless the dimensionality
// matches
if (is_empty_type(source.data_type()) && source.dimension() == entity::Dimension::Dim0)
return target;
return std::nullopt;
return is_empty_type(source.data_type()) && source.dimension() == entity::Dimension::Dim0;
}

if (source == target)
return target;
return true;

// Empty type is coercible to any type
if (is_empty_type(source.data_type())) {
return target;
return true;
}

// Nothing is coercible to the empty type.
if (is_empty_type(target.data_type())) {
return std::nullopt;
return false;
}

auto source_type = source.data_type();
Expand All @@ -73,101 +98,104 @@ namespace arcticdb {
auto target_size = slice_bit_size(target_type);

if (is_time_type(source_type)) {
if (!is_time_type(target_type))
return std::nullopt;
return is_time_type(target_type);
} else if (is_unsigned_type(source_type)) {
if (is_unsigned_type(target_type)) {
// UINT->UINT, target_size must be >= source_size
if (source_size > target_size)
return std::nullopt;
// UINT->UINT
return target_size >= source_size;
} else if (is_signed_type(target_type)) {
// UINT->INT, target_size must be > source_size
if (source_size >= target_size)
return std::nullopt;
// UINT->INT
return target_size > source_size;
} else if (is_floating_point_type(target_type)) {
// UINT->FLOAT, no restrictions on relative sizes
// UINT->FLOAT
return target_size == entity::SizeBits::S64 || source_size < entity::SizeBits::S32;
} else {
// Non-numeric target type
return std::nullopt;
return false;
}
} else if (is_signed_type(source_type)) {
if (is_unsigned_type(target_type)) {
// INT->UINT never promotable
return std::nullopt;
return false;
} else if (is_signed_type(target_type)) {
// INT->INT, target_size must be >= source_size
if (source_size > target_size)
return std::nullopt;
// INT->INT
return target_size >= source_size;
} else if (is_floating_point_type(target_type)) {
// INT->FLOAT, no restrictions on relative sizes
// INT->FLOAT
return target_size == entity::SizeBits::S64 || source_size < entity::SizeBits::S32;
} else {
// Non-numeric target type
return std::nullopt;
return false;
}
} else if (is_floating_point_type(source_type)) {
if (is_unsigned_type(target_type) || is_signed_type(target_type)) {
if (is_integer_type(target_type)) {
// FLOAT->U/INT never promotable
return std::nullopt;
return false;
} else if (is_floating_point_type(target_type)) {
// FLOAT->FLOAT, target_size must be >= source_size
if (source_size > target_size)
return std::nullopt;
// FLOAT->FLOAT
return target_size >= source_size;
} else {
// Non-numeric target type
return std::nullopt;
return false;
}
} else if (is_sequence_type(source_type) && is_sequence_type(target_type)) {
// Only allow promotion with UTF strings, and only to dynamic (never to fixed width)
if (!is_utf_type(source_type) || !is_utf_type(target_type) || !is_dynamic_string_type(target_type))
return std::nullopt;
return is_utf_type(source_type) && is_utf_type(target_type) && is_dynamic_string_type(target_type);
} else if (is_bool_object_type(source_type)) {
return std::nullopt;
return false;
} else {
// Non-numeric source type
return std::nullopt;
return false;
}
}

static std::optional<entity::TypeDescriptor> common_type_mixed_sign_ints(const entity::TypeDescriptor& left, const entity::TypeDescriptor& right) {
auto dimension = left.dimension();
auto left_type = left.data_type();
auto right_type = right.data_type();
auto left_size = slice_bit_size(left_type);
auto right_size = slice_bit_size(right_type);
// To get here we must have one signed and one unsigned type, with the width of the signed type <= the width of the unsigned type
internal::check<ErrorCode::E_ASSERTION_FAILURE>(is_signed_type(left_type) ^ is_signed_type(right_type),
"Expected one signed and one unsigned int in has_valid_common_type");
if (is_signed_type(left_type)) {
internal::check<ErrorCode::E_ASSERTION_FAILURE>(left_size <= right_size,
"Expected left_size <= right_size in has_valid_common_type");
} else {
// is_signed_type(right_type)
internal::check<ErrorCode::E_ASSERTION_FAILURE>(right_size <= left_size,
"Expected right_size <= left_size in has_valid_common_type");
}

return target;
auto target_size = entity::SizeBits(uint8_t(std::max(left_size, right_size)) + 1);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get why this works but it seems a bit hacky and fragile.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just the old logic moved around, so I'd rather not change it

if (target_size < entity::SizeBits::COUNT) {
return std::make_optional<entity::TypeDescriptor>(combine_data_type(entity::ValueType::INT, target_size), dimension);
} else {
return std::nullopt;
}
}

std::optional<entity::TypeDescriptor> has_valid_common_type(
const entity::TypeDescriptor& left,
const entity::TypeDescriptor& right
) {
auto maybe_common_type = has_valid_type_promotion(left, right);
if (!maybe_common_type) {
maybe_common_type = has_valid_type_promotion(right, left);
std::optional<entity::TypeDescriptor> has_valid_common_type(const entity::TypeDescriptor& left, const entity::TypeDescriptor& right) {
if (is_valid_type_promotion_to_target(left, right)) {
return right;
} else if (is_valid_type_promotion_to_target(right, left)) {
return left;
}

if (left.dimension() != right.dimension()) {
return std::nullopt;
}
// has_valid_type_promotion checks if the second type can represent all values of the first type
// We also want to handle cases where there is a third type that can represent both
// In practice, this is only the case when there is one signed and one unsigned integer right now
if (!maybe_common_type &&
left.dimension() == right.dimension() &&
is_integer_type(left.data_type()) &&
is_integer_type(right.data_type())) {
auto dimension = left.dimension();
auto left_type = left.data_type();
auto right_type = right.data_type();
auto left_size = slice_bit_size(left_type);
auto right_size = slice_bit_size(right_type);
// To get here we must have one signed and one unsigned type, with the width of the signed type <= the width of the unsigned type
internal::check<ErrorCode::E_ASSERTION_FAILURE>(is_signed_type(left_type) ^ is_signed_type(right_type),

if (is_integer_type(left.data_type()) && is_integer_type(right.data_type())) {
// One must be signed and the other unsigned, since if they matched is_valid_type_promotion_to_target would have handled them already
internal::check<ErrorCode::E_ASSERTION_FAILURE>(is_signed_type(left.data_type()) ^ is_signed_type(right.data_type()),
"Expected one signed and one unsigned int in has_valid_common_type");
if (is_signed_type(left_type)) {
internal::check<ErrorCode::E_ASSERTION_FAILURE>(left_size <= right_size,
"Expected left_size <= right_size in has_valid_common_type");
} else {
// is_signed_type(right_type)
internal::check<ErrorCode::E_ASSERTION_FAILURE>(right_size <= left_size,
"Expected right_size <= left_size in has_valid_common_type");
}
auto target_size = entity::SizeBits(uint8_t(std::max(left_size, right_size)) + 1);
if (target_size < entity::SizeBits::COUNT) {
maybe_common_type = entity::TypeDescriptor{
combine_data_type(entity::ValueType::INT, target_size),
dimension};
}
return common_type_mixed_sign_ints(left, right);
} else if (is_mixed_float_and_integer(left.data_type(), right.data_type())) {
return common_type_float_integer(left, right);
} else {
return std::nullopt;
}
return maybe_common_type;
}

}
12 changes: 1 addition & 11 deletions cpp/arcticdb/entity/type_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,16 @@ namespace entity {
/// n elements of left type from one buffer to n elements of type right in another buffer and get the same result
[[nodiscard]] bool trivially_compatible_types(const entity::TypeDescriptor& left, const entity::TypeDescriptor& right);

[[nodiscard]] std::optional<entity::TypeDescriptor> has_valid_type_promotion(
[[nodiscard]] bool is_valid_type_promotion_to_target(
const entity::TypeDescriptor& source,
const entity::TypeDescriptor& target
);

[[nodiscard]] std::optional<entity::TypeDescriptor> has_valid_type_promotion(
const proto::descriptors::TypeDescriptor& source,
const proto::descriptors::TypeDescriptor& target
);

[[nodiscard]] std::optional<entity::TypeDescriptor> has_valid_common_type(
const entity::TypeDescriptor& left,
const entity::TypeDescriptor& right
);

[[nodiscard]] std::optional<entity::TypeDescriptor> has_valid_common_type(
const proto::descriptors::TypeDescriptor& left,
const proto::descriptors::TypeDescriptor& right
);

inline std::string get_user_friendly_type_string(const entity::TypeDescriptor& type) {
return is_sequence_type(type.data_type()) ? fmt::format("TD<type=STRING, dim={}>", type.dimension_) : fmt::format("{}", type);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/arcticdb/pipeline/execution.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ struct ColumnExpression {
ColumnExpression(const IterableContainer& left, const IterableContainer& right) :
left_(left),
right_(right) {
auto promoted_type = has_valid_type_promotion(left_->type(), right_->type());
auto promoted_type = has_valid_common_type(left_->type(), right_->type());
util::check(static_cast<bool>(promoted_type), "Cannot promote from type {} and type {} in column expression", left_->type(), right_->type());
type_ = promoted_type.value();
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/arcticdb/pipeline/read_frame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ void decode_into_frame_static(

void check_mapping_type_compatibility(const ColumnMapping& m) {
util::check(
static_cast<bool>(has_valid_type_promotion(m.source_type_desc_, m.dest_type_desc_)),
is_valid_type_promotion_to_target(m.source_type_desc_, m.dest_type_desc_),
"Can't promote type {} to type {} in field {}",
m.source_type_desc_,
m.dest_type_desc_,
Expand Down
1 change: 0 additions & 1 deletion cpp/arcticdb/processing/clause_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,6 @@ std::vector<std::vector<EntityId>> offsets_to_entity_ids(const std::vector<std::
* On entry to a clause, construct ProcessingUnits from the input entity IDs. These will be provided by the
* structure_for_processing method for the first clause in the pipeline and for clauses whose expected input structure
* differs from the previous clause's output structure. Otherwise, these come directly from the previous clause.
* clauses.
*/
template<class... Args>
ProcessingUnit gather_entities(ComponentManager& component_manager, const std::vector<EntityId>& entity_ids) {
Expand Down
9 changes: 4 additions & 5 deletions cpp/arcticdb/processing/operation_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,10 @@ struct type_arithmetic_promoted_type {
double,
float
>,
// Otherwise, if only one type is floating point, promote to this type
std::conditional_t<std::is_floating_point_v<LHS>,
LHS,
RHS
>
// Otherwise, if only one type is floating point, always promote to double
// For example when combining int32 and float32 the result can only fit in float64 without loss of precision
// Special cases like int16 and float32 can fit in float32, but we always promote up to float64 (as does Pandas)
double
>,
// Otherwise, both types are integers
std::conditional_t<std::is_unsigned_v<LHS> && std::is_unsigned_v<RHS>,
Expand Down
Loading
Loading