-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy patharrow_clickhouse_types.h
221 lines (176 loc) · 6.57 KB
/
arrow_clickhouse_types.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#pragma once
#include <cstdint>
#include <list>
#include <map>
#include <stdexcept>
#include <string>
#include <vector>
#include <cstdint>
#include <arrow/api.h>
#include <arrow/compute/api.h>
#include <arrow/util/bitmap.h>
#include <common/StringRef.h>
#include <common/defines.h>
#include <common/extended_types.h>
#include <Common/PODArray_fwd.h>
namespace AH
{
/// What to do if the limit is exceeded.
enum class OverflowMode
{
THROW = 0, /// Throw exception.
BREAK = 1, /// Abort query execution, return what is.
/** Only for GROUP BY: do not add new rows to the set,
* but continue to aggregate for keys that are already in the set.
*/
ANY = 2,
};
using Exception = std::runtime_error;
using ColumnNumbers = std::vector<uint32_t>; // it's vector<size_t> in CH
using Names = std::vector<std::string>;
using Block = std::shared_ptr<arrow::RecordBatch>;
using BlocksList = std::list<Block>;
using Array = arrow::ScalarVector;
using ColumnWithTypeAndName = arrow::Field;
using ColumnsWithTypeAndName = arrow::FieldVector;
using Header = std::shared_ptr<arrow::Schema>;
using Sizes = std::vector<size_t>;
// TODO: replace with arrow::memory_pool
class Arena;
using ArenaPtr = std::shared_ptr<Arena>;
using ConstArenaPtr = std::shared_ptr<const Arena>;
using ConstArenas = std::vector<ConstArenaPtr>;
using IColumn = arrow::Array;
using ColumnPtr = std::shared_ptr<IColumn>;
using Columns = std::vector<ColumnPtr>;
using ColumnRawPtrs = std::vector<const IColumn *>;
using MutableColumn = arrow::ArrayBuilder;
using MutableColumnPtr = std::shared_ptr<arrow::ArrayBuilder>;
using MutableColumns = std::vector<MutableColumnPtr>;
struct XColumn
{
using Offset = UInt64;
using Offsets = PaddedPODArray<Offset>;
using ColumnIndex = UInt64;
using Selector = PaddedPODArray<ColumnIndex>;
using Filter = PaddedPODArray<UInt8>;
};
using ColumnInt8 = arrow::NumericArray<arrow::Int8Type>;
using ColumnInt16 = arrow::NumericArray<arrow::Int16Type>;
using ColumnInt32 = arrow::NumericArray<arrow::Int32Type>;
using ColumnInt64 = arrow::NumericArray<arrow::Int64Type>;
using ColumnUInt8 = arrow::NumericArray<arrow::UInt8Type>;
using ColumnUInt16 = arrow::NumericArray<arrow::UInt16Type>;
using ColumnUInt32 = arrow::NumericArray<arrow::UInt32Type>;
using ColumnUInt64 = arrow::NumericArray<arrow::UInt64Type>;
using ColumnFloat32 = arrow::NumericArray<arrow::FloatType>;
using ColumnFloat64 = arrow::NumericArray<arrow::DoubleType>;
using ColumnBinary = arrow::BinaryArray;
using ColumnString = arrow::StringArray;
using ColumnFixedString = arrow::FixedSizeBinaryArray;
using ColumnTimestamp = arrow::TimestampArray;
using ColumnDuration = arrow::DurationArray;
using ColumnDecimal = arrow::DecimalArray;
using MutableColumnInt8 = arrow::Int8Builder;
using MutableColumnInt16 = arrow::Int16Builder;
using MutableColumnInt32 = arrow::Int32Builder;
using MutableColumnInt64 = arrow::Int64Builder;
using MutableColumnUInt8 = arrow::UInt8Builder;
using MutableColumnUInt16 = arrow::UInt16Builder;
using MutableColumnUInt32 = arrow::UInt32Builder;
using MutableColumnUInt64 = arrow::UInt64Builder;
using MutableColumnFloat32 = arrow::FloatBuilder;
using MutableColumnFloat64 = arrow::DoubleBuilder;
using MutableColumnBinary = arrow::BinaryBuilder;
using MutableColumnString = arrow::StringBuilder;
using MutableColumnFixedString = arrow::FixedSizeBinaryBuilder;
using MutableColumnTimestamp = arrow::TimestampBuilder;
using MutableColumnDuration = arrow::DurationBuilder;
using MutableColumnDecimal = arrow::DecimalBuilder;
using IDataType = arrow::DataType;
using DataTypePtr = std::shared_ptr<IDataType>;
using DataTypes = arrow::DataTypeVector;
using DataTypeInt8 = arrow::Int8Type;
using DataTypeInt16 = arrow::Int16Type;
using DataTypeInt32 = arrow::Int32Type;
using DataTypeInt64 = arrow::Int64Type;
using DataTypeUInt8 = arrow::UInt8Type;
using DataTypeUInt16 = arrow::UInt16Type;
using DataTypeUInt32 = arrow::UInt32Type;
using DataTypeUInt64 = arrow::UInt64Type;
using DataTypeFloat32 = arrow::FloatType;
using DataTypeFloat64 = arrow::DoubleType;
using DataTypeBinary = arrow::BinaryType;
using DataTypeString = arrow::StringType;
using DataTypeFixedString = arrow::FixedSizeBinaryType;
using DataTypeTimestamp = arrow::TimestampType;
using DataTypeDuration = arrow::DurationType;
using DataTypeDecimal = arrow::DecimalType;
class IAggregateFunction;
using AggregateFunctionPtr = std::shared_ptr<const IAggregateFunction>;
struct AggregateDescription
{
AggregateFunctionPtr function;
Array parameters; /// Parameters of the (parametric) aggregate function.
ColumnNumbers arguments;
Names argument_names; /// used if no `arguments` are specified.
String column_name; /// What name to use for a column with aggregate function values
};
using AggregateDescriptions = std::vector<AggregateDescription>;
using AggregateColumnsData = std::vector<arrow::UInt64Builder *>;
using AggregateColumnsConstData = std::vector<const arrow::UInt64Array *>;
inline Columns columnsFromHeader(const Header & schema, size_t num_rows = 0)
{
std::vector<std::shared_ptr<arrow::Array>> columns;
columns.reserve(schema->num_fields());
for (auto & field : schema->fields())
columns.emplace_back(*arrow::MakeArrayOfNull(field->type(), num_rows));
return columns;
}
inline Block blockFromHeader(const Header & schema, size_t num_rows = 0)
{
return arrow::RecordBatch::Make(schema, num_rows, columnsFromHeader(schema, num_rows));
}
template <typename To, typename From>
inline To assert_cast(From && from)
{
#ifndef NDEBUG
if constexpr (std::is_pointer_v<To>)
{
if (!dynamic_cast<To>(from))
throw std::bad_cast();
}
return dynamic_cast<To>(from);
#else
return static_cast<To>(from);
#endif
}
template <typename To>
inline To assert_same_size_cast(const IColumn * from)
{
#ifndef NDEBUG
using ArrayType = typename std::remove_pointer<To>::type;
using CTo = typename ArrayType::value_type;
auto type_id = from->type_id();
if (arrow::is_primitive(type_id) && sizeof(CTo) == (bit_width(type_id) / 8))
return static_cast<To>(from);
return assert_cast<To>(from);
#else
return static_cast<To>(from);
#endif
}
template <typename To>
inline To assert_same_size_cast(MutableColumn & from)
{
#ifndef NDEBUG
using ArrayType = typename std::remove_reference<To>::type;
using CTo = typename ArrayType::value_type;
auto type_id = from.type()->id();
if (arrow::is_primitive(type_id) && sizeof(CTo) == (bit_width(type_id) / 8))
return static_cast<To>(from);
return assert_cast<To>(from);
#else
return static_cast<To>(from);
#endif
}
}