Skip to content

Commit

Permalink
update to apache-arrow@0.3.0
Browse files Browse the repository at this point in the history
- implements Arrow column reflection as an Arrow TypeVisitor
- updates main.cpp to reflect Arrow 0.3.0's internal API changes
  • Loading branch information
trxcllnt committed Feb 22, 2018
1 parent 8f63509 commit 2458efa
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 122 deletions.
2 changes: 1 addition & 1 deletion packages/perspective/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"author": "",
"license": "Apache",
"dependencies": {
"@apache-arrow/es5-esm": "^0.2.0",
"@apache-arrow/es5-esm": "^0.3.0",
"@jpmorganchase/perspective-common": "^0.1.0",
"babel-runtime": "^6.26.0",
"bluebird": "^3.5.1",
Expand Down
41 changes: 17 additions & 24 deletions packages/perspective/src/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,15 +179,16 @@ namespace arrow {
{

// Copy out dictionary encoded data
val values = dcol["data"]["values"];
val vdata = values["data"];
val dictionary = dcol["dictionary"];
// ptaylor: This assumes the dictionary is either a Binary or Utf8 Vector. Should it support other Vector types?
val vdata = dictionary["values"];
t_int32 vsize = vdata["length"].as<t_int32>();
std::vector<t_uchar> data;
data.reserve(vsize);
data.resize(vsize);
vecFromTypedArray(vdata, data.data(), vsize);

val voffsets = values["offsets"];
val voffsets = dictionary["valueOffsets"];
t_int32 osize = voffsets["length"].as<t_int32>();
std::vector<t_int32> offsets;
offsets.reserve(osize);
Expand All @@ -197,7 +198,7 @@ namespace arrow {
t_vocab* vocab = col->_get_vocab();
t_str elem;

t_int32 dsize = dcol["data"]["length"].as<t_int32>();
t_int32 dsize = dictionary["length"].as<t_int32>();
for (t_int32 i = 0; i < dsize; ++i) {
t_int32 bidx = offsets[i];
std::size_t es = offsets[i+1] - bidx;
Expand All @@ -219,7 +220,7 @@ _fill_col(val dcol, t_col_sptr col, t_bool is_arrow)
t_uindex nrows = col->size();

if (is_arrow) {
val data = dcol["data"];
val data = dcol["values"];
arrow::vecFromTypedArray(data, col->get_nth<T>(0), nrows);
} else {
for (auto i = 0; i < nrows; ++i)
Expand All @@ -237,7 +238,7 @@ _fill_col<t_int64>(val dcol, t_col_sptr col, t_bool is_arrow)
t_uindex nrows = col->size();

if (is_arrow) {
val data = dcol["data"];
val data = dcol["values"];
// arrow packs 64 bit into two 32 bit ints
arrow::vecFromTypedArray(data, col->get_nth<t_int64>(0), nrows * 2);
} else {
Expand All @@ -252,17 +253,17 @@ _fill_col<t_time>(val dcol, t_col_sptr col, t_bool is_arrow)
t_uindex nrows = col->size();

if (is_arrow) {
val data = dcol["data"];
val data = dcol["values"];
// arrow packs 64 bit into two 32 bit ints
arrow::vecFromTypedArray(data, col->get_nth<t_time>(0), nrows*2);

t_str unit = dcol["unit"].as<t_str>();
if (unit != "MILLISECOND") {
t_int8 unit = dcol["type"]["unit"].as<t_int8>();
if (unit != /* Arrow.enum_.TimeUnit.MILLISECOND */ 1) {
// Slow path - need to convert each value
t_int64 factor = 1;
if (unit == "NANOSECOND") {
if (unit == /* Arrow.enum_.TimeUnit.NANOSECOND */ 3) {
factor = 1e6;
} else if (unit == "MICROSECOND") {
} else if (unit == /* Arrow.enum_.TimeUnit.MICROSECOND */ 2) {
factor = 1e3;
}
for (auto i = 0; i < nrows; ++i)
Expand All @@ -287,7 +288,7 @@ _fill_col<t_bool>(val dcol, t_col_sptr col, t_bool is_arrow)

if (is_arrow) {
// arrow packs bools into a bitmap
val data = dcol["data"];
val data = dcol["values"];
for (auto i = 0; i < nrows; ++i)
{
t_uint8 elem = data[i / 8].as<t_uint8>();
Expand All @@ -313,7 +314,7 @@ _fill_col<std::string>(val dcol, t_col_sptr col, t_bool is_arrow)

if (is_arrow) {
if (dcol["constructor"]["name"].as<t_str>() == "DictionaryVector") {
val vkeys = dcol["keys"]["data"];
val vkeys = dcol["indicies"]["values"];

// Perspective stores string indices in a 32bit unsigned array
// Javascript's typed arrays handle copying from various bitwidth arrays properly
Expand All @@ -333,18 +334,15 @@ _fill_col<std::string>(val dcol, t_col_sptr col, t_bool is_arrow)
}
} else if (dcol["constructor"]["name"].as<t_str>() == "Utf8Vector" ||
dcol["constructor"]["name"].as<t_str>() == "BinaryVector") {
if (dcol["constructor"]["name"].as<t_str>() == "Utf8Vector") {
dcol = dcol["values"];
}

val vdata = dcol["data"];
val vdata = dcol["values"];
t_int32 vsize = vdata["length"].as<t_int32>();
std::vector<t_uint8> data;
data.reserve(vsize);
data.resize(vsize);
arrow::vecFromTypedArray(vdata, data.data(), vsize);

val voffsets = dcol["offsets"];
val voffsets = dcol["valueOffsets"];
t_int32 osize = voffsets["length"].as<t_int32>();
std::vector<t_int32> offsets;
offsets.reserve(osize);
Expand Down Expand Up @@ -460,12 +458,7 @@ _fill_data(t_table_sptr tbl,
if (null_count == 0) {
col->valid_raw_fill(true);
} else {
val validity = dcol;
if (dcol["constructor"]["name"].as<t_str>() == "Utf8Vector") {
validity = dcol["values"]["validity"]["data"];
} else {
validity = dcol["validity"]["data"];
}
val validity = dcol["nullBitmap"];
arrow::fill_col_valid(validity, col);
}
}
Expand Down
Loading

0 comments on commit 2458efa

Please sign in to comment.