Skip to content

Commit

Permalink
Merge pull request #483 from jpmorganchase/data-slice
Browse files Browse the repository at this point in the history
Implement t_data_slice API for data extraction from 0 & 1-sided views
  • Loading branch information
texodus authored Mar 14, 2019
2 parents f019c0f + 0b06ac5 commit 6aae31b
Show file tree
Hide file tree
Showing 10 changed files with 311 additions and 131 deletions.
3 changes: 1 addition & 2 deletions cpp/perspective/src/cpp/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,8 @@ t_config::t_config(const std::vector<std::string>& row_pivots, const t_aggspec&

t_config::t_config(const std::vector<t_pivot>& row_pivots,
const std::vector<t_aggspec>& aggregates, t_filter_op combiner,
const std::vector<t_fterm>& fterms, bool column_only)
const std::vector<t_fterm>& fterms)
: m_row_pivots(row_pivots)
, m_column_only(column_only)
, m_aggregates(aggregates)
, m_totals(TOTALS_BEFORE)
, m_combiner(combiner)
Expand Down
77 changes: 44 additions & 33 deletions cpp/perspective/src/cpp/data_slice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,32 @@ namespace perspective {
template <typename CTX_T>
t_data_slice<CTX_T>::t_data_slice(std::shared_ptr<CTX_T> ctx, t_uindex start_row,
t_uindex end_row, t_uindex start_col, t_uindex end_col,
std::shared_ptr<std::vector<t_tscalar>> slice,
std::shared_ptr<std::vector<std::string>> column_names)
const std::shared_ptr<std::vector<t_tscalar>>& slice, std::vector<std::string> column_names)
: m_ctx(ctx)
, m_start_row(start_row)
, m_end_row(end_row)
, m_start_col(start_col)
, m_end_col(end_col)
, m_slice(slice)
, m_column_names(column_names) {}
, m_column_names(column_names) {
m_stride = m_end_col - m_start_col;
}

template <typename CTX_T>
t_data_slice<CTX_T>::t_data_slice(std::shared_ptr<CTX_T> ctx, t_uindex start_row,
t_uindex end_row, t_uindex start_col, t_uindex end_col,
std::shared_ptr<std::vector<t_tscalar>> slice,
std::shared_ptr<std::vector<std::string>> column_names,
std::shared_ptr<std::vector<t_uindex>> column_indices)
const std::shared_ptr<std::vector<t_tscalar>>& slice, std::vector<std::string> column_names,
std::vector<t_uindex> column_indices)
: m_ctx(ctx)
, m_start_row(start_row)
, m_end_row(end_row)
, m_start_col(start_col)
, m_end_col(end_col)
, m_slice(slice)
, m_column_names(column_names)
, m_column_indices(column_indices) {}
, m_column_indices(column_indices) {
m_stride = m_end_col - m_start_col;
}

template <typename CTX_T>
t_data_slice<CTX_T>::~t_data_slice() {}
Expand All @@ -53,34 +55,30 @@ t_data_slice<CTX_T>::~t_data_slice() {}
* @return t_tscalar a valid scalar containing the underlying data, or a new
* t_tscalar initialized with an invalid flag.
*/
template <>
t_tscalar
t_data_slice<t_ctx0>::get(t_uindex ridx, t_uindex col) const {
t_tscalar rv;
rv.clear();
return rv;
}

template <>
template <typename CTX_T>
t_tscalar
t_data_slice<t_ctx1>::get(t_uindex ridx, t_uindex cidx) const {
t_data_slice<CTX_T>::get(t_uindex ridx, t_uindex cidx) const {
t_uindex idx = get_slice_idx(ridx, cidx);
t_tscalar rv;
rv.clear();
if (idx >= m_slice->size()) {
rv.clear();
} else {
rv = m_slice->at(idx);
}
return rv;
}

template <>
t_tscalar
t_data_slice<t_ctx2>::get(t_uindex ridx, t_uindex cidx) const {
t_tscalar rv;
rv.clear();
return rv;
}
t_uindex idx = get_slice_idx(ridx, cidx);
t_tscalar rv = m_slice->operator[](idx);
/* if (m_column_indices.size() > 0) {
t_uindex idx_skip_headers;
} else {
template <>
std::vector<t_tscalar>
t_data_slice<t_ctx0>::get_row_path(t_uindex idx) const {
return std::vector<t_tscalar>();
} */
return rv;
}

template <typename CTX_T>
Expand All @@ -89,6 +87,13 @@ t_data_slice<CTX_T>::get_row_path(t_uindex idx) const {
return m_ctx->unity_get_row_path(idx);
}

template <typename CTX_T>
t_uindex
t_data_slice<CTX_T>::get_slice_idx(t_uindex ridx, t_uindex cidx) const {
t_uindex idx = (ridx - m_start_row) * m_stride + (cidx - m_start_col);
return idx;
}

// Getters
template <typename CTX_T>
std::shared_ptr<CTX_T>
Expand All @@ -103,28 +108,34 @@ t_data_slice<CTX_T>::get_slice() const {
}

template <typename CTX_T>
std::shared_ptr<std::vector<std::string>>
const std::vector<std::string>&
t_data_slice<CTX_T>::get_column_names() const {
return m_column_names;
}

template <typename CTX_T>
std::shared_ptr<std::vector<t_uindex>>
const std::vector<t_uindex>&
t_data_slice<CTX_T>::get_column_indices() const {
return m_column_indices;
}

template <typename CTX_T>
bool
t_data_slice<CTX_T>::is_column_only() const {
auto config = m_ctx->get_config();
return config.is_column_only();
return false;
}

template <>
bool
t_data_slice<t_ctx0>::is_column_only() const {
return false;
t_data_slice<t_ctx2>::is_column_only() const {
auto config = m_ctx->get_config();
return config.is_column_only();
}

template <typename CTX_T>
t_uindex
t_data_slice<CTX_T>::get_stride() const {
return m_stride;
}

template <typename CTX_T>
Expand All @@ -141,4 +152,4 @@ t_data_slice<CTX_T>::get_data_extents() const {
template class t_data_slice<t_ctx0>;
template class t_data_slice<t_ctx1>;
template class t_data_slice<t_ctx2>;
} // end namespace perspective
} // end namespace perspective
86 changes: 70 additions & 16 deletions cpp/perspective/src/cpp/emscripten.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1663,7 +1663,6 @@ namespace binding {
auto schema = gnode->get_tblschema();
t_config view_config = make_view_config<val>(schema, separator, date_parser, config);

bool column_only = view_config.is_column_only();
auto aggregates = view_config.get_aggregates();
auto row_pivots = view_config.get_row_pivots();
auto filter_op = view_config.get_combiner();
Expand All @@ -1676,7 +1675,7 @@ namespace binding {
}

auto ctx = make_context_one(schema, row_pivots, filter_op, filters, aggregates, sorts,
pivot_depth, column_only, pool, gnode, name);
pivot_depth, pool, gnode, name);

auto view_ptr
= std::make_shared<View<t_ctx1>>(pool, ctx, gnode, name, separator, view_config);
Expand Down Expand Up @@ -1765,9 +1764,9 @@ namespace binding {
std::shared_ptr<t_ctx1>
make_context_one(t_schema schema, std::vector<t_pivot> pivots, t_filter_op combiner,
std::vector<t_fterm> filters, std::vector<t_aggspec> aggregates,
std::vector<t_sortspec> sorts, std::int32_t pivot_depth, bool column_only, t_pool* pool,
std::vector<t_sortspec> sorts, std::int32_t pivot_depth, t_pool* pool,
std::shared_ptr<t_gnode> gnode, std::string name) {
auto cfg = t_config(pivots, aggregates, combiner, filters, column_only);
auto cfg = t_config(pivots, aggregates, combiner, filters);
auto ctx1 = std::make_shared<t_ctx1>(schema, cfg);

ctx1->init();
Expand Down Expand Up @@ -1876,7 +1875,7 @@ namespace binding {
std::uint32_t start_col, std::uint32_t end_col) {
val arr = val::array();
auto data_slice = view->get_data(start_row, end_row, start_col, end_col);
auto slice = data_slice.get_slice();
auto slice = data_slice->get_slice();
for (auto idx = 0; idx < slice->size(); ++idx) {
arr.set(idx, scalar_to_val(slice->at(idx)));
}
Expand All @@ -1901,15 +1900,15 @@ namespace binding {
std::uint32_t start_col, std::uint32_t end_col) {
val arr = val::array();
auto data_slice = view->get_data(start_row, end_row, start_col, end_col);
auto slice = data_slice.get_slice();
auto column_indices = data_slice.get_column_indices();
auto slice = data_slice->get_slice();
auto column_indices = data_slice->get_column_indices();

if (column_indices->size() > 0) {
if (column_indices.size() > 0) {
t_uindex i = 0;
auto iter = slice->begin();
while (iter != slice->end()) {
t_uindex prev = column_indices->front();
for (auto idx = column_indices->begin(); idx != column_indices->end();
t_uindex prev = column_indices.front();
for (auto idx = column_indices.begin(); idx != column_indices.end();
idx++, i++) {
t_uindex col_num = *idx;
iter += col_num - prev;
Expand All @@ -1928,6 +1927,44 @@ namespace binding {
return arr;
}

/**
* @brief Get the t_data_slice object, which contains an underlying slice of data and
* metadata required to interact with it.
*
* @param view
* @param start_row
* @param end_row
* @param start_col
* @param end_col
* @return val
*/
template <typename CTX_T>
std::shared_ptr<t_data_slice<CTX_T>>
get_data_slice(std::shared_ptr<View<CTX_T>> view, std::uint32_t start_row,
std::uint32_t end_row, std::uint32_t start_col, std::uint32_t end_col) {
auto data_slice = view->get_data(start_row, end_row, start_col, end_col);
return data_slice;
}

/**
* @brief Retrieve a single value from the data slice and serialize it to an output
* type that interfaces with the binding language.
*
* @param view
* @param start_row
* @param end_row
* @param start_col
* @param end_col
* @return val
*/
template <typename CTX_T>
val
get_from_data_slice(
std::shared_ptr<t_data_slice<CTX_T>> data_slice, t_uindex ridx, t_uindex cidx) {
auto d = data_slice->get(ridx, cidx);
return scalar_to_val(d);
}

} // end namespace binding
} // end namespace perspective

Expand Down Expand Up @@ -1986,7 +2023,6 @@ EMSCRIPTEN_BINDINGS(perspective) {
.function("get_aggregates", &View<t_ctx0>::get_aggregates)
.function("get_filters", &View<t_ctx0>::get_filters)
.function("get_sorts", &View<t_ctx0>::get_sorts)
.function("get_row_path", &View<t_ctx0>::get_row_path)
.function("get_step_delta", &View<t_ctx0>::get_step_delta)
.function("is_column_only", &View<t_ctx0>::is_column_only);

Expand All @@ -2009,7 +2045,6 @@ EMSCRIPTEN_BINDINGS(perspective) {
.function("get_aggregates", &View<t_ctx1>::get_aggregates)
.function("get_filters", &View<t_ctx1>::get_filters)
.function("get_sorts", &View<t_ctx1>::get_sorts)
.function("get_row_path", &View<t_ctx1>::get_row_path)
.function("get_step_delta", &View<t_ctx1>::get_step_delta)
.function("is_column_only", &View<t_ctx1>::is_column_only);

Expand Down Expand Up @@ -2070,9 +2105,22 @@ EMSCRIPTEN_BINDINGS(perspective) {
*
* t_data_slice
*/
class_<t_data_slice<t_ctx0>>("t_data_slice_ctx0");
class_<t_data_slice<t_ctx1>>("t_data_slice_ctx1");
class_<t_data_slice<t_ctx2>>("t_data_slice_ctx2");
class_<t_data_slice<t_ctx0>>("t_data_slice_ctx0")
.smart_ptr<std::shared_ptr<t_data_slice<t_ctx0>>>("shared_ptr<t_data_slice<t_ctx0>>>")
.function<const std::vector<std::string>&>(
"get_column_names", &t_data_slice<t_ctx0>::get_column_names);

class_<t_data_slice<t_ctx1>>("t_data_slice_ctx1")
.smart_ptr<std::shared_ptr<t_data_slice<t_ctx1>>>("shared_ptr<t_data_slice<t_ctx1>>>")
.function<const std::vector<std::string>&>(
"get_column_names", &t_data_slice<t_ctx1>::get_column_names)
.function<std::vector<t_tscalar>>("get_row_path", &t_data_slice<t_ctx1>::get_row_path);

class_<t_data_slice<t_ctx2>>("t_data_slice_ctx2")
.smart_ptr<std::shared_ptr<t_data_slice<t_ctx2>>>("shared_ptr<t_data_slice<t_ctx2>>>")
.function<const std::vector<std::string>&>(
"get_column_names", &t_data_slice<t_ctx2>::get_column_names);

/******************************************************************************
*
* t_ctx0
Expand Down Expand Up @@ -2197,4 +2245,10 @@ EMSCRIPTEN_BINDINGS(perspective) {
function("make_view_zero", &make_view_zero<val>, allow_raw_pointers());
function("make_view_one", &make_view_one<val>, allow_raw_pointers());
function("make_view_two", &make_view_two<val>, allow_raw_pointers());
}
function("get_data_slice_zero", &get_data_slice<t_ctx0>, allow_raw_pointers());
function("get_from_data_slice_zero", &get_from_data_slice<t_ctx0>, allow_raw_pointers());
function("get_data_slice_one", &get_data_slice<t_ctx1>, allow_raw_pointers());
function("get_from_data_slice_one", &get_from_data_slice<t_ctx1>, allow_raw_pointers());
function("get_data_slice_two", &get_data_slice<t_ctx2>, allow_raw_pointers());
function("get_from_data_slice_two", &get_from_data_slice<t_ctx2>, allow_raw_pointers());
}
Loading

0 comments on commit 6aae31b

Please sign in to comment.