Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
willdealtry committed Sep 12, 2024
1 parent d5292a2 commit ac514de
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 6 deletions.
2 changes: 2 additions & 0 deletions cpp/arcticdb/arrow/arrow_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <sparrow/array.hpp>
#include <sparrow/arrow_interface/array_data_to_arrow_array_converters.hpp>
#include <sparrow/arrow_interface/arrow_array/smart_pointers.hpp>
#include <sparrow/external_array.hpp>

#include <arcticdb/column_store/memory_segment.hpp>

Expand Down Expand Up @@ -48,4 +49,5 @@ std::vector<sparrow::arrow_array_unique_ptr> segment_to_arrow_arrays(SegmentInMe
return output;
}


} // namespace arcticdb
2 changes: 1 addition & 1 deletion cpp/arcticdb/entity/output_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include <cstdint>

namespace arcticdb {
enum class OutputType : uint8_t {
enum class OutputFormat : uint8_t {
PANDAS,
ARROW,
PARQUET
Expand Down
6 changes: 3 additions & 3 deletions cpp/arcticdb/pipeline/read_options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ struct ReadOptions {
std::optional<bool> set_tz_;
std::optional<bool> optimise_string_memory_;
std::optional<bool> batch_throw_on_error_;
OutputType output_type_ = OutputType::PANDAS;
OutputFormat output_format_ = OutputFormat::PANDAS;

void set_force_strings_to_fixed(const std::optional<bool>& force_strings_to_fixed) {
force_strings_to_fixed_ = force_strings_to_fixed;
Expand Down Expand Up @@ -60,8 +60,8 @@ struct ReadOptions {
batch_throw_on_error_ = batch_throw_on_error;
}

void set_output_type(OutputType output_type) {
output_type_ = output_type;
void set_output_format(OutputFormat output_format) {
output_format_ = output_format;
}
};
} //namespace arcticdb
9 changes: 7 additions & 2 deletions cpp/arcticdb/version/python_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,15 +159,19 @@ void register_bindings(py::module &version, py::exception<arcticdb::ArcticExcept
}))
.def(py::init([](py::array value_list){
return std::make_shared<ValueSet>(value_list);
}))
;
}));

py::class_<VersionQuery>(version, "PythonVersionStoreVersionQuery")
.def(py::init())
.def("set_snap_name", &VersionQuery::set_snap_name)
.def("set_timestamp", &VersionQuery::set_timestamp)
.def("set_version", &VersionQuery::set_version);

py::enum_<OutputFormat>(version, "OutputFormat")
.value("PANDAS", OutputFormat::PANDAS)
.value("ARROW", OutputFormat::ARROW)
.value("PARQUET", OutputFormat::PARQUET);

py::class_<ReadOptions>(version, "PythonVersionStoreReadOptions")
.def(py::init())
.def("set_force_strings_to_object", &ReadOptions::set_force_strings_to_object)
Expand All @@ -177,6 +181,7 @@ void register_bindings(py::module &version, py::exception<arcticdb::ArcticExcept
.def("set_set_tz", &ReadOptions::set_set_tz)
.def("set_optimise_string_memory", &ReadOptions::set_optimise_string_memory)
.def("set_batch_throw_on_error", &ReadOptions::set_batch_throw_on_error)
.def("set_output_format", &ReadOptions::set_output_format)
.def_property_readonly("incompletes", &ReadOptions::get_incompletes);

version.def("write_dataframe_to_file", &write_dataframe_to_file);
Expand Down
2 changes: 2 additions & 0 deletions python/arcticdb/version_store/_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from arcticdb_ext.version_store import StreamDescriptorMismatch
from arcticdb_ext.version_store import DataError
from arcticdb_ext.version_store import sorted_value_name
from arcticdb_ext.version_store import OutputFormat
from arcticdb.authorization.permissions import OpenMode
from arcticdb.exceptions import ArcticDbNotYetImplemented, ArcticNativeException
from arcticdb.flattener import Flattener
Expand Down Expand Up @@ -1624,6 +1625,7 @@ def _get_read_options(self, **kwargs):
read_options = _PythonVersionStoreReadOptions()
read_options.set_force_strings_to_object(_assume_false("force_string_to_object", kwargs))
read_options.set_optimise_string_memory(_assume_false("optimise_string_memory", kwargs))
read_options.set_output_format(kwargs.get("output_format"), default=OutputFormat.PANDAS)
read_options.set_dynamic_schema(
self.resolve_defaults("dynamic_schema", proto_cfg, global_default=False, **kwargs)
)
Expand Down
11 changes: 11 additions & 0 deletions python/tests/unit/arcticdb/version_store/test_arrow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from arcticdb_ext.version_store import OutputFormat
import pandas as pd
import numpy as np


def test_basic_roundtrip(lmdb_version_store_v1):
lib = lmdb_version_store_v1
df = pd.DataFrame({"x": np.arange(10)})
lib.write("arrow", df)
vit = lib.read("arrow", output_format=OutputFormat.ARROW)
print(vit)

0 comments on commit ac514de

Please sign in to comment.