Skip to content

Commit

Permalink
Merge pull request #46 from project-tsurugi/reverse_scan_api
Browse files Browse the repository at this point in the history
reverse scan apiの制限つき実装
  • Loading branch information
kuron99 authored Dec 16, 2024
2 parents 844d987 + 5b11549 commit a68c4ca
Show file tree
Hide file tree
Showing 4 changed files with 277 additions and 13 deletions.
27 changes: 21 additions & 6 deletions include/interface_scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ template<class ValueType>
scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end,
std::string_view r_key, scan_endpoint r_end,
std::vector<std::tuple<std::string, ValueType*, std::size_t>>& tuple_list,
std::vector<std::pair<node_version64_body, node_version64*>>*
node_version_vec,
std::size_t max_size) {
std::vector<std::pair<node_version64_body, node_version64*>>* node_version_vec,
std::size_t max_size,
bool right_to_left = false) {

/**
* Prohibition : std::string_view{nullptr, non-zero value}.
*/
Expand All @@ -45,6 +46,13 @@ scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end,
return status::ERR_BAD_USAGE;
}

/**
* currently right_to_left is restricted to unbounded scan with max_size == 1
*/
if (right_to_left && (r_end != scan_endpoint::INF || max_size != 1)) {
return status::ERR_BAD_USAGE;
}

retry_from_root:
// clear out parameter, this must be after retry_from_root for retry.
tuple_list.clear();
Expand Down Expand Up @@ -73,6 +81,12 @@ scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end,
traverse_key_view.size());
}
}
if (right_to_left) {
// assuming r_end == scan_endpoint::INF
// put maximum value of key_slice
key_slice = ~key_slice_type{0};
key_slice_length = sizeof(key_slice_type);
}
/**
* traverse tree to border node.
*/
Expand Down Expand Up @@ -108,7 +122,7 @@ scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end,
check_status = scan_border<ValueType>(
&target_border, traverse_key_view, l_end, r_key, r_end,
tuple_list, std::get<tuple_v_index>(node_and_v),
node_version_vec, key_prefix, max_size);
node_version_vec, key_prefix, max_size, right_to_left);

// check rc, success
if (check_status == status::OK_SCAN_END) { return status::OK; }
Expand All @@ -133,14 +147,15 @@ scan(std::string_view storage_name, std::string_view l_key, // NOLINT
std::vector<std::tuple<std::string, ValueType*, std::size_t>>& tuple_list,
std::vector<std::pair<node_version64_body, node_version64*>>*
node_version_vec = nullptr,
std::size_t max_size = 0) {
std::size_t max_size = 0,
bool right_to_left = false) {
// check storage
tree_instance* ti{};
if (storage::find_storage(storage_name, &ti) != status::OK) {
return status::WARN_STORAGE_NOT_EXIST;
}
return scan(ti, l_key, l_end, r_key, r_end, tuple_list, node_version_vec,
max_size);
max_size, right_to_left);
}

} // namespace yakushima
7 changes: 6 additions & 1 deletion include/kvs.h
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,10 @@ put(Token token, std::string_view storage_name, // NOLINT
* node_version_vec to make sure the values ​​are not overwritten. This advantage
* is effective when the right end point is unknown but you want to scan to a
* specific value.
* @param[in] right_to_left If this argument is true, the scan is performed from right end.
* When this is set to true, current implementation has following limitation: 1. max_size must be 1 so that at most
* one entry is hit and returned as scan result 2. r_end must be scan_endpoint::INF so that the scan is performed from
* unbounded right end. Status::ERR_BAD_USAGE is returned if these conditions are not met.
* @return Status::ERR_BAD_USAGE The arguments is invalid. In the case1: you use
* same l_key and r_key and one of the endpoint is exclusive. case2: one of the
* endpoint use null key but the string size is not zero like
Expand All @@ -272,6 +276,7 @@ scan(std::string_view storage_name, std::string_view l_key, // NOLINT
std::vector<std::tuple<std::string, ValueType*, std::size_t>>& tuple_list,
std::vector<std::pair<node_version64_body, node_version64*>>*
node_version_vec,
std::size_t max_size);
std::size_t max_size,
bool right_to_left);

} // namespace yakushima
21 changes: 15 additions & 6 deletions include/scan_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ scan(base_node* const root, const std::string_view l_key,
std::vector<std::tuple<std::string, ValueType*, std::size_t>>& tuple_list,
std::vector<std::pair<node_version64_body, node_version64*>>* const
node_version_vec,
const std::string& key_prefix, const std::size_t max_size) {
const std::string& key_prefix, const std::size_t max_size, bool right_to_left) {
/**
* Log size before scanning this node.
* This must be before retry label for retry at find border.
Expand Down Expand Up @@ -106,6 +106,12 @@ scan(base_node* const root, const std::string_view l_key,
} else {
if (!l_key.empty()) { memcpy(&ks, l_key.data(), l_key.size()); }
}
if (right_to_left) {
// assuming r_end == scan_endpoint::INF
// put maximum value of key_slice
ks = ~key_slice_type{0};
kl = sizeof(key_slice_type);
}
node_and_v = find_border(root, ks, kl, check_status);
if (check_status == status::WARN_RETRY_FROM_ROOT_OF_ALL) {
return status::OK_RETRY_AFTER_FB;
Expand All @@ -124,7 +130,7 @@ scan(base_node* const root, const std::string_view l_key,
// scan the border node
check_status = scan_border<ValueType>(
&bn, l_key, l_end, r_key, r_end, tuple_list, check_v,
node_version_vec, key_prefix, max_size);
node_version_vec, key_prefix, max_size, right_to_left);

// check rc, success
if (check_status == status::OK_SCAN_END) { return status::OK; }
Expand Down Expand Up @@ -170,7 +176,7 @@ scan_border(border_node** const target, const std::string_view l_key,
node_version64_body& v_at_fb,
std::vector<std::pair<node_version64_body, node_version64*>>* const
node_version_vec,
const std::string& key_prefix, const std::size_t max_size) {
const std::string& key_prefix, const std::size_t max_size, bool right_to_left) {
/**
* Log size before scanning this node.
* This must be before retry label for retry at find border.
Expand Down Expand Up @@ -215,16 +221,19 @@ scan_border(border_node** const target, const std::string_view l_key,
border_node* bn = *target;
/**
* next node pointer must be logged before optimistic verify.
* When right_to_left is true, we stop at the first border node and don't use this.
* TODO When we extend reverse scan for multiple entries, we need get_prev() here.
*/
border_node* next = bn->get_next();

/**
* get permutation at once.
* After scan border, optimistic verify support this is atomic.
*/
permutation perm(bn->get_permutation().get_body());
// check all elements in border node.
for (std::size_t i = 0; i < perm.get_cnk(); ++i) {
std::size_t index = perm.get_index_of_rank(i);
for (std::size_t i = 0, n = perm.get_cnk(); i < n; ++i) {
std::size_t index = perm.get_index_of_rank(right_to_left ? n-i-1 : i);
key_slice_type ks = bn->get_key_slice_at(index);
key_length_type kl = bn->get_key_length_at(index);
std::string full_key{key_prefix};
Expand Down Expand Up @@ -314,7 +323,7 @@ scan_border(border_node** const target, const std::string_view l_key,
}
check_status =
scan(next_layer, arg_l_key, arg_l_end, arg_r_key, arg_r_end,
tuple_list, node_version_vec, full_key, max_size);
tuple_list, node_version_vec, full_key, max_size, right_to_left);
if (check_status != status::OK) {
// failed. clean up tuple list and node vesion vec.
clean_up_tuple_list_nvc();
Expand Down
235 changes: 235 additions & 0 deletions test/scan/scan_reverse_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
/**
* @file scan_basic_usage_test.cpp
*/

#include <array>

#include "gtest/gtest.h"

#include "kvs.h"

using namespace yakushima;

namespace yakushima::testing {

std::string st{"s"}; // NOLINT

class scan_reverse_test : public ::testing::Test {
void SetUp() override {
init();
create_storage(st);
}

void TearDown() override { fin(); }
};

std::string_view key(std::tuple<std::string, char*, std::size_t> const& t) {
return std::get<0>(t);
}

std::string_view value(std::tuple<std::string, char*, std::size_t> const& t) {
return std::string_view{std::get<1>(t), std::get<2>(t)};
}

TEST_F(scan_reverse_test, basic_usage) { // NOLINT
std::string k0("k0");
std::string k1("k1");
std::string v0("v0");
std::string v1("v1");
Token token{};
ASSERT_EQ(enter(token), status::OK);
ASSERT_EQ(status::OK, put(token, st, k0, v0.data(), v0.size()));
ASSERT_EQ(status::OK, put(token, st, k1, v1.data(), v1.size()));
std::vector<std::tuple<std::string, char*, std::size_t>> tup_lis{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
auto verify = [&tup_lis, &nv, &v1]() {
if (tup_lis.size() != 1) { return false; }
if (tup_lis.size() != nv.size()) { return false; }
if (std::get<2>(tup_lis.at(0)) != v1.size()) { return false; }
if (memcmp(std::get<1>(tup_lis.at(0)), v1.data(), v1.size()) != 0) {
return false;
}
return true;
};
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup_lis, &nv, 1, true));

ASSERT_EQ(true, verify());
ASSERT_EQ(tup_lis.size(), 1);
EXPECT_EQ(key(tup_lis[0]), k1);
EXPECT_EQ(value(tup_lis[0]), v1);

ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INCLUSIVE, "", scan_endpoint::INF, tup_lis, &nv, 1, true));
ASSERT_EQ(tup_lis.size(), 1);
EXPECT_EQ(key(tup_lis[0]), k1);
EXPECT_EQ(value(tup_lis[0]), v1);

// currently max_size must be 1 and r_end == INF for reverse scan
ASSERT_EQ(status::ERR_BAD_USAGE,
scan<char>(st, "", scan_endpoint::INCLUSIVE, "", scan_endpoint::INCLUSIVE, tup_lis, &nv, 1, true));
ASSERT_EQ(status::ERR_BAD_USAGE,
scan<char>(st, "", scan_endpoint::INCLUSIVE, "", scan_endpoint::INF, tup_lis, &nv, 0, true));

ASSERT_EQ(leave(token), status::OK);
}

TEST_F(scan_reverse_test, scan_results_zero) { // NOLINT
Token s{};
ASSERT_EQ(status::OK, enter(s));
std::vector<std::tuple<std::string, char*, std::size_t>> tup_lis{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK,
scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup_lis, &nv, 1, true));
ASSERT_EQ(status::OK, leave(s));
}

TEST_F(scan_reverse_test, long_key_scan) { // NOLINT
// prepare
Token s{};
ASSERT_EQ(status::OK, enter(s));
std::string st{"test"};
ASSERT_EQ(status::OK, create_storage(st));

for (std::size_t i = 1024; i <= 1024 * 30; i += 1024) { // NOLINT
// put
LOG(INFO) << "test key size " << i / 1024 << " KiB";
std::string k0(i, 'a');
std::string k1(i, 'b');
std::string v0{"v0"};
std::string v1{"v1"};
ASSERT_EQ(status::OK, put(s, st, k0, v0.data(), v0.size()));
ASSERT_EQ(status::OK, put(s, st, k1, v1.data(), v1.size()));

// test: scan
std::vector<std::tuple<std::string, char*, std::size_t>> tup_lis{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup_lis, &nv, 1, true));
ASSERT_EQ(tup_lis.size(), 1);
EXPECT_EQ(key(tup_lis[0]), k1);
EXPECT_EQ(value(tup_lis[0]), v1);
}

// cleanup
ASSERT_EQ(status::OK, leave(s));
}

TEST_F(scan_reverse_test, scan_single_border) { // NOLINT
Token token{};
ASSERT_EQ(enter(token), status::OK);
for (char i = 0; i <= 7; ++i) { // NOLINT
char c = i;
std::string v{"v"};
v += std::to_string(i);
ASSERT_EQ(status::OK, put(token, st, std::string_view(&c, 1), v.data(), v.size()));
}
/**
* border node
* 0, 1, 2, 3, 4, 5, 6, 7
*/
std::vector<std::tuple<std::string, char*, std::size_t>> tup{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true));
ASSERT_EQ(tup.size(), 1); // NOLINT
ASSERT_EQ(tup.size(), nv.size()); // NOLINT
EXPECT_EQ(key(tup[0]), "\x07");
EXPECT_EQ(value(tup[0]), "v7");

ASSERT_EQ(leave(token), status::OK);
}

TEST_F(scan_reverse_test, scan_two_borders) { // NOLINT
Token token{};
ASSERT_EQ(enter(token), status::OK);
for (char i = 0; i <= 16; ++i) { // NOLINT
char c = i;
std::string v{"v"};
v += std::to_string(i);
ASSERT_EQ(status::OK, put(token, st, std::string_view(&c, 1), v.data(), v.size()));
}
/**
* border nodes
* A: 0, 1, 2, 3, 4, 5, 6, 7,
* B: 8, 9, 10, 11, 12, 13, 14, 15, 16
*/
std::vector<std::tuple<std::string, char*, std::size_t>> tup{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true));
ASSERT_EQ(tup.size(), 1); // NOLINT
ASSERT_EQ(tup.size(), nv.size()); // NOLINT

EXPECT_EQ(key(tup[0]), "\x10");
EXPECT_EQ(value(tup[0]), "v16");
}

TEST_F(scan_reverse_test, scan_three_borders) { // NOLINT
Token token{};
ASSERT_EQ(enter(token), status::OK);
for (char i = 0; i <= 25; ++i) { // NOLINT
char c = i;
std::string v{"v"};
v += std::to_string(i);
ASSERT_EQ(status::OK,
put(token, st, std::string_view(&c, 1), v.data(), v.size()));
}
/**
* now
* A: 0, 1, 2, 3, 4, 5, 6, 7,
* B: 8, 9, 10, 11, 12, 13, 14, 15,
* C: 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
* branch of A and B is 8
* branch of B and C is 16
*/
std::vector<std::tuple<std::string, char*, std::size_t>> tup{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true));
ASSERT_EQ(tup.size(), 1); // NOLINT
ASSERT_EQ(tup.size(), nv.size()); // NOLINT
EXPECT_EQ(key(tup[0]), "\x19");
EXPECT_EQ(value(tup[0]), "v25");
}

TEST_F(scan_reverse_test, scan_three_borders_removed_last) { // NOLINT
Token token{};
ASSERT_EQ(enter(token), status::OK);
for (char i = 0; i <= 25; ++i) { // NOLINT
char c = i;
std::string v{"v"};
v += std::to_string(i);
ASSERT_EQ(status::OK,
put(token, st, std::string_view(&c, 1), v.data(), v.size()));
}
/**
* now
* A: 0, 1, 2, 3, 4, 5, 6, 7,
* B: 8, 9, 10, 11, 12, 13, 14, 15,
* C: 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
* branch of A and B is 8
* branch of B and C is 16
*/
std::vector<std::tuple<std::string, char*, std::size_t>> tup{}; // NOLINT
std::vector<std::pair<node_version64_body, node_version64*>> nv;
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true));
ASSERT_EQ(tup.size(), 1); // NOLINT
ASSERT_EQ(tup.size(), nv.size()); // NOLINT

auto delete_range = [&token](char begin, char end) {
for (char i = begin; i <= end; ++i) {
char c = i;
ASSERT_EQ(status::OK, remove(token, st, std::string_view(&c, 1)));
}
};
delete_range(25, 25); // NOLINT
/**
* now
* A: 0, 1, 2, 3, 4, 5, 6, 7,
* B: 8, 9, 10, 11, 12, 13, 14, 15,
* C: 16, 17, 18, 19, 20, 21, 22, 23, 24
*/
ASSERT_EQ(status::OK, scan<char>(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true));
ASSERT_EQ(tup.size(), 1); // NOLINT
EXPECT_EQ(key(tup[0]), "\x18");
EXPECT_EQ(value(tup[0]), "v24");

ASSERT_EQ(leave(token), status::OK);
}

} // namespace yakushima::testing

0 comments on commit a68c4ca

Please sign in to comment.