diff --git a/include/interface_scan.h b/include/interface_scan.h index 63f86b1..194d6f0 100644 --- a/include/interface_scan.h +++ b/include/interface_scan.h @@ -19,9 +19,10 @@ template scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end, std::string_view r_key, scan_endpoint r_end, std::vector>& tuple_list, - std::vector>* - node_version_vec, - std::size_t max_size) { + std::vector>* node_version_vec, + std::size_t max_size, + bool right_to_left = false) { + /** * Prohibition : std::string_view{nullptr, non-zero value}. */ @@ -45,6 +46,13 @@ scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end, return status::ERR_BAD_USAGE; } + /** + * currently right_to_left is restricted to unbounded scan with max_size == 1 + */ + if (right_to_left && (r_end != scan_endpoint::INF || max_size != 1)) { + return status::ERR_BAD_USAGE; + } + retry_from_root: // clear out parameter, this must be after retry_from_root for retry. tuple_list.clear(); @@ -73,6 +81,12 @@ scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end, traverse_key_view.size()); } } + if (right_to_left) { + // assuming r_end == scan_endpoint::INF + // put maximum value of key_slice + key_slice = ~key_slice_type{0}; + key_slice_length = sizeof(key_slice_type); + } /** * traverse tree to border node. */ @@ -108,7 +122,7 @@ scan(tree_instance* ti, std::string_view l_key, scan_endpoint l_end, check_status = scan_border( &target_border, traverse_key_view, l_end, r_key, r_end, tuple_list, std::get(node_and_v), - node_version_vec, key_prefix, max_size); + node_version_vec, key_prefix, max_size, right_to_left); // check rc, success if (check_status == status::OK_SCAN_END) { return status::OK; } @@ -133,14 +147,15 @@ scan(std::string_view storage_name, std::string_view l_key, // NOLINT std::vector>& tuple_list, std::vector>* node_version_vec = nullptr, - std::size_t max_size = 0) { + std::size_t max_size = 0, + bool right_to_left = false) { // check storage tree_instance* ti{}; if (storage::find_storage(storage_name, &ti) != status::OK) { return status::WARN_STORAGE_NOT_EXIST; } return scan(ti, l_key, l_end, r_key, r_end, tuple_list, node_version_vec, - max_size); + max_size, right_to_left); } } // namespace yakushima diff --git a/include/kvs.h b/include/kvs.h index dc79788..8f84408 100644 --- a/include/kvs.h +++ b/include/kvs.h @@ -256,6 +256,10 @@ put(Token token, std::string_view storage_name, // NOLINT * node_version_vec to make sure the values ​​are not overwritten. This advantage * is effective when the right end point is unknown but you want to scan to a * specific value. + * @param[in] right_to_left If this argument is true, the scan is performed from right end. + * When this is set to true, current implementation has following limitation: 1. max_size must be 1 so that at most + * one entry is hit and returned as scan result 2. r_end must be scan_endpoint::INF so that the scan is performed from + * unbounded right end. Status::ERR_BAD_USAGE is returned if these conditions are not met. * @return Status::ERR_BAD_USAGE The arguments is invalid. In the case1: you use * same l_key and r_key and one of the endpoint is exclusive. case2: one of the * endpoint use null key but the string size is not zero like @@ -272,6 +276,7 @@ scan(std::string_view storage_name, std::string_view l_key, // NOLINT std::vector>& tuple_list, std::vector>* node_version_vec, - std::size_t max_size); + std::size_t max_size, + bool right_to_left); } // namespace yakushima diff --git a/include/scan_helper.h b/include/scan_helper.h index c2b0b6a..d25aa96 100644 --- a/include/scan_helper.h +++ b/include/scan_helper.h @@ -56,7 +56,7 @@ scan(base_node* const root, const std::string_view l_key, std::vector>& tuple_list, std::vector>* const node_version_vec, - const std::string& key_prefix, const std::size_t max_size) { + const std::string& key_prefix, const std::size_t max_size, bool right_to_left) { /** * Log size before scanning this node. * This must be before retry label for retry at find border. @@ -106,6 +106,12 @@ scan(base_node* const root, const std::string_view l_key, } else { if (!l_key.empty()) { memcpy(&ks, l_key.data(), l_key.size()); } } + if (right_to_left) { + // assuming r_end == scan_endpoint::INF + // put maximum value of key_slice + ks = ~key_slice_type{0}; + kl = sizeof(key_slice_type); + } node_and_v = find_border(root, ks, kl, check_status); if (check_status == status::WARN_RETRY_FROM_ROOT_OF_ALL) { return status::OK_RETRY_AFTER_FB; @@ -124,7 +130,7 @@ scan(base_node* const root, const std::string_view l_key, // scan the border node check_status = scan_border( &bn, l_key, l_end, r_key, r_end, tuple_list, check_v, - node_version_vec, key_prefix, max_size); + node_version_vec, key_prefix, max_size, right_to_left); // check rc, success if (check_status == status::OK_SCAN_END) { return status::OK; } @@ -170,7 +176,7 @@ scan_border(border_node** const target, const std::string_view l_key, node_version64_body& v_at_fb, std::vector>* const node_version_vec, - const std::string& key_prefix, const std::size_t max_size) { + const std::string& key_prefix, const std::size_t max_size, bool right_to_left) { /** * Log size before scanning this node. * This must be before retry label for retry at find border. @@ -215,16 +221,19 @@ scan_border(border_node** const target, const std::string_view l_key, border_node* bn = *target; /** * next node pointer must be logged before optimistic verify. + * When right_to_left is true, we stop at the first border node and don't use this. + * TODO When we extend reverse scan for multiple entries, we need get_prev() here. */ border_node* next = bn->get_next(); + /** * get permutation at once. * After scan border, optimistic verify support this is atomic. */ permutation perm(bn->get_permutation().get_body()); // check all elements in border node. - for (std::size_t i = 0; i < perm.get_cnk(); ++i) { - std::size_t index = perm.get_index_of_rank(i); + for (std::size_t i = 0, n = perm.get_cnk(); i < n; ++i) { + std::size_t index = perm.get_index_of_rank(right_to_left ? n-i-1 : i); key_slice_type ks = bn->get_key_slice_at(index); key_length_type kl = bn->get_key_length_at(index); std::string full_key{key_prefix}; @@ -314,7 +323,7 @@ scan_border(border_node** const target, const std::string_view l_key, } check_status = scan(next_layer, arg_l_key, arg_l_end, arg_r_key, arg_r_end, - tuple_list, node_version_vec, full_key, max_size); + tuple_list, node_version_vec, full_key, max_size, right_to_left); if (check_status != status::OK) { // failed. clean up tuple list and node vesion vec. clean_up_tuple_list_nvc(); diff --git a/test/scan/scan_reverse_test.cpp b/test/scan/scan_reverse_test.cpp new file mode 100644 index 0000000..e040585 --- /dev/null +++ b/test/scan/scan_reverse_test.cpp @@ -0,0 +1,235 @@ +/** + * @file scan_basic_usage_test.cpp + */ + +#include + +#include "gtest/gtest.h" + +#include "kvs.h" + +using namespace yakushima; + +namespace yakushima::testing { + +std::string st{"s"}; // NOLINT + +class scan_reverse_test : public ::testing::Test { + void SetUp() override { + init(); + create_storage(st); + } + + void TearDown() override { fin(); } +}; + +std::string_view key(std::tuple const& t) { + return std::get<0>(t); +} + +std::string_view value(std::tuple const& t) { + return std::string_view{std::get<1>(t), std::get<2>(t)}; +} + +TEST_F(scan_reverse_test, basic_usage) { // NOLINT + std::string k0("k0"); + std::string k1("k1"); + std::string v0("v0"); + std::string v1("v1"); + Token token{}; + ASSERT_EQ(enter(token), status::OK); + ASSERT_EQ(status::OK, put(token, st, k0, v0.data(), v0.size())); + ASSERT_EQ(status::OK, put(token, st, k1, v1.data(), v1.size())); + std::vector> tup_lis{}; // NOLINT + std::vector> nv; + auto verify = [&tup_lis, &nv, &v1]() { + if (tup_lis.size() != 1) { return false; } + if (tup_lis.size() != nv.size()) { return false; } + if (std::get<2>(tup_lis.at(0)) != v1.size()) { return false; } + if (memcmp(std::get<1>(tup_lis.at(0)), v1.data(), v1.size()) != 0) { + return false; + } + return true; + }; + ASSERT_EQ(status::OK, scan(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup_lis, &nv, 1, true)); + + ASSERT_EQ(true, verify()); + ASSERT_EQ(tup_lis.size(), 1); + EXPECT_EQ(key(tup_lis[0]), k1); + EXPECT_EQ(value(tup_lis[0]), v1); + + ASSERT_EQ(status::OK, scan(st, "", scan_endpoint::INCLUSIVE, "", scan_endpoint::INF, tup_lis, &nv, 1, true)); + ASSERT_EQ(tup_lis.size(), 1); + EXPECT_EQ(key(tup_lis[0]), k1); + EXPECT_EQ(value(tup_lis[0]), v1); + + // currently max_size must be 1 and r_end == INF for reverse scan + ASSERT_EQ(status::ERR_BAD_USAGE, + scan(st, "", scan_endpoint::INCLUSIVE, "", scan_endpoint::INCLUSIVE, tup_lis, &nv, 1, true)); + ASSERT_EQ(status::ERR_BAD_USAGE, + scan(st, "", scan_endpoint::INCLUSIVE, "", scan_endpoint::INF, tup_lis, &nv, 0, true)); + + ASSERT_EQ(leave(token), status::OK); +} + +TEST_F(scan_reverse_test, scan_results_zero) { // NOLINT + Token s{}; + ASSERT_EQ(status::OK, enter(s)); + std::vector> tup_lis{}; // NOLINT + std::vector> nv; + ASSERT_EQ(status::OK, + scan(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup_lis, &nv, 1, true)); + ASSERT_EQ(status::OK, leave(s)); +} + +TEST_F(scan_reverse_test, long_key_scan) { // NOLINT + // prepare + Token s{}; + ASSERT_EQ(status::OK, enter(s)); + std::string st{"test"}; + ASSERT_EQ(status::OK, create_storage(st)); + + for (std::size_t i = 1024; i <= 1024 * 30; i += 1024) { // NOLINT + // put + LOG(INFO) << "test key size " << i / 1024 << " KiB"; + std::string k0(i, 'a'); + std::string k1(i, 'b'); + std::string v0{"v0"}; + std::string v1{"v1"}; + ASSERT_EQ(status::OK, put(s, st, k0, v0.data(), v0.size())); + ASSERT_EQ(status::OK, put(s, st, k1, v1.data(), v1.size())); + + // test: scan + std::vector> tup_lis{}; // NOLINT + std::vector> nv; + ASSERT_EQ(status::OK, scan(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup_lis, &nv, 1, true)); + ASSERT_EQ(tup_lis.size(), 1); + EXPECT_EQ(key(tup_lis[0]), k1); + EXPECT_EQ(value(tup_lis[0]), v1); + } + + // cleanup + ASSERT_EQ(status::OK, leave(s)); +} + +TEST_F(scan_reverse_test, scan_single_border) { // NOLINT + Token token{}; + ASSERT_EQ(enter(token), status::OK); + for (char i = 0; i <= 7; ++i) { // NOLINT + char c = i; + std::string v{"v"}; + v += std::to_string(i); + ASSERT_EQ(status::OK, put(token, st, std::string_view(&c, 1), v.data(), v.size())); + } + /** + * border node + * 0, 1, 2, 3, 4, 5, 6, 7 + */ + std::vector> tup{}; // NOLINT + std::vector> nv; + ASSERT_EQ(status::OK, scan(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true)); + ASSERT_EQ(tup.size(), 1); // NOLINT + ASSERT_EQ(tup.size(), nv.size()); // NOLINT + EXPECT_EQ(key(tup[0]), "\x07"); + EXPECT_EQ(value(tup[0]), "v7"); + + ASSERT_EQ(leave(token), status::OK); +} + +TEST_F(scan_reverse_test, scan_two_borders) { // NOLINT + Token token{}; + ASSERT_EQ(enter(token), status::OK); + for (char i = 0; i <= 16; ++i) { // NOLINT + char c = i; + std::string v{"v"}; + v += std::to_string(i); + ASSERT_EQ(status::OK, put(token, st, std::string_view(&c, 1), v.data(), v.size())); + } + /** + * border nodes + * A: 0, 1, 2, 3, 4, 5, 6, 7, + * B: 8, 9, 10, 11, 12, 13, 14, 15, 16 + */ + std::vector> tup{}; // NOLINT + std::vector> nv; + ASSERT_EQ(status::OK, scan(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true)); + ASSERT_EQ(tup.size(), 1); // NOLINT + ASSERT_EQ(tup.size(), nv.size()); // NOLINT + + EXPECT_EQ(key(tup[0]), "\x10"); + EXPECT_EQ(value(tup[0]), "v16"); +} + +TEST_F(scan_reverse_test, scan_three_borders) { // NOLINT + Token token{}; + ASSERT_EQ(enter(token), status::OK); + for (char i = 0; i <= 25; ++i) { // NOLINT + char c = i; + std::string v{"v"}; + v += std::to_string(i); + ASSERT_EQ(status::OK, + put(token, st, std::string_view(&c, 1), v.data(), v.size())); + } + /** + * now + * A: 0, 1, 2, 3, 4, 5, 6, 7, + * B: 8, 9, 10, 11, 12, 13, 14, 15, + * C: 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + * branch of A and B is 8 + * branch of B and C is 16 + */ + std::vector> tup{}; // NOLINT + std::vector> nv; + ASSERT_EQ(status::OK, scan(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true)); + ASSERT_EQ(tup.size(), 1); // NOLINT + ASSERT_EQ(tup.size(), nv.size()); // NOLINT + EXPECT_EQ(key(tup[0]), "\x19"); + EXPECT_EQ(value(tup[0]), "v25"); +} + +TEST_F(scan_reverse_test, scan_three_borders_removed_last) { // NOLINT + Token token{}; + ASSERT_EQ(enter(token), status::OK); + for (char i = 0; i <= 25; ++i) { // NOLINT + char c = i; + std::string v{"v"}; + v += std::to_string(i); + ASSERT_EQ(status::OK, + put(token, st, std::string_view(&c, 1), v.data(), v.size())); + } + /** + * now + * A: 0, 1, 2, 3, 4, 5, 6, 7, + * B: 8, 9, 10, 11, 12, 13, 14, 15, + * C: 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 + * branch of A and B is 8 + * branch of B and C is 16 + */ + std::vector> tup{}; // NOLINT + std::vector> nv; + ASSERT_EQ(status::OK, scan(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true)); + ASSERT_EQ(tup.size(), 1); // NOLINT + ASSERT_EQ(tup.size(), nv.size()); // NOLINT + + auto delete_range = [&token](char begin, char end) { + for (char i = begin; i <= end; ++i) { + char c = i; + ASSERT_EQ(status::OK, remove(token, st, std::string_view(&c, 1))); + } + }; + delete_range(25, 25); // NOLINT + /** + * now + * A: 0, 1, 2, 3, 4, 5, 6, 7, + * B: 8, 9, 10, 11, 12, 13, 14, 15, + * C: 16, 17, 18, 19, 20, 21, 22, 23, 24 + */ + ASSERT_EQ(status::OK, scan(st, "", scan_endpoint::INF, "", scan_endpoint::INF, tup, &nv, 1, true)); + ASSERT_EQ(tup.size(), 1); // NOLINT + EXPECT_EQ(key(tup[0]), "\x18"); + EXPECT_EQ(value(tup[0]), "v24"); + + ASSERT_EQ(leave(token), status::OK); +} + +} // namespace yakushima::testing