From 2baabdaf129c797bb999eccc603f296de0b544fd Mon Sep 17 00:00:00 2001 From: chunmei-liu Date: Mon, 7 Feb 2022 14:34:33 -0800 Subject: [PATCH 1/3] crimson/seastore: support sparse_read add fiemap and readv functions. Signed-off-by: chunmei-liu --- .../os/seastore/object_data_handler.cc | 54 +++++++++++++++++ src/crimson/os/seastore/object_data_handler.h | 8 +++ src/crimson/os/seastore/seastore.cc | 58 ++++++++++++++++++- src/crimson/os/seastore/seastore.h | 8 +++ 4 files changed, 127 insertions(+), 1 deletion(-) diff --git a/src/crimson/os/seastore/object_data_handler.cc b/src/crimson/os/seastore/object_data_handler.cc index 4e5a55b0d0ad4..624229979159a 100644 --- a/src/crimson/os/seastore/object_data_handler.cc +++ b/src/crimson/os/seastore/object_data_handler.cc @@ -551,6 +551,60 @@ ObjectDataHandler::read_ret ObjectDataHandler::read( }); } +ObjectDataHandler::fiemap_ret ObjectDataHandler::fiemap( + context_t ctx, + objaddr_t obj_offset, + extent_len_t len) +{ + return seastar::do_with( + std::map(), + [ctx, obj_offset, len](auto &ret) { + return with_object_data( + ctx, + [ctx, obj_offset, len, &ret](const auto &object_data) { + LOG_PREFIX(ObjectDataHandler::fiemap); + DEBUGT( + "{}~{}, reservation {}~{}", + ctx.t, + obj_offset, + len, + object_data.get_reserved_data_base(), + object_data.get_reserved_data_len()); + /* Assumption: callers ensure that onode size is <= reserved + * size and that len is adjusted here prior to call */ + ceph_assert(!object_data.is_null()); + ceph_assert((obj_offset + len) <= object_data.get_reserved_data_len()); + ceph_assert(len > 0); + laddr_t loffset = + object_data.get_reserved_data_base() + obj_offset; + return ctx.tm.get_pins( + ctx.t, + loffset, + len + ).si_then([loffset, len, &object_data, &ret](auto &&pins) { + ceph_assert(pins.size() >= 1); + ceph_assert((*pins.begin())->get_laddr() <= loffset); + for (auto &&i: pins) { + if (!(i->get_paddr().is_zero())) { + auto ret_left = std::max(i->get_laddr(), loffset); + auto ret_right = std::min( + i->get_laddr() + i->get_length(), + loffset + len); + assert(ret_right > ret_left); + ret.emplace( + std::make_pair( + ret_left - object_data.get_reserved_data_base(), + ret_right - ret_left + )); + } + } + }); + }).si_then([&ret] { + return std::move(ret); + }); + }); +} + ObjectDataHandler::truncate_ret ObjectDataHandler::truncate( context_t ctx, objaddr_t offset) diff --git a/src/crimson/os/seastore/object_data_handler.h b/src/crimson/os/seastore/object_data_handler.h index c397245125cf8..dd91f343623b3 100644 --- a/src/crimson/os/seastore/object_data_handler.h +++ b/src/crimson/os/seastore/object_data_handler.h @@ -74,6 +74,14 @@ class ObjectDataHandler { objaddr_t offset, extent_len_t len); + /// sparse read data, get range interval in [offset, offset + len) + using fiemap_iertr = base_iertr; + using fiemap_ret = fiemap_iertr::future>; + fiemap_ret fiemap( + context_t ctx, + objaddr_t offset, + extent_len_t len); + /// Clears data past offset using truncate_iertr = base_iertr; using truncate_ret = truncate_iertr::future<>; diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 2702e49730970..0323f81e956e6 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -484,6 +484,22 @@ SeaStore::read_errorator::future SeaStore::readv( interval_set& m, uint32_t op_flags) { + return seastar::do_with( + ceph::bufferlist{}, + [=, &oid, &m](auto &ret) { + return crimson::do_for_each( + m, + [=, &oid, &ret](auto &p) { + return read( + ch, oid, p.first, p.second, op_flags + ).safe_then([&ret](auto bl) { + ret.claim_append(bl); + }); + }).safe_then([&ret] { + return read_errorator::make_ready_future + (std::move(ret)); + }); + }); return read_errorator::make_ready_future(); } @@ -853,13 +869,53 @@ seastar::future SeaStore::get_omap_iterator( }); } +SeaStore::_fiemap_ret SeaStore::_fiemap( + Transaction &t, + Onode &onode, + uint64_t off, + uint64_t len) const +{ + return seastar::do_with( + ObjectDataHandler(max_object_size), + [=, &t, &onode] (auto &objhandler) { + return objhandler.fiemap( + ObjectDataHandler::context_t{ + *transaction_manager, + t, + onode, + }, + off, + len); + }); +} seastar::future> SeaStore::fiemap( CollectionRef ch, const ghobject_t& oid, uint64_t off, uint64_t len) { - return seastar::make_ready_future>(); + LOG_PREFIX(SeaStore::fiemap); + DEBUG("oid: {}, off: {}, len: {} ", oid, off, len); + return repeat_with_onode>( + ch, + oid, + Transaction::src_t::READ, + "fiemap_read", + op_type_t::READ, + [=](auto &t, auto &onode) -> _fiemap_ret { + size_t size = onode.get_layout().size; + if (off >= size) { + INFOT("fiemap offset is over onode size!", t); + return seastar::make_ready_future>(); + } + size_t adjust_len = (len == 0) ? + size - off: + std::min(size - off, len); + return _fiemap(t, onode, off, adjust_len); + }).handle_error( + crimson::ct_error::assert_all{ + "Invalid error in SeaStore::fiemap" + }); } void SeaStore::on_error(ceph::os::Transaction &t) { diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h index 93e34c28801e3..358370ae59e22 100644 --- a/src/crimson/os/seastore/seastore.h +++ b/src/crimson/os/seastore/seastore.h @@ -22,6 +22,7 @@ #include "crimson/os/seastore/onode_manager.h" #include "crimson/os/seastore/omap_manager.h" #include "crimson/os/seastore/collection_manager.h" +#include "crimson/os/seastore/object_data_handler.h" namespace crimson::os::seastore { @@ -269,6 +270,13 @@ class SeaStore final : public FuturizedStore { }); } + using _fiemap_ret = ObjectDataHandler::fiemap_ret; + _fiemap_ret _fiemap( + Transaction &t, + Onode &onode, + uint64_t off, + uint64_t len) const; + using _omap_get_value_iertr = OMapManager::base_iertr::extend< crimson::ct_error::enodata >; From f4e20eec8e36d983cd9da148b3efc1672fdc2c10 Mon Sep 17 00:00:00 2001 From: chunmei-liu Date: Sat, 12 Feb 2022 13:36:33 -0800 Subject: [PATCH 2/3] crimson/seastore:: add fiemap and sparse read unit test Signed-off-by: chunmei-liu --- src/test/crimson/seastore/test_seastore.cc | 125 +++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/src/test/crimson/seastore/test_seastore.cc b/src/test/crimson/seastore/test_seastore.cc index 28f359ae4a3c5..df078d5c661c0 100644 --- a/src/test/crimson/seastore/test_seastore.cc +++ b/src/test/crimson/seastore/test_seastore.cc @@ -92,9 +92,39 @@ struct seastore_test_t : std::move(t)).get0(); } + void truncate( + CTransaction &t, + uint64_t off) { + t.truncate(cid, oid, off); + } + + void truncate( + SeaStore &seastore, + uint64_t off) { + CTransaction t; + truncate(t, off); + seastore.do_transaction( + coll, + std::move(t)).get0(); + } + + std::map fiemap( + SeaStore &seastore, + uint64_t off, + uint64_t len) { + return seastore.fiemap(coll, oid, off, len).get0(); + } + + bufferlist readv( + SeaStore &seastore, + interval_set&m) { + return seastore.readv(coll, oid, m).unsafe_get0(); + } + void remove( CTransaction &t) { t.remove(cid, oid); + t.remove_collection(cid); } void remove( @@ -518,3 +548,98 @@ TEST_F(seastore_test_t, simple_extent_test) test_obj.check_size(*seastore); }); } + +TEST_F(seastore_test_t, fiemap_empty) +{ + run_async([this] { + auto &test_obj = get_object(make_oid(0)); + test_obj.touch(*seastore); + test_obj.truncate(*seastore, 100000); + + std::map m; + m = test_obj.fiemap(*seastore, 0, 100000); + EXPECT_TRUE(m.empty()); + + test_obj.remove(*seastore); + }); +} + +TEST_F(seastore_test_t, fiemap_holes) +{ + run_async([this] { + const uint64_t MAX_EXTENTS = 100; + + // large enough to ensure that seastore will allocate each write seperately + const uint64_t SKIP_STEP = 16 << 10; + auto &test_obj = get_object(make_oid(0)); + bufferlist bl; + bl.append("foo"); + + test_obj.touch(*seastore); + for (uint64_t i = 0; i < MAX_EXTENTS; i++) { + test_obj.write(*seastore, SKIP_STEP * i, bl); + } + + { // fiemap test from 0 to SKIP_STEP * (MAX_EXTENTS - 1) + 3 + auto m = test_obj.fiemap( + *seastore, 0, SKIP_STEP * (MAX_EXTENTS - 1) + 3); + ASSERT_EQ(m.size(), MAX_EXTENTS); + for (uint64_t i = 0; i < MAX_EXTENTS; i++) { + ASSERT_TRUE(m.count(SKIP_STEP * i)); + ASSERT_GE(m[SKIP_STEP * i], bl.length()); + } + } + + { // fiemap test from SKIP_STEP to SKIP_STEP * (MAX_EXTENTS - 2) + 3 + auto m = test_obj.fiemap( + *seastore, SKIP_STEP, SKIP_STEP * (MAX_EXTENTS - 3) + 3); + ASSERT_EQ(m.size(), MAX_EXTENTS - 2); + for (uint64_t i = 1; i < MAX_EXTENTS - 1; i++) { + ASSERT_TRUE(m.count(SKIP_STEP * i)); + ASSERT_GE(m[SKIP_STEP * i], bl.length()); + } + } + + { // fiemap test SKIP_STEP + 1 to 2 * SKIP_STEP + 1 (partial overlap) + auto m = test_obj.fiemap( + *seastore, SKIP_STEP + 1, SKIP_STEP + 1); + ASSERT_EQ(m.size(), 2); + ASSERT_EQ(m.begin()->first, SKIP_STEP + 1); + ASSERT_GE(m.begin()->second, bl.length()); + ASSERT_LE(m.rbegin()->first, (2 * SKIP_STEP) + 1); + ASSERT_EQ(m.rbegin()->first + m.rbegin()->second, 2 * SKIP_STEP + 2); + } + + test_obj.remove(*seastore); + }); +} + +TEST_F(seastore_test_t, sparse_read) +{ + run_async([this] { + const uint64_t MAX_EXTENTS = 100; + const uint64_t SKIP_STEP = 16 << 10; + auto &test_obj = get_object(make_oid(0)); + bufferlist wbl; + wbl.append("foo"); + + test_obj.touch(*seastore); + for (uint64_t i = 0; i < MAX_EXTENTS; i++) { + test_obj.write(*seastore, SKIP_STEP * i, wbl); + } + interval_set m; + m = interval_set( + test_obj.fiemap(*seastore, 0, SKIP_STEP * (MAX_EXTENTS - 1) + 3)); + ASSERT_TRUE(!m.empty()); + uint64_t off = 0; + auto rbl = test_obj.readv(*seastore, m); + + for (auto &&miter : m) { + bufferlist subl; + subl.substr_of(rbl, off, std::min(miter.second, uint64_t(wbl.length()))); + ASSERT_TRUE(subl.contents_equal(wbl)); + off += miter.second; + } + test_obj.remove(*seastore); + }); +} From bc1e1116f58ada34c4664791bf77b278f7f82488 Mon Sep 17 00:00:00 2001 From: chunmei-liu Date: Tue, 15 Feb 2022 19:13:15 -0800 Subject: [PATCH 3/3] crimson/seastore: put ObjectDataHandler instance into do_with make ObjectDataHandler instance alive before sub functions are resolved. Signed-off-by: chunmei-liu --- src/crimson/os/seastore/seastore.cc | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc index 0323f81e956e6..3c17d90be196a 100644 --- a/src/crimson/os/seastore/seastore.cc +++ b/src/crimson/os/seastore/seastore.cc @@ -1145,8 +1145,9 @@ SeaStore::tm_ret SeaStore::_write( } return seastar::do_with( std::move(_bl), - [=, &ctx, &onode](auto &bl) { - return ObjectDataHandler(max_object_size).write( + ObjectDataHandler(max_object_size), + [=, &ctx, &onode](auto &bl, auto &objhandler) { + return objhandler.write( ObjectDataHandler::context_t{ *transaction_manager, *ctx.transaction, @@ -1278,13 +1279,17 @@ SeaStore::tm_ret SeaStore::_truncate( LOG_PREFIX(SeaStore::_truncate); DEBUGT("onode={} size={}", *ctx.transaction, *onode, size); onode->get_mutable_layout(*ctx.transaction).size = size; - return ObjectDataHandler(max_object_size).truncate( - ObjectDataHandler::context_t{ - *transaction_manager, - *ctx.transaction, - *onode - }, - size); + return seastar::do_with( + ObjectDataHandler(max_object_size), + [=, &ctx, &onode](auto &objhandler) { + return objhandler.truncate( + ObjectDataHandler::context_t{ + *transaction_manager, + *ctx.transaction, + *onode + }, + size); + }); } SeaStore::tm_ret SeaStore::_setattrs(