Skip to content

Commit

Permalink
Merge pull request ceph#44933 from liu-chunmei/seastore-support-spars…
Browse files Browse the repository at this point in the history
…e-read

crimson/seastore: support sparse_read

Reviewed-by: Samuel Just <sjust@redhat.com>
  • Loading branch information
athanatos authored Feb 17, 2022
2 parents e3463e2 + bc1e111 commit cee3cae
Show file tree
Hide file tree
Showing 5 changed files with 266 additions and 10 deletions.
54 changes: 54 additions & 0 deletions src/crimson/os/seastore/object_data_handler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,60 @@ ObjectDataHandler::read_ret ObjectDataHandler::read(
});
}

ObjectDataHandler::fiemap_ret ObjectDataHandler::fiemap(
context_t ctx,
objaddr_t obj_offset,
extent_len_t len)
{
return seastar::do_with(
std::map<uint64_t, uint64_t>(),
[ctx, obj_offset, len](auto &ret) {
return with_object_data(
ctx,
[ctx, obj_offset, len, &ret](const auto &object_data) {
LOG_PREFIX(ObjectDataHandler::fiemap);
DEBUGT(
"{}~{}, reservation {}~{}",
ctx.t,
obj_offset,
len,
object_data.get_reserved_data_base(),
object_data.get_reserved_data_len());
/* Assumption: callers ensure that onode size is <= reserved
* size and that len is adjusted here prior to call */
ceph_assert(!object_data.is_null());
ceph_assert((obj_offset + len) <= object_data.get_reserved_data_len());
ceph_assert(len > 0);
laddr_t loffset =
object_data.get_reserved_data_base() + obj_offset;
return ctx.tm.get_pins(
ctx.t,
loffset,
len
).si_then([loffset, len, &object_data, &ret](auto &&pins) {
ceph_assert(pins.size() >= 1);
ceph_assert((*pins.begin())->get_laddr() <= loffset);
for (auto &&i: pins) {
if (!(i->get_paddr().is_zero())) {
auto ret_left = std::max(i->get_laddr(), loffset);
auto ret_right = std::min(
i->get_laddr() + i->get_length(),
loffset + len);
assert(ret_right > ret_left);
ret.emplace(
std::make_pair(
ret_left - object_data.get_reserved_data_base(),
ret_right - ret_left
));
}
}
});
}).si_then([&ret] {
return std::move(ret);
});
});
}

ObjectDataHandler::truncate_ret ObjectDataHandler::truncate(
context_t ctx,
objaddr_t offset)
Expand Down
8 changes: 8 additions & 0 deletions src/crimson/os/seastore/object_data_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ class ObjectDataHandler {
objaddr_t offset,
extent_len_t len);

/// sparse read data, get range interval in [offset, offset + len)
using fiemap_iertr = base_iertr;
using fiemap_ret = fiemap_iertr::future<std::map<uint64_t, uint64_t>>;
fiemap_ret fiemap(
context_t ctx,
objaddr_t offset,
extent_len_t len);

/// Clears data past offset
using truncate_iertr = base_iertr;
using truncate_ret = truncate_iertr::future<>;
Expand Down
81 changes: 71 additions & 10 deletions src/crimson/os/seastore/seastore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,22 @@ SeaStore::read_errorator::future<ceph::bufferlist> SeaStore::readv(
interval_set<uint64_t>& m,
uint32_t op_flags)
{
return seastar::do_with(
ceph::bufferlist{},
[=, &oid, &m](auto &ret) {
return crimson::do_for_each(
m,
[=, &oid, &ret](auto &p) {
return read(
ch, oid, p.first, p.second, op_flags
).safe_then([&ret](auto bl) {
ret.claim_append(bl);
});
}).safe_then([&ret] {
return read_errorator::make_ready_future<ceph::bufferlist>
(std::move(ret));
});
});
return read_errorator::make_ready_future<ceph::bufferlist>();
}

Expand Down Expand Up @@ -855,13 +871,53 @@ seastar::future<FuturizedStore::OmapIteratorRef> SeaStore::get_omap_iterator(
});
}

SeaStore::_fiemap_ret SeaStore::_fiemap(
Transaction &t,
Onode &onode,
uint64_t off,
uint64_t len) const
{
return seastar::do_with(
ObjectDataHandler(max_object_size),
[=, &t, &onode] (auto &objhandler) {
return objhandler.fiemap(
ObjectDataHandler::context_t{
*transaction_manager,
t,
onode,
},
off,
len);
});
}
seastar::future<std::map<uint64_t, uint64_t>> SeaStore::fiemap(
CollectionRef ch,
const ghobject_t& oid,
uint64_t off,
uint64_t len)
{
return seastar::make_ready_future<std::map<uint64_t, uint64_t>>();
LOG_PREFIX(SeaStore::fiemap);
DEBUG("oid: {}, off: {}, len: {} ", oid, off, len);
return repeat_with_onode<std::map<uint64_t, uint64_t>>(
ch,
oid,
Transaction::src_t::READ,
"fiemap_read",
op_type_t::READ,
[=](auto &t, auto &onode) -> _fiemap_ret {
size_t size = onode.get_layout().size;
if (off >= size) {
INFOT("fiemap offset is over onode size!", t);
return seastar::make_ready_future<std::map<uint64_t, uint64_t>>();
}
size_t adjust_len = (len == 0) ?
size - off:
std::min(size - off, len);
return _fiemap(t, onode, off, adjust_len);
}).handle_error(
crimson::ct_error::assert_all{
"Invalid error in SeaStore::fiemap"
});
}

void SeaStore::on_error(ceph::os::Transaction &t) {
Expand Down Expand Up @@ -1091,8 +1147,9 @@ SeaStore::tm_ret SeaStore::_write(
}
return seastar::do_with(
std::move(_bl),
[=, &ctx, &onode](auto &bl) {
return ObjectDataHandler(max_object_size).write(
ObjectDataHandler(max_object_size),
[=, &ctx, &onode](auto &bl, auto &objhandler) {
return objhandler.write(
ObjectDataHandler::context_t{
*transaction_manager,
*ctx.transaction,
Expand Down Expand Up @@ -1224,13 +1281,17 @@ SeaStore::tm_ret SeaStore::_truncate(
LOG_PREFIX(SeaStore::_truncate);
DEBUGT("onode={} size={}", *ctx.transaction, *onode, size);
onode->get_mutable_layout(*ctx.transaction).size = size;
return ObjectDataHandler(max_object_size).truncate(
ObjectDataHandler::context_t{
*transaction_manager,
*ctx.transaction,
*onode
},
size);
return seastar::do_with(
ObjectDataHandler(max_object_size),
[=, &ctx, &onode](auto &objhandler) {
return objhandler.truncate(
ObjectDataHandler::context_t{
*transaction_manager,
*ctx.transaction,
*onode
},
size);
});
}

SeaStore::tm_ret SeaStore::_setattrs(
Expand Down
8 changes: 8 additions & 0 deletions src/crimson/os/seastore/seastore.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "crimson/os/seastore/onode_manager.h"
#include "crimson/os/seastore/omap_manager.h"
#include "crimson/os/seastore/collection_manager.h"
#include "crimson/os/seastore/object_data_handler.h"

namespace crimson::os::seastore {

Expand Down Expand Up @@ -269,6 +270,13 @@ class SeaStore final : public FuturizedStore {
});
}

using _fiemap_ret = ObjectDataHandler::fiemap_ret;
_fiemap_ret _fiemap(
Transaction &t,
Onode &onode,
uint64_t off,
uint64_t len) const;

using _omap_get_value_iertr = OMapManager::base_iertr::extend<
crimson::ct_error::enodata
>;
Expand Down
125 changes: 125 additions & 0 deletions src/test/crimson/seastore/test_seastore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,39 @@ struct seastore_test_t :
std::move(t)).get0();
}

void truncate(
CTransaction &t,
uint64_t off) {
t.truncate(cid, oid, off);
}

void truncate(
SeaStore &seastore,
uint64_t off) {
CTransaction t;
truncate(t, off);
seastore.do_transaction(
coll,
std::move(t)).get0();
}

std::map<uint64_t, uint64_t> fiemap(
SeaStore &seastore,
uint64_t off,
uint64_t len) {
return seastore.fiemap(coll, oid, off, len).get0();
}

bufferlist readv(
SeaStore &seastore,
interval_set<uint64_t>&m) {
return seastore.readv(coll, oid, m).unsafe_get0();
}

void remove(
CTransaction &t) {
t.remove(cid, oid);
t.remove_collection(cid);
}

void remove(
Expand Down Expand Up @@ -518,3 +548,98 @@ TEST_F(seastore_test_t, simple_extent_test)
test_obj.check_size(*seastore);
});
}

TEST_F(seastore_test_t, fiemap_empty)
{
run_async([this] {
auto &test_obj = get_object(make_oid(0));
test_obj.touch(*seastore);
test_obj.truncate(*seastore, 100000);

std::map<uint64_t, uint64_t> m;
m = test_obj.fiemap(*seastore, 0, 100000);
EXPECT_TRUE(m.empty());

test_obj.remove(*seastore);
});
}

TEST_F(seastore_test_t, fiemap_holes)
{
run_async([this] {
const uint64_t MAX_EXTENTS = 100;

// large enough to ensure that seastore will allocate each write seperately
const uint64_t SKIP_STEP = 16 << 10;
auto &test_obj = get_object(make_oid(0));
bufferlist bl;
bl.append("foo");

test_obj.touch(*seastore);
for (uint64_t i = 0; i < MAX_EXTENTS; i++) {
test_obj.write(*seastore, SKIP_STEP * i, bl);
}

{ // fiemap test from 0 to SKIP_STEP * (MAX_EXTENTS - 1) + 3
auto m = test_obj.fiemap(
*seastore, 0, SKIP_STEP * (MAX_EXTENTS - 1) + 3);
ASSERT_EQ(m.size(), MAX_EXTENTS);
for (uint64_t i = 0; i < MAX_EXTENTS; i++) {
ASSERT_TRUE(m.count(SKIP_STEP * i));
ASSERT_GE(m[SKIP_STEP * i], bl.length());
}
}

{ // fiemap test from SKIP_STEP to SKIP_STEP * (MAX_EXTENTS - 2) + 3
auto m = test_obj.fiemap(
*seastore, SKIP_STEP, SKIP_STEP * (MAX_EXTENTS - 3) + 3);
ASSERT_EQ(m.size(), MAX_EXTENTS - 2);
for (uint64_t i = 1; i < MAX_EXTENTS - 1; i++) {
ASSERT_TRUE(m.count(SKIP_STEP * i));
ASSERT_GE(m[SKIP_STEP * i], bl.length());
}
}

{ // fiemap test SKIP_STEP + 1 to 2 * SKIP_STEP + 1 (partial overlap)
auto m = test_obj.fiemap(
*seastore, SKIP_STEP + 1, SKIP_STEP + 1);
ASSERT_EQ(m.size(), 2);
ASSERT_EQ(m.begin()->first, SKIP_STEP + 1);
ASSERT_GE(m.begin()->second, bl.length());
ASSERT_LE(m.rbegin()->first, (2 * SKIP_STEP) + 1);
ASSERT_EQ(m.rbegin()->first + m.rbegin()->second, 2 * SKIP_STEP + 2);
}

test_obj.remove(*seastore);
});
}

TEST_F(seastore_test_t, sparse_read)
{
run_async([this] {
const uint64_t MAX_EXTENTS = 100;
const uint64_t SKIP_STEP = 16 << 10;
auto &test_obj = get_object(make_oid(0));
bufferlist wbl;
wbl.append("foo");

test_obj.touch(*seastore);
for (uint64_t i = 0; i < MAX_EXTENTS; i++) {
test_obj.write(*seastore, SKIP_STEP * i, wbl);
}
interval_set<uint64_t> m;
m = interval_set<uint64_t>(
test_obj.fiemap(*seastore, 0, SKIP_STEP * (MAX_EXTENTS - 1) + 3));
ASSERT_TRUE(!m.empty());
uint64_t off = 0;
auto rbl = test_obj.readv(*seastore, m);

for (auto &&miter : m) {
bufferlist subl;
subl.substr_of(rbl, off, std::min(miter.second, uint64_t(wbl.length())));
ASSERT_TRUE(subl.contents_equal(wbl));
off += miter.second;
}
test_obj.remove(*seastore);
});
}

0 comments on commit cee3cae

Please sign in to comment.