Skip to content

Commit

Permalink
mgr: Improve ok-to-stop by using the avail_no_missing for recovery
Browse files Browse the repository at this point in the history
Signed-off-by: David Zafman <dzafman@redhat.com>
  • Loading branch information
dzafman committed Apr 25, 2019
1 parent 4249778 commit 9750061
Showing 1 changed file with 31 additions and 25 deletions.
56 changes: 31 additions & 25 deletions src/mgr/DaemonServer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1505,7 +1505,7 @@ bool DaemonServer::_handle_command(
cmdctx->reply(r, ss);
return true;
}
map<pg_t,int> pg_delta; // pgid -> net acting set size change
int touched_pgs = 0;
int dangerous_pgs = 0;
cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pg_map) {
if (pg_map.num_pg_unknown > 0) {
Expand All @@ -1514,35 +1514,40 @@ bool DaemonServer::_handle_command(
r = -EAGAIN;
return;
}
for (auto osd : osds) {
auto p = pg_map.pg_by_osd.find(osd);
if (p != pg_map.pg_by_osd.end()) {
for (auto& pgid : p->second) {
--pg_delta[pgid];
for (const auto& q : pg_map.pg_stat) {
set<int32_t> pg_acting; // net acting sets (with no missing if degraded)
bool found = false;
if (q.second.state & PG_STATE_DEGRADED) {
for (auto& anm : q.second.avail_no_missing) {
if (osds.count(anm.osd)) {
found = true;
continue;
}
pg_acting.insert(anm.osd);
}
} else {
for (auto& a : q.second.acting) {
if (osds.count(a)) {
found = true;
continue;
}
pg_acting.insert(a);
}
}
}
for (auto& p : pg_delta) {
auto q = pg_map.pg_stat.find(p.first);
if (q == pg_map.pg_stat.end()) {
ss << "missing information about " << p.first << "; cannot draw"
<< " any conclusions";
r = -EAGAIN;
return;
if (!found) {
continue;
}
if (!(q->second.state & PG_STATE_ACTIVE) ||
(q->second.state & PG_STATE_DEGRADED)) {
// we don't currently have a good way to tell *how* degraded
// a degraded PG is, so we have to assume we cannot remove
// any more replicas/shards.
touched_pgs++;
if (!(q.second.state & PG_STATE_ACTIVE) ||
(q.second.state & PG_STATE_DEGRADED)) {
++dangerous_pgs;
continue;
}
const pg_pool_t *pi = osdmap.get_pg_pool(p.first.pool());
const pg_pool_t *pi = osdmap.get_pg_pool(q.first.pool());
if (!pi) {
++dangerous_pgs; // pool is creating or deleting
} else {
if (q->second.acting.size() + p.second < pi->min_size) {
if (pg_acting.size() < pi->min_size) {
++dangerous_pgs;
}
}
Expand All @@ -1553,14 +1558,15 @@ bool DaemonServer::_handle_command(
return true;
}
if (dangerous_pgs) {
ss << dangerous_pgs << " PGs are already degraded or might become "
<< "unavailable";
ss << dangerous_pgs << " PGs are already too degraded, would become"
<< " too degraded or might become unavailable";
cmdctx->reply(-EBUSY, ss);
return true;
}
ss << "OSD(s) " << osds << " are ok to stop without reducing"
<< " availability, provided there are no other concurrent failures"
<< " or interventions. " << pg_delta.size() << " PGs are likely to be"
<< " availability or risking data, provided there are no other concurrent failures"
<< " or interventions." << std::endl;
ss << touched_pgs << " PGs are likely to be"
<< " degraded (but remain available) as a result.";
cmdctx->reply(0, ss);
return true;
Expand Down

0 comments on commit 9750061

Please sign in to comment.