Skip to content

Commit

Permalink
Revert "block/mq-deadline: Prioritize high-priority requests"
Browse files Browse the repository at this point in the history
This reverts commit fb92603.

Zhen reports that this commit slows down mq-deadline on a 128 thread
box, going from 258K IOPS to 170-180K. My testing shows that Optane
gen2 IOPS goes from 2.3M IOPS to 1.2M IOPS on a 64 thread box.

Looking in detail at the code, the main culprit here is needing to sum
percpu counters in the dispatch hot path, leading to very high CPU
utilization there. To make matters worse, the code currently needs to
sum 2 percpu counters, and it does so in the most naive way of iterating
possible CPUs _twice_.

Since we're close to release, revert this commit and we can re-do it
with regular per-priority counters instead for the 5.15 kernel.

Link: https://lore.kernel.org/linux-block/20210826144039.2143-1-thunder.leizhen@huawei.com/
Reported-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
axboe committed Aug 26, 2021
1 parent b6d2b05 commit 7b05bf7
Showing 1 changed file with 5 additions and 37 deletions.
42 changes: 5 additions & 37 deletions block/mq-deadline.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@
*/
static const int read_expire = HZ / 2; /* max time before a read is submitted. */
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
/*
* Time after which to dispatch lower priority requests even if higher
* priority requests are pending.
*/
static const int aging_expire = 10 * HZ;
static const int writes_starved = 2; /* max times reads can starve a write */
static const int fifo_batch = 16; /* # of sequential requests treated as one
by the above parameters. For throughput. */
Expand Down Expand Up @@ -103,7 +98,6 @@ struct deadline_data {
int writes_starved;
int front_merges;
u32 async_depth;
int aging_expire;

spinlock_t lock;
spinlock_t zone_lock;
Expand Down Expand Up @@ -369,11 +363,10 @@ deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,

/*
* deadline_dispatch_requests selects the best request according to
* read/write expire, fifo_batch, etc and with a start time <= @latest.
* read/write expire, fifo_batch, etc
*/
static struct request *__dd_dispatch_request(struct deadline_data *dd,
struct dd_per_prio *per_prio,
u64 latest_start_ns)
struct dd_per_prio *per_prio)
{
struct request *rq, *next_rq;
enum dd_data_dir data_dir;
Expand All @@ -385,8 +378,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
if (!list_empty(&per_prio->dispatch)) {
rq = list_first_entry(&per_prio->dispatch, struct request,
queuelist);
if (rq->start_time_ns > latest_start_ns)
return NULL;
list_del_init(&rq->queuelist);
goto done;
}
Expand Down Expand Up @@ -464,8 +455,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
dd->batching = 0;

dispatch_request:
if (rq->start_time_ns > latest_start_ns)
return NULL;
/*
* rq is the selected appropriate request.
*/
Expand Down Expand Up @@ -494,32 +483,15 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
{
struct deadline_data *dd = hctx->queue->elevator->elevator_data;
const u64 now_ns = ktime_get_ns();
struct request *rq = NULL;
struct request *rq;
enum dd_prio prio;

spin_lock(&dd->lock);
/*
* Start with dispatching requests whose deadline expired more than
* aging_expire jiffies ago.
*/
for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) {
rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns -
jiffies_to_nsecs(dd->aging_expire));
if (rq)
goto unlock;
}
/*
* Next, dispatch requests in priority order. Ignore lower priority
* requests if any higher priority requests are pending.
*/
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns);
if (rq || dd_queued(dd, prio))
rq = __dd_dispatch_request(dd, &dd->per_prio[prio]);
if (rq)
break;
}

unlock:
spin_unlock(&dd->lock);

return rq;
Expand Down Expand Up @@ -620,7 +592,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
dd->front_merges = 1;
dd->last_dir = DD_WRITE;
dd->fifo_batch = fifo_batch;
dd->aging_expire = aging_expire;
spin_lock_init(&dd->lock);
spin_lock_init(&dd->zone_lock);

Expand Down Expand Up @@ -842,7 +813,6 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page) \
#define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR))
SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]);
SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
SHOW_JIFFIES(deadline_aging_expire_show, dd->aging_expire);
SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
SHOW_INT(deadline_front_merges_show, dd->front_merges);
SHOW_INT(deadline_async_depth_show, dd->front_merges);
Expand Down Expand Up @@ -872,7 +842,6 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies)
STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX);
STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX);
STORE_JIFFIES(deadline_aging_expire_store, &dd->aging_expire, 0, INT_MAX);
STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX);
Expand All @@ -891,7 +860,6 @@ static struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(front_merges),
DD_ATTR(async_depth),
DD_ATTR(fifo_batch),
DD_ATTR(aging_expire),
__ATTR_NULL
};

Expand Down

0 comments on commit 7b05bf7

Please sign in to comment.