Skip to content

Commit

Permalink
block: model freeze & enter queue as lock for supporting lockdep
Browse files Browse the repository at this point in the history
[ Upstream commit f1be178 ]

Recently we got several deadlock report[1][2][3] caused by
blk_mq_freeze_queue and blk_enter_queue().

Turns out the two are just like acquiring read/write lock, so model them
as read/write lock for supporting lockdep:

1) model q->q_usage_counter as two locks(io and queue lock)

- queue lock covers sync with blk_enter_queue()

- io lock covers sync with bio_enter_queue()

2) make the lockdep class/key as per-queue:

- different subsystem has very different lock use pattern, shared lock
 class causes false positive easily

- freeze_queue degrades to no lock in case that disk state becomes DEAD
  because bio_enter_queue() won't be blocked any more

- freeze_queue degrades to no lock in case that request queue becomes dying
  because blk_enter_queue() won't be blocked any more

3) model blk_mq_freeze_queue() as acquire_exclusive & try_lock
- it is exclusive lock, so dependency with blk_enter_queue() is covered

- it is trylock because blk_mq_freeze_queue() are allowed to run
  concurrently

4) model blk_enter_queue() & bio_enter_queue() as acquire_read()
- nested blk_enter_queue() are allowed

- dependency with blk_mq_freeze_queue() is covered

- blk_queue_exit() is often called from other contexts(such as irq), and
it can't be annotated as lock_release(), so simply do it in
blk_enter_queue(), this way still covered cases as many as possible

With lockdep support, such kind of reports may be reported asap and
needn't wait until the real deadlock is triggered.

For example, lockdep report can be triggered in the report[3] with this
patch applied.

[1] occasional block layer hang when setting 'echo noop > /sys/block/sda/queue/scheduler'
https://bugzilla.kernel.org/show_bug.cgi?id=219166

[2] del_gendisk() vs blk_queue_enter() race condition
https://lore.kernel.org/linux-block/20241003085610.GK11458@google.com/

[3] queue_freeze & queue_enter deadlock in scsi
https://lore.kernel.org/linux-block/ZxG38G9BuFdBpBHZ@fedora/T/#u

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20241025003722.3630252-4-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Stable-dep-of: 3802f73 ("block: fix uaf for flush rq while iterating tags")
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
Ming Lei authored and gregkh committed Dec 5, 2024
1 parent 9c3d53f commit a6fc2ba
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 13 deletions.
18 changes: 16 additions & 2 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ static void blk_free_queue(struct request_queue *q)
blk_mq_release(q);

ida_free(&blk_queue_ida, q->id);
lockdep_unregister_key(&q->io_lock_cls_key);
lockdep_unregister_key(&q->q_lock_cls_key);
call_rcu(&q->rcu_head, blk_free_queue_rcu);
}

Expand All @@ -278,18 +280,20 @@ void blk_put_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_put_queue);

void blk_queue_start_drain(struct request_queue *q)
bool blk_queue_start_drain(struct request_queue *q)
{
/*
* When queue DYING flag is set, we need to block new req
* entering queue, so we call blk_freeze_queue_start() to
* prevent I/O from crossing blk_queue_enter().
*/
blk_freeze_queue_start(q);
bool freeze = __blk_freeze_queue_start(q);
if (queue_is_mq(q))
blk_mq_wake_waiters(q);
/* Make blk_queue_enter() reexamine the DYING flag. */
wake_up_all(&q->mq_freeze_wq);

return freeze;
}

/**
Expand Down Expand Up @@ -321,6 +325,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
return -ENODEV;
}

rwsem_acquire_read(&q->q_lockdep_map, 0, 0, _RET_IP_);
rwsem_release(&q->q_lockdep_map, _RET_IP_);
return 0;
}

Expand Down Expand Up @@ -352,6 +358,8 @@ int __bio_queue_enter(struct request_queue *q, struct bio *bio)
goto dead;
}

rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_);
rwsem_release(&q->io_lockdep_map, _RET_IP_);
return 0;
dead:
bio_io_error(bio);
Expand Down Expand Up @@ -441,6 +449,12 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
PERCPU_REF_INIT_ATOMIC, GFP_KERNEL);
if (error)
goto fail_stats;
lockdep_register_key(&q->io_lock_cls_key);
lockdep_register_key(&q->q_lock_cls_key);
lockdep_init_map(&q->io_lockdep_map, "&q->q_usage_counter(io)",
&q->io_lock_cls_key, 0);
lockdep_init_map(&q->q_lockdep_map, "&q->q_usage_counter(queue)",
&q->q_lock_cls_key, 0);

q->nr_requests = BLKDEV_DEFAULT_RQ;

Expand Down
26 changes: 22 additions & 4 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,29 @@ void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part,
inflight[1] = mi.inflight[1];
}

void blk_freeze_queue_start(struct request_queue *q)
bool __blk_freeze_queue_start(struct request_queue *q)
{
int freeze;

mutex_lock(&q->mq_freeze_lock);
if (++q->mq_freeze_depth == 1) {
percpu_ref_kill(&q->q_usage_counter);
mutex_unlock(&q->mq_freeze_lock);
if (queue_is_mq(q))
blk_mq_run_hw_queues(q, false);
freeze = true;
} else {
mutex_unlock(&q->mq_freeze_lock);
freeze = false;
}

return freeze;
}

void blk_freeze_queue_start(struct request_queue *q)
{
if (__blk_freeze_queue_start(q))
blk_freeze_acquire_lock(q, false, false);
}
EXPORT_SYMBOL_GPL(blk_freeze_queue_start);

Expand Down Expand Up @@ -176,8 +188,10 @@ void blk_mq_freeze_queue(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);

void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
{
int unfreeze = false;

mutex_lock(&q->mq_freeze_lock);
if (force_atomic)
q->q_usage_counter.data->force_atomic = true;
Expand All @@ -186,13 +200,17 @@ void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic)
if (!q->mq_freeze_depth) {
percpu_ref_resurrect(&q->q_usage_counter);
wake_up_all(&q->mq_freeze_wq);
unfreeze = true;
}
mutex_unlock(&q->mq_freeze_lock);

return unfreeze;
}

void blk_mq_unfreeze_queue(struct request_queue *q)
{
__blk_mq_unfreeze_queue(q, false);
if (__blk_mq_unfreeze_queue(q, false))
blk_unfreeze_release_lock(q, false, false);
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);

Expand All @@ -205,7 +223,7 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
*/
void blk_freeze_queue_start_non_owner(struct request_queue *q)
{
blk_freeze_queue_start(q);
__blk_freeze_queue_start(q);
}
EXPORT_SYMBOL_GPL(blk_freeze_queue_start_non_owner);

Expand Down
29 changes: 26 additions & 3 deletions block/blk.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <linux/bio-integrity.h>
#include <linux/blk-crypto.h>
#include <linux/lockdep.h>
#include <linux/memblock.h> /* for max_pfn/max_low_pfn */
#include <linux/sched/sysctl.h>
#include <linux/timekeeping.h>
Expand Down Expand Up @@ -35,8 +36,9 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
void blk_free_flush_queue(struct blk_flush_queue *q);

void blk_freeze_queue(struct request_queue *q);
void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
void blk_queue_start_drain(struct request_queue *q);
bool __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
bool blk_queue_start_drain(struct request_queue *q);
bool __blk_freeze_queue_start(struct request_queue *q);
int __bio_queue_enter(struct request_queue *q, struct bio *bio);
void submit_bio_noacct_nocheck(struct bio *bio);
void bio_await_chain(struct bio *bio);
Expand Down Expand Up @@ -69,8 +71,11 @@ static inline int bio_queue_enter(struct bio *bio)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);

if (blk_try_enter_queue(q, false))
if (blk_try_enter_queue(q, false)) {
rwsem_acquire_read(&q->io_lockdep_map, 0, 0, _RET_IP_);
rwsem_release(&q->io_lockdep_map, _RET_IP_);
return 0;
}
return __bio_queue_enter(q, bio);
}

Expand Down Expand Up @@ -734,4 +739,22 @@ void blk_integrity_verify(struct bio *bio);
void blk_integrity_prepare(struct request *rq);
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes);

static inline void blk_freeze_acquire_lock(struct request_queue *q, bool
disk_dead, bool queue_dying)
{
if (!disk_dead)
rwsem_acquire(&q->io_lockdep_map, 0, 1, _RET_IP_);
if (!queue_dying)
rwsem_acquire(&q->q_lockdep_map, 0, 1, _RET_IP_);
}

static inline void blk_unfreeze_release_lock(struct request_queue *q, bool
disk_dead, bool queue_dying)
{
if (!queue_dying)
rwsem_release(&q->q_lockdep_map, _RET_IP_);
if (!disk_dead)
rwsem_release(&q->io_lockdep_map, _RET_IP_);
}

#endif /* BLK_INTERNAL_H */
15 changes: 11 additions & 4 deletions block/genhd.c
Original file line number Diff line number Diff line change
Expand Up @@ -581,13 +581,13 @@ static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
rcu_read_unlock();
}

static void __blk_mark_disk_dead(struct gendisk *disk)
static bool __blk_mark_disk_dead(struct gendisk *disk)
{
/*
* Fail any new I/O.
*/
if (test_and_set_bit(GD_DEAD, &disk->state))
return;
return false;

if (test_bit(GD_OWNS_QUEUE, &disk->state))
blk_queue_flag_set(QUEUE_FLAG_DYING, disk->queue);
Expand All @@ -600,7 +600,7 @@ static void __blk_mark_disk_dead(struct gendisk *disk)
/*
* Prevent new I/O from crossing bio_queue_enter().
*/
blk_queue_start_drain(disk->queue);
return blk_queue_start_drain(disk->queue);
}

/**
Expand Down Expand Up @@ -641,6 +641,7 @@ void del_gendisk(struct gendisk *disk)
struct request_queue *q = disk->queue;
struct block_device *part;
unsigned long idx;
bool start_drain, queue_dying;

might_sleep();

Expand Down Expand Up @@ -668,7 +669,10 @@ void del_gendisk(struct gendisk *disk)
* Drop all partitions now that the disk is marked dead.
*/
mutex_lock(&disk->open_mutex);
__blk_mark_disk_dead(disk);
start_drain = __blk_mark_disk_dead(disk);
queue_dying = blk_queue_dying(q);
if (start_drain)
blk_freeze_acquire_lock(q, true, queue_dying);
xa_for_each_start(&disk->part_tbl, idx, part, 1)
drop_partition(part);
mutex_unlock(&disk->open_mutex);
Expand Down Expand Up @@ -725,6 +729,9 @@ void del_gendisk(struct gendisk *disk)
if (queue_is_mq(q))
blk_mq_exit_queue(q);
}

if (start_drain)
blk_unfreeze_release_lock(q, true, queue_dying);
}
EXPORT_SYMBOL(del_gendisk);

Expand Down
6 changes: 6 additions & 0 deletions include/linux/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <linux/uuid.h>
#include <linux/xarray.h>
#include <linux/file.h>
#include <linux/lockdep.h>

struct module;
struct request_queue;
Expand Down Expand Up @@ -471,6 +472,11 @@ struct request_queue {
struct xarray hctx_table;

struct percpu_ref q_usage_counter;
struct lock_class_key io_lock_cls_key;
struct lockdep_map io_lockdep_map;

struct lock_class_key q_lock_cls_key;
struct lockdep_map q_lockdep_map;

struct request *last_merge;

Expand Down

0 comments on commit a6fc2ba

Please sign in to comment.