diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 458e1d38577db1..004cc3cc6123b0 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -287,7 +287,8 @@ do { \ break; \ \ mutex_unlock(&(ca)->set->bucket_lock); \ - if (kthread_should_stop()) { \ + if (kthread_should_stop() || \ + test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \ set_current_state(TASK_RUNNING); \ return 0; \ } \ diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index b5ddb848cd31ef..8a0327581d62fa 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -188,6 +188,7 @@ #include #include #include +#include #include "bset.h" #include "util.h" @@ -475,10 +476,15 @@ struct gc_stat { * * CACHE_SET_RUNNING means all cache devices have been registered and journal * replay is complete. + * + * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all + * external and internal I/O should be denied when this flag is set. + * */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 #define CACHE_SET_RUNNING 2 +#define CACHE_SET_IO_DISABLE 3 struct cache_set { struct closure cl; @@ -868,6 +874,33 @@ static inline void wake_up_allocators(struct cache_set *c) wake_up_process(ca->alloc_thread); } +static inline void closure_bio_submit(struct cache_set *c, + struct bio *bio, + struct closure *cl) +{ + closure_get(cl); + if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return; + } + generic_make_request(bio); +} + +/* + * Prevent the kthread exits directly, and make sure when kthread_stop() + * is called to stop a kthread, it is still alive. If a kthread might be + * stopped by CACHE_SET_IO_DISABLE bit set, wait_for_kthread_stop() is + * necessary before the kthread returns. + */ +static inline void wait_for_kthread_stop(void) +{ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } +} + /* Forward declarations */ void bch_count_io_errors(struct cache *, blk_status_t, int, const char *); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index fad9fe8817eb1a..39cc8a549091e6 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1744,6 +1744,7 @@ static void bch_btree_gc(struct cache_set *c) btree_gc_start(c); + /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */ do { ret = btree_root(gc_root, c, &op, &writes, &stats); closure_sync(&writes); @@ -1751,7 +1752,7 @@ static void bch_btree_gc(struct cache_set *c) if (ret && ret != -EAGAIN) pr_warn("gc failed!"); - } while (ret); + } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags)); bch_btree_gc_finish(c); wake_up_allocators(c); @@ -1789,15 +1790,19 @@ static int bch_gc_thread(void *arg) while (1) { wait_event_interruptible(c->gc_wait, - kthread_should_stop() || gc_should_run(c)); + kthread_should_stop() || + test_bit(CACHE_SET_IO_DISABLE, &c->flags) || + gc_should_run(c)); - if (kthread_should_stop()) + if (kthread_should_stop() || + test_bit(CACHE_SET_IO_DISABLE, &c->flags)) break; set_gc_sectors(c); bch_btree_gc(c); } + wait_for_kthread_stop(); return 0; } diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index a783c5a41ff166..8013ecbcdbda1f 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c) bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev); b->submit_time_us = local_clock_us(); - closure_bio_submit(bio, bio->bi_private); + closure_bio_submit(c, bio, bio->bi_private); } void bch_submit_bbio(struct bio *bio, struct cache_set *c, diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 1b736b86073999..c94085f400a4d3 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset; bio_set_op_attrs(bio, REQ_OP_READ, 0); bch_bio_map(bio, data); - closure_bio_submit(bio, &cl); + closure_bio_submit(ca->set, bio, &cl); closure_sync(&cl); /* This function could be simpler now since we no longer write @@ -674,7 +674,7 @@ static void journal_write_unlocked(struct closure *cl) spin_unlock(&c->journal.lock); while ((bio = bio_list_pop(&list))) - closure_bio_submit(bio, cl); + closure_bio_submit(c, bio, cl); continue_at(cl, journal_write_done, NULL); } diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 6422846b546ed2..7aca308bee5b43 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -747,7 +747,7 @@ static void cached_dev_read_error(struct closure *cl) /* XXX: invalidate cache */ - closure_bio_submit(bio, cl); + closure_bio_submit(s->iop.c, bio, cl); } continue_at(cl, cached_dev_cache_miss_done, NULL); @@ -872,7 +872,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, s->cache_miss = miss; s->iop.bio = cache_bio; bio_get(cache_bio); - closure_bio_submit(cache_bio, &s->cl); + closure_bio_submit(s->iop.c, cache_bio, &s->cl); return ret; out_put: @@ -880,7 +880,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, out_submit: miss->bi_end_io = request_endio; miss->bi_private = &s->cl; - closure_bio_submit(miss, &s->cl); + closure_bio_submit(s->iop.c, miss, &s->cl); return ret; } @@ -945,7 +945,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) if ((bio_op(bio) != REQ_OP_DISCARD) || blk_queue_discard(bdev_get_queue(dc->bdev))) - closure_bio_submit(bio, cl); + closure_bio_submit(s->iop.c, bio, cl); } else if (s->iop.writeback) { bch_writeback_add(dc); s->iop.bio = bio; @@ -960,12 +960,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) flush->bi_private = cl; flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; - closure_bio_submit(flush, cl); + closure_bio_submit(s->iop.c, flush, cl); } } else { s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split); - closure_bio_submit(bio, cl); + closure_bio_submit(s->iop.c, bio, cl); } closure_call(&s->iop.cl, bch_data_insert, NULL, cl); @@ -981,7 +981,7 @@ static void cached_dev_nodata(struct closure *cl) bch_journal_meta(s->iop.c, cl); /* If it's a flush, we send the flush to the backing device too */ - closure_bio_submit(bio, cl); + closure_bio_submit(s->iop.c, bio, cl); continue_at(cl, cached_dev_bio_complete, NULL); } @@ -996,6 +996,12 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q, struct cached_dev *dc = container_of(d, struct cached_dev, disk); int rw = bio_data_dir(bio); + if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return BLK_QC_T_NONE; + } + atomic_set(&dc->backing_idle, 0); generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); @@ -1112,6 +1118,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q, struct bcache_device *d = bio->bi_disk->private_data; int rw = bio_data_dir(bio); + if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return BLK_QC_T_NONE; + } + generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0); s = search_alloc(bio, d); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e5be599338c5e9..dda4ccdd1360fd 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -521,7 +521,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op, bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags); bch_bio_map(bio, ca->disk_buckets); - closure_bio_submit(bio, &ca->prio); + closure_bio_submit(ca->set, bio, &ca->prio); closure_sync(cl); } @@ -1350,6 +1350,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) test_bit(CACHE_SET_STOPPING, &c->flags)) return false; + if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) + pr_warn("CACHE_SET_IO_DISABLE already set"); + /* XXX: we can be called from atomic context acquire_console_sem(); */ @@ -1585,6 +1588,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) c->congested_read_threshold_us = 2000; c->congested_write_threshold_us = 20000; c->error_limit = DEFAULT_IO_ERROR_LIMIT; + WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags)); return c; err: diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 55673508628f85..a3a45de5626d91 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -95,6 +95,7 @@ read_attribute(partial_stripes_expensive); rw_attribute(synchronous); rw_attribute(journal_delay_ms); +rw_attribute(io_disable); rw_attribute(discard); rw_attribute(running); rw_attribute(label); @@ -591,6 +592,8 @@ SHOW(__bch_cache_set) sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite); sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled); sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); + sysfs_printf(io_disable, "%i", + test_bit(CACHE_SET_IO_DISABLE, &c->flags)); if (attr == &sysfs_bset_tree_stats) return bch_bset_print_stats(c, buf); @@ -680,6 +683,20 @@ STORE(__bch_cache_set) if (attr == &sysfs_io_error_halflife) c->error_decay = strtoul_or_return(buf) / 88; + if (attr == &sysfs_io_disable) { + int v = strtoul_or_return(buf); + + if (v) { + if (test_and_set_bit(CACHE_SET_IO_DISABLE, + &c->flags)) + pr_warn("CACHE_SET_IO_DISABLE already set"); + } else { + if (!test_and_clear_bit(CACHE_SET_IO_DISABLE, + &c->flags)) + pr_warn("CACHE_SET_IO_DISABLE already cleared"); + } + } + sysfs_strtoul(journal_delay_ms, c->journal_delay_ms); sysfs_strtoul(verify, c->verify); sysfs_strtoul(key_merging_disabled, c->key_merging_disabled); @@ -765,6 +782,7 @@ static struct attribute *bch_cache_set_internal_files[] = { &sysfs_gc_always_rewrite, &sysfs_btree_shrinker_disabled, &sysfs_copy_gc_enabled, + &sysfs_io_disable, NULL }; KTYPE(bch_cache_set_internal); diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index a6763db7f061b6..268024529edd66 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -567,12 +567,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev) return bdev->bd_inode->i_size >> 9; } -#define closure_bio_submit(bio, cl) \ -do { \ - closure_get(cl); \ - generic_make_request(bio); \ -} while (0) - uint64_t bch_crc64_update(uint64_t, const void *, size_t); uint64_t bch_crc64(const void *, size_t); diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 8f98ef1038d3ba..70092ada68e68a 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -114,6 +114,7 @@ static void update_writeback_rate(struct work_struct *work) struct cached_dev *dc = container_of(to_delayed_work(work), struct cached_dev, writeback_rate_update); + struct cache_set *c = dc->disk.c; /* * should check BCACHE_DEV_RATE_DW_RUNNING before calling @@ -123,7 +124,12 @@ static void update_writeback_rate(struct work_struct *work) /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ smp_mb(); - if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) { + /* + * CACHE_SET_IO_DISABLE might be set via sysfs interface, + * check it here too. + */ + if (!test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) || + test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags); /* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */ smp_mb(); @@ -138,7 +144,12 @@ static void update_writeback_rate(struct work_struct *work) up_read(&dc->writeback_lock); - if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags)) { + /* + * CACHE_SET_IO_DISABLE might be set via sysfs interface, + * check it here too. + */ + if (test_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags) && + !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { schedule_delayed_work(&dc->writeback_rate_update, dc->writeback_rate_update_seconds * HZ); } @@ -278,7 +289,7 @@ static void write_dirty(struct closure *cl) bio_set_dev(&io->bio, io->dc->bdev); io->bio.bi_end_io = dirty_endio; - closure_bio_submit(&io->bio, cl); + closure_bio_submit(io->dc->disk.c, &io->bio, cl); } atomic_set(&dc->writeback_sequence_next, next_sequence); @@ -304,7 +315,7 @@ static void read_dirty_submit(struct closure *cl) { struct dirty_io *io = container_of(cl, struct dirty_io, cl); - closure_bio_submit(&io->bio, cl); + closure_bio_submit(io->dc->disk.c, &io->bio, cl); continue_at(cl, write_dirty, io->dc->writeback_write_wq); } @@ -330,7 +341,9 @@ static void read_dirty(struct cached_dev *dc) next = bch_keybuf_next(&dc->writeback_keys); - while (!kthread_should_stop() && next) { + while (!kthread_should_stop() && + !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && + next) { size = 0; nk = 0; @@ -427,7 +440,9 @@ static void read_dirty(struct cached_dev *dc) } } - while (!kthread_should_stop() && delay) { + while (!kthread_should_stop() && + !test_bit(CACHE_SET_IO_DISABLE, &dc->disk.c->flags) && + delay) { schedule_timeout_interruptible(delay); delay = writeback_delay(dc, 0); } @@ -583,11 +598,13 @@ static bool refill_dirty(struct cached_dev *dc) static int bch_writeback_thread(void *arg) { struct cached_dev *dc = arg; + struct cache_set *c = dc->disk.c; bool searched_full_index; bch_ratelimit_reset(&dc->writeback_rate); - while (!kthread_should_stop()) { + while (!kthread_should_stop() && + !test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { down_write(&dc->writeback_lock); set_current_state(TASK_INTERRUPTIBLE); /* @@ -601,7 +618,8 @@ static int bch_writeback_thread(void *arg) (!atomic_read(&dc->has_dirty) || !dc->writeback_running)) { up_write(&dc->writeback_lock); - if (kthread_should_stop()) { + if (kthread_should_stop() || + test_bit(CACHE_SET_IO_DISABLE, &c->flags)) { set_current_state(TASK_RUNNING); break; } @@ -637,6 +655,7 @@ static int bch_writeback_thread(void *arg) while (delay && !kthread_should_stop() && + !test_bit(CACHE_SET_IO_DISABLE, &c->flags) && !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) delay = schedule_timeout_interruptible(delay); @@ -644,8 +663,8 @@ static int bch_writeback_thread(void *arg) } } - dc->writeback_thread = NULL; cached_dev_put(dc); + wait_for_kthread_stop(); return 0; }