Skip to content

Commit

Permalink
bcache: add CACHE_SET_IO_DISABLE to struct cache_set flags
Browse files Browse the repository at this point in the history
When too many I/Os failed on cache device, bch_cache_set_error() is called
in the error handling code path to retire whole problematic cache set. If
new I/O requests continue to come and take refcount dc->count, the cache
set won't be retired immediately, this is a problem.

Further more, there are several kernel thread and self-armed kernel work
may still running after bch_cache_set_error() is called. It needs to wait
quite a while for them to stop, or they won't stop at all. They also
prevent the cache set from being retired.

The solution in this patch is, to add per cache set flag to disable I/O
request on this cache and all attached backing devices. Then new coming I/O
requests can be rejected in *_make_request() before taking refcount, kernel
threads and self-armed kernel worker can stop very fast when flags bit
CACHE_SET_IO_DISABLE is set.

Because bcache also do internal I/Os for writeback, garbage collection,
bucket allocation, journaling, this kind of I/O should be disabled after
bch_cache_set_error() is called. So closure_bio_submit() is modified to
check whether CACHE_SET_IO_DISABLE is set on cache_set->flags. If set,
closure_bio_submit() will set bio->bi_status to BLK_STS_IOERR and
return, generic_make_request() won't be called.

A sysfs interface is also added to set or clear CACHE_SET_IO_DISABLE bit
from cache_set->flags, to disable or enable cache set I/O for debugging. It
is helpful to trigger more corner case issues for failed cache device.

Changelog
v4, add wait_for_kthread_stop(), and call it before exits writeback and gc
    kernel threads.
v3, change CACHE_SET_IO_DISABLE from 4 to 3, since it is bit index.
    remove "bcache: " prefix when printing out kernel message.
v2, more changes by previous review,
- Use CACHE_SET_IO_DISABLE of cache_set->flags, suggested by Junhui.
- Check CACHE_SET_IO_DISABLE in bch_btree_gc() to stop a while-loop, this
  is reported and inspired from origal patch of Pavel Vazharov.
v1, initial version.

Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Michael Lyle <mlyle@lyle.org>
Cc: Junhui Tang <tang.junhui@zte.com.cn>
Cc: Michael Lyle <mlyle@lyle.org>
Cc: Pavel Vazharov <freakpv@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Coly Li authored and axboe committed Mar 19, 2018
1 parent 3fd47bf commit 771f393
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 30 deletions.
3 changes: 2 additions & 1 deletion drivers/md/bcache/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,8 @@ do { \
break; \
\
mutex_unlock(&(ca)->set->bucket_lock); \
if (kthread_should_stop()) { \
if (kthread_should_stop() || \
test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
set_current_state(TASK_RUNNING); \
return 0; \
} \
Expand Down
33 changes: 33 additions & 0 deletions drivers/md/bcache/bcache.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>

#include "bset.h"
#include "util.h"
Expand Down Expand Up @@ -475,10 +476,15 @@ struct gc_stat {
*
* CACHE_SET_RUNNING means all cache devices have been registered and journal
* replay is complete.
*
* CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
* external and internal I/O should be denied when this flag is set.
*
*/
#define CACHE_SET_UNREGISTERING 0
#define CACHE_SET_STOPPING 1
#define CACHE_SET_RUNNING 2
#define CACHE_SET_IO_DISABLE 3

struct cache_set {
struct closure cl;
Expand Down Expand Up @@ -868,6 +874,33 @@ static inline void wake_up_allocators(struct cache_set *c)
wake_up_process(ca->alloc_thread);
}

static inline void closure_bio_submit(struct cache_set *c,
struct bio *bio,
struct closure *cl)
{
closure_get(cl);
if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return;
}
generic_make_request(bio);
}

/*
* Prevent the kthread exits directly, and make sure when kthread_stop()
* is called to stop a kthread, it is still alive. If a kthread might be
* stopped by CACHE_SET_IO_DISABLE bit set, wait_for_kthread_stop() is
* necessary before the kthread returns.
*/
static inline void wait_for_kthread_stop(void)
{
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
}
}

/* Forward declarations */

void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
Expand Down
11 changes: 8 additions & 3 deletions drivers/md/bcache/btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1744,14 +1744,15 @@ static void bch_btree_gc(struct cache_set *c)

btree_gc_start(c);

/* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
do {
ret = btree_root(gc_root, c, &op, &writes, &stats);
closure_sync(&writes);
cond_resched();

if (ret && ret != -EAGAIN)
pr_warn("gc failed!");
} while (ret);
} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));

bch_btree_gc_finish(c);
wake_up_allocators(c);
Expand Down Expand Up @@ -1789,15 +1790,19 @@ static int bch_gc_thread(void *arg)

while (1) {
wait_event_interruptible(c->gc_wait,
kthread_should_stop() || gc_should_run(c));
kthread_should_stop() ||
test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
gc_should_run(c));

if (kthread_should_stop())
if (kthread_should_stop() ||
test_bit(CACHE_SET_IO_DISABLE, &c->flags))
break;

set_gc_sectors(c);
bch_btree_gc(c);
}

wait_for_kthread_stop();
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/md/bcache/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);

b->submit_time_us = local_clock_us();
closure_bio_submit(bio, bio->bi_private);
closure_bio_submit(c, bio, bio->bi_private);
}

void bch_submit_bbio(struct bio *bio, struct cache_set *c,
Expand Down
4 changes: 2 additions & 2 deletions drivers/md/bcache/journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
bch_bio_map(bio, data);

closure_bio_submit(bio, &cl);
closure_bio_submit(ca->set, bio, &cl);
closure_sync(&cl);

/* This function could be simpler now since we no longer write
Expand Down Expand Up @@ -674,7 +674,7 @@ static void journal_write_unlocked(struct closure *cl)
spin_unlock(&c->journal.lock);

while ((bio = bio_list_pop(&list)))
closure_bio_submit(bio, cl);
closure_bio_submit(c, bio, cl);

continue_at(cl, journal_write_done, NULL);
}
Expand Down
26 changes: 19 additions & 7 deletions drivers/md/bcache/request.c
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,7 @@ static void cached_dev_read_error(struct closure *cl)

/* XXX: invalidate cache */

closure_bio_submit(bio, cl);
closure_bio_submit(s->iop.c, bio, cl);
}

continue_at(cl, cached_dev_cache_miss_done, NULL);
Expand Down Expand Up @@ -872,15 +872,15 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->cache_miss = miss;
s->iop.bio = cache_bio;
bio_get(cache_bio);
closure_bio_submit(cache_bio, &s->cl);
closure_bio_submit(s->iop.c, cache_bio, &s->cl);

return ret;
out_put:
bio_put(cache_bio);
out_submit:
miss->bi_end_io = request_endio;
miss->bi_private = &s->cl;
closure_bio_submit(miss, &s->cl);
closure_bio_submit(s->iop.c, miss, &s->cl);
return ret;
}

Expand Down Expand Up @@ -945,7 +945,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)

if ((bio_op(bio) != REQ_OP_DISCARD) ||
blk_queue_discard(bdev_get_queue(dc->bdev)))
closure_bio_submit(bio, cl);
closure_bio_submit(s->iop.c, bio, cl);
} else if (s->iop.writeback) {
bch_writeback_add(dc);
s->iop.bio = bio;
Expand All @@ -960,12 +960,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
flush->bi_private = cl;
flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;

closure_bio_submit(flush, cl);
closure_bio_submit(s->iop.c, flush, cl);
}
} else {
s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);

closure_bio_submit(bio, cl);
closure_bio_submit(s->iop.c, bio, cl);
}

closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
Expand All @@ -981,7 +981,7 @@ static void cached_dev_nodata(struct closure *cl)
bch_journal_meta(s->iop.c, cl);

/* If it's a flush, we send the flush to the backing device too */
closure_bio_submit(bio, cl);
closure_bio_submit(s->iop.c, bio, cl);

continue_at(cl, cached_dev_bio_complete, NULL);
}
Expand All @@ -996,6 +996,12 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
int rw = bio_data_dir(bio);

if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
}

atomic_set(&dc->backing_idle, 0);
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);

Expand Down Expand Up @@ -1112,6 +1118,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
struct bcache_device *d = bio->bi_disk->private_data;
int rw = bio_data_dir(bio);

if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
}

generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);

s = search_alloc(bio, d);
Expand Down
6 changes: 5 additions & 1 deletion drivers/md/bcache/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
bch_bio_map(bio, ca->disk_buckets);

closure_bio_submit(bio, &ca->prio);
closure_bio_submit(ca->set, bio, &ca->prio);
closure_sync(cl);
}

Expand Down Expand Up @@ -1350,6 +1350,9 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
test_bit(CACHE_SET_STOPPING, &c->flags))
return false;

if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
pr_warn("CACHE_SET_IO_DISABLE already set");

/* XXX: we can be called from atomic context
acquire_console_sem();
*/
Expand Down Expand Up @@ -1585,6 +1588,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
c->error_limit = DEFAULT_IO_ERROR_LIMIT;
WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));

return c;
err:
Expand Down
18 changes: 18 additions & 0 deletions drivers/md/bcache/sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ read_attribute(partial_stripes_expensive);

rw_attribute(synchronous);
rw_attribute(journal_delay_ms);
rw_attribute(io_disable);
rw_attribute(discard);
rw_attribute(running);
rw_attribute(label);
Expand Down Expand Up @@ -591,6 +592,8 @@ SHOW(__bch_cache_set)
sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
sysfs_printf(io_disable, "%i",
test_bit(CACHE_SET_IO_DISABLE, &c->flags));

if (attr == &sysfs_bset_tree_stats)
return bch_bset_print_stats(c, buf);
Expand Down Expand Up @@ -680,6 +683,20 @@ STORE(__bch_cache_set)
if (attr == &sysfs_io_error_halflife)
c->error_decay = strtoul_or_return(buf) / 88;

if (attr == &sysfs_io_disable) {
int v = strtoul_or_return(buf);

if (v) {
if (test_and_set_bit(CACHE_SET_IO_DISABLE,
&c->flags))
pr_warn("CACHE_SET_IO_DISABLE already set");
} else {
if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
&c->flags))
pr_warn("CACHE_SET_IO_DISABLE already cleared");
}
}

sysfs_strtoul(journal_delay_ms, c->journal_delay_ms);
sysfs_strtoul(verify, c->verify);
sysfs_strtoul(key_merging_disabled, c->key_merging_disabled);
Expand Down Expand Up @@ -765,6 +782,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_gc_always_rewrite,
&sysfs_btree_shrinker_disabled,
&sysfs_copy_gc_enabled,
&sysfs_io_disable,
NULL
};
KTYPE(bch_cache_set_internal);
Expand Down
6 changes: 0 additions & 6 deletions drivers/md/bcache/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -567,12 +567,6 @@ static inline sector_t bdev_sectors(struct block_device *bdev)
return bdev->bd_inode->i_size >> 9;
}

#define closure_bio_submit(bio, cl) \
do { \
closure_get(cl); \
generic_make_request(bio); \
} while (0)

uint64_t bch_crc64_update(uint64_t, const void *, size_t);
uint64_t bch_crc64(const void *, size_t);

Expand Down
Loading

0 comments on commit 771f393

Please sign in to comment.