Skip to content

Commit

Permalink
ck: jbd2: add proc entry to control whether doing buffer copy-out
Browse files Browse the repository at this point in the history
fix #32728070

When jbd2 tries to get write access to one buffer, and if this buffer
is under writeback with BH_Shadow flag, jbd2 will wait until this buffer
has been written to disk, but sometimes the time taken to wait may be
much long, especially disk capacity is almost full.

Here add a proc entry "force-copy", if its value is not zero, jbd2 will
always do meta buffer copy-cout, then we can eliminate the unnecessary
waiting time here, and reduce long tail latency for buffered-write.

I construct such test case below:

$cat offline.fio
; fio-rand-RW.job for fiotest

[global]
name=fio-rand-RW
filename=fio-rand-RW
rw=randrw
rwmixread=60
rwmixwrite=40
bs=4K
direct=0
numjobs=4
time_based=1
runtime=900

[file1]
size=60G
ioengine=sync
iodepth=16

$cat online.fio
; fio-seq-write.job for fiotest

[global]
name=fio-seq-write
filename=fio-seq-write
rw=write
bs=256K
direct=0
numjobs=1
time_based=1
runtime=60

[file1]
rate=50m
size=10G
ioengine=sync
iodepth=16

With this patch:
$cat /proc/fs/jbd2/sda5-8/force_copy
0

online fio almost always get such long tail latency:

Jobs: 1 (f=1), 0B/s-0B/s: [W(1)][100.0%][w=50.0MiB/s][w=200 IOPS][eta
00m:00s]
file1: (groupid=0, jobs=1): err= 0: pid=17855: Thu Nov 15 09:45:57 2018
  write: IOPS=200, BW=50.0MiB/s (52.4MB/s)(3000MiB/60001msec)
    clat (usec): min=135, max=4086.6k, avg=867.21, stdev=50338.22
     lat (usec): min=139, max=4086.6k, avg=871.16, stdev=50338.22
    clat percentiles (usec):
     |  1.00th=[    141],  5.00th=[    143], 10.00th=[    145],
     | 20.00th=[    147], 30.00th=[    147], 40.00th=[    149],
     | 50.00th=[    149], 60.00th=[    151], 70.00th=[    153],
     | 80.00th=[    155], 90.00th=[    159], 95.00th=[    163],
     | 99.00th=[    255], 99.50th=[    273], 99.90th=[    429],
     | 99.95th=[    441], 99.99th=[3640656]

$cat /proc/fs/jbd2/sda5-8/force_copy
1

online fio latency is much better.

Jobs: 1 (f=1), 0B/s-0B/s: [W(1)][100.0%][w=50.0MiB/s][w=200 IOPS][eta
00m:00s]
file1: (groupid=0, jobs=1): err= 0: pid=8084: Thu Nov 15 09:31:15 2018
  write: IOPS=200, BW=50.0MiB/s (52.4MB/s)(3000MiB/60001msec)
    clat (usec): min=137, max=545, avg=151.35, stdev=16.22
     lat (usec): min=140, max=548, avg=155.31, stdev=16.65
    clat percentiles (usec):
     |  1.00th=[  143],  5.00th=[  145], 10.00th=[  145], 20.00th=[
147],
     | 30.00th=[  147], 40.00th=[  147], 50.00th=[  149], 60.00th=[
149],
     | 70.00th=[  151], 80.00th=[  155], 90.00th=[  157], 95.00th=[
161],
     | 99.00th=[  239], 99.50th=[  269], 99.90th=[  420], 99.95th=[
429],
     | 99.99th=[  537]

As to the cost: because we'll always need to copy meta buffer, will
consume minor cpu time and some memory(at most 32MB for 128MB journal
size).

Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Acked-by: Caspar Zhang <caspar@linux.alibaba.com>
Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>

Conflicts:
	fs/jbd2/journal.c
  • Loading branch information
Hao Xu authored and shiloong committed Mar 12, 2021
1 parent bbbbf3a commit bed50b9
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 0 deletions.
57 changes: 57 additions & 0 deletions fs/jbd2/journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,9 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
}
kunmap_atomic(mapped_data);

/* force copy-out */
if (need_copy_out == 0 && journal->j_force_copy)
need_copy_out = 1;
/*
* Do we need to do a data copy?
*/
Expand Down Expand Up @@ -1406,6 +1409,57 @@ static const struct proc_ops jbd2_stats_proc_ops = {
.proc_release = jbd2_seq_stats_release,
};

static int jbd2_seq_force_copy_show(struct seq_file *m, void *v)
{
journal_t *journal = m->private;

seq_printf(m, "%u\n", journal->j_force_copy);
return 0;
}

static int jbd2_seq_force_copy_open(struct inode *inode, struct file *filp)
{
journal_t *journal = PDE_DATA(inode);

return single_open(filp, jbd2_seq_force_copy_show, journal);
}

/* Worst case buffer size needed for holding an integer. */
#define PROC_NUMBUF 13

static ssize_t jbd2_seq_force_copy_write(struct file *file,
const char __user *buf, size_t count, loff_t *offset)
{
struct inode *inode = file_inode(file);
journal_t *journal = PDE_DATA(inode);
char buffer[PROC_NUMBUF];
unsigned int force_copy;
int err;

memset(buffer, 0, sizeof(buffer));
if (count > sizeof(buffer) - 1)
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count)) {
err = -EFAULT;
goto out;
}

err = kstrtouint(strstrip(buffer), 0, &force_copy);
if (err)
goto out;
journal->j_force_copy = force_copy;
out:
return err < 0 ? err : count;
}

static const struct proc_ops jbd2_force_copy_proc_ops = {
.proc_open = jbd2_seq_force_copy_open,
.proc_read = seq_read,
.proc_write = jbd2_seq_force_copy_write,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};

static struct proc_dir_entry *proc_jbd2_stats;

static void jbd2_stats_proc_init(journal_t *journal)
Expand All @@ -1414,6 +1468,8 @@ static void jbd2_stats_proc_init(journal_t *journal)
if (journal->j_proc_entry) {
proc_create_data("info", S_IRUGO, journal->j_proc_entry,
&jbd2_info_proc_ops, journal);
proc_create_data("force_copy", 0644, journal->j_proc_entry,
&jbd2_force_copy_proc_ops, journal);
proc_create_data("stats", 0444, journal->j_proc_entry,
&jbd2_stats_proc_ops, journal);
}
Expand All @@ -1422,6 +1478,7 @@ static void jbd2_stats_proc_init(journal_t *journal)
static void jbd2_stats_proc_exit(journal_t *journal)
{
remove_proc_entry("info", journal->j_proc_entry);
remove_proc_entry("force_copy", journal->j_proc_entry);
remove_proc_entry("stats", journal->j_proc_entry);
remove_proc_entry(journal->j_devname, proc_jbd2_stats);
}
Expand Down
5 changes: 5 additions & 0 deletions include/linux/jbd2.h
Original file line number Diff line number Diff line change
Expand Up @@ -1227,6 +1227,11 @@ struct journal_s
*/
struct transaction_stats_s j_stats;

/**
* @j_force_copy: if not zero, force to do buffer copy-out.
*/
unsigned int j_force_copy;

/**
* @j_failed_commit: Failed journal commit ID.
*/
Expand Down

0 comments on commit bed50b9

Please sign in to comment.