Skip to content

Commit

Permalink
Add 2 Module Parameters Regarding Log Size Limit
Browse files Browse the repository at this point in the history
zfs_wrlog_data_max
The upper limit of TX_WRITE log data. Once it is reached,
write operation is blocked, until log data is cleared out
after txg sync. It only counts TX_WRITE log with WR_COPIED
or WR_NEED_COPY.
This defaults to the same value of zfs_dirty_data_max

zfs_wrlog_data_sync_percent
The least TX_WRITE log data (as a percentage
of zfs_wrlog_data_max) to kick a txg sync.

Signed-off-by: jxdking <lostking2008@hotmail.com>
  • Loading branch information
jxdking committed Apr 11, 2021
1 parent e778b04 commit 6003160
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 2 deletions.
8 changes: 8 additions & 0 deletions include/sys/dsl_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ struct dsl_deadlist;

extern unsigned long zfs_dirty_data_max;
extern unsigned long zfs_dirty_data_max_max;
extern unsigned long zfs_wrlog_data_max;
extern int zfs_wrlog_data_sync_percent;
extern int zfs_dirty_data_sync_percent;
extern int zfs_dirty_data_max_percent;
extern int zfs_dirty_data_max_max_percent;
Expand Down Expand Up @@ -119,6 +121,11 @@ typedef struct dsl_pool {
uint64_t dp_mos_compressed_delta;
uint64_t dp_mos_uncompressed_delta;

/* Uses dp_wrlog_lock */
kmutex_t dp_wrlog_lock;
uint64_t dp_wrlog_pertxg[TXG_SIZE];
uint64_t dp_wrlog_total;

/*
* Time of most recently scheduled (furthest in the future)
* wakeup for delayed transactions.
Expand Down Expand Up @@ -158,6 +165,7 @@ int dsl_pool_sync_context(dsl_pool_t *dp);
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, zfs_space_check_t slop_policy);
uint64_t dsl_pool_unreserved_space(dsl_pool_t *dp,
zfs_space_check_t slop_policy);
void dsl_pool_wrlog_delay(int64_t resid, dsl_pool_t *dp, uint64_t txg);
void dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
void dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg);
void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
Expand Down
4 changes: 4 additions & 0 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -7706,6 +7706,10 @@ arc_init(void)
zfs_dirty_data_max = MIN(zfs_dirty_data_max,
zfs_dirty_data_max_max);
}

if (zfs_wrlog_data_max == 0) {
zfs_wrlog_data_max = zfs_dirty_data_max;
}
}

void
Expand Down
68 changes: 68 additions & 0 deletions module/zfs/dsl_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,20 @@ unsigned long zfs_dirty_data_max_max = 0;
int zfs_dirty_data_max_percent = 10;
int zfs_dirty_data_max_max_percent = 25;

/*
* zfs_wrlog_data_max, the upper limit of TX_WRITE log data.
* Once it is reached, write operation is blocked,
* until log data is cleared out after txg sync.
* It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY.
* This defaults to the same value of zfs_dirty_data_max.
*
* zfs_wrlog_data_sync_percent, the least TX_WRITE log data
* (as a percentage of zfs_wrlog_data_max) to push a txg.
*/
unsigned long zfs_wrlog_data_max = 0;
int zfs_wrlog_data_sync_percent = 30;


/*
* If there's at least this much dirty data (as a percentage of
* zfs_dirty_data_max), push out a txg. This should be less than
Expand Down Expand Up @@ -218,6 +232,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT);

mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&dp->dp_wrlog_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);

dp->dp_zrele_taskq = taskq_create("z_zrele", 100, defclsyspri,
Expand Down Expand Up @@ -415,6 +430,7 @@ dsl_pool_close(dsl_pool_t *dp)

rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
mutex_destroy(&dp->dp_wrlog_lock);
cv_destroy(&dp->dp_spaceavail_cv);
taskq_destroy(dp->dp_unlinked_drain_taskq);
taskq_destroy(dp->dp_zrele_taskq);
Expand Down Expand Up @@ -593,6 +609,49 @@ dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta)
cv_signal(&dp->dp_spaceavail_cv);
}

void
dsl_pool_wrlog_delay(int64_t size, dsl_pool_t *dp, uint64_t txg)
{
uint64_t last_total;

mutex_enter(&dp->dp_wrlog_lock);
last_total = dp->dp_wrlog_total;
dp->dp_wrlog_total += size;
dp->dp_wrlog_pertxg[txg & TXG_MASK] += size;
mutex_exit(&dp->dp_wrlog_lock);

if (last_total > zfs_wrlog_data_max) {
dprintf("write log total exceeds zfs_wrlog_data_max. "
"dp_wrlog_total: %llu", last_total);

/*
* Current txg need to stay open to process current
* write transaction. We only can wait until txg - 1
* is synced.
*/
if (txg > 2) {
txg_wait_synced(dp, txg - 1);
}
}

if (last_total >=
zfs_wrlog_data_sync_percent * zfs_wrlog_data_max / 100) {
txg_kick(dp);
}
}

static void
dsl_pool_wrlog_clear(dsl_pool_t *dp, uint64_t txg)
{
mutex_enter(&dp->dp_wrlog_lock);
dp->dp_wrlog_total -= dp->dp_wrlog_pertxg[txg & TXG_MASK];
dp->dp_wrlog_pertxg[txg & TXG_MASK] = 0;
mutex_exit(&dp->dp_wrlog_lock);

dprintf("write log total cleared for txg: %llu, "
"dp_wrlog_total: %llu", txg, dp->dp_wrlog_total);
}

#ifdef ZFS_DEBUG
static boolean_t
dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
Expand Down Expand Up @@ -817,6 +876,9 @@ dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
dmu_buf_rele(ds->ds_dbuf, zilog);
}

dsl_pool_wrlog_clear(dp, txg);

ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
}

Expand Down Expand Up @@ -1393,6 +1455,12 @@ ZFS_MODULE_PARAM(zfs, zfs_, delay_min_dirty_percent, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
"Determines the dirty space limit");

ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW,
"The write log limit");

ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_sync_percent, INT, ZMOD_RW,
"The data txg sync threshold as a percentage of zfs_wrlog_data_max");

/* zfs_dirty_data_max_max only applied at module load in arc_init(). */
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
"zfs_dirty_data_max upper bound in bytes");
Expand Down
7 changes: 7 additions & 0 deletions module/zfs/zfs_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
#include <sys/spa.h>
#include <sys/zfs_fuid.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_pool.h>


/*
* These zfs_log_* functions must be called within a dmu tx, in one
Expand Down Expand Up @@ -541,6 +543,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
itx_wr_state_t write_state;
uintptr_t fsync_cnt;
uint64_t gen = 0;
ssize_t size = resid;

if (zil_replaying(zilog, tx) || zp->z_unlinked ||
zfs_xattr_owner_unlinked(zp)) {
Expand Down Expand Up @@ -626,6 +629,10 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
off += len;
resid -= len;
}

if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
dsl_pool_wrlog_delay(size, zilog->zl_dmu_pool, tx->tx_txg);
}
}

/*
Expand Down
8 changes: 6 additions & 2 deletions module/zfs/zvol.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,8 @@
#include <sys/zfs_rlock.h>
#include <sys/spa_impl.h>
#include <sys/zvol.h>

#include <sys/zvol_impl.h>

#include <sys/dsl_pool.h>

unsigned int zvol_inhibit_dev = 0;
unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
Expand Down Expand Up @@ -579,6 +578,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
itx_wr_state_t write_state;
uint64_t sz = size;

if (zil_replaying(zilog, tx))
return;
Expand Down Expand Up @@ -630,6 +630,10 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
offset += len;
size -= len;
}

if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
dsl_pool_wrlog_delay(sz, zilog->zl_dmu_pool, tx->tx_txg);
}
}

/*
Expand Down

0 comments on commit 6003160

Please sign in to comment.