Skip to content
This repository was archived by the owner on Nov 7, 2019. It is now read-only.

Commit ff5177e

Browse files
alek-pahrens
authored andcommitted
6569 large file delete can starve out write ops
Reviewed by: Matt Ahrens <mahrens@delphix.com> Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com> Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com> Approved by: Dan McDonald <danmcd@omniti.com>
1 parent 7748149 commit ff5177e

File tree

3 files changed

+64
-6
lines changed

3 files changed

+64
-6
lines changed

usr/src/uts/common/fs/zfs/dmu.c

+51-6
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,14 @@
5656
*/
5757
int zfs_nopwrite_enabled = 1;
5858

59+
/*
60+
* Tunable to control percentage of dirtied blocks from frees in one TXG.
61+
* After this threshold is crossed, additional dirty blocks from frees
62+
* wait until the next TXG.
63+
* A value of zero will disable this throttle.
64+
*/
65+
uint32_t zfs_per_txg_dirty_frees_percent = 30;
66+
5967
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
6068
{ DMU_BSWAP_UINT8, TRUE, "unallocated" },
6169
{ DMU_BSWAP_ZAP, TRUE, "object directory" },
@@ -717,15 +725,25 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
717725
{
718726
uint64_t object_size = (dn->dn_maxblkid + 1) * dn->dn_datablksz;
719727
int err;
728+
uint64_t dirty_frees_threshold;
729+
dsl_pool_t *dp = dmu_objset_pool(os);
720730

721731
if (offset >= object_size)
722732
return (0);
723733

734+
if (zfs_per_txg_dirty_frees_percent <= 100)
735+
dirty_frees_threshold =
736+
zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
737+
else
738+
dirty_frees_threshold = zfs_dirty_data_max / 4;
739+
724740
if (length == DMU_OBJECT_END || offset + length > object_size)
725741
length = object_size - offset;
726742

727743
while (length != 0) {
728-
uint64_t chunk_end, chunk_begin;
744+
uint64_t chunk_end, chunk_begin, chunk_len;
745+
uint64_t long_free_dirty_all_txgs = 0;
746+
dmu_tx_t *tx;
729747

730748
if (dmu_objset_zfs_unmounting(dn->dn_objset))
731749
return (SET_ERROR(EINTR));
@@ -739,9 +757,28 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
739757
ASSERT3U(chunk_begin, >=, offset);
740758
ASSERT3U(chunk_begin, <=, chunk_end);
741759

742-
dmu_tx_t *tx = dmu_tx_create(os);
743-
dmu_tx_hold_free(tx, dn->dn_object,
744-
chunk_begin, chunk_end - chunk_begin);
760+
chunk_len = chunk_end - chunk_begin;
761+
762+
mutex_enter(&dp->dp_lock);
763+
for (int t = 0; t < TXG_SIZE; t++) {
764+
long_free_dirty_all_txgs +=
765+
dp->dp_long_free_dirty_pertxg[t];
766+
}
767+
mutex_exit(&dp->dp_lock);
768+
769+
/*
770+
* To avoid filling up a TXG with just frees wait for
771+
* the next TXG to open before freeing more chunks if
772+
* we have reached the threshold of frees
773+
*/
774+
if (dirty_frees_threshold != 0 &&
775+
long_free_dirty_all_txgs >= dirty_frees_threshold) {
776+
txg_wait_open(dp, 0);
777+
continue;
778+
}
779+
780+
tx = dmu_tx_create(os);
781+
dmu_tx_hold_free(tx, dn->dn_object, chunk_begin, chunk_len);
745782

746783
/*
747784
* Mark this transaction as typically resulting in a net
@@ -753,10 +790,18 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
753790
dmu_tx_abort(tx);
754791
return (err);
755792
}
756-
dnode_free_range(dn, chunk_begin, chunk_end - chunk_begin, tx);
793+
794+
mutex_enter(&dp->dp_lock);
795+
dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=
796+
chunk_len;
797+
mutex_exit(&dp->dp_lock);
798+
DTRACE_PROBE3(free__long__range,
799+
uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
800+
uint64_t, dmu_tx_get_txg(tx));
801+
dnode_free_range(dn, chunk_begin, chunk_len, tx);
757802
dmu_tx_commit(tx);
758803

759-
length -= chunk_end - chunk_begin;
804+
length -= chunk_len;
760805
}
761806
return (0);
762807
}

usr/src/uts/common/fs/zfs/dsl_pool.c

+11
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
* Copyright (c) 2013 Steven Hartland. All rights reserved.
2525
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
2626
* Copyright (c) 2014 Integros [integros.com]
27+
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
2728
*/
2829

2930
#include <sys/dsl_pool.h>
@@ -492,6 +493,16 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
492493
*/
493494
dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg);
494495

496+
/*
497+
* Update the long range free counter after
498+
* we're done syncing user data
499+
*/
500+
mutex_enter(&dp->dp_lock);
501+
ASSERT(spa_sync_pass(dp->dp_spa) == 1 ||
502+
dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] == 0);
503+
dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] = 0;
504+
mutex_exit(&dp->dp_lock);
505+
495506
/*
496507
* After the data blocks have been written (ensured by the zio_wait()
497508
* above), update the user/group space accounting.

usr/src/uts/common/fs/zfs/sys/dsl_pool.h

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
/*
2222
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
2323
* Copyright (c) 2013 by Delphix. All rights reserved.
24+
* Copyright 2016 Nexenta Systems, Inc. All rights reserved.
2425
*/
2526

2627
#ifndef _SYS_DSL_POOL_H
@@ -103,6 +104,7 @@ typedef struct dsl_pool {
103104
kcondvar_t dp_spaceavail_cv;
104105
uint64_t dp_dirty_pertxg[TXG_SIZE];
105106
uint64_t dp_dirty_total;
107+
uint64_t dp_long_free_dirty_pertxg[TXG_SIZE];
106108
uint64_t dp_mos_used_delta;
107109
uint64_t dp_mos_compressed_delta;
108110
uint64_t dp_mos_uncompressed_delta;

0 commit comments

Comments
 (0)