Skip to content

Commit

Permalink
Allow kicking a taskq to spawn more threads
Browse files Browse the repository at this point in the history
This patch add a module parameter spl_taskq_kick. When writing non-zero value
to it, it will scan all the taskq, if a taskq contains a task pending for more
than 5 seconds, it will be forced to spawn a new thread. This is use as an
emergency recovery from deadlock, not a general solution.

Signed-off-by: Chunwei Chen <david.chen@osnexus.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes openzfs#529
  • Loading branch information
Chunwei Chen authored and behlendorf committed Feb 5, 2016
1 parent d112232 commit 8f3b403
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
1 change: 1 addition & 0 deletions include/sys/taskq.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ typedef struct taskq_ent {
void *tqent_arg;
taskq_t *tqent_taskq;
uintptr_t tqent_flags;
unsigned long tqent_birth;
} taskq_ent_t;

#define TQENT_FLAG_PREALLOC 0x1
Expand Down
14 changes: 14 additions & 0 deletions man/man5/spl-module-parameters.5
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,20 @@ may be overridden for non-standard configurations.
Default value: \fB/etc/hostid\fR
.RE

.sp
.ne 2
.na
\fBspl_taskq_kick\fR (uint)
.ad
.RS 12n
Kick stuck taskq to spawn threads. When writing a non-zero value to it, it will
scan all the taskqs. If any of them have a pending task more than 5 seconds old,
it will kick it to spawn more threads. This can be used if you find a rare
deadlock occurs because one or more taskqs didn't spawn a thread when it should.
.sp
Default value: \fB0\fR
.RE

.sp
.ne 2
.na
Expand Down
60 changes: 60 additions & 0 deletions module/spl/spl-taskq.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ task_expire(unsigned long data)
return;
}

t->tqent_birth = jiffies;
/*
* The priority list must be maintained in strict task id order
* from lowest to highest for lowest_id to be easily calculable.
Expand Down Expand Up @@ -583,6 +584,7 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
t->tqent_timer.data = 0;
t->tqent_timer.function = NULL;
t->tqent_timer.expires = 0;
t->tqent_birth = jiffies;

ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));

Expand Down Expand Up @@ -682,6 +684,7 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
t->tqent_func = func;
t->tqent_arg = arg;
t->tqent_taskq = tq;
t->tqent_birth = jiffies;

spin_unlock(&t->tqent_lock);

Expand Down Expand Up @@ -1134,6 +1137,63 @@ taskq_destroy(taskq_t *tq)
}
EXPORT_SYMBOL(taskq_destroy);


static unsigned int spl_taskq_kick = 0;

/*
* 2.6.36 API Change
* module_param_cb is introduced to take kernel_param_ops and
* module_param_call is marked as obsolete. Also set and get operations
* were changed to take a 'const struct kernel_param *'.
*/
static int
#ifdef module_param_cb
param_set_taskq_kick(const char *val, const struct kernel_param *kp)
#else
param_set_taskq_kick(const char *val, struct kernel_param *kp)
#endif
{
int ret;
taskq_t *tq;
taskq_ent_t *t;
unsigned long flags;

ret = param_set_uint(val, kp);
if (ret < 0 || !spl_taskq_kick)
return (ret);
/* reset value */
spl_taskq_kick = 0;

down_read(&tq_list_sem);
list_for_each_entry(tq, &tq_list, tq_taskqs) {
spin_lock_irqsave_nested(&tq->tq_lock, flags,
tq->tq_lock_class);
/* Check if the first pending is older than 5 seconds */
t = taskq_next_ent(tq);
if (t && time_after(jiffies, t->tqent_birth + 5*HZ)) {
(void) taskq_thread_spawn(tq);
printk(KERN_INFO "spl: Kicked taskq %s/%d\n",
tq->tq_name, tq->tq_instance);
}
spin_unlock_irqrestore(&tq->tq_lock, flags);
}
up_read(&tq_list_sem);
return (ret);
}

#ifdef module_param_cb
static const struct kernel_param_ops param_ops_taskq_kick = {
.set = param_set_taskq_kick,
.get = param_get_uint,
};
module_param_cb(spl_taskq_kick, &param_ops_taskq_kick, &spl_taskq_kick, 0644);
#else
module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint,
&spl_taskq_kick, 0644);
#endif
MODULE_PARM_DESC(spl_taskq_kick,
"Write nonzero to kick stuck taskqs to spawn more threads");

int
spl_taskq_init(void)
{
Expand Down

0 comments on commit 8f3b403

Please sign in to comment.