Skip to content

Commit

Permalink
freezer,sched: Rewrite core freezer logic
Browse files Browse the repository at this point in the history
Rewrite the core freezer to behave better wrt thawing and be simpler
in general.

By replacing PF_FROZEN with TASK_FROZEN, a special block state, it is
ensured frozen tasks stay frozen until thawed and don't randomly wake
up early, as is currently possible.

As such, it does away with PF_FROZEN and PF_FREEZER_SKIP, freeing up
two PF_flags (yay!).

Specifically; the current scheme works a little like:

	freezer_do_not_count();
	schedule();
	freezer_count();

And either the task is blocked, or it lands in try_to_freezer()
through freezer_count(). Now, when it is blocked, the freezer
considers it frozen and continues.

However, on thawing, once pm_freezing is cleared, freezer_count()
stops working, and any random/spurious wakeup will let a task run
before its time.

That is, thawing tries to thaw things in explicit order; kernel
threads and workqueues before doing bringing SMP back before userspace
etc.. However due to the above mentioned races it is entirely possible
for userspace tasks to thaw (by accident) before SMP is back.

This can be a fatal problem in asymmetric ISA architectures (eg ARMv9)
where the userspace task requires a special CPU to run.

As said; replace this with a special task state TASK_FROZEN and add
the following state transitions:

	TASK_FREEZABLE	-> TASK_FROZEN
	__TASK_STOPPED	-> TASK_FROZEN
	__TASK_TRACED	-> TASK_FROZEN

The new TASK_FREEZABLE can be set on any state part of TASK_NORMAL
(IOW. TASK_INTERRUPTIBLE and TASK_UNINTERRUPTIBLE) -- any such state
is already required to deal with spurious wakeups and the freezer
causes one such when thawing the task (since the original state is
lost).

The special __TASK_{STOPPED,TRACED} states *can* be restored since
their canonical state is in ->jobctl.

With this, frozen tasks need an explicit TASK_FROZEN wakeup and are
free of undue (early / spurious) wakeups.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/20220822114649.055452969@infradead.org
  • Loading branch information
Peter Zijlstra committed Sep 7, 2022
1 parent 9963e44 commit f5d39b0
Show file tree
Hide file tree
Showing 32 changed files with 210 additions and 395 deletions.
4 changes: 1 addition & 3 deletions drivers/android/binder.c
Original file line number Diff line number Diff line change
Expand Up @@ -4247,10 +4247,9 @@ static int binder_wait_for_work(struct binder_thread *thread,
struct binder_proc *proc = thread->proc;
int ret = 0;

freezer_do_not_count();
binder_inner_proc_lock(proc);
for (;;) {
prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE);
prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE);
if (binder_has_work_ilocked(thread, do_proc_work))
break;
if (do_proc_work)
Expand All @@ -4267,7 +4266,6 @@ static int binder_wait_for_work(struct binder_thread *thread,
}
finish_wait(&thread->wait, &wait);
binder_inner_proc_unlock(proc);
freezer_count();

return ret;
}
Expand Down
4 changes: 2 additions & 2 deletions drivers/media/pci/pt3/pt3.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,8 +445,8 @@ static int pt3_fetch_thread(void *data)
pt3_proc_dma(adap);

delay = ktime_set(0, PT3_FETCH_DELAY * NSEC_PER_MSEC);
set_current_state(TASK_UNINTERRUPTIBLE);
freezable_schedule_hrtimeout_range(&delay,
set_current_state(TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
schedule_hrtimeout_range(&delay,
PT3_FETCH_DELAY_DELTA * NSEC_PER_MSEC,
HRTIMER_MODE_REL);
}
Expand Down
4 changes: 2 additions & 2 deletions fs/cifs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -2326,7 +2326,7 @@ cifs_invalidate_mapping(struct inode *inode)
static int
cifs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
freezable_schedule_unsafe();
schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
Expand All @@ -2344,7 +2344,7 @@ cifs_revalidate_mapping(struct inode *inode)
return 0;

rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
TASK_KILLABLE);
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (rc)
return rc;

Expand Down
5 changes: 3 additions & 2 deletions fs/cifs/transport.c
Original file line number Diff line number Diff line change
Expand Up @@ -757,8 +757,9 @@ wait_for_response(struct TCP_Server_Info *server, struct mid_q_entry *midQ)
{
int error;

error = wait_event_freezekillable_unsafe(server->response_q,
midQ->mid_state != MID_REQUEST_SUBMITTED);
error = wait_event_state(server->response_q,
midQ->mid_state != MID_REQUEST_SUBMITTED,
(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE));
if (error < 0)
return -ERESTARTSYS;

Expand Down
5 changes: 2 additions & 3 deletions fs/coredump.c
Original file line number Diff line number Diff line change
Expand Up @@ -402,9 +402,8 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
if (core_waiters > 0) {
struct core_thread *ptr;

freezer_do_not_count();
wait_for_completion(&core_state->startup);
freezer_count();
wait_for_completion_state(&core_state->startup,
TASK_UNINTERRUPTIBLE|TASK_FREEZABLE);
/*
* Wait for all the threads to become inactive, so that
* all the thread context (extended register state, like
Expand Down
3 changes: 2 additions & 1 deletion fs/nfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -570,7 +570,8 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
}

wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING,
nfs_wait_bit_killable, TASK_KILLABLE);
nfs_wait_bit_killable,
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);

lock_page(page);
mapping = page_file_mapping(page);
Expand Down
12 changes: 4 additions & 8 deletions fs/nfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,13 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
return nfs_fileid_to_ino_t(fattr->fileid);
}

static int nfs_wait_killable(int mode)
int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
freezable_schedule_unsafe();
schedule();
if (signal_pending_state(mode, current))
return -ERESTARTSYS;
return 0;
}

int nfs_wait_bit_killable(struct wait_bit_key *key, int mode)
{
return nfs_wait_killable(mode);
}
EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);

/**
Expand Down Expand Up @@ -1331,7 +1326,8 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
*/
for (;;) {
ret = wait_on_bit_action(bitlock, NFS_INO_INVALIDATING,
nfs_wait_bit_killable, TASK_KILLABLE);
nfs_wait_bit_killable,
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (ret)
goto out;
spin_lock(&inode->i_lock);
Expand Down
3 changes: 2 additions & 1 deletion fs/nfs/nfs3proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
freezable_schedule_timeout_killable_unsafe(NFS_JUKEBOX_RETRY_TIME);
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
} while (!fatal_signal_pending(current));
return res;
Expand Down
14 changes: 7 additions & 7 deletions fs/nfs/nfs4proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,8 @@ static int nfs4_delay_killable(long *timeout)
{
might_sleep();

freezable_schedule_timeout_killable_unsafe(
nfs4_update_delay(timeout));
__set_current_state(TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(nfs4_update_delay(timeout));
if (!__fatal_signal_pending(current))
return 0;
return -EINTR;
Expand All @@ -427,7 +427,8 @@ static int nfs4_delay_interruptible(long *timeout)
{
might_sleep();

freezable_schedule_timeout_interruptible_unsafe(nfs4_update_delay(timeout));
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE_UNSAFE);
schedule_timeout(nfs4_update_delay(timeout));
if (!signal_pending(current))
return 0;
return __fatal_signal_pending(current) ? -EINTR :-ERESTARTSYS;
Expand Down Expand Up @@ -7406,7 +7407,8 @@ nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd,
status = nfs4_proc_setlk(state, cmd, request);
if ((status != -EAGAIN) || IS_SETLK(cmd))
break;
freezable_schedule_timeout_interruptible(timeout);
__set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
schedule_timeout(timeout);
timeout *= 2;
timeout = min_t(unsigned long, NFS4_LOCK_MAXTIMEOUT, timeout);
status = -ERESTARTSYS;
Expand Down Expand Up @@ -7474,10 +7476,8 @@ nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
break;

status = -ERESTARTSYS;
freezer_do_not_count();
wait_woken(&waiter.wait, TASK_INTERRUPTIBLE,
wait_woken(&waiter.wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE,
NFS4_LOCK_MAXTIMEOUT);
freezer_count();
} while (!signalled());

remove_wait_queue(q, &waiter.wait);
Expand Down
3 changes: 2 additions & 1 deletion fs/nfs/nfs4state.c
Original file line number Diff line number Diff line change
Expand Up @@ -1314,7 +1314,8 @@ int nfs4_wait_clnt_recover(struct nfs_client *clp)

refcount_inc(&clp->cl_count);
res = wait_on_bit_action(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
nfs_wait_bit_killable, TASK_KILLABLE);
nfs_wait_bit_killable,
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (res)
goto out;
if (clp->cl_cons_state < 0)
Expand Down
4 changes: 2 additions & 2 deletions fs/nfs/pnfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1908,7 +1908,7 @@ static int pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo)
pnfs_layoutcommit_inode(lo->plh_inode, false);
return wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN,
nfs_wait_bit_killable,
TASK_KILLABLE);
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
}

static void nfs_layoutget_begin(struct pnfs_layout_hdr *lo)
Expand Down Expand Up @@ -3193,7 +3193,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
status = wait_on_bit_lock_action(&nfsi->flags,
NFS_INO_LAYOUTCOMMITTING,
nfs_wait_bit_killable,
TASK_KILLABLE);
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (status)
goto out;
}
Expand Down
8 changes: 4 additions & 4 deletions fs/xfs/xfs_trans_ail.c
Original file line number Diff line number Diff line change
Expand Up @@ -602,9 +602,9 @@ xfsaild(

while (1) {
if (tout && tout <= 20)
set_current_state(TASK_KILLABLE);
set_current_state(TASK_KILLABLE|TASK_FREEZABLE);
else
set_current_state(TASK_INTERRUPTIBLE);
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);

/*
* Check kthread_should_stop() after we set the task state to
Expand Down Expand Up @@ -653,14 +653,14 @@ xfsaild(
ailp->ail_target == ailp->ail_target_prev &&
list_empty(&ailp->ail_buf_list)) {
spin_unlock(&ailp->ail_lock);
freezable_schedule();
schedule();
tout = 0;
continue;
}
spin_unlock(&ailp->ail_lock);

if (tout)
freezable_schedule_timeout(msecs_to_jiffies(tout));
schedule_timeout(msecs_to_jiffies(tout));

__set_current_state(TASK_RUNNING);

Expand Down
Loading

0 comments on commit f5d39b0

Please sign in to comment.