diff --git a/Documentation/scheduler/sched-ext.rst b/Documentation/scheduler/sched-ext.rst
index b67346cf523549..3e1e0a4e974d73 100644
--- a/Documentation/scheduler/sched-ext.rst
+++ b/Documentation/scheduler/sched-ext.rst
@@ -77,19 +77,57 @@ optional. The following modified excerpt is from
 
 .. code-block:: c
 
-    s32 BPF_STRUCT_OPS(simple_init)
+    /*
+     * Decide which CPU a task should be migrated to before being
+     * enqueued (either at wakeup, fork time, or exec time). If an
+     * idle core is found by the default ops.select_cpu() implementation,
+     * then dispatch the task directly to SCX_DSQ_LOCAL and skip the
+     * ops.enqueue() callback.
+     *
+     * Note that this implemenation has exactly the same behavior as the
+     * default ops.select_cpu implementation. The behavior of the scheduler
+     * would be exactly same if the implementation just didn't define the
+     * simple_select_cpu() struct_ops prog.
+     */
+    s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p,
+                       s32 prev_cpu, u64 wake_flags)
     {
-            if (!switch_partial)
-                    scx_bpf_switch_all();
-            return 0;
+            s32 cpu;
+            /* Need to initialize or the BPF verifier will reject the program */
+            bool direct = false;
+
+            cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &direct);
+
+            if (direct)
+                    scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
+
+            return cpu;
     }
 
+    /*
+     * Do a direct dispatch of a task to the global DSQ. This ops.enqueue()
+     * callback will only be invoked if we failed to find a core to dispatch
+     * to in ops.select_cpu() above.
+     *
+     * Note that this implemenation has exactly the same behavior as the
+     * default ops.enqueue implementation, which just dispatches the task
+     * to SCX_DSQ_GLOBAL. The behavior of the scheduler would be exactly same
+     * if the implementation just didn't define the simple_enqueue struct_ops
+     * prog.
+     */
     void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags)
     {
-            if (enq_flags & SCX_ENQ_LOCAL)
-                    scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, enq_flags);
-            else
-                    scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+            scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+    }
+
+    s32 BPF_STRUCT_OPS(simple_init)
+    {
+            /*
+             * All SCHED_OTHER, SCHED_IDLE, and SCHED_BATCH tasks should
+             * use sched_ext.
+             */
+            scx_bpf_switch_all();
+            return 0;
     }
 
     void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei)
@@ -99,6 +137,7 @@ optional. The following modified excerpt is from
 
     SEC(".struct_ops")
     struct sched_ext_ops simple_ops = {
+            .select_cpu             = (void *)simple_select_cpu,
             .enqueue                = (void *)simple_enqueue,
             .init                   = (void *)simple_init,
             .exit                   = (void *)simple_exit,
@@ -142,11 +181,19 @@ The following briefly shows how a waking task is scheduled and executed.
    scheduler can wake up any cpu using the ``scx_bpf_kick_cpu()`` helper,
    using ``ops.select_cpu()`` judiciously can be simpler and more efficient.
 
+   A task can be immediately dispatched to a DSQ from ``ops.select_cpu()`` by
+   calling ``scx_bpf_dispatch()``. If the task is dispatched to
+   ``SCX_DSQ_LOCAL`` from ``ops.select_cpu()``, it will be dispatched to the
+   local DSQ of whichever CPU is returned from ``ops.select_cpu()``.
+   Additionally, dispatching directly from ``ops.select_cpu()`` will cause the
+   ``ops.enqueue()`` callback to be skipped.
+
    Note that the scheduler core will ignore an invalid CPU selection, for
    example, if it's outside the allowed cpumask of the task.
 
-2. Once the target CPU is selected, ``ops.enqueue()`` is invoked. It can
-   make one of the following decisions:
+2. Once the target CPU is selected, ``ops.enqueue()`` is invoked (unless the
+   task was dispatched directly from ``ops.select_cpu()``). ``ops.enqueue()``
+   can make one of the following decisions:
 
    * Immediately dispatch the task to either the global or local DSQ by
      calling ``scx_bpf_dispatch()`` with ``SCX_DSQ_GLOBAL`` or
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index b20a7620b93d7c..5a033634939248 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -184,6 +184,11 @@ struct sched_ext_ops {
 	 * If an idle CPU is returned, the CPU is kicked and will try to
 	 * dispatch. While an explicit custom mechanism can be added,
 	 * select_cpu() serves as the default way to wake up idle CPUs.
+	 *
+	 * @p may be dispatched directly by calling scx_bpf_dispatch(). If @p
+	 * is dispatched, the ops.enqueue() callback will be skipped. Finally,
+	 * if @p is dispatched to SCX_DSQ_LOCAL, it will be dispatched to the
+	 * local DSQ of whatever CPU is returned by this callback.
 	 */
 	s32 (*select_cpu)(struct task_struct *p, s32 prev_cpu, u64 wake_flags);
 
@@ -196,6 +201,9 @@ struct sched_ext_ops {
 	 * or enqueue on the BPF scheduler. If not directly dispatched, the bpf
 	 * scheduler owns @p and if it fails to dispatch @p, the task will
 	 * stall.
+	 *
+	 * If @p was dispatched from ops.select_cpu(), this callback is
+	 * skipped.
 	 */
 	void (*enqueue)(struct task_struct *p, u64 enq_flags);
 
@@ -597,7 +605,7 @@ struct scx_dispatch_q {
 enum scx_ent_flags {
 	SCX_TASK_QUEUED		= 1 << 0, /* on ext runqueue */
 	SCX_TASK_BAL_KEEP	= 1 << 1, /* balance decided to keep current */
-	SCX_TASK_ENQ_LOCAL	= 1 << 2, /* used by scx_select_cpu_dfl() to set SCX_ENQ_LOCAL */
+	SCX_TASK_DDSP_PRIQ	= 1 << 2, /* task should be enqueued on priq when directly dispatched */
 
 	SCX_TASK_OPS_PREPPED	= 1 << 8, /* prepared for BPF scheduler enable */
 	SCX_TASK_OPS_ENABLED	= 1 << 9, /* task has BPF scheduler enabled */
@@ -630,12 +638,13 @@ enum scx_kf_mask {
 	SCX_KF_CPU_RELEASE	= 1 << 2, /* ops.cpu_release() */
 	/* ops.dequeue (in REST) may be nested inside DISPATCH */
 	SCX_KF_DISPATCH		= 1 << 3, /* ops.dispatch() */
-	SCX_KF_ENQUEUE		= 1 << 4, /* ops.enqueue() */
-	SCX_KF_REST		= 1 << 5, /* other rq-locked operations */
+	SCX_KF_ENQUEUE		= 1 << 4, /* ops.enqueue() and ops.select_cpu() */
+	SCX_KF_SELECT_CPU	= 1 << 5, /* ops.select_cpu() */
+	SCX_KF_REST		= 1 << 6, /* other rq-locked operations */
 
 	__SCX_KF_RQ_LOCKED	= SCX_KF_CPU_RELEASE | SCX_KF_DISPATCH |
-				  SCX_KF_ENQUEUE | SCX_KF_REST,
-	__SCX_KF_TERMINAL	= SCX_KF_ENQUEUE | SCX_KF_REST,
+				  SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
+	__SCX_KF_TERMINAL	= SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU | SCX_KF_REST,
 };
 
 /*
@@ -685,6 +694,12 @@ struct sched_ext_entity {
 	 */
 	u64			dsq_vtime;
 
+	/*
+	 * Used to track when a task has requested a direct dispatch from the
+	 * ops.select_cpu() path.
+	 */
+	u64			ddsq_id;
+
 	/*
 	 * If set, reject future sched_setscheduler(2) calls updating the policy
 	 * to %SCHED_EXT with -%EACCES.
diff --git a/init/init_task.c b/init/init_task.c
index 20fa6efc07f2ec..56c49c02d830f8 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -113,6 +113,7 @@ struct task_struct init_task
 		.ops_state	= ATOMIC_INIT(0),
 		.runnable_at	= INITIAL_JIFFIES,
 		.slice		= SCX_SLICE_DFL,
+		.ddsq_id	= SCX_DSQ_INVALID,
 	},
 #endif
 	.ptraced	= LIST_HEAD_INIT(init_task.ptraced),
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index d50e9dfee51725..c8885037f2a308 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4564,6 +4564,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	atomic_long_set(&p->scx.ops_state, 0);
 	p->scx.runnable_at	= INITIAL_JIFFIES;
 	p->scx.slice		= SCX_SLICE_DFL;
+	p->scx.ddsq_id		= SCX_DSQ_INVALID;
 #endif
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 634fcb7cb24312..7b4825ec19aa85 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -272,7 +272,7 @@ do {										\
  */
 #define SCX_CALL_OP_TASK(mask, op, task, args...)				\
 do {										\
-	BUILD_BUG_ON(mask & ~__SCX_KF_TERMINAL);				\
+	BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL);				\
 	current->scx.kf_tasks[0] = task;					\
 	SCX_CALL_OP(mask, op, task, ##args);					\
 	current->scx.kf_tasks[0] = NULL;					\
@@ -281,7 +281,7 @@ do {										\
 #define SCX_CALL_OP_TASK_RET(mask, op, task, args...)				\
 ({										\
 	__typeof__(scx_ops.op(task, ##args)) __ret;				\
-	BUILD_BUG_ON(mask & ~__SCX_KF_TERMINAL);				\
+	BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL);				\
 	current->scx.kf_tasks[0] = task;					\
 	__ret = SCX_CALL_OP_RET(mask, op, task, ##args);			\
 	current->scx.kf_tasks[0] = NULL;					\
@@ -291,7 +291,7 @@ do {										\
 #define SCX_CALL_OP_2TASKS_RET(mask, op, task0, task1, args...)			\
 ({										\
 	__typeof__(scx_ops.op(task0, task1, ##args)) __ret;			\
-	BUILD_BUG_ON(mask & ~__SCX_KF_TERMINAL);				\
+	BUILD_BUG_ON((mask) & ~__SCX_KF_TERMINAL);				\
 	current->scx.kf_tasks[0] = task0;					\
 	current->scx.kf_tasks[1] = task1;					\
 	__ret = SCX_CALL_OP_RET(mask, op, task0, task1, ##args);		\
@@ -786,18 +786,24 @@ static struct scx_dispatch_q *find_dsq_for_dispatch(struct rq *rq, u64 dsq_id,
 	return dsq;
 }
 
-static void direct_dispatch(struct task_struct *ddsp_task, struct task_struct *p,
-			    u64 dsq_id, u64 enq_flags)
+static void mark_direct_dispatch(struct task_struct *ddsp_task,
+				 struct task_struct *p, u64 dsq_id,
+				 u64 enq_flags)
 {
-	struct scx_dispatch_q *dsq;
+	/*
+	 * Mark that dispatch already happened from ops.select_cpu() or
+	 * ops.enqueue() by spoiling direct_dispatch_task with a non-NULL value
+	 * which can never match a valid task pointer.
+	 */
+	__this_cpu_write(direct_dispatch_task, ERR_PTR(-ESRCH));
 
-	/* @p must match the task which is being enqueued */
+	/* @p must match the task on the enqueue path */
 	if (unlikely(p != ddsp_task)) {
 		if (IS_ERR(ddsp_task))
 			scx_ops_error("%s[%d] already direct-dispatched",
 				      p->comm, p->pid);
 		else
-			scx_ops_error("enqueueing %s[%d] but trying to direct-dispatch %s[%d]",
+			scx_ops_error("scheduling for %s[%d] but trying to direct-dispatch %s[%d]",
 				      ddsp_task->comm, ddsp_task->pid,
 				      p->comm, p->pid);
 		return;
@@ -814,16 +820,28 @@ static void direct_dispatch(struct task_struct *ddsp_task, struct task_struct *p
 		return;
 	}
 
+	WARN_ON_ONCE(p->scx.ddsq_id != SCX_DSQ_INVALID);
+	WARN_ON_ONCE(p->scx.flags & SCX_TASK_DDSP_PRIQ);
+
+	p->scx.ddsq_id = dsq_id;
+	if (enq_flags & SCX_ENQ_DSQ_PRIQ)
+		p->scx.flags |= SCX_TASK_DDSP_PRIQ;
+}
+
+static void direct_dispatch(struct task_struct *p, u64 enq_flags)
+{
+	struct scx_dispatch_q *dsq;
+
 	touch_core_sched_dispatch(task_rq(p), p);
 
-	dsq = find_dsq_for_dispatch(task_rq(p), dsq_id, p);
-	dispatch_enqueue(dsq, p, enq_flags | SCX_ENQ_CLEAR_OPSS);
+	if (p->scx.flags & SCX_TASK_DDSP_PRIQ) {
+		enq_flags |= SCX_ENQ_DSQ_PRIQ;
+		p->scx.flags &= ~SCX_TASK_DDSP_PRIQ;
+	}
 
-	/*
-	 * Mark that dispatch already happened by spoiling direct_dispatch_task
-	 * with a non-NULL value which can never match a valid task pointer.
-	 */
-	__this_cpu_write(direct_dispatch_task, ERR_PTR(-ESRCH));
+	dsq = find_dsq_for_dispatch(task_rq(p), p->scx.ddsq_id, p);
+	dispatch_enqueue(dsq, p, enq_flags | SCX_ENQ_CLEAR_OPSS);
+	p->scx.ddsq_id = SCX_DSQ_INVALID;
 }
 
 static bool test_rq_online(struct rq *rq)
@@ -843,10 +861,8 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
 
 	WARN_ON_ONCE(!(p->scx.flags & SCX_TASK_QUEUED));
 
-	if (p->scx.flags & SCX_TASK_ENQ_LOCAL) {
-		enq_flags |= SCX_ENQ_LOCAL;
-		p->scx.flags &= ~SCX_TASK_ENQ_LOCAL;
-	}
+	if (p->scx.ddsq_id != SCX_DSQ_INVALID)
+		goto direct;
 
 	/* rq migration */
 	if (sticky_cpu == cpu_of(rq))
@@ -870,12 +886,8 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
 	    (enq_flags & SCX_ENQ_LAST))
 		goto local;
 
-	if (!SCX_HAS_OP(enqueue)) {
-		if (enq_flags & SCX_ENQ_LOCAL)
-			goto local;
-		else
-			goto global;
-	}
+	if (!SCX_HAS_OP(enqueue))
+		goto global;
 
 	/* DSQ bypass didn't trigger, enqueue on the BPF scheduler */
 	qseq = rq->scx.ops_qseq++ << SCX_OPSS_QSEQ_SHIFT;
@@ -889,13 +901,19 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
 
 	SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
 
+	*ddsp_taskp = NULL;
+	if (p->scx.ddsq_id != SCX_DSQ_INVALID)
+		goto direct;
+
 	/*
 	 * If not directly dispatched, QUEUEING isn't clear yet and dispatch or
 	 * dequeue may be waiting. The store_release matches their load_acquire.
 	 */
-	if (*ddsp_taskp == p)
-		atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_QUEUED | qseq);
-	*ddsp_taskp = NULL;
+	atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_QUEUED | qseq);
+	return;
+
+direct:
+	direct_dispatch(p, enq_flags);
 	return;
 
 local:
@@ -1771,7 +1789,7 @@ static void put_prev_task_scx(struct rq *rq, struct task_struct *p)
 		 * follow-up scheduling event.
 		 */
 		if (list_empty(&rq->scx.local_dsq.fifo))
-			do_enqueue_task(rq, p, SCX_ENQ_LAST | SCX_ENQ_LOCAL, -1);
+			do_enqueue_task(rq, p, SCX_ENQ_LAST, -1);
 		else
 			do_enqueue_task(rq, p, 0, -1);
 	}
@@ -1995,10 +2013,13 @@ static s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, u64 flags)
 		goto retry;
 }
 
-static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags)
+static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu,
+			      u64 wake_flags, bool *found)
 {
 	s32 cpu;
 
+	*found = false;
+
 	if (!static_branch_likely(&scx_builtin_idle_enabled)) {
 		scx_ops_error("built-in idle tracking is disabled");
 		return prev_cpu;
@@ -2011,10 +2032,8 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flag
 	if ((wake_flags & SCX_WAKE_SYNC) && p->nr_cpus_allowed > 1 &&
 	    !cpumask_empty(idle_masks.cpu) && !(current->flags & PF_EXITING)) {
 		cpu = smp_processor_id();
-		if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
-			p->scx.flags |= SCX_TASK_ENQ_LOCAL;
-			return cpu;
-		}
+		if (cpumask_test_cpu(cpu, p->cpus_ptr))
+			goto cpu_found;
 	}
 
 	if (p->nr_cpus_allowed == 1)
@@ -2027,38 +2046,55 @@ static s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flag
 	if (sched_smt_active()) {
 		if (cpumask_test_cpu(prev_cpu, idle_masks.smt) &&
 		    test_and_clear_cpu_idle(prev_cpu)) {
-			p->scx.flags |= SCX_TASK_ENQ_LOCAL;
-			return prev_cpu;
+			cpu = prev_cpu;
+			goto cpu_found;
 		}
 
 		cpu = scx_pick_idle_cpu(p->cpus_ptr, SCX_PICK_IDLE_CORE);
-		if (cpu >= 0) {
-			p->scx.flags |= SCX_TASK_ENQ_LOCAL;
-			return cpu;
-		}
+		if (cpu >= 0)
+			goto cpu_found;
 	}
 
 	if (test_and_clear_cpu_idle(prev_cpu)) {
-		p->scx.flags |= SCX_TASK_ENQ_LOCAL;
-		return prev_cpu;
+		cpu = prev_cpu;
+		goto cpu_found;
 	}
 
 	cpu = scx_pick_idle_cpu(p->cpus_ptr, 0);
-	if (cpu >= 0) {
-		p->scx.flags |= SCX_TASK_ENQ_LOCAL;
-		return cpu;
-	}
+	if (cpu >= 0)
+		goto cpu_found;
 
 	return prev_cpu;
+
+cpu_found:
+	*found = true;
+	return cpu;
+}
+
+s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+			   bool *found)
+{
+	if (!scx_kf_allowed(SCX_KF_SELECT_CPU)) {
+		*found = false;
+		return prev_cpu;
+	}
+
+	return scx_select_cpu_dfl(p, prev_cpu, wake_flags, found);
 }
 
 static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flags)
 {
 	if (SCX_HAS_OP(select_cpu)) {
 		s32 cpu;
+		struct task_struct **ddsp_taskp;
+
+		ddsp_taskp = this_cpu_ptr(&direct_dispatch_task);
+		WARN_ON_ONCE(*ddsp_taskp);
+		*ddsp_taskp = p;
 
-		cpu = SCX_CALL_OP_TASK_RET(SCX_KF_REST, select_cpu, p, prev_cpu,
-					   wake_flags);
+		cpu = SCX_CALL_OP_TASK_RET(SCX_KF_ENQUEUE | SCX_KF_SELECT_CPU,
+					   select_cpu, p, prev_cpu, wake_flags);
+		*ddsp_taskp = NULL;
 		if (ops_cpu_valid(cpu)) {
 			return cpu;
 		} else {
@@ -2066,7 +2102,13 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
 			return prev_cpu;
 		}
 	} else {
-		return scx_select_cpu_dfl(p, prev_cpu, wake_flags);
+		bool found;
+		s32 cpu;
+
+		cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, &found);
+		if (found)
+			p->scx.ddsq_id = SCX_DSQ_LOCAL;
+		return cpu;
 	}
 }
 
@@ -3191,6 +3233,20 @@ static struct kthread_worker *scx_create_rt_helper(const char *name)
 	return helper;
 }
 
+static int validate_ops(const struct sched_ext_ops *ops)
+{
+	/*
+	 * It doesn't make sense to specify the SCX_OPS_ENQ_LAST flag if the
+	 * ops.enqueue() callback isn't implemented.
+	 */
+	if ((ops->flags & SCX_OPS_ENQ_LAST) && !ops->enqueue) {
+		scx_ops_error("SCX_OPS_ENQ_LAST requires ops.enqueue() to be implemented");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int scx_ops_enable(struct sched_ext_ops *ops)
 {
 	struct scx_task_iter sti;
@@ -3254,6 +3310,10 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
 			goto err_disable;
 	}
 
+	ret = validate_ops(ops);
+	if (ret)
+		goto err_disable;
+
 	WARN_ON_ONCE(scx_dsp_buf);
 	scx_dsp_max_batch = ops->dispatch_max_batch ?: SCX_DSP_DFL_MAX_BATCH;
 	scx_dsp_buf = __alloc_percpu(sizeof(scx_dsp_buf[0]) * scx_dsp_max_batch,
@@ -3870,7 +3930,7 @@ static void scx_dispatch_commit(struct task_struct *p, u64 dsq_id, u64 enq_flags
 
 	ddsp_task = __this_cpu_read(direct_dispatch_task);
 	if (ddsp_task) {
-		direct_dispatch(ddsp_task, p, dsq_id, enq_flags);
+		mark_direct_dispatch(ddsp_task, p, dsq_id, enq_flags);
 		return;
 	}
 
@@ -4447,6 +4507,7 @@ BTF_ID_FLAGS(func, scx_bpf_test_and_clear_cpu_idle)
 BTF_ID_FLAGS(func, scx_bpf_pick_idle_cpu, KF_RCU)
 BTF_ID_FLAGS(func, scx_bpf_pick_any_cpu, KF_RCU)
 BTF_ID_FLAGS(func, scx_bpf_destroy_dsq)
+BTF_ID_FLAGS(func, scx_bpf_select_cpu_dfl, KF_RCU)
 BTF_SET8_END(scx_kfunc_ids_ops_only)
 
 static const struct btf_kfunc_id_set scx_kfunc_set_ops_only = {
diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
index 27248760f4ccb6..a8f72efe39b362 100644
--- a/kernel/sched/ext.h
+++ b/kernel/sched/ext.h
@@ -43,8 +43,8 @@ enum scx_enq_flags {
 	/*
 	 * The task being enqueued is the only task available for the cpu. By
 	 * default, ext core keeps executing such tasks but when
-	 * %SCX_OPS_ENQ_LAST is specified, they're ops.enqueue()'d with
-	 * %SCX_ENQ_LAST and %SCX_ENQ_LOCAL flags set.
+	 * %SCX_OPS_ENQ_LAST is specified, they're ops.enqueue()'d with the
+	 * %SCX_ENQ_LAST flag set.
 	 *
 	 * If the BPF scheduler wants to continue executing the task,
 	 * ops.enqueue() should dispatch the task to %SCX_DSQ_LOCAL immediately.
@@ -54,13 +54,6 @@ enum scx_enq_flags {
 	 */
 	SCX_ENQ_LAST		= 1LLU << 41,
 
-	/*
-	 * A hint indicating that it's advisable to enqueue the task on the
-	 * local dsq of the currently selected CPU. Currently used by
-	 * select_cpu_dfl() and together with %SCX_ENQ_LAST.
-	 */
-	SCX_ENQ_LOCAL		= 1LLU << 42,
-
 	/* high 8 bits are internal */
 	__SCX_ENQ_INTERNAL_MASK	= 0xffLLU << 56,
 
diff --git a/tools/testing/selftests/scx/.gitignore b/tools/testing/selftests/scx/.gitignore
index 72fc34154e98b0..8e5d7c1aab5b0a 100644
--- a/tools/testing/selftests/scx/.gitignore
+++ b/tools/testing/selftests/scx/.gitignore
@@ -1,2 +1,9 @@
+enq_last_no_enq_fails
+enqueue_select_cpu_fails
 minimal
+select_cpu_dfl
+select_cpu_dfl_nodispatch
+select_cpu_dispatch
+select_cpu_dispatch_dbl_dsp
+select_cpu_dispatch_bad_dsq
 build/
diff --git a/tools/testing/selftests/scx/Makefile b/tools/testing/selftests/scx/Makefile
index c331cfc380b6c7..3af9edc08c07f6 100644
--- a/tools/testing/selftests/scx/Makefile
+++ b/tools/testing/selftests/scx/Makefile
@@ -42,6 +42,8 @@ SCXOBJ_DIR := $(OBJ_DIR)/sched_ext
 BPFOBJ := $(BPFOBJ_DIR)/libbpf.a
 LIBBPF_OUTPUT := $(OBJ_DIR)/libbpf/libbpf.a
 DEFAULT_BPFTOOL := $(OUTPUT_DIR)/sbin/bpftool
+HOST_BUILD_DIR := $(OBJ_DIR)
+HOST_OUTPUT_DIR := $(OUTPUT_DIR)
 
 VMLINUX_BTF_PATHS ?= ../../../../vmlinux					\
 		     /sys/kernel/btf/vmlinux					\
@@ -145,7 +147,15 @@ $(INCLUDE_DIR)/%.bpf.skel.h: $(SCXOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BP
 ################
 # C schedulers #
 ################
-c-sched-targets := minimal
+c-sched-targets :=			\
+	minimal				\
+	select_cpu_dfl			\
+	select_cpu_dfl_nodispatch	\
+	select_cpu_dispatch		\
+	select_cpu_dispatch_dbl_dsp	\
+	select_cpu_dispatch_bad_dsq	\
+	enqueue_select_cpu_fails	\
+	enq_last_no_enq_fails
 
 $(c-sched-targets): %: $(filter-out %.bpf.c,%.c) $(INCLUDE_DIR)/%.bpf.skel.h
 	$(eval sched=$(notdir $@))
diff --git a/tools/testing/selftests/scx/enq_last_no_enq_fails.bpf.c b/tools/testing/selftests/scx/enq_last_no_enq_fails.bpf.c
new file mode 100644
index 00000000000000..4b0f84568dc151
--- /dev/null
+++ b/tools/testing/selftests/scx/enq_last_no_enq_fails.bpf.c
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * A scheduler that validates the behavior of direct dispatching with a default
+ * select_cpu implementation.
+ *
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+s32 BPF_STRUCT_OPS(enq_last_no_enq_fails_init)
+{
+	scx_bpf_switch_all();
+
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops enq_last_no_enq_fails_ops = {
+	.init			= enq_last_no_enq_fails_init,
+	.name			= "enq_last_no_enq_fails",
+	/* Need to define ops.enqueue() with SCX_OPS_ENQ_LAST */
+	.flags			= SCX_OPS_ENQ_LAST,
+	.timeout_ms		= 1000U,
+};
diff --git a/tools/testing/selftests/scx/enq_last_no_enq_fails.c b/tools/testing/selftests/scx/enq_last_no_enq_fails.c
new file mode 100644
index 00000000000000..1f3d4d8adcc7f5
--- /dev/null
+++ b/tools/testing/selftests/scx/enq_last_no_enq_fails.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <libgen.h>
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include "enq_last_no_enq_fails.bpf.skel.h"
+#include "scx_test.h"
+
+int main(int argc, char **argv)
+{
+	struct enq_last_no_enq_fails *skel;
+	struct bpf_link *link;
+
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+	skel = enq_last_no_enq_fails__open_and_load();
+	SCX_BUG_ON(!skel, "Failed to open and load skel");
+
+	link = bpf_map__attach_struct_ops(skel->maps.enq_last_no_enq_fails_ops);
+	SCX_BUG_ON(link, "Succeeded in attaching struct_ops");
+
+	bpf_link__destroy(link);
+	enq_last_no_enq_fails__destroy(skel);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/scx/enqueue_select_cpu_fails.bpf.c b/tools/testing/selftests/scx/enqueue_select_cpu_fails.bpf.c
new file mode 100644
index 00000000000000..61f04fa4ce2b38
--- /dev/null
+++ b/tools/testing/selftests/scx/enqueue_select_cpu_fails.bpf.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * A scheduler that validates the behavior of direct dispatching with a default
+ * select_cpu implementation.
+ *
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* Manually specify the signature until the kfunc is added to the scx repo. */
+s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+			   bool *found) __ksym;
+
+s32 BPF_STRUCT_OPS(enqueue_select_cpu_fails_select_cpu, struct task_struct *p,
+		   s32 prev_cpu, u64 wake_flags)
+{
+	return prev_cpu;
+}
+
+void BPF_STRUCT_OPS(enqueue_select_cpu_fails_enqueue, struct task_struct *p,
+		    u64 enq_flags)
+{
+	/*
+	 * Need to initialize the variable or the verifier will fail to load.
+	 * Improving these semantics is actively being worked on.
+	 */
+	bool found = false;
+
+	/* Can only call from ops.select_cpu() */
+	scx_bpf_select_cpu_dfl(p, 0, 0, &found);
+
+	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+}
+
+s32 BPF_STRUCT_OPS(enqueue_select_cpu_fails_init)
+{
+	scx_bpf_switch_all();
+
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops enqueue_select_cpu_fails_ops = {
+	.select_cpu		= enqueue_select_cpu_fails_select_cpu,
+	.enqueue		= enqueue_select_cpu_fails_enqueue,
+	.init			= enqueue_select_cpu_fails_init,
+	.name			= "enqueue_select_cpu_fails",
+	.timeout_ms		= 1000U,
+};
diff --git a/tools/testing/selftests/scx/enqueue_select_cpu_fails.c b/tools/testing/selftests/scx/enqueue_select_cpu_fails.c
new file mode 100644
index 00000000000000..f45740370f5086
--- /dev/null
+++ b/tools/testing/selftests/scx/enqueue_select_cpu_fails.c
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <libgen.h>
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include "enqueue_select_cpu_fails.bpf.skel.h"
+#include "scx_test.h"
+
+int main(int argc, char **argv)
+{
+	struct enqueue_select_cpu_fails *skel;
+	struct bpf_link *link;
+
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+	skel = enqueue_select_cpu_fails__open_and_load();
+	SCX_BUG_ON(!skel, "Failed to open and load skel");
+
+	link = bpf_map__attach_struct_ops(skel->maps.enqueue_select_cpu_fails_ops);
+	SCX_BUG_ON(!link, "Failed to attach struct_ops");
+
+	sleep(1);
+
+	bpf_link__destroy(link);
+	enqueue_select_cpu_fails__destroy(skel);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/scx/scx_test.h b/tools/testing/selftests/scx/scx_test.h
new file mode 100644
index 00000000000000..6a61763b19ab55
--- /dev/null
+++ b/tools/testing/selftests/scx/scx_test.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ */
+
+#ifndef __SCX_TEST_H__
+#define __SCX_TEST_H__
+
+#include <scx/common.h>
+
+#define SCX_GT(_x, _y) SCX_BUG_ON((_x) <= (_y), "Expected %s > %s (%lu > %lu)",		\
+				  #_x, #_y, (u64)(_x), (u64)(_y))
+#define SCX_GE(_x, _y) SCX_BUG_ON((_x) < (_y), "Expected %s >= %s (%lu >= %lu)",	\
+				  #_x, #_y, (u64)(_x), (u64)(_y))
+#define SCX_LT(_x, _y) SCX_BUG_ON((_x) >= (_y), "Expected %s < %s (%lu < %lu)",		\
+				  #_x, #_y, (u64)(_x), (u64)(_y))
+#define SCX_LE(_x, _y) SCX_BUG_ON((_x) > (_y), "Expected %s <= %s (%lu <= %lu)",	\
+				  #_x, #_y, (u64)(_x), (u64)(_y))
+#define SCX_EQ(_x, _y) SCX_BUG_ON((_x) != (_y), "Expected %s == %s (%lu == %lu)",	\
+				  #_x, #_y, (u64)(_x), (u64)(_y))
+#define SCX_ASSERT(_x) SCX_BUG_ON(!(_x), "Expected %s to be true (%lu)",		\
+				  #_x, (u64)(_x))
+
+#endif  // # __SCX_TEST_H__
diff --git a/tools/testing/selftests/scx/select_cpu_dfl.bpf.c b/tools/testing/selftests/scx/select_cpu_dfl.bpf.c
new file mode 100644
index 00000000000000..091bf1ed9bec0a
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_dfl.bpf.c
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * A scheduler that validates the behavior of direct dispatching with a default
+ * select_cpu implementation.
+ *
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+bool saw_local = false;
+
+void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p,
+		    u64 enq_flags)
+{
+	const struct cpumask *idle_mask = scx_bpf_get_idle_cpumask();
+
+	if (p->nr_cpus_allowed > 1 &&
+	    bpf_cpumask_test_cpu(scx_bpf_task_cpu(p), idle_mask))
+		saw_local = true;
+	scx_bpf_put_idle_cpumask(idle_mask);
+
+	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+}
+
+s32 BPF_STRUCT_OPS(select_cpu_dfl_init)
+{
+	scx_bpf_switch_all();
+
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops select_cpu_dfl_ops = {
+	.enqueue		= select_cpu_dfl_enqueue,
+	.init			= select_cpu_dfl_init,
+	.name			= "select_cpu_dfl",
+};
diff --git a/tools/testing/selftests/scx/select_cpu_dfl.c b/tools/testing/selftests/scx/select_cpu_dfl.c
new file mode 100644
index 00000000000000..2962be1bec5181
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_dfl.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <libgen.h>
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include "select_cpu_dfl.bpf.skel.h"
+#include "scx_test.h"
+
+#define NUM_CHILDREN 1028
+
+int main(int argc, char **argv)
+{
+	struct select_cpu_dfl *skel;
+	struct bpf_link *link;
+	pid_t pids[NUM_CHILDREN];
+	int i, status;
+
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+	skel = select_cpu_dfl__open_and_load();
+	SCX_BUG_ON(!skel, "Failed to open and load skel");
+
+	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_dfl_ops);
+	SCX_BUG_ON(!link, "Failed to attach struct_ops");
+
+	for (i = 0; i < NUM_CHILDREN; i++) {
+		pids[i] = fork();
+		if (pids[i] == 0) {
+			sleep(1);
+			exit(0);
+		}
+	}
+
+	for (i = 0; i < NUM_CHILDREN; i++) {
+		SCX_EQ(waitpid(pids[i], &status, 0), pids[i]);
+		SCX_EQ(status, 0);
+	}
+
+	SCX_ASSERT(!skel->bss->saw_local);
+	bpf_link__destroy(link);
+	select_cpu_dfl__destroy(skel);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/scx/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/scx/select_cpu_dfl_nodispatch.bpf.c
new file mode 100644
index 00000000000000..9d026e0cbdbb47
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_dfl_nodispatch.bpf.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * A scheduler that validates the behavior of direct dispatching with a default
+ * select_cpu implementation, and with the SCX_OPS_ENQ_DFL_NO_DISPATCH ops flag
+ * specified.
+ *
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+bool saw_local = false;
+
+/* Per-task scheduling context */
+struct task_ctx {
+	bool	force_local;	/* CPU changed by ops.select_cpu() */
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct task_ctx);
+} task_ctx_stor SEC(".maps");
+
+/* Manually specify the signature until the kfunc is added to the scx repo. */
+s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags,
+			   bool *found) __ksym;
+
+s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_select_cpu, struct task_struct *p,
+		   s32 prev_cpu, u64 wake_flags)
+{
+	struct task_ctx *tctx;
+	s32 cpu;
+
+	tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0);
+	if (!tctx) {
+		scx_bpf_error("task_ctx lookup failed");
+		return -ESRCH;
+	}
+
+	cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags,
+				     &tctx->force_local);
+
+	return cpu;
+}
+
+void BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_enqueue, struct task_struct *p,
+		    u64 enq_flags)
+{
+	u64 dsq_id = SCX_DSQ_GLOBAL;
+	struct task_ctx *tctx;
+
+	tctx = bpf_task_storage_get(&task_ctx_stor, p, 0, 0);
+	if (!tctx) {
+		scx_bpf_error("task_ctx lookup failed");
+		return;
+	}
+
+	if (tctx->force_local) {
+		dsq_id = SCX_DSQ_LOCAL;
+		tctx->force_local = false;
+		saw_local = true;
+	}
+
+	scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, enq_flags);
+}
+
+s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_prep_enable,
+		   struct task_struct *p, struct scx_enable_args *args)
+{
+	if (bpf_task_storage_get(&task_ctx_stor, p, 0,
+				 BPF_LOCAL_STORAGE_GET_F_CREATE))
+		return 0;
+	else
+		return -ENOMEM;
+}
+
+s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init)
+{
+	scx_bpf_switch_all();
+
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops select_cpu_dfl_nodispatch_ops = {
+	.select_cpu		= select_cpu_dfl_nodispatch_select_cpu,
+	.enqueue		= select_cpu_dfl_nodispatch_enqueue,
+	.prep_enable		= select_cpu_dfl_nodispatch_prep_enable,
+	.init			= select_cpu_dfl_nodispatch_init,
+	.name			= "select_cpu_dfl_nodispatch",
+};
diff --git a/tools/testing/selftests/scx/select_cpu_dfl_nodispatch.c b/tools/testing/selftests/scx/select_cpu_dfl_nodispatch.c
new file mode 100644
index 00000000000000..3121b28c81ed03
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_dfl_nodispatch.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <libgen.h>
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include "select_cpu_dfl_nodispatch.bpf.skel.h"
+#include "scx_test.h"
+
+#define NUM_CHILDREN 1028
+
+int main(int argc, char **argv)
+{
+	struct select_cpu_dfl_nodispatch *skel;
+	struct bpf_link *link;
+	pid_t pids[NUM_CHILDREN];
+	int i, status;
+
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+	skel = select_cpu_dfl_nodispatch__open_and_load();
+	SCX_BUG_ON(!skel, "Failed to open and load skel");
+
+	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_dfl_nodispatch_ops);
+	SCX_BUG_ON(!link, "Failed to attach struct_ops");
+
+	for (i = 0; i < NUM_CHILDREN; i++) {
+		pids[i] = fork();
+		if (pids[i] == 0) {
+			sleep(1);
+			exit(0);
+		}
+	}
+
+	for (i = 0; i < NUM_CHILDREN; i++) {
+		SCX_EQ(waitpid(pids[i], &status, 0), pids[i]);
+		SCX_EQ(status, 0);
+	}
+
+	SCX_ASSERT(skel->bss->saw_local);
+	bpf_link__destroy(link);
+	select_cpu_dfl_nodispatch__destroy(skel);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/scx/select_cpu_dispatch.bpf.c b/tools/testing/selftests/scx/select_cpu_dispatch.bpf.c
new file mode 100644
index 00000000000000..0fda9776972514
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_dispatch.bpf.c
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * A scheduler that validates the behavior of direct dispatching with a default
+ * select_cpu implementation.
+ *
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+s32 BPF_STRUCT_OPS(select_cpu_dispatch_select_cpu, struct task_struct *p,
+		   s32 prev_cpu, u64 wake_flags)
+{
+	u64 dsq_id = SCX_DSQ_LOCAL;
+	s32 cpu = prev_cpu;
+
+	if (scx_bpf_test_and_clear_cpu_idle(cpu))
+		goto dispatch;
+
+	cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
+	if (cpu >= 0)
+		goto dispatch;
+
+	dsq_id = SCX_DSQ_GLOBAL;
+	cpu = prev_cpu;
+
+dispatch:
+	scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, 0);
+	return cpu;
+}
+
+s32 BPF_STRUCT_OPS(select_cpu_dispatch_init)
+{
+	scx_bpf_switch_all();
+
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops select_cpu_dispatch_ops = {
+	.select_cpu		= select_cpu_dispatch_select_cpu,
+	.init			= select_cpu_dispatch_init,
+	.name			= "select_cpu_dispatch",
+	.timeout_ms		= 1000U,
+};
diff --git a/tools/testing/selftests/scx/select_cpu_dispatch.c b/tools/testing/selftests/scx/select_cpu_dispatch.c
new file mode 100644
index 00000000000000..a3625f75db720d
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_dispatch.c
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <libgen.h>
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include "select_cpu_dispatch.bpf.skel.h"
+#include "scx_test.h"
+
+#define NUM_CHILDREN 1028
+
+int main(int argc, char **argv)
+{
+	struct select_cpu_dispatch *skel;
+	struct bpf_link *link;
+	pid_t pids[NUM_CHILDREN];
+	int i, status;
+
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+	skel = select_cpu_dispatch__open_and_load();
+	SCX_BUG_ON(!skel, "Failed to open and load skel");
+
+	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_dispatch_ops);
+	SCX_BUG_ON(!link, "Failed to attach struct_ops");
+
+	for (i = 0; i < NUM_CHILDREN; i++) {
+		pids[i] = fork();
+		if (pids[i] == 0) {
+			sleep(1);
+			exit(0);
+		}
+	}
+
+	for (i = 0; i < NUM_CHILDREN; i++) {
+		SCX_EQ(waitpid(pids[i], &status, 0), pids[i]);
+		SCX_EQ(status, 0);
+	}
+
+
+	bpf_link__destroy(link);
+	select_cpu_dispatch__destroy(skel);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/scx/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/scx/select_cpu_dispatch_bad_dsq.bpf.c
new file mode 100644
index 00000000000000..c9105add924d5c
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_dispatch_bad_dsq.bpf.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * A scheduler that validates the behavior of direct dispatching with a default
+ * select_cpu implementation.
+ *
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct user_exit_info uei;
+
+s32 BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_select_cpu, struct task_struct *p,
+		   s32 prev_cpu, u64 wake_flags)
+{
+	/* Dispatching to a random DSQ should fail. */
+	scx_bpf_dispatch(p, 0xcafef00d, SCX_SLICE_DFL, 0);
+
+	return prev_cpu;
+}
+
+s32 BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_init)
+{
+	scx_bpf_switch_all();
+
+	return 0;
+}
+
+void BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_exit, struct scx_exit_info *ei)
+{
+	uei_record(&uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops select_cpu_dispatch_bad_dsq_ops = {
+	.select_cpu		= select_cpu_dispatch_bad_dsq_select_cpu,
+	.init			= select_cpu_dispatch_bad_dsq_init,
+	.exit			= select_cpu_dispatch_bad_dsq_exit,
+	.name			= "select_cpu_dispatch_bad_dsq",
+	.timeout_ms		= 1000U,
+};
diff --git a/tools/testing/selftests/scx/select_cpu_dispatch_bad_dsq.c b/tools/testing/selftests/scx/select_cpu_dispatch_bad_dsq.c
new file mode 100644
index 00000000000000..f1094e3645d619
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_dispatch_bad_dsq.c
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <libgen.h>
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include "select_cpu_dispatch_bad_dsq.bpf.skel.h"
+#include "scx_test.h"
+
+#define NUM_CHILDREN 1028
+#define SCX_EXIT_ERROR 1024
+
+int main(int argc, char **argv)
+{
+	struct select_cpu_dispatch_bad_dsq *skel;
+	struct bpf_link *link;
+	pid_t pids[NUM_CHILDREN];
+	int i, status;
+
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+	skel = select_cpu_dispatch_bad_dsq__open_and_load();
+	SCX_BUG_ON(!skel, "Failed to open and load skel");
+
+	/*
+	 * The scheduler is expected to gracefully exit after bad_dsqoneously
+	 * double-dispatching from ops.selec_cpu().
+	 */
+	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_dispatch_bad_dsq_ops);
+	SCX_BUG_ON(!link, "Failed to attach struct_ops");
+
+	for (i = 0; i < NUM_CHILDREN; i++) {
+		pids[i] = fork();
+		if (pids[i] == 0) {
+			sleep(1);
+			exit(0);
+		}
+	}
+
+	for (i = 0; i < NUM_CHILDREN; i++) {
+		SCX_EQ(waitpid(pids[i], &status, 0), pids[i]);
+		SCX_EQ(status, 0);
+	}
+
+	SCX_EQ(skel->bss->uei.kind, SCX_EXIT_ERROR);
+	bpf_link__destroy(link);
+	select_cpu_dispatch_bad_dsq__destroy(skel);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/scx/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/scx/select_cpu_dispatch_dbl_dsp.bpf.c
new file mode 100644
index 00000000000000..82d8148399f28b
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_dispatch_dbl_dsp.bpf.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * A scheduler that validates the behavior of direct dispatching with a default
+ * select_cpu implementation.
+ *
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct user_exit_info uei;
+
+s32 BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_select_cpu, struct task_struct *p,
+		   s32 prev_cpu, u64 wake_flags)
+{
+	/* Dispatching twice in a row is disallowed. */
+	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
+	scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
+
+	return prev_cpu;
+}
+
+s32 BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_init)
+{
+	scx_bpf_switch_all();
+
+	return 0;
+}
+
+void BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_exit, struct scx_exit_info *ei)
+{
+	uei_record(&uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops select_cpu_dispatch_dbl_dsp_ops = {
+	.select_cpu		= select_cpu_dispatch_dbl_dsp_select_cpu,
+	.init			= select_cpu_dispatch_dbl_dsp_init,
+	.exit			= select_cpu_dispatch_dbl_dsp_exit,
+	.name			= "select_cpu_dispatch_dbl_dsp",
+	.timeout_ms		= 1000U,
+};
diff --git a/tools/testing/selftests/scx/select_cpu_dispatch_dbl_dsp.c b/tools/testing/selftests/scx/select_cpu_dispatch_dbl_dsp.c
new file mode 100644
index 00000000000000..9736b65f79bd0c
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_dispatch_dbl_dsp.c
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2023 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <libgen.h>
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include "select_cpu_dispatch_dbl_dsp.bpf.skel.h"
+#include "scx_test.h"
+
+#define NUM_CHILDREN 1028
+#define SCX_EXIT_ERROR 1024
+
+int main(int argc, char **argv)
+{
+	struct select_cpu_dispatch_dbl_dsp *skel;
+	struct bpf_link *link;
+	pid_t pids[NUM_CHILDREN];
+	int i, status;
+
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+	skel = select_cpu_dispatch_dbl_dsp__open_and_load();
+	SCX_BUG_ON(!skel, "Failed to open and load skel");
+
+	/*
+	 * The scheduler is expected to gracefully exit after
+	 * double-dispatching from ops.select_cpu().
+	 */
+	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_dispatch_dbl_dsp_ops);
+	SCX_BUG_ON(!link, "Failed to attach struct_ops");
+
+	for (i = 0; i < NUM_CHILDREN; i++) {
+		pids[i] = fork();
+		if (pids[i] == 0) {
+			sleep(1);
+			exit(0);
+		}
+	}
+
+	for (i = 0; i < NUM_CHILDREN; i++) {
+		SCX_EQ(waitpid(pids[i], &status, 0), pids[i]);
+		SCX_EQ(status, 0);
+	}
+
+	SCX_EQ(skel->bss->uei.kind, SCX_EXIT_ERROR);
+	bpf_link__destroy(link);
+	select_cpu_dispatch_dbl_dsp__destroy(skel);
+
+	return 0;
+}