Skip to content

Commit 46fe18b

Browse files
committed
io_uring: move to using create_io_thread()
This allows us to do task creation and setup without needing to use completions to try and synchronize with the starting thread. Get rid of the old io_wq_fork_thread() wrapper, and the 'wq' and 'worker' startup completion events - we can now do setup before the task is running. Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent cc440e8 commit 46fe18b

3 files changed

Lines changed: 54 additions & 109 deletions

File tree

fs/io-wq.c

Lines changed: 35 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ struct io_worker {
5454
spinlock_t lock;
5555

5656
struct completion ref_done;
57-
struct completion started;
5857

5958
struct rcu_head rcu;
6059
};
@@ -116,7 +115,6 @@ struct io_wq {
116115
struct io_wq_hash *hash;
117116

118117
refcount_t refs;
119-
struct completion started;
120118
struct completion exited;
121119

122120
atomic_t worker_refs;
@@ -199,6 +197,7 @@ static void io_worker_exit(struct io_worker *worker)
199197
kfree_rcu(worker, rcu);
200198
if (atomic_dec_and_test(&wqe->wq->worker_refs))
201199
complete(&wqe->wq->worker_done);
200+
do_exit(0);
202201
}
203202

204203
static inline bool io_wqe_run_queue(struct io_wqe *wqe)
@@ -273,14 +272,6 @@ static void io_wqe_dec_running(struct io_worker *worker)
273272
io_wqe_wake_worker(wqe, acct);
274273
}
275274

276-
static void io_worker_start(struct io_worker *worker)
277-
{
278-
current->flags |= PF_NOFREEZE;
279-
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
280-
io_wqe_inc_running(worker);
281-
complete(&worker->started);
282-
}
283-
284275
/*
285276
* Worker will start processing some work. Move it to the busy list, if
286277
* it's currently on the freelist
@@ -489,8 +480,13 @@ static int io_wqe_worker(void *data)
489480
struct io_worker *worker = data;
490481
struct io_wqe *wqe = worker->wqe;
491482
struct io_wq *wq = wqe->wq;
483+
char buf[TASK_COMM_LEN];
492484

493-
io_worker_start(worker);
485+
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
486+
io_wqe_inc_running(worker);
487+
488+
sprintf(buf, "iou-wrk-%d", wq->task_pid);
489+
set_task_comm(current, buf);
494490

495491
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
496492
set_current_state(TASK_INTERRUPTIBLE);
@@ -565,67 +561,11 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
565561
raw_spin_unlock_irq(&worker->wqe->lock);
566562
}
567563

568-
static int task_thread(void *data, int index)
569-
{
570-
struct io_worker *worker = data;
571-
struct io_wqe *wqe = worker->wqe;
572-
struct io_wqe_acct *acct = &wqe->acct[index];
573-
struct io_wq *wq = wqe->wq;
574-
char buf[TASK_COMM_LEN];
575-
576-
sprintf(buf, "iou-wrk-%d", wq->task_pid);
577-
set_task_comm(current, buf);
578-
579-
current->pf_io_worker = worker;
580-
worker->task = current;
581-
582-
set_cpus_allowed_ptr(current, cpumask_of_node(wqe->node));
583-
current->flags |= PF_NO_SETAFFINITY;
584-
585-
raw_spin_lock_irq(&wqe->lock);
586-
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
587-
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
588-
worker->flags |= IO_WORKER_F_FREE;
589-
if (index == IO_WQ_ACCT_BOUND)
590-
worker->flags |= IO_WORKER_F_BOUND;
591-
if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
592-
worker->flags |= IO_WORKER_F_FIXED;
593-
acct->nr_workers++;
594-
raw_spin_unlock_irq(&wqe->lock);
595-
596-
io_wqe_worker(data);
597-
do_exit(0);
598-
}
599-
600-
static int task_thread_bound(void *data)
601-
{
602-
return task_thread(data, IO_WQ_ACCT_BOUND);
603-
}
604-
605-
static int task_thread_unbound(void *data)
606-
{
607-
return task_thread(data, IO_WQ_ACCT_UNBOUND);
608-
}
609-
610-
pid_t io_wq_fork_thread(int (*fn)(void *), void *arg)
611-
{
612-
unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
613-
CLONE_IO|SIGCHLD;
614-
struct kernel_clone_args args = {
615-
.flags = ((lower_32_bits(flags) | CLONE_VM |
616-
CLONE_UNTRACED) & ~CSIGNAL),
617-
.exit_signal = (lower_32_bits(flags) & CSIGNAL),
618-
.stack = (unsigned long)fn,
619-
.stack_size = (unsigned long)arg,
620-
};
621-
622-
return kernel_clone(&args);
623-
}
624-
625564
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
626565
{
566+
struct io_wqe_acct *acct = &wqe->acct[index];
627567
struct io_worker *worker;
628-
pid_t pid;
568+
struct task_struct *tsk;
629569

630570
__set_current_state(TASK_RUNNING);
631571

@@ -638,21 +578,33 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
638578
worker->wqe = wqe;
639579
spin_lock_init(&worker->lock);
640580
init_completion(&worker->ref_done);
641-
init_completion(&worker->started);
642581

643582
atomic_inc(&wq->worker_refs);
644583

645-
if (index == IO_WQ_ACCT_BOUND)
646-
pid = io_wq_fork_thread(task_thread_bound, worker);
647-
else
648-
pid = io_wq_fork_thread(task_thread_unbound, worker);
649-
if (pid < 0) {
584+
tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
585+
if (IS_ERR(tsk)) {
650586
if (atomic_dec_and_test(&wq->worker_refs))
651587
complete(&wq->worker_done);
652588
kfree(worker);
653589
return false;
654590
}
655-
wait_for_completion(&worker->started);
591+
592+
tsk->pf_io_worker = worker;
593+
worker->task = tsk;
594+
set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node));
595+
tsk->flags |= PF_NOFREEZE | PF_NO_SETAFFINITY;
596+
597+
raw_spin_lock_irq(&wqe->lock);
598+
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
599+
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
600+
worker->flags |= IO_WORKER_F_FREE;
601+
if (index == IO_WQ_ACCT_BOUND)
602+
worker->flags |= IO_WORKER_F_BOUND;
603+
if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
604+
worker->flags |= IO_WORKER_F_FIXED;
605+
acct->nr_workers++;
606+
raw_spin_unlock_irq(&wqe->lock);
607+
wake_up_new_task(tsk);
656608
return true;
657609
}
658610

@@ -696,6 +648,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
696648

697649
static bool io_wq_worker_wake(struct io_worker *worker, void *data)
698650
{
651+
set_notify_signal(worker->task);
699652
wake_up_process(worker->task);
700653
return false;
701654
}
@@ -752,10 +705,6 @@ static int io_wq_manager(void *data)
752705

753706
sprintf(buf, "iou-mgr-%d", wq->task_pid);
754707
set_task_comm(current, buf);
755-
current->flags |= PF_IO_WORKER;
756-
wq->manager = get_task_struct(current);
757-
758-
complete(&wq->started);
759708

760709
do {
761710
set_current_state(TASK_INTERRUPTIBLE);
@@ -815,21 +764,20 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
815764

816765
static int io_wq_fork_manager(struct io_wq *wq)
817766
{
818-
int ret;
767+
struct task_struct *tsk;
819768

820769
if (wq->manager)
821770
return 0;
822771

823772
reinit_completion(&wq->worker_done);
824-
current->flags |= PF_IO_WORKER;
825-
ret = io_wq_fork_thread(io_wq_manager, wq);
826-
current->flags &= ~PF_IO_WORKER;
827-
if (ret >= 0) {
828-
wait_for_completion(&wq->started);
773+
tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE);
774+
if (!IS_ERR(tsk)) {
775+
wq->manager = get_task_struct(tsk);
776+
wake_up_new_task(tsk);
829777
return 0;
830778
}
831779

832-
return ret;
780+
return PTR_ERR(tsk);
833781
}
834782

835783
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
@@ -1062,7 +1010,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
10621010
}
10631011

10641012
wq->task_pid = current->pid;
1065-
init_completion(&wq->started);
10661013
init_completion(&wq->exited);
10671014
refcount_set(&wq->refs, 1);
10681015

fs/io-wq.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,6 @@ void io_wq_put_and_exit(struct io_wq *wq);
119119
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
120120
void io_wq_hash_work(struct io_wq_work *work, void *val);
121121

122-
pid_t io_wq_fork_thread(int (*fn)(void *), void *arg);
123-
124122
static inline bool io_wq_is_hashed(struct io_wq_work *work)
125123
{
126124
return work->flags & IO_WQ_WORK_HASHED;

fs/io_uring.c

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6668,7 +6668,6 @@ static int io_sq_thread(void *data)
66686668

66696669
sprintf(buf, "iou-sqp-%d", sqd->task_pid);
66706670
set_task_comm(current, buf);
6671-
sqd->thread = current;
66726671
current->pf_io_worker = NULL;
66736672

66746673
if (sqd->sq_cpu != -1)
@@ -6677,8 +6676,6 @@ static int io_sq_thread(void *data)
66776676
set_cpus_allowed_ptr(current, cpu_online_mask);
66786677
current->flags |= PF_NO_SETAFFINITY;
66796678

6680-
complete(&sqd->completion);
6681-
66826679
wait_for_completion(&sqd->startup);
66836680

66846681
while (!io_sq_thread_should_stop(sqd)) {
@@ -7818,21 +7815,22 @@ void __io_uring_free(struct task_struct *tsk)
78187815

78197816
static int io_sq_thread_fork(struct io_sq_data *sqd, struct io_ring_ctx *ctx)
78207817
{
7818+
struct task_struct *tsk;
78217819
int ret;
78227820

78237821
clear_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
78247822
reinit_completion(&sqd->completion);
78257823
ctx->sqo_exec = 0;
78267824
sqd->task_pid = current->pid;
7827-
current->flags |= PF_IO_WORKER;
7828-
ret = io_wq_fork_thread(io_sq_thread, sqd);
7829-
current->flags &= ~PF_IO_WORKER;
7830-
if (ret < 0) {
7831-
sqd->thread = NULL;
7832-
return ret;
7833-
}
7834-
wait_for_completion(&sqd->completion);
7835-
return io_uring_alloc_task_context(sqd->thread, ctx);
7825+
tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
7826+
if (IS_ERR(tsk))
7827+
return PTR_ERR(tsk);
7828+
ret = io_uring_alloc_task_context(tsk, ctx);
7829+
if (ret)
7830+
set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
7831+
sqd->thread = tsk;
7832+
wake_up_new_task(tsk);
7833+
return ret;
78367834
}
78377835

78387836
static int io_sq_offload_create(struct io_ring_ctx *ctx,
@@ -7855,6 +7853,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
78557853
fdput(f);
78567854
}
78577855
if (ctx->flags & IORING_SETUP_SQPOLL) {
7856+
struct task_struct *tsk;
78587857
struct io_sq_data *sqd;
78597858

78607859
ret = -EPERM;
@@ -7896,15 +7895,16 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
78967895
}
78977896

78987897
sqd->task_pid = current->pid;
7899-
current->flags |= PF_IO_WORKER;
7900-
ret = io_wq_fork_thread(io_sq_thread, sqd);
7901-
current->flags &= ~PF_IO_WORKER;
7902-
if (ret < 0) {
7903-
sqd->thread = NULL;
7898+
tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
7899+
if (IS_ERR(tsk)) {
7900+
ret = PTR_ERR(tsk);
79047901
goto err;
79057902
}
7906-
wait_for_completion(&sqd->completion);
7907-
ret = io_uring_alloc_task_context(sqd->thread, ctx);
7903+
ret = io_uring_alloc_task_context(tsk, ctx);
7904+
if (ret)
7905+
set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
7906+
sqd->thread = tsk;
7907+
wake_up_new_task(tsk);
79087908
if (ret)
79097909
goto err;
79107910
} else if (p->flags & IORING_SETUP_SQ_AFF) {

0 commit comments

Comments
 (0)