Merge tag 'io_uring-6.17-20250919' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

 - Fix for a regression introduced in the io-wq worker creation logic.

 - Remove the allocation cache for the msg_ring io_kiocb allocations. I
   have a suspicion that there's a bug there, and since we just fixed
   one in that area, let's just yank the use of that cache entirely.
   It's not that important, and it kills some code.

 - Treat a closed ring like task exiting in that any requests that
   trigger post that condition should just get canceled. Doesn't fix any
   real issues, outside of having tasks being able to rely on that
   guarantee.

 - Fix for a bug in the network zero-copy notification mechanism, where
   a comparison for matching tctx/ctx for notifications was buggy in
   that it didn't correctly compare with the previous notification.

* tag 'io_uring-6.17-20250919' of git://git.kernel.dk/linux:
  io_uring: fix incorrect io_kiocb reference in io_link_skb
  io_uring/msg_ring: kill alloc_cache for io_kiocb allocations
  io_uring: include dying ring in task_work "should cancel" state
  io_uring/io-wq: fix `max_workers` breakage and `nr_workers` underflow
This commit is contained in:
Linus Torvalds
2025-09-19 12:10:49 -07:00
9 changed files with 15 additions and 40 deletions

View File

@@ -352,16 +352,16 @@ static void create_worker_cb(struct callback_head *cb)
struct io_wq *wq;
struct io_wq_acct *acct;
bool do_create = false;
bool activated_free_worker, do_create = false;
worker = container_of(cb, struct io_worker, create_work);
wq = worker->wq;
acct = worker->acct;
rcu_read_lock();
do_create = !io_acct_activate_free_worker(acct);
activated_free_worker = io_acct_activate_free_worker(acct);
rcu_read_unlock();
if (!do_create)
if (activated_free_worker)
goto no_need_create;
raw_spin_lock(&acct->workers_lock);

View File

@@ -290,7 +290,6 @@ static void io_free_alloc_caches(struct io_ring_ctx *ctx)
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
io_alloc_cache_free(&ctx->cmd_cache, io_cmd_cache_free);
io_alloc_cache_free(&ctx->msg_cache, kfree);
io_futex_cache_free(ctx);
io_rsrc_cache_free(ctx);
}
@@ -337,9 +336,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
ret |= io_alloc_cache_init(&ctx->cmd_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_async_cmd),
sizeof(struct io_async_cmd));
spin_lock_init(&ctx->msg_lock);
ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_kiocb), 0);
ret |= io_futex_cache_init(ctx);
ret |= io_rsrc_cache_init(ctx);
if (ret)
@@ -1406,8 +1402,10 @@ static void io_req_task_cancel(struct io_kiocb *req, io_tw_token_t tw)
void io_req_task_submit(struct io_kiocb *req, io_tw_token_t tw)
{
io_tw_lock(req->ctx, tw);
if (unlikely(io_should_terminate_tw()))
struct io_ring_ctx *ctx = req->ctx;
io_tw_lock(ctx, tw);
if (unlikely(io_should_terminate_tw(ctx)))
io_req_defer_failed(req, -EFAULT);
else if (req->flags & REQ_F_FORCE_ASYNC)
io_queue_iowq(req);

View File

@@ -476,9 +476,9 @@ static inline bool io_allowed_run_tw(struct io_ring_ctx *ctx)
* 2) PF_KTHREAD is set, in which case the invoker of the task_work is
* our fallback task_work.
*/
static inline bool io_should_terminate_tw(void)
static inline bool io_should_terminate_tw(struct io_ring_ctx *ctx)
{
return current->flags & (PF_KTHREAD | PF_EXITING);
return (current->flags & (PF_KTHREAD | PF_EXITING)) || percpu_ref_is_dying(&ctx->refs);
}
static inline void io_req_queue_tw_complete(struct io_kiocb *req, s32 res)

View File

@@ -11,7 +11,6 @@
#include "io_uring.h"
#include "rsrc.h"
#include "filetable.h"
#include "alloc_cache.h"
#include "msg_ring.h"
/* All valid masks for MSG_RING */
@@ -76,13 +75,7 @@ static void io_msg_tw_complete(struct io_kiocb *req, io_tw_token_t tw)
struct io_ring_ctx *ctx = req->ctx;
io_add_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, req->cqe.flags);
if (spin_trylock(&ctx->msg_lock)) {
if (io_alloc_cache_put(&ctx->msg_cache, req))
req = NULL;
spin_unlock(&ctx->msg_lock);
}
if (req)
kfree_rcu(req, rcu_head);
kfree_rcu(req, rcu_head);
percpu_ref_put(&ctx->refs);
}
@@ -104,26 +97,13 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
return 0;
}
static struct io_kiocb *io_msg_get_kiocb(struct io_ring_ctx *ctx)
{
struct io_kiocb *req = NULL;
if (spin_trylock(&ctx->msg_lock)) {
req = io_alloc_cache_get(&ctx->msg_cache);
spin_unlock(&ctx->msg_lock);
if (req)
return req;
}
return kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
}
static int io_msg_data_remote(struct io_ring_ctx *target_ctx,
struct io_msg *msg)
{
struct io_kiocb *target;
u32 flags = 0;
target = io_msg_get_kiocb(target_ctx);
target = kmem_cache_alloc(req_cachep, GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO) ;
if (unlikely(!target))
return -ENOMEM;

View File

@@ -85,7 +85,7 @@ static int io_link_skb(struct sk_buff *skb, struct ubuf_info *uarg)
return -EEXIST;
prev_nd = container_of(prev_uarg, struct io_notif_data, uarg);
prev_notif = cmd_to_io_kiocb(nd);
prev_notif = cmd_to_io_kiocb(prev_nd);
/* make sure all noifications can be finished in the same task_work */
if (unlikely(notif->ctx != prev_notif->ctx ||

View File

@@ -224,7 +224,7 @@ static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw)
{
int v;
if (unlikely(io_should_terminate_tw()))
if (unlikely(io_should_terminate_tw(req->ctx)))
return -ECANCELED;
do {

View File

@@ -324,7 +324,7 @@ static void io_req_task_link_timeout(struct io_kiocb *req, io_tw_token_t tw)
int ret;
if (prev) {
if (!io_should_terminate_tw()) {
if (!io_should_terminate_tw(req->ctx)) {
struct io_cancel_data cd = {
.ctx = req->ctx,
.data = prev->cqe.user_data,

View File

@@ -118,7 +118,7 @@ static void io_uring_cmd_work(struct io_kiocb *req, io_tw_token_t tw)
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
unsigned int flags = IO_URING_F_COMPLETE_DEFER;
if (io_should_terminate_tw())
if (io_should_terminate_tw(req->ctx))
flags |= IO_URING_F_TASK_DEAD;
/* task_work executor checks the deffered list completion */