mirror of
https://github.com/torvalds/linux.git
synced 2025-12-01 07:26:02 +07:00
Merge tag 'io_uring-6.18-20251023' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring fixes from Jens Axboe: - Add MAINTAINERS entry for zcrx, mostly so that netdev gets automatically CC'ed by default on any changes there too. - Fix for the SQPOLL busy vs work time accounting. It was using getrusage(), which was both broken from a thread point of view (we only care about the SQPOLL thread itself), and vastly overkill as only the systime was used. On top of that, also be a bit smarter in when it's queried. It used excessive CPU before this change. Marked for stable as well. - Fix provided ring buffer auto commit for uring_cmd. - Fix a few style issues and sparse annotation for a lock. * tag 'io_uring-6.18-20251023' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: io_uring: fix buffer auto-commit for multishot uring_cmd io_uring: correct __must_hold annotation in io_install_fixed_file io_uring zcrx: add MAINTAINERS entry io_uring: Fix code indentation error io_uring/sqpoll: be smarter on when to update the stime usage io_uring/sqpoll: switch away from getrusage() for CPU accounting io_uring: fix incorrect unlikely() usage in io_waitid_prep()
This commit is contained in:
@@ -13116,6 +13116,15 @@ F: include/uapi/linux/io_uring.h
|
||||
F: include/uapi/linux/io_uring/
|
||||
F: io_uring/
|
||||
|
||||
IO_URING ZCRX
|
||||
M: Pavel Begunkov <asml.silence@gmail.com>
|
||||
L: io-uring@vger.kernel.org
|
||||
L: netdev@vger.kernel.org
|
||||
T: git https://github.com/isilence/linux.git zcrx/for-next
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git
|
||||
S: Maintained
|
||||
F: io_uring/zcrx.*
|
||||
|
||||
IPMI SUBSYSTEM
|
||||
M: Corey Minyard <corey@minyard.net>
|
||||
L: openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
|
||||
|
||||
@@ -59,7 +59,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
|
||||
{
|
||||
struct io_overflow_cqe *ocqe;
|
||||
struct io_rings *r = ctx->rings;
|
||||
struct rusage sq_usage;
|
||||
unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
|
||||
unsigned int sq_head = READ_ONCE(r->sq.head);
|
||||
unsigned int sq_tail = READ_ONCE(r->sq.tail);
|
||||
@@ -152,14 +151,15 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
|
||||
* thread termination.
|
||||
*/
|
||||
if (tsk) {
|
||||
u64 usec;
|
||||
|
||||
get_task_struct(tsk);
|
||||
rcu_read_unlock();
|
||||
getrusage(tsk, RUSAGE_SELF, &sq_usage);
|
||||
usec = io_sq_cpu_usec(tsk);
|
||||
put_task_struct(tsk);
|
||||
sq_pid = sq->task_pid;
|
||||
sq_cpu = sq->sq_cpu;
|
||||
sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
|
||||
+ sq_usage.ru_stime.tv_usec);
|
||||
sq_total_time = usec;
|
||||
sq_work_time = sq->work_time;
|
||||
} else {
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -57,7 +57,7 @@ void io_free_file_tables(struct io_ring_ctx *ctx, struct io_file_table *table)
|
||||
|
||||
static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
|
||||
u32 slot_index)
|
||||
__must_hold(&req->ctx->uring_lock)
|
||||
__must_hold(&ctx->uring_lock)
|
||||
{
|
||||
struct io_rsrc_node *node;
|
||||
|
||||
|
||||
@@ -879,7 +879,7 @@ static inline struct io_cqe io_init_cqe(u64 user_data, s32 res, u32 cflags)
|
||||
}
|
||||
|
||||
static __cold void io_cqe_overflow(struct io_ring_ctx *ctx, struct io_cqe *cqe,
|
||||
struct io_big_cqe *big_cqe)
|
||||
struct io_big_cqe *big_cqe)
|
||||
{
|
||||
struct io_overflow_cqe *ocqe;
|
||||
|
||||
|
||||
@@ -155,6 +155,27 @@ static int io_provided_buffers_select(struct io_kiocb *req, size_t *len,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
/*
|
||||
* If we came in unlocked, we have no choice but to consume the
|
||||
* buffer here, otherwise nothing ensures that the buffer won't
|
||||
* get used by others. This does mean it'll be pinned until the
|
||||
* IO completes, coming in unlocked means we're being called from
|
||||
* io-wq context and there may be further retries in async hybrid
|
||||
* mode. For the locked case, the caller must call commit when
|
||||
* the transfer completes (or if we get -EAGAIN and must poll of
|
||||
* retry).
|
||||
*/
|
||||
if (issue_flags & IO_URING_F_UNLOCKED)
|
||||
return true;
|
||||
|
||||
/* uring_cmd commits kbuf upfront, no need to auto-commit */
|
||||
if (!io_file_can_poll(req) && req->opcode != IORING_OP_URING_CMD)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
|
||||
struct io_buffer_list *bl,
|
||||
unsigned int issue_flags)
|
||||
@@ -181,17 +202,7 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
|
||||
sel.buf_list = bl;
|
||||
sel.addr = u64_to_user_ptr(buf->addr);
|
||||
|
||||
if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
|
||||
/*
|
||||
* If we came in unlocked, we have no choice but to consume the
|
||||
* buffer here, otherwise nothing ensures that the buffer won't
|
||||
* get used by others. This does mean it'll be pinned until the
|
||||
* IO completes, coming in unlocked means we're being called from
|
||||
* io-wq context and there may be further retries in async hybrid
|
||||
* mode. For the locked case, the caller must call commit when
|
||||
* the transfer completes (or if we get -EAGAIN and must poll of
|
||||
* retry).
|
||||
*/
|
||||
if (io_should_commit(req, issue_flags)) {
|
||||
io_kbuf_commit(req, sel.buf_list, *len, 1);
|
||||
sel.buf_list = NULL;
|
||||
}
|
||||
|
||||
@@ -383,7 +383,7 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
return 0;
|
||||
|
||||
if (sr->flags & IORING_SEND_VECTORIZED)
|
||||
return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);
|
||||
return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);
|
||||
|
||||
return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
|
||||
}
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <linux/audit.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <linux/io_uring.h>
|
||||
|
||||
#include <uapi/linux/io_uring.h>
|
||||
@@ -169,7 +170,38 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
|
||||
return READ_ONCE(sqd->state);
|
||||
}
|
||||
|
||||
static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
|
||||
struct io_sq_time {
|
||||
bool started;
|
||||
u64 usec;
|
||||
};
|
||||
|
||||
u64 io_sq_cpu_usec(struct task_struct *tsk)
|
||||
{
|
||||
u64 utime, stime;
|
||||
|
||||
task_cputime_adjusted(tsk, &utime, &stime);
|
||||
do_div(stime, 1000);
|
||||
return stime;
|
||||
}
|
||||
|
||||
static void io_sq_update_worktime(struct io_sq_data *sqd, struct io_sq_time *ist)
|
||||
{
|
||||
if (!ist->started)
|
||||
return;
|
||||
ist->started = false;
|
||||
sqd->work_time += io_sq_cpu_usec(current) - ist->usec;
|
||||
}
|
||||
|
||||
static void io_sq_start_worktime(struct io_sq_time *ist)
|
||||
{
|
||||
if (ist->started)
|
||||
return;
|
||||
ist->started = true;
|
||||
ist->usec = io_sq_cpu_usec(current);
|
||||
}
|
||||
|
||||
static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd,
|
||||
bool cap_entries, struct io_sq_time *ist)
|
||||
{
|
||||
unsigned int to_submit;
|
||||
int ret = 0;
|
||||
@@ -182,6 +214,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
|
||||
if (to_submit || !wq_list_empty(&ctx->iopoll_list)) {
|
||||
const struct cred *creds = NULL;
|
||||
|
||||
io_sq_start_worktime(ist);
|
||||
|
||||
if (ctx->sq_creds != current_cred())
|
||||
creds = override_creds(ctx->sq_creds);
|
||||
|
||||
@@ -255,23 +289,11 @@ static bool io_sq_tw_pending(struct llist_node *retry_list)
|
||||
return retry_list || !llist_empty(&tctx->task_list);
|
||||
}
|
||||
|
||||
static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
|
||||
{
|
||||
struct rusage end;
|
||||
|
||||
getrusage(current, RUSAGE_SELF, &end);
|
||||
end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
|
||||
end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
|
||||
|
||||
sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
|
||||
}
|
||||
|
||||
static int io_sq_thread(void *data)
|
||||
{
|
||||
struct llist_node *retry_list = NULL;
|
||||
struct io_sq_data *sqd = data;
|
||||
struct io_ring_ctx *ctx;
|
||||
struct rusage start;
|
||||
unsigned long timeout = 0;
|
||||
char buf[TASK_COMM_LEN] = {};
|
||||
DEFINE_WAIT(wait);
|
||||
@@ -309,6 +331,7 @@ static int io_sq_thread(void *data)
|
||||
mutex_lock(&sqd->lock);
|
||||
while (1) {
|
||||
bool cap_entries, sqt_spin = false;
|
||||
struct io_sq_time ist = { };
|
||||
|
||||
if (io_sqd_events_pending(sqd) || signal_pending(current)) {
|
||||
if (io_sqd_handle_event(sqd))
|
||||
@@ -317,9 +340,8 @@ static int io_sq_thread(void *data)
|
||||
}
|
||||
|
||||
cap_entries = !list_is_singular(&sqd->ctx_list);
|
||||
getrusage(current, RUSAGE_SELF, &start);
|
||||
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
|
||||
int ret = __io_sq_thread(ctx, cap_entries);
|
||||
int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist);
|
||||
|
||||
if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list)))
|
||||
sqt_spin = true;
|
||||
@@ -327,15 +349,18 @@ static int io_sq_thread(void *data)
|
||||
if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE))
|
||||
sqt_spin = true;
|
||||
|
||||
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
|
||||
if (io_napi(ctx))
|
||||
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
|
||||
if (io_napi(ctx)) {
|
||||
io_sq_start_worktime(&ist);
|
||||
io_napi_sqpoll_busy_poll(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
io_sq_update_worktime(sqd, &ist);
|
||||
|
||||
if (sqt_spin || !time_after(jiffies, timeout)) {
|
||||
if (sqt_spin) {
|
||||
io_sq_update_worktime(sqd, &start);
|
||||
if (sqt_spin)
|
||||
timeout = jiffies + sqd->sq_thread_idle;
|
||||
}
|
||||
if (unlikely(need_resched())) {
|
||||
mutex_unlock(&sqd->lock);
|
||||
cond_resched();
|
||||
|
||||
@@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd);
|
||||
void io_put_sq_data(struct io_sq_data *sqd);
|
||||
void io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
|
||||
int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask);
|
||||
u64 io_sq_cpu_usec(struct task_struct *tsk);
|
||||
|
||||
static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd)
|
||||
{
|
||||
|
||||
@@ -250,7 +250,7 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
return -EINVAL;
|
||||
|
||||
iwa = io_uring_alloc_async_data(NULL, req);
|
||||
if (!unlikely(iwa))
|
||||
if (unlikely(!iwa))
|
||||
return -ENOMEM;
|
||||
iwa->req = req;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user