io_uring/sqpoll: switch away from getrusage() for CPU accounting

getrusage() does a lot more than what the SQPOLL accounting needs, the
latter only cares about (and uses) the stime. Rather than do a full
RUSAGE_SELF summation, just query the used stime instead.

Cc: stable@vger.kernel.org
Fixes: 3fcb9d1720 ("io_uring/sqpoll: statistics of the true utilization of sq threads")
Reviewed-by: Gabriel Krisman Bertazi <krisman@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Jens Axboe
2025-10-21 07:16:08 -06:00
parent 4ec703ec0c
commit 8ac9b0d33e
3 changed files with 23 additions and 18 deletions

View File

@@ -59,7 +59,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
{
struct io_overflow_cqe *ocqe;
struct io_rings *r = ctx->rings;
struct rusage sq_usage;
unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
unsigned int sq_head = READ_ONCE(r->sq.head);
unsigned int sq_tail = READ_ONCE(r->sq.tail);
@@ -152,14 +151,15 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
* thread termination.
*/
if (tsk) {
u64 usec;
get_task_struct(tsk);
rcu_read_unlock();
getrusage(tsk, RUSAGE_SELF, &sq_usage);
usec = io_sq_cpu_usec(tsk);
put_task_struct(tsk);
sq_pid = sq->task_pid;
sq_cpu = sq->sq_cpu;
sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
+ sq_usage.ru_stime.tv_usec);
sq_total_time = usec;
sq_work_time = sq->work_time;
} else {
rcu_read_unlock();

View File

@@ -11,6 +11,7 @@
#include <linux/audit.h>
#include <linux/security.h>
#include <linux/cpuset.h>
#include <linux/sched/cputime.h>
#include <linux/io_uring.h>
#include <uapi/linux/io_uring.h>
@@ -169,6 +170,20 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
return READ_ONCE(sqd->state);
}
u64 io_sq_cpu_usec(struct task_struct *tsk)
{
u64 utime, stime;
task_cputime_adjusted(tsk, &utime, &stime);
do_div(stime, 1000);
return stime;
}
static void io_sq_update_worktime(struct io_sq_data *sqd, u64 usec)
{
sqd->work_time += io_sq_cpu_usec(current) - usec;
}
static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
{
unsigned int to_submit;
@@ -255,26 +270,15 @@ static bool io_sq_tw_pending(struct llist_node *retry_list)
return retry_list || !llist_empty(&tctx->task_list);
}
static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
{
struct rusage end;
getrusage(current, RUSAGE_SELF, &end);
end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
}
static int io_sq_thread(void *data)
{
struct llist_node *retry_list = NULL;
struct io_sq_data *sqd = data;
struct io_ring_ctx *ctx;
struct rusage start;
unsigned long timeout = 0;
char buf[TASK_COMM_LEN] = {};
DEFINE_WAIT(wait);
u64 start;
/* offload context creation failed, just exit */
if (!current->io_uring) {
@@ -317,7 +321,7 @@ static int io_sq_thread(void *data)
}
cap_entries = !list_is_singular(&sqd->ctx_list);
getrusage(current, RUSAGE_SELF, &start);
start = io_sq_cpu_usec(current);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
int ret = __io_sq_thread(ctx, cap_entries);
@@ -333,7 +337,7 @@ static int io_sq_thread(void *data)
if (sqt_spin || !time_after(jiffies, timeout)) {
if (sqt_spin) {
io_sq_update_worktime(sqd, &start);
io_sq_update_worktime(sqd, start);
timeout = jiffies + sqd->sq_thread_idle;
}
if (unlikely(need_resched())) {

View File

@@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd);
void io_put_sq_data(struct io_sq_data *sqd);
void io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask);
u64 io_sq_cpu_usec(struct task_struct *tsk);
static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd)
{