mirror of
https://github.com/torvalds/linux.git
synced 2025-11-30 23:16:01 +07:00
sched: Make migrate_{en,dis}able() inline
For now, migrate_enable and migrate_disable are global, which makes them
become hotspots in some case. Take BPF for example, the function calling
to migrate_enable and migrate_disable in BPF trampoline can introduce
significant overhead, and following is the 'perf top' of FENTRY's
benchmark (./tools/testing/selftests/bpf/bench trig-fentry):
54.63% bpf_prog_2dcccf652aac1793_bench_trigger_fentry [k]
bpf_prog_2dcccf652aac1793_bench_trigger_fentry
10.43% [kernel] [k] migrate_enable
10.07% bpf_trampoline_6442517037 [k] bpf_trampoline_6442517037
8.06% [kernel] [k] __bpf_prog_exit_recur
4.11% libc.so.6 [.] syscall
2.15% [kernel] [k] entry_SYSCALL_64
1.48% [kernel] [k] memchr_inv
1.32% [kernel] [k] fput
1.16% [kernel] [k] _copy_to_user
0.73% [kernel] [k] bpf_prog_test_run_raw_tp
So in this commit, we make migrate_enable/migrate_disable inline to obtain
better performance. The struct rq is defined internally in
kernel/sched/sched.h, and the field "nr_pinned" is accessed in
migrate_enable/migrate_disable, which makes it hard to make them inline.
Alexei Starovoitov suggests to generate the offset of "nr_pinned" in [1],
so we can define the migrate_enable/migrate_disable in
include/linux/sched.h and access "this_rq()->nr_pinned" with
"(void *)this_rq() + RQ_nr_pinned".
The offset of "nr_pinned" is generated in include/generated/rq-offsets.h
by kernel/sched/rq-offsets.c.
Generally speaking, we move the definition of migrate_enable and
migrate_disable to include/linux/sched.h from kernel/sched/core.c. The
calling to __set_cpus_allowed_ptr() is leaved in ___migrate_enable().
The "struct rq" is not available in include/linux/sched.h, so we can't
access the "runqueues" with this_cpu_ptr(), as the compilation will fail
in this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr():
typeof((ptr) + 0)
So we introduce the this_rq_raw() and access the runqueues with
arch_raw_cpu_ptr/PERCPU_PTR directly.
The variable "runqueues" is not visible in the kernel modules, and export
it is not a good idea. As Peter Zijlstra advised in [2], we define and
export migrate_enable/migrate_disable in kernel/sched/core.c too, and use
them for the modules.
Before this patch, the performance of BPF FENTRY is:
fentry : 113.030 ± 0.149M/s
fentry : 112.501 ± 0.187M/s
fentry : 112.828 ± 0.267M/s
fentry : 115.287 ± 0.241M/s
After this patch, the performance of BPF FENTRY increases to:
fentry : 143.644 ± 0.670M/s
fentry : 149.764 ± 0.362M/s
fentry : 149.642 ± 0.156M/s
fentry : 145.263 ± 0.221M/s
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/bpf/CAADnVQ+5sEDKHdsJY5ZsfGDO_1SEhhQWHrt2SMBG5SYyQ+jt7w@mail.gmail.com/ [1]
Link: https://lore.kernel.org/all/20250819123214.GH4067720@noisy.programming.kicks-ass.net/ [2]
This commit is contained in:
committed by
Peter Zijlstra
parent
88a90315a9
commit
378b770819
13
Kbuild
13
Kbuild
@@ -34,13 +34,24 @@ arch/$(SRCARCH)/kernel/asm-offsets.s: $(timeconst-file) $(bounds-file)
|
||||
$(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE
|
||||
$(call filechk,offsets,__ASM_OFFSETS_H__)
|
||||
|
||||
# Generate rq-offsets.h
|
||||
|
||||
rq-offsets-file := include/generated/rq-offsets.h
|
||||
|
||||
targets += kernel/sched/rq-offsets.s
|
||||
|
||||
kernel/sched/rq-offsets.s: $(offsets-file)
|
||||
|
||||
$(rq-offsets-file): kernel/sched/rq-offsets.s FORCE
|
||||
$(call filechk,offsets,__RQ_OFFSETS_H__)
|
||||
|
||||
# Check for missing system calls
|
||||
|
||||
quiet_cmd_syscalls = CALL $<
|
||||
cmd_syscalls = $(CONFIG_SHELL) $< $(CC) $(c_flags) $(missing_syscalls_flags)
|
||||
|
||||
PHONY += missing-syscalls
|
||||
missing-syscalls: scripts/checksyscalls.sh $(offsets-file)
|
||||
missing-syscalls: scripts/checksyscalls.sh $(rq-offsets-file)
|
||||
$(call cmd,syscalls)
|
||||
|
||||
# Check the manual modification of atomic headers
|
||||
|
||||
@@ -424,8 +424,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
|
||||
* work-conserving schedulers.
|
||||
*
|
||||
*/
|
||||
extern void migrate_disable(void);
|
||||
extern void migrate_enable(void);
|
||||
|
||||
/**
|
||||
* preempt_disable_nested - Disable preemption inside a normally preempt disabled section
|
||||
@@ -471,7 +469,6 @@ static __always_inline void preempt_enable_nested(void)
|
||||
|
||||
DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
|
||||
DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
|
||||
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
|
||||
|
||||
#ifdef CONFIG_PREEMPT_DYNAMIC
|
||||
|
||||
|
||||
@@ -49,6 +49,9 @@
|
||||
#include <linux/tracepoint-defs.h>
|
||||
#include <linux/unwind_deferred_types.h>
|
||||
#include <asm/kmap_size.h>
|
||||
#ifndef COMPILE_OFFSETS
|
||||
#include <generated/rq-offsets.h>
|
||||
#endif
|
||||
|
||||
/* task_struct member predeclarations (sorted alphabetically): */
|
||||
struct audit_context;
|
||||
@@ -2317,4 +2320,114 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
|
||||
#define alloc_tag_restore(_tag, _old) do {} while (0)
|
||||
#endif
|
||||
|
||||
#ifndef MODULE
|
||||
#ifndef COMPILE_OFFSETS
|
||||
|
||||
extern void ___migrate_enable(void);
|
||||
|
||||
struct rq;
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
|
||||
|
||||
/*
|
||||
* The "struct rq" is not available here, so we can't access the
|
||||
* "runqueues" with this_cpu_ptr(), as the compilation will fail in
|
||||
* this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr():
|
||||
* typeof((ptr) + 0)
|
||||
*
|
||||
* So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here.
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
#define this_rq_raw() arch_raw_cpu_ptr(&runqueues)
|
||||
#else
|
||||
#define this_rq_raw() PERCPU_PTR(&runqueues)
|
||||
#endif
|
||||
#define this_rq_pinned() (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned))
|
||||
|
||||
static inline void __migrate_enable(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
* Check both overflow from migrate_disable() and superfluous
|
||||
* migrate_enable().
|
||||
*/
|
||||
if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
|
||||
return;
|
||||
#endif
|
||||
|
||||
if (p->migration_disabled > 1) {
|
||||
p->migration_disabled--;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure stop_task runs either before or after this, and that
|
||||
* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
|
||||
*/
|
||||
guard(preempt)();
|
||||
if (unlikely(p->cpus_ptr != &p->cpus_mask))
|
||||
___migrate_enable();
|
||||
/*
|
||||
* Mustn't clear migration_disabled() until cpus_ptr points back at the
|
||||
* regular cpus_mask, otherwise things that race (eg.
|
||||
* select_fallback_rq) get confused.
|
||||
*/
|
||||
barrier();
|
||||
p->migration_disabled = 0;
|
||||
this_rq_pinned()--;
|
||||
}
|
||||
|
||||
static inline void __migrate_disable(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
|
||||
if (p->migration_disabled) {
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
*Warn about overflow half-way through the range.
|
||||
*/
|
||||
WARN_ON_ONCE((s16)p->migration_disabled < 0);
|
||||
#endif
|
||||
p->migration_disabled++;
|
||||
return;
|
||||
}
|
||||
|
||||
guard(preempt)();
|
||||
this_rq_pinned()++;
|
||||
p->migration_disabled = 1;
|
||||
}
|
||||
#else /* !COMPILE_OFFSETS */
|
||||
static inline void __migrate_disable(void) { }
|
||||
static inline void __migrate_enable(void) { }
|
||||
#endif /* !COMPILE_OFFSETS */
|
||||
|
||||
/*
|
||||
* So that it is possible to not export the runqueues variable, define and
|
||||
* export migrate_enable/migrate_disable in kernel/sched/core.c too, and use
|
||||
* them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will
|
||||
* be defined in kernel/sched/core.c.
|
||||
*/
|
||||
#ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE
|
||||
static inline void migrate_disable(void)
|
||||
{
|
||||
__migrate_disable();
|
||||
}
|
||||
|
||||
static inline void migrate_enable(void)
|
||||
{
|
||||
__migrate_enable();
|
||||
}
|
||||
#else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
|
||||
extern void migrate_disable(void);
|
||||
extern void migrate_enable(void);
|
||||
#endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */
|
||||
|
||||
#else /* MODULE */
|
||||
extern void migrate_disable(void);
|
||||
extern void migrate_enable(void);
|
||||
#endif /* MODULE */
|
||||
|
||||
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
|
||||
|
||||
#endif
|
||||
|
||||
@@ -23855,6 +23855,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
|
||||
BTF_SET_START(btf_id_deny)
|
||||
BTF_ID_UNUSED
|
||||
#ifdef CONFIG_SMP
|
||||
BTF_ID(func, ___migrate_enable)
|
||||
BTF_ID(func, migrate_disable)
|
||||
BTF_ID(func, migrate_enable)
|
||||
#endif
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
* Copyright (C) 1991-2002 Linus Torvalds
|
||||
* Copyright (C) 1998-2024 Ingo Molnar, Red Hat
|
||||
*/
|
||||
#define INSTANTIATE_EXPORTED_MIGRATE_DISABLE
|
||||
#include <linux/sched.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/hrtimer_api.h>
|
||||
#include <linux/ktime_api.h>
|
||||
@@ -2381,28 +2383,7 @@ static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
|
||||
__do_set_cpus_allowed(p, &ac);
|
||||
}
|
||||
|
||||
void migrate_disable(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
|
||||
if (p->migration_disabled) {
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
*Warn about overflow half-way through the range.
|
||||
*/
|
||||
WARN_ON_ONCE((s16)p->migration_disabled < 0);
|
||||
#endif
|
||||
p->migration_disabled++;
|
||||
return;
|
||||
}
|
||||
|
||||
guard(preempt)();
|
||||
this_rq()->nr_pinned++;
|
||||
p->migration_disabled = 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(migrate_disable);
|
||||
|
||||
void migrate_enable(void)
|
||||
void ___migrate_enable(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
struct affinity_context ac = {
|
||||
@@ -2410,35 +2391,19 @@ void migrate_enable(void)
|
||||
.flags = SCA_MIGRATE_ENABLE,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
* Check both overflow from migrate_disable() and superfluous
|
||||
* migrate_enable().
|
||||
*/
|
||||
if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
|
||||
return;
|
||||
#endif
|
||||
__set_cpus_allowed_ptr(p, &ac);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(___migrate_enable);
|
||||
|
||||
if (p->migration_disabled > 1) {
|
||||
p->migration_disabled--;
|
||||
return;
|
||||
}
|
||||
void migrate_disable(void)
|
||||
{
|
||||
__migrate_disable();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(migrate_disable);
|
||||
|
||||
/*
|
||||
* Ensure stop_task runs either before or after this, and that
|
||||
* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
|
||||
*/
|
||||
guard(preempt)();
|
||||
if (p->cpus_ptr != &p->cpus_mask)
|
||||
__set_cpus_allowed_ptr(p, &ac);
|
||||
/*
|
||||
* Mustn't clear migration_disabled() until cpus_ptr points back at the
|
||||
* regular cpus_mask, otherwise things that race (eg.
|
||||
* select_fallback_rq) get confused.
|
||||
*/
|
||||
barrier();
|
||||
p->migration_disabled = 0;
|
||||
this_rq()->nr_pinned--;
|
||||
void migrate_enable(void)
|
||||
{
|
||||
__migrate_enable();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(migrate_enable);
|
||||
|
||||
|
||||
12
kernel/sched/rq-offsets.c
Normal file
12
kernel/sched/rq-offsets.c
Normal file
@@ -0,0 +1,12 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define COMPILE_OFFSETS
|
||||
#include <linux/kbuild.h>
|
||||
#include <linux/types.h>
|
||||
#include "sched.h"
|
||||
|
||||
int main(void)
|
||||
{
|
||||
DEFINE(RQ_nr_pinned, offsetof(struct rq, nr_pinned));
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user