softirq: Allow to drop the softirq-BKL lock on PREEMPT_RT

softirqs are preemptible on PREEMPT_RT. There is synchronisation between
individual sections which disable bottom halves. This in turn means that
a forced threaded interrupt cannot preempt another forced threaded
interrupt. Instead it will PI-boost the other handler and wait for its
completion.

This is required because code within a softirq section is assumed to be
non-preemptible and may expect exclusive access to per-CPU resources
such as variables or pinned timers.

Code with such expectation has been identified and updated to use
local_lock_nested_bh() for locking of the per-CPU resource. This means the
softirq lock can be removed.

Disable the softirq synchronization, but add a new config switch
CONFIG_PREEMPT_RT_NEEDS_BH_LOCK which allows to re-enable the synchronized
behavior in case that there are issues, which haven't been detected yet.

The softirq_ctrl.cnt accounting remains to let the NOHZ code know if
softirqs are currently handled.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
Sebastian Andrzej Siewior
2025-09-04 16:25:25 +02:00
committed by Thomas Gleixner
parent fd4e876f59
commit 3253cb49cb
2 changed files with 76 additions and 20 deletions

View File

@@ -103,6 +103,19 @@ config PREEMPT_RT
Select this if you are building a kernel for systems which
require real-time guarantees.
config PREEMPT_RT_NEEDS_BH_LOCK
bool "Enforce softirq synchronisation on PREEMPT_RT"
depends on PREEMPT_RT
help
Enforce synchronisation across the softirqs context. On PREEMPT_RT
the softirq is preemptible. This enforces the same per-CPU BLK
semantic non-PREEMPT_RT builds have. This should not be needed
because per-CPU locks were added to avoid the per-CPU BKL.
This switch provides the old behaviour for testing reasons. Select
this if you suspect an error with preemptible softirq and want test
the old synchronized behaviour.
config PREEMPT_COUNT
bool

View File

@@ -165,7 +165,11 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
/* First entry of a task into a BH disabled section? */
if (!current->softirq_disable_cnt) {
if (preemptible()) {
local_lock(&softirq_ctrl.lock);
if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK))
local_lock(&softirq_ctrl.lock);
else
migrate_disable();
/* Required to meet the RCU bottomhalf requirements. */
rcu_read_lock();
} else {
@@ -177,17 +181,34 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
* Track the per CPU softirq disabled state. On RT this is per CPU
* state to allow preemption of bottom half disabled sections.
*/
newcnt = __this_cpu_add_return(softirq_ctrl.cnt, cnt);
/*
* Reflect the result in the task state to prevent recursion on the
* local lock and to make softirq_count() & al work.
*/
current->softirq_disable_cnt = newcnt;
if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
newcnt = this_cpu_add_return(softirq_ctrl.cnt, cnt);
/*
* Reflect the result in the task state to prevent recursion on the
* local lock and to make softirq_count() & al work.
*/
current->softirq_disable_cnt = newcnt;
if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
raw_local_irq_save(flags);
lockdep_softirqs_off(ip);
raw_local_irq_restore(flags);
if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && newcnt == cnt) {
raw_local_irq_save(flags);
lockdep_softirqs_off(ip);
raw_local_irq_restore(flags);
}
} else {
bool sirq_dis = false;
if (!current->softirq_disable_cnt)
sirq_dis = true;
this_cpu_add(softirq_ctrl.cnt, cnt);
current->softirq_disable_cnt += cnt;
WARN_ON_ONCE(current->softirq_disable_cnt < 0);
if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_dis) {
raw_local_irq_save(flags);
lockdep_softirqs_off(ip);
raw_local_irq_restore(flags);
}
}
}
EXPORT_SYMBOL(__local_bh_disable_ip);
@@ -195,23 +216,42 @@ EXPORT_SYMBOL(__local_bh_disable_ip);
static void __local_bh_enable(unsigned int cnt, bool unlock)
{
unsigned long flags;
bool sirq_en = false;
int newcnt;
DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
this_cpu_read(softirq_ctrl.cnt));
if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
DEBUG_LOCKS_WARN_ON(current->softirq_disable_cnt !=
this_cpu_read(softirq_ctrl.cnt));
if (softirq_count() == cnt)
sirq_en = true;
} else {
if (current->softirq_disable_cnt == cnt)
sirq_en = true;
}
if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && softirq_count() == cnt) {
if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS) && sirq_en) {
raw_local_irq_save(flags);
lockdep_softirqs_on(_RET_IP_);
raw_local_irq_restore(flags);
}
newcnt = __this_cpu_sub_return(softirq_ctrl.cnt, cnt);
current->softirq_disable_cnt = newcnt;
if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK)) {
newcnt = this_cpu_sub_return(softirq_ctrl.cnt, cnt);
current->softirq_disable_cnt = newcnt;
if (!newcnt && unlock) {
rcu_read_unlock();
local_unlock(&softirq_ctrl.lock);
if (!newcnt && unlock) {
rcu_read_unlock();
local_unlock(&softirq_ctrl.lock);
}
} else {
current->softirq_disable_cnt -= cnt;
this_cpu_sub(softirq_ctrl.cnt, cnt);
if (unlock && !current->softirq_disable_cnt) {
migrate_enable();
rcu_read_unlock();
} else {
WARN_ON_ONCE(current->softirq_disable_cnt < 0);
}
}
}
@@ -228,7 +268,10 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
lock_map_release(&bh_lock_map);
local_irq_save(flags);
curcnt = __this_cpu_read(softirq_ctrl.cnt);
if (IS_ENABLED(CONFIG_PREEMPT_RT_NEEDS_BH_LOCK))
curcnt = this_cpu_read(softirq_ctrl.cnt);
else
curcnt = current->softirq_disable_cnt;
/*
* If this is not reenabling soft interrupts, no point in trying to