memcg: skip cgroup_file_notify if spinning is not allowed

Generally memcg charging is allowed from all the contexts including NMI
where even spinning on spinlock can cause locking issues.  However one
call chain was missed during the addition of memcg charging from any
context support.  That is try_charge_memcg() -> memcg_memory_event() ->
cgroup_file_notify().

The possible function call tree under cgroup_file_notify() can acquire
many different spin locks in spinning mode.  Some of them are
cgroup_file_kn_lock, kernfs_notify_lock, pool_workqeue's lock.  So, let's
just skip cgroup_file_notify() from memcg charging if the context does not
allow spinning.

Alternative approach was also explored where instead of skipping
cgroup_file_notify(), we defer the memcg event processing to irq_work [1].
However it adds complexity and it was decided to keep things simple until
we need more memcg events with !allow_spinning requirement.

Link: https://lore.kernel.org/all/5qi2llyzf7gklncflo6gxoozljbm4h3tpnuv4u4ej4ztysvi6f@x44v7nz2wdzd/ [1]
Link: https://lkml.kernel.org/r/20250922220203.261714-1-shakeel.butt@linux.dev
Fixes: 3ac4638a73 ("memcg: make memcg_rstat_updated nmi safe")
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Closes: https://lore.kernel.org/all/20250905061919.439648-1-yepeilin@google.com/
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peilin Ye <yepeilin@google.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Shakeel Butt
2025-09-22 15:02:03 -07:00
committed by Andrew Morton
parent 7a405dbb0f
commit fcc0669c5a
2 changed files with 23 additions and 10 deletions

View File

@@ -1001,22 +1001,28 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
count_memcg_events_mm(mm, idx, 1);
}
static inline void memcg_memory_event(struct mem_cgroup *memcg,
enum memcg_memory_event event)
static inline void __memcg_memory_event(struct mem_cgroup *memcg,
enum memcg_memory_event event,
bool allow_spinning)
{
bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX ||
event == MEMCG_SWAP_FAIL;
/* For now only MEMCG_MAX can happen with !allow_spinning context. */
VM_WARN_ON_ONCE(!allow_spinning && event != MEMCG_MAX);
atomic_long_inc(&memcg->memory_events_local[event]);
if (!swap_event)
if (!swap_event && allow_spinning)
cgroup_file_notify(&memcg->events_local_file);
do {
atomic_long_inc(&memcg->memory_events[event]);
if (swap_event)
cgroup_file_notify(&memcg->swap_events_file);
else
cgroup_file_notify(&memcg->events_file);
if (allow_spinning) {
if (swap_event)
cgroup_file_notify(&memcg->swap_events_file);
else
cgroup_file_notify(&memcg->events_file);
}
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
break;
@@ -1026,6 +1032,12 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
!mem_cgroup_is_root(memcg));
}
static inline void memcg_memory_event(struct mem_cgroup *memcg,
enum memcg_memory_event event)
{
__memcg_memory_event(memcg, event, true);
}
static inline void memcg_memory_event_mm(struct mm_struct *mm,
enum memcg_memory_event event)
{

View File

@@ -2307,12 +2307,13 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
bool drained = false;
bool raised_max_event = false;
unsigned long pflags;
bool allow_spinning = gfpflags_allow_spinning(gfp_mask);
retry:
if (consume_stock(memcg, nr_pages))
return 0;
if (!gfpflags_allow_spinning(gfp_mask))
if (!allow_spinning)
/* Avoid the refill and flush of the older stock */
batch = nr_pages;
@@ -2348,7 +2349,7 @@ retry:
if (!gfpflags_allow_blocking(gfp_mask))
goto nomem;
memcg_memory_event(mem_over_limit, MEMCG_MAX);
__memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning);
raised_max_event = true;
psi_memstall_enter(&pflags);
@@ -2415,7 +2416,7 @@ force:
* a MEMCG_MAX event.
*/
if (!raised_max_event)
memcg_memory_event(mem_over_limit, MEMCG_MAX);
__memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning);
/*
* The allocation either can't fail or will lead to more memory