mirror of
https://github.com/torvalds/linux.git
synced 2025-11-30 23:16:01 +07:00
memcg: convert memcg->socket_pressure to u64
memcg->socket_pressure is initialised with jiffies when the memcg is
created.
Once vmpressure detects that the cgroup is under memory pressure, the
field is updated with jiffies + HZ to signal the fact to the socket layer
and suppress memory allocation for one second.
Otherwise, the field is not updated.
mem_cgroup_under_socket_pressure() uses time_before() to check if jiffies
is less than memcg->socket_pressure, and this has a bug on 32-bit kernel.
if (time_before(jiffies, memcg->socket_pressure))
return true;
As time_before() casts the final result to long, the acceptable delta
between two timestamps is 2 ^ (BITS_PER_LONG - 1).
On 32-bit kernel with CONFIG_HZ=1000, this is about 24 days.
>>> (2 ** 31) / 1000 / 60 / 60 / 24
24.855134814814818
Once 24 days have passed since the last update of socket_pressure,
mem_cgroup_under_socket_pressure() starts to lie until the next 24 days
pass.
We don't need to worry about this on 64-bit machines unless they serve for
300 million years.
>>> (2 ** 63) / 1000 / 60 / 60 / 24 / 365
292471208.6775361
Let's convert memcg->socket_pressure to u64.
Performance teting:
I don't have a real 32-bit machine so this is a result on QEMU, but
with/without the u64 jiffie patch, the time spent in
mem_cgroup_under_socket_pressure() was 1~5us and I didn't see any
measurable delta.
no patch applied:
iperf3 273 [000] 137.296248:
probe:mem_cgroup_under_socket_pressure: (c13660d0)
c13660d1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3 273 [000] 137.296249:
probe:mem_cgroup_under_socket_pressure__return: (c13660d0 <- c1d8fd7f)
iperf3 273 [000] 137.296251:
probe:mem_cgroup_under_socket_pressure: (c13660d0)
c13660d1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3 273 [000] 137.296253:
probe:mem_cgroup_under_socket_pressure__return: (c13660d0 <- c1d8fd7f)
u64 jiffies patch applied:
iperf3 308 [001] 330.669370:
probe:mem_cgroup_under_socket_pressure: (c12ddba0)
c12ddba1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3 308 [001] 330.669371:
probe:mem_cgroup_under_socket_pressure__return: (c12ddba0 <- c1ce98bf)
iperf3 308 [001] 330.669382:
probe:mem_cgroup_under_socket_pressure: (c12ddba0)
c12ddba1 mem_cgroup_under_socket_pressure+0x1
([kernel.kallsyms])
iperf3 308 [001] 330.669384:
probe:mem_cgroup_under_socket_pressure__return: (c12ddba0 <- c1ce98bf)
So the u64 approach is good enough.
Link: https://lkml.kernel.org/r/20250717194645.1096500-1-kuniyu@google.com
Fixes: 8e8ae64524 ("mm: memcontrol: hook up vmpressure to socket pressure")
Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com>
Reported-by: Neal Cardwell <ncardwell@google.com>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <ncardwell@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
committed by
Andrew Morton
parent
a9e056de66
commit
378bdb9740
@@ -251,8 +251,10 @@ struct mem_cgroup {
|
||||
* that this indicator should NOT be used in legacy cgroup mode
|
||||
* where socket memory is accounted/charged separately.
|
||||
*/
|
||||
unsigned long socket_pressure;
|
||||
|
||||
u64 socket_pressure;
|
||||
#if BITS_PER_LONG < 64
|
||||
seqlock_t socket_pressure_seqlock;
|
||||
#endif
|
||||
int kmemcg_id;
|
||||
/*
|
||||
* memcg->objcg is wiped out as a part of the objcg repaprenting
|
||||
@@ -1602,6 +1604,42 @@ extern struct static_key_false memcg_sockets_enabled_key;
|
||||
#define mem_cgroup_sockets_enabled static_branch_unlikely(&memcg_sockets_enabled_key)
|
||||
void mem_cgroup_sk_alloc(struct sock *sk);
|
||||
void mem_cgroup_sk_free(struct sock *sk);
|
||||
|
||||
#if BITS_PER_LONG < 64
|
||||
static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
|
||||
{
|
||||
u64 val = get_jiffies_64() + HZ;
|
||||
unsigned long flags;
|
||||
|
||||
write_seqlock_irqsave(&memcg->socket_pressure_seqlock, flags);
|
||||
memcg->socket_pressure = val;
|
||||
write_sequnlock_irqrestore(&memcg->socket_pressure_seqlock, flags);
|
||||
}
|
||||
|
||||
static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg)
|
||||
{
|
||||
unsigned int seq;
|
||||
u64 val;
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&memcg->socket_pressure_seqlock);
|
||||
val = memcg->socket_pressure;
|
||||
} while (read_seqretry(&memcg->socket_pressure_seqlock, seq));
|
||||
|
||||
return val;
|
||||
}
|
||||
#else
|
||||
static inline void mem_cgroup_set_socket_pressure(struct mem_cgroup *memcg)
|
||||
{
|
||||
WRITE_ONCE(memcg->socket_pressure, jiffies + HZ);
|
||||
}
|
||||
|
||||
static inline u64 mem_cgroup_get_socket_pressure(struct mem_cgroup *memcg)
|
||||
{
|
||||
return READ_ONCE(memcg->socket_pressure);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
|
||||
{
|
||||
#ifdef CONFIG_MEMCG_V1
|
||||
@@ -1609,7 +1647,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
|
||||
return !!memcg->tcpmem_pressure;
|
||||
#endif /* CONFIG_MEMCG_V1 */
|
||||
do {
|
||||
if (time_before(jiffies, READ_ONCE(memcg->socket_pressure)))
|
||||
if (time_before64(get_jiffies_64(), mem_cgroup_get_socket_pressure(memcg)))
|
||||
return true;
|
||||
} while ((memcg = parent_mem_cgroup(memcg)));
|
||||
return false;
|
||||
|
||||
@@ -3754,7 +3754,10 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
|
||||
INIT_LIST_HEAD(&memcg->memory_peaks);
|
||||
INIT_LIST_HEAD(&memcg->swap_peaks);
|
||||
spin_lock_init(&memcg->peaks_lock);
|
||||
memcg->socket_pressure = jiffies;
|
||||
memcg->socket_pressure = get_jiffies_64();
|
||||
#if BITS_PER_LONG < 64
|
||||
seqlock_init(&memcg->socket_pressure_seqlock);
|
||||
#endif
|
||||
memcg1_memcg_init(memcg);
|
||||
memcg->kmemcg_id = -1;
|
||||
INIT_LIST_HEAD(&memcg->objcg_list);
|
||||
|
||||
@@ -316,7 +316,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
|
||||
* asserted for a second in which subsequent
|
||||
* pressure events can occur.
|
||||
*/
|
||||
WRITE_ONCE(memcg->socket_pressure, jiffies + HZ);
|
||||
mem_cgroup_set_socket_pressure(memcg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user