mm/hwpoison: decouple hwpoison_filter from mm/memory-failure.c

mm/memory-failure.c defines and uses hwpoison_filter_* parameters but the
values of those parameters can only be modified via mm/hwpoison-inject.c
from userspace.  They have a potentially different life time.  Decouple
those parameters from mm/memory-failure.c to fix this broken layering.

Link: https://lkml.kernel.org/r/20250904062258.3336092-1-linmiaohe@huawei.com
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Miaohe Lin
2025-09-04 14:22:58 +08:00
committed by Andrew Morton
parent a488ba3124
commit 5ce1dbfdd8
5 changed files with 113 additions and 96 deletions

View File

@@ -256,6 +256,7 @@ u64 stable_page_flags(const struct page *page)
return u;
}
EXPORT_SYMBOL_GPL(stable_page_flags);
/* /proc/kpageflags - an array exposing page flags
*

View File

@@ -7,8 +7,96 @@
#include <linux/swap.h>
#include <linux/pagemap.h>
#include <linux/hugetlb.h>
#include <linux/page-flags.h>
#include <linux/memcontrol.h>
#include "internal.h"
static u32 hwpoison_filter_enable;
static u32 hwpoison_filter_dev_major = ~0U;
static u32 hwpoison_filter_dev_minor = ~0U;
static u64 hwpoison_filter_flags_mask;
static u64 hwpoison_filter_flags_value;
static int hwpoison_filter_dev(struct page *p)
{
struct folio *folio = page_folio(p);
struct address_space *mapping;
dev_t dev;
if (hwpoison_filter_dev_major == ~0U &&
hwpoison_filter_dev_minor == ~0U)
return 0;
mapping = folio_mapping(folio);
if (mapping == NULL || mapping->host == NULL)
return -EINVAL;
dev = mapping->host->i_sb->s_dev;
if (hwpoison_filter_dev_major != ~0U &&
hwpoison_filter_dev_major != MAJOR(dev))
return -EINVAL;
if (hwpoison_filter_dev_minor != ~0U &&
hwpoison_filter_dev_minor != MINOR(dev))
return -EINVAL;
return 0;
}
static int hwpoison_filter_flags(struct page *p)
{
if (!hwpoison_filter_flags_mask)
return 0;
if ((stable_page_flags(p) & hwpoison_filter_flags_mask) ==
hwpoison_filter_flags_value)
return 0;
else
return -EINVAL;
}
/*
* This allows stress tests to limit test scope to a collection of tasks
* by putting them under some memcg. This prevents killing unrelated/important
* processes such as /sbin/init. Note that the target task may share clean
* pages with init (eg. libc text), which is harmless. If the target task
* share _dirty_ pages with another task B, the test scheme must make sure B
* is also included in the memcg. At last, due to race conditions this filter
* can only guarantee that the page either belongs to the memcg tasks, or is
* a freed page.
*/
#ifdef CONFIG_MEMCG
static u64 hwpoison_filter_memcg;
static int hwpoison_filter_task(struct page *p)
{
if (!hwpoison_filter_memcg)
return 0;
if (page_cgroup_ino(p) != hwpoison_filter_memcg)
return -EINVAL;
return 0;
}
#else
static int hwpoison_filter_task(struct page *p) { return 0; }
#endif
static int hwpoison_filter(struct page *p)
{
if (!hwpoison_filter_enable)
return 0;
if (hwpoison_filter_dev(p))
return -EINVAL;
if (hwpoison_filter_flags(p))
return -EINVAL;
if (hwpoison_filter_task(p))
return -EINVAL;
return 0;
}
static struct dentry *hwpoison_dir;
static int hwpoison_inject(void *data, u64 val)
@@ -67,6 +155,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(unpoison_fops, NULL, hwpoison_unpoison, "%lli\n");
static void __exit pfn_inject_exit(void)
{
hwpoison_filter_enable = 0;
hwpoison_filter_unregister();
debugfs_remove_recursive(hwpoison_dir);
}
@@ -105,6 +194,8 @@ static int __init pfn_inject_init(void)
&hwpoison_filter_memcg);
#endif
hwpoison_filter_register(hwpoison_filter);
return 0;
}

View File

@@ -1228,14 +1228,10 @@ static inline bool node_reclaim_enabled(void)
#ifdef CONFIG_MEMORY_FAILURE
int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill);
void shake_folio(struct folio *folio);
extern int hwpoison_filter(struct page *p);
typedef int hwpoison_filter_func_t(struct page *p);
void hwpoison_filter_register(hwpoison_filter_func_t *filter);
void hwpoison_filter_unregister(void);
extern u32 hwpoison_filter_dev_major;
extern u32 hwpoison_filter_dev_minor;
extern u64 hwpoison_filter_flags_mask;
extern u64 hwpoison_filter_flags_value;
extern u64 hwpoison_filter_memcg;
extern u32 hwpoison_filter_enable;
#define MAGIC_HWPOISON 0x48575053U /* HWPS */
void SetPageHWPoisonTakenOff(struct page *page);
void ClearPageHWPoisonTakenOff(struct page *page);

View File

@@ -287,6 +287,7 @@ ino_t page_cgroup_ino(struct page *page)
rcu_read_unlock();
return ino;
}
EXPORT_SYMBOL_GPL(page_cgroup_ino);
/* Subset of node_stat_item for memcg stats */
static const unsigned int memcg_node_stat_items[] = {

View File

@@ -212,106 +212,34 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo
return true;
}
#if IS_ENABLED(CONFIG_HWPOISON_INJECT)
static hwpoison_filter_func_t __rcu *hwpoison_filter_func __read_mostly;
u32 hwpoison_filter_enable = 0;
u32 hwpoison_filter_dev_major = ~0U;
u32 hwpoison_filter_dev_minor = ~0U;
u64 hwpoison_filter_flags_mask;
u64 hwpoison_filter_flags_value;
EXPORT_SYMBOL_GPL(hwpoison_filter_enable);
EXPORT_SYMBOL_GPL(hwpoison_filter_dev_major);
EXPORT_SYMBOL_GPL(hwpoison_filter_dev_minor);
EXPORT_SYMBOL_GPL(hwpoison_filter_flags_mask);
EXPORT_SYMBOL_GPL(hwpoison_filter_flags_value);
static int hwpoison_filter_dev(struct page *p)
void hwpoison_filter_register(hwpoison_filter_func_t *filter)
{
struct folio *folio = page_folio(p);
struct address_space *mapping;
dev_t dev;
if (hwpoison_filter_dev_major == ~0U &&
hwpoison_filter_dev_minor == ~0U)
return 0;
mapping = folio_mapping(folio);
if (mapping == NULL || mapping->host == NULL)
return -EINVAL;
dev = mapping->host->i_sb->s_dev;
if (hwpoison_filter_dev_major != ~0U &&
hwpoison_filter_dev_major != MAJOR(dev))
return -EINVAL;
if (hwpoison_filter_dev_minor != ~0U &&
hwpoison_filter_dev_minor != MINOR(dev))
return -EINVAL;
return 0;
rcu_assign_pointer(hwpoison_filter_func, filter);
}
EXPORT_SYMBOL_GPL(hwpoison_filter_register);
static int hwpoison_filter_flags(struct page *p)
void hwpoison_filter_unregister(void)
{
if (!hwpoison_filter_flags_mask)
return 0;
if ((stable_page_flags(p) & hwpoison_filter_flags_mask) ==
hwpoison_filter_flags_value)
return 0;
else
return -EINVAL;
RCU_INIT_POINTER(hwpoison_filter_func, NULL);
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(hwpoison_filter_unregister);
/*
* This allows stress tests to limit test scope to a collection of tasks
* by putting them under some memcg. This prevents killing unrelated/important
* processes such as /sbin/init. Note that the target task may share clean
* pages with init (eg. libc text), which is harmless. If the target task
* share _dirty_ pages with another task B, the test scheme must make sure B
* is also included in the memcg. At last, due to race conditions this filter
* can only guarantee that the page either belongs to the memcg tasks, or is
* a freed page.
*/
#ifdef CONFIG_MEMCG
u64 hwpoison_filter_memcg;
EXPORT_SYMBOL_GPL(hwpoison_filter_memcg);
static int hwpoison_filter_task(struct page *p)
static int hwpoison_filter(struct page *p)
{
if (!hwpoison_filter_memcg)
return 0;
int ret = 0;
hwpoison_filter_func_t *filter;
if (page_cgroup_ino(p) != hwpoison_filter_memcg)
return -EINVAL;
rcu_read_lock();
filter = rcu_dereference(hwpoison_filter_func);
if (filter)
ret = filter(p);
rcu_read_unlock();
return 0;
return ret;
}
#else
static int hwpoison_filter_task(struct page *p) { return 0; }
#endif
int hwpoison_filter(struct page *p)
{
if (!hwpoison_filter_enable)
return 0;
if (hwpoison_filter_dev(p))
return -EINVAL;
if (hwpoison_filter_flags(p))
return -EINVAL;
if (hwpoison_filter_task(p))
return -EINVAL;
return 0;
}
EXPORT_SYMBOL_GPL(hwpoison_filter);
#else
int hwpoison_filter(struct page *p)
{
return 0;
}
#endif
/*
* Kill all processes that have a poisoned page mapped and then isolate