mirror of
https://github.com/torvalds/linux.git
synced 2025-12-01 07:26:02 +07:00
Merge tag 'locking-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar:
"Futexes:
- Add support for task local hash maps (Sebastian Andrzej Siewior,
Peter Zijlstra)
- Implement the FUTEX2_NUMA ABI, which feature extends the futex
interface to be NUMA-aware. On NUMA-aware futexes a second u32 word
containing the NUMA node is added to after the u32 futex value word
(Peter Zijlstra)
- Implement the FUTEX2_MPOL ABI, which feature extends the futex
interface to be mempolicy-aware as well, to further refine futex
node mappings and lookups (Peter Zijlstra)
Locking primitives:
- Misc cleanups (Andy Shevchenko, Borislav Petkov, Colin Ian King,
Ingo Molnar, Nam Cao, Peter Zijlstra)
Lockdep:
- Prevent abuse of lockdep subclasses (Waiman Long)
- Add number of dynamic keys to /proc/lockdep_stats (Waiman Long)
Plus misc cleanups and fixes"
* tag 'locking-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (35 commits)
selftests/futex: Fix spelling mistake "unitiliazed" -> "uninitialized"
futex: Correct the kernedoc return value for futex_wait_setup().
tools headers: Synchronize prctl.h ABI header
futex: Use RCU_INIT_POINTER() in futex_mm_init().
selftests/futex: Use TAP output in futex_numa_mpol
selftests/futex: Use TAP output in futex_priv_hash
futex: Fix kernel-doc comments
futex: Relax the rcu_assign_pointer() assignment of mm->futex_phash in futex_mm_init()
futex: Fix outdated comment in struct restart_block
locking/lockdep: Add number of dynamic keys to /proc/lockdep_stats
locking/lockdep: Prevent abuse of lockdep subclass
locking/lockdep: Move hlock_equal() to the respective #ifdeffery
futex,selftests: Add another FUTEX2_NUMA selftest
selftests/futex: Add futex_numa_mpol
selftests/futex: Add futex_priv_hash
selftests/futex: Build without headers nonsense
tools/perf: Allow to select the number of hash buckets
tools headers: Synchronize prctl.h ABI header
futex: Implement FUTEX2_MPOL
futex: Implement FUTEX2_NUMA
...
This commit is contained in:
@@ -4,11 +4,11 @@
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/mm_types.h>
|
||||
|
||||
#include <uapi/linux/futex.h>
|
||||
|
||||
struct inode;
|
||||
struct mm_struct;
|
||||
struct task_struct;
|
||||
|
||||
/*
|
||||
@@ -34,6 +34,7 @@ union futex_key {
|
||||
u64 i_seq;
|
||||
unsigned long pgoff;
|
||||
unsigned int offset;
|
||||
/* unsigned int node; */
|
||||
} shared;
|
||||
struct {
|
||||
union {
|
||||
@@ -42,11 +43,13 @@ union futex_key {
|
||||
};
|
||||
unsigned long address;
|
||||
unsigned int offset;
|
||||
/* unsigned int node; */
|
||||
} private;
|
||||
struct {
|
||||
u64 ptr;
|
||||
unsigned long word;
|
||||
unsigned int offset;
|
||||
unsigned int node; /* NOT hashed! */
|
||||
} both;
|
||||
};
|
||||
|
||||
@@ -77,7 +80,25 @@ void futex_exec_release(struct task_struct *tsk);
|
||||
|
||||
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
||||
u32 __user *uaddr2, u32 val2, u32 val3);
|
||||
#else
|
||||
int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4);
|
||||
|
||||
#ifdef CONFIG_FUTEX_PRIVATE_HASH
|
||||
int futex_hash_allocate_default(void);
|
||||
void futex_hash_free(struct mm_struct *mm);
|
||||
|
||||
static inline void futex_mm_init(struct mm_struct *mm)
|
||||
{
|
||||
RCU_INIT_POINTER(mm->futex_phash, NULL);
|
||||
mutex_init(&mm->futex_hash_lock);
|
||||
}
|
||||
|
||||
#else /* !CONFIG_FUTEX_PRIVATE_HASH */
|
||||
static inline int futex_hash_allocate_default(void) { return 0; }
|
||||
static inline void futex_hash_free(struct mm_struct *mm) { }
|
||||
static inline void futex_mm_init(struct mm_struct *mm) { }
|
||||
#endif /* CONFIG_FUTEX_PRIVATE_HASH */
|
||||
|
||||
#else /* !CONFIG_FUTEX */
|
||||
static inline void futex_init_task(struct task_struct *tsk) { }
|
||||
static inline void futex_exit_recursive(struct task_struct *tsk) { }
|
||||
static inline void futex_exit_release(struct task_struct *tsk) { }
|
||||
@@ -88,6 +109,17 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val,
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
static inline int futex_hash_allocate_default(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void futex_hash_free(struct mm_struct *mm) { }
|
||||
static inline void futex_mm_init(struct mm_struct *mm) { }
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
#define INIT_PASID 0
|
||||
|
||||
struct address_space;
|
||||
struct futex_private_hash;
|
||||
struct mem_cgroup;
|
||||
|
||||
/*
|
||||
@@ -1031,7 +1032,11 @@ struct mm_struct {
|
||||
*/
|
||||
seqcount_t mm_lock_seq;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FUTEX_PRIVATE_HASH
|
||||
struct mutex futex_hash_lock;
|
||||
struct futex_private_hash __rcu *futex_phash;
|
||||
struct futex_private_hash *futex_phash_new;
|
||||
#endif
|
||||
|
||||
unsigned long hiwater_rss; /* High-watermark of RSS usage */
|
||||
unsigned long hiwater_vm; /* High-water virtual memory usage */
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/tracepoint-defs.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/cleanup.h>
|
||||
|
||||
#define MMAP_LOCK_INITIALIZER(name) \
|
||||
.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
|
||||
@@ -211,6 +212,9 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
|
||||
up_read(&mm->mmap_lock);
|
||||
}
|
||||
|
||||
DEFINE_GUARD(mmap_read_lock, struct mm_struct *,
|
||||
mmap_read_lock(_T), mmap_read_unlock(_T))
|
||||
|
||||
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
|
||||
{
|
||||
__mmap_lock_trace_released(mm, false);
|
||||
|
||||
@@ -30,7 +30,11 @@ static inline void rcuref_init(rcuref_t *ref, unsigned int cnt)
|
||||
* rcuref_read - Read the number of held reference counts of a rcuref
|
||||
* @ref: Pointer to the reference count
|
||||
*
|
||||
* Return: The number of held references (0 ... N)
|
||||
* Return: The number of held references (0 ... N). The value 0 does not
|
||||
* indicate that it is safe to schedule the object, protected by this reference
|
||||
* counter, for deconstruction.
|
||||
* If you want to know if the reference counter has been marked DEAD (as
|
||||
* signaled by rcuref_put()) please use rcuread_is_dead().
|
||||
*/
|
||||
static inline unsigned int rcuref_read(rcuref_t *ref)
|
||||
{
|
||||
@@ -40,6 +44,22 @@ static inline unsigned int rcuref_read(rcuref_t *ref)
|
||||
return c >= RCUREF_RELEASED ? 0 : c + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* rcuref_is_dead - Check if the rcuref has been already marked dead
|
||||
* @ref: Pointer to the reference count
|
||||
*
|
||||
* Return: True if the object has been marked DEAD. This signals that a previous
|
||||
* invocation of rcuref_put() returned true on this reference counter meaning
|
||||
* the protected object can safely be scheduled for deconstruction.
|
||||
* Otherwise, returns false.
|
||||
*/
|
||||
static inline bool rcuref_is_dead(rcuref_t *ref)
|
||||
{
|
||||
unsigned int c = atomic_read(&ref->refcnt);
|
||||
|
||||
return (c >= RCUREF_RELEASED) && (c < RCUREF_NOREF);
|
||||
}
|
||||
|
||||
extern __must_check bool rcuref_get_slowpath(rcuref_t *ref);
|
||||
|
||||
/**
|
||||
|
||||
@@ -26,7 +26,7 @@ struct restart_block {
|
||||
unsigned long arch_data;
|
||||
long (*fn)(struct restart_block *);
|
||||
union {
|
||||
/* For futex_wait and futex_wait_requeue_pi */
|
||||
/* For futex_wait() */
|
||||
struct {
|
||||
u32 __user *uaddr;
|
||||
u32 val;
|
||||
|
||||
@@ -169,8 +169,13 @@ void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_m
|
||||
int node, const void *caller) __alloc_size(1);
|
||||
#define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__))
|
||||
|
||||
void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
|
||||
#define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__))
|
||||
void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) __alloc_size(1);
|
||||
#define vmalloc_huge_node(...) alloc_hooks(vmalloc_huge_node_noprof(__VA_ARGS__))
|
||||
|
||||
static inline void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
|
||||
{
|
||||
return vmalloc_huge_node(size, gfp_mask, NUMA_NO_NODE);
|
||||
}
|
||||
|
||||
extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
|
||||
#define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__))
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
#define FUTEX2_SIZE_U32 0x02
|
||||
#define FUTEX2_SIZE_U64 0x03
|
||||
#define FUTEX2_NUMA 0x04
|
||||
/* 0x08 */
|
||||
#define FUTEX2_MPOL 0x08
|
||||
/* 0x10 */
|
||||
/* 0x20 */
|
||||
/* 0x40 */
|
||||
@@ -74,6 +74,13 @@
|
||||
/* do not use */
|
||||
#define FUTEX_32 FUTEX2_SIZE_U32 /* historical accident :-( */
|
||||
|
||||
/*
|
||||
* When FUTEX2_NUMA doubles the futex word, the second word is a node value.
|
||||
* The special value -1 indicates no-node. This is the same value as
|
||||
* NUMA_NO_NODE, except that value is not ABI, this is.
|
||||
*/
|
||||
#define FUTEX_NO_NODE (-1)
|
||||
|
||||
/*
|
||||
* Max numbers of elements in a futex_waitv array
|
||||
*/
|
||||
|
||||
@@ -364,4 +364,11 @@ struct prctl_mm_map {
|
||||
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
|
||||
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
|
||||
|
||||
/* FUTEX hash management */
|
||||
#define PR_FUTEX_HASH 78
|
||||
# define PR_FUTEX_HASH_SET_SLOTS 1
|
||||
# define FH_FLAG_IMMUTABLE (1ULL << 0)
|
||||
# define PR_FUTEX_HASH_GET_SLOTS 2
|
||||
# define PR_FUTEX_HASH_GET_IMMUTABLE 3
|
||||
|
||||
#endif /* _LINUX_PRCTL_H */
|
||||
|
||||
10
init/Kconfig
10
init/Kconfig
@@ -1687,6 +1687,16 @@ config FUTEX_PI
|
||||
depends on FUTEX && RT_MUTEXES
|
||||
default y
|
||||
|
||||
config FUTEX_PRIVATE_HASH
|
||||
bool
|
||||
depends on FUTEX && !BASE_SMALL && MMU
|
||||
default y
|
||||
|
||||
config FUTEX_MPOL
|
||||
bool
|
||||
depends on FUTEX && NUMA
|
||||
default y
|
||||
|
||||
config EPOLL
|
||||
bool "Enable eventpoll support" if EXPERT
|
||||
default y
|
||||
|
||||
@@ -273,7 +273,6 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
|
||||
struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_futex_data *ifd = NULL;
|
||||
struct futex_hash_bucket *hb;
|
||||
int ret;
|
||||
|
||||
if (!iof->futex_mask) {
|
||||
@@ -295,12 +294,11 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags)
|
||||
ifd->req = req;
|
||||
|
||||
ret = futex_wait_setup(iof->uaddr, iof->futex_val, iof->futex_flags,
|
||||
&ifd->q, &hb);
|
||||
&ifd->q, NULL, NULL);
|
||||
if (!ret) {
|
||||
hlist_add_head(&req->hash_node, &ctx->futex_list);
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
|
||||
futex_queue(&ifd->q, hb, NULL);
|
||||
return IOU_ISSUE_SKIP_COMPLETE;
|
||||
}
|
||||
|
||||
|
||||
@@ -1306,6 +1306,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||
RCU_INIT_POINTER(mm->exe_file, NULL);
|
||||
mmu_notifier_subscriptions_init(mm);
|
||||
init_tlb_flush_pending(mm);
|
||||
futex_mm_init(mm);
|
||||
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS)
|
||||
mm->pmd_huge_pte = NULL;
|
||||
#endif
|
||||
@@ -1388,6 +1389,7 @@ static inline void __mmput(struct mm_struct *mm)
|
||||
if (mm->binfmt)
|
||||
module_put(mm->binfmt->module);
|
||||
lru_gen_del_mm(mm);
|
||||
futex_hash_free(mm);
|
||||
mmdrop(mm);
|
||||
}
|
||||
|
||||
@@ -2153,6 +2155,13 @@ static void rv_task_fork(struct task_struct *p)
|
||||
#define rv_task_fork(p) do {} while (0)
|
||||
#endif
|
||||
|
||||
static bool need_futex_hash_allocate_default(u64 clone_flags)
|
||||
{
|
||||
if ((clone_flags & (CLONE_THREAD | CLONE_VM)) != (CLONE_THREAD | CLONE_VM))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* This creates a new process as a copy of the old one,
|
||||
* but does not actually start it yet.
|
||||
@@ -2533,6 +2542,21 @@ __latent_entropy struct task_struct *copy_process(
|
||||
if (retval)
|
||||
goto bad_fork_cancel_cgroup;
|
||||
|
||||
/*
|
||||
* Allocate a default futex hash for the user process once the first
|
||||
* thread spawns.
|
||||
*/
|
||||
if (need_futex_hash_allocate_default(clone_flags)) {
|
||||
retval = futex_hash_allocate_default();
|
||||
if (retval)
|
||||
goto bad_fork_core_free;
|
||||
/*
|
||||
* If we fail beyond this point we don't free the allocated
|
||||
* futex hash map. We assume that another thread will be created
|
||||
* and makes use of it. The hash map will be freed once the main
|
||||
* thread terminates.
|
||||
*/
|
||||
}
|
||||
/*
|
||||
* From this point on we must avoid any synchronous user-space
|
||||
* communication until we take the tasklist-lock. In particular, we do
|
||||
|
||||
@@ -36,9 +36,15 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/plist.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/fault-inject.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/prctl.h>
|
||||
#include <linux/rcuref.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/mmap_lock.h>
|
||||
|
||||
#include "futex.h"
|
||||
#include "../locking/rtmutex_common.h"
|
||||
@@ -49,12 +55,24 @@
|
||||
* reside in the same cacheline.
|
||||
*/
|
||||
static struct {
|
||||
struct futex_hash_bucket *queues;
|
||||
unsigned long hashmask;
|
||||
unsigned int hashshift;
|
||||
struct futex_hash_bucket *queues[MAX_NUMNODES];
|
||||
} __futex_data __read_mostly __aligned(2*sizeof(long));
|
||||
#define futex_queues (__futex_data.queues)
|
||||
#define futex_hashmask (__futex_data.hashmask)
|
||||
|
||||
#define futex_hashmask (__futex_data.hashmask)
|
||||
#define futex_hashshift (__futex_data.hashshift)
|
||||
#define futex_queues (__futex_data.queues)
|
||||
|
||||
struct futex_private_hash {
|
||||
rcuref_t users;
|
||||
unsigned int hash_mask;
|
||||
struct rcu_head rcu;
|
||||
void *mm;
|
||||
bool custom;
|
||||
bool immutable;
|
||||
struct futex_hash_bucket queues[];
|
||||
};
|
||||
|
||||
/*
|
||||
* Fault injections for futexes.
|
||||
@@ -107,21 +125,328 @@ late_initcall(fail_futex_debugfs);
|
||||
|
||||
#endif /* CONFIG_FAIL_FUTEX */
|
||||
|
||||
/**
|
||||
* futex_hash - Return the hash bucket in the global hash
|
||||
* @key: Pointer to the futex key for which the hash is calculated
|
||||
*
|
||||
* We hash on the keys returned from get_futex_key (see below) and return the
|
||||
* corresponding hash bucket in the global hash.
|
||||
*/
|
||||
struct futex_hash_bucket *futex_hash(union futex_key *key)
|
||||
{
|
||||
u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
|
||||
key->both.offset);
|
||||
static struct futex_hash_bucket *
|
||||
__futex_hash(union futex_key *key, struct futex_private_hash *fph);
|
||||
|
||||
return &futex_queues[hash & futex_hashmask];
|
||||
#ifdef CONFIG_FUTEX_PRIVATE_HASH
|
||||
static inline bool futex_key_is_private(union futex_key *key)
|
||||
{
|
||||
/*
|
||||
* Relies on get_futex_key() to set either bit for shared
|
||||
* futexes -- see comment with union futex_key.
|
||||
*/
|
||||
return !(key->both.offset & (FUT_OFF_INODE | FUT_OFF_MMSHARED));
|
||||
}
|
||||
|
||||
bool futex_private_hash_get(struct futex_private_hash *fph)
|
||||
{
|
||||
if (fph->immutable)
|
||||
return true;
|
||||
return rcuref_get(&fph->users);
|
||||
}
|
||||
|
||||
void futex_private_hash_put(struct futex_private_hash *fph)
|
||||
{
|
||||
/* Ignore return value, last put is verified via rcuref_is_dead() */
|
||||
if (fph->immutable)
|
||||
return;
|
||||
if (rcuref_put(&fph->users))
|
||||
wake_up_var(fph->mm);
|
||||
}
|
||||
|
||||
/**
|
||||
* futex_hash_get - Get an additional reference for the local hash.
|
||||
* @hb: ptr to the private local hash.
|
||||
*
|
||||
* Obtain an additional reference for the already obtained hash bucket. The
|
||||
* caller must already own an reference.
|
||||
*/
|
||||
void futex_hash_get(struct futex_hash_bucket *hb)
|
||||
{
|
||||
struct futex_private_hash *fph = hb->priv;
|
||||
|
||||
if (!fph)
|
||||
return;
|
||||
WARN_ON_ONCE(!futex_private_hash_get(fph));
|
||||
}
|
||||
|
||||
void futex_hash_put(struct futex_hash_bucket *hb)
|
||||
{
|
||||
struct futex_private_hash *fph = hb->priv;
|
||||
|
||||
if (!fph)
|
||||
return;
|
||||
futex_private_hash_put(fph);
|
||||
}
|
||||
|
||||
static struct futex_hash_bucket *
|
||||
__futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
|
||||
{
|
||||
u32 hash;
|
||||
|
||||
if (!futex_key_is_private(key))
|
||||
return NULL;
|
||||
|
||||
if (!fph)
|
||||
fph = rcu_dereference(key->private.mm->futex_phash);
|
||||
if (!fph || !fph->hash_mask)
|
||||
return NULL;
|
||||
|
||||
hash = jhash2((void *)&key->private.address,
|
||||
sizeof(key->private.address) / 4,
|
||||
key->both.offset);
|
||||
return &fph->queues[hash & fph->hash_mask];
|
||||
}
|
||||
|
||||
static void futex_rehash_private(struct futex_private_hash *old,
|
||||
struct futex_private_hash *new)
|
||||
{
|
||||
struct futex_hash_bucket *hb_old, *hb_new;
|
||||
unsigned int slots = old->hash_mask + 1;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < slots; i++) {
|
||||
struct futex_q *this, *tmp;
|
||||
|
||||
hb_old = &old->queues[i];
|
||||
|
||||
spin_lock(&hb_old->lock);
|
||||
plist_for_each_entry_safe(this, tmp, &hb_old->chain, list) {
|
||||
|
||||
plist_del(&this->list, &hb_old->chain);
|
||||
futex_hb_waiters_dec(hb_old);
|
||||
|
||||
WARN_ON_ONCE(this->lock_ptr != &hb_old->lock);
|
||||
|
||||
hb_new = __futex_hash(&this->key, new);
|
||||
futex_hb_waiters_inc(hb_new);
|
||||
/*
|
||||
* The new pointer isn't published yet but an already
|
||||
* moved user can be unqueued due to timeout or signal.
|
||||
*/
|
||||
spin_lock_nested(&hb_new->lock, SINGLE_DEPTH_NESTING);
|
||||
plist_add(&this->list, &hb_new->chain);
|
||||
this->lock_ptr = &hb_new->lock;
|
||||
spin_unlock(&hb_new->lock);
|
||||
}
|
||||
spin_unlock(&hb_old->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static bool __futex_pivot_hash(struct mm_struct *mm,
|
||||
struct futex_private_hash *new)
|
||||
{
|
||||
struct futex_private_hash *fph;
|
||||
|
||||
WARN_ON_ONCE(mm->futex_phash_new);
|
||||
|
||||
fph = rcu_dereference_protected(mm->futex_phash,
|
||||
lockdep_is_held(&mm->futex_hash_lock));
|
||||
if (fph) {
|
||||
if (!rcuref_is_dead(&fph->users)) {
|
||||
mm->futex_phash_new = new;
|
||||
return false;
|
||||
}
|
||||
|
||||
futex_rehash_private(fph, new);
|
||||
}
|
||||
rcu_assign_pointer(mm->futex_phash, new);
|
||||
kvfree_rcu(fph, rcu);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void futex_pivot_hash(struct mm_struct *mm)
|
||||
{
|
||||
scoped_guard(mutex, &mm->futex_hash_lock) {
|
||||
struct futex_private_hash *fph;
|
||||
|
||||
fph = mm->futex_phash_new;
|
||||
if (fph) {
|
||||
mm->futex_phash_new = NULL;
|
||||
__futex_pivot_hash(mm, fph);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct futex_private_hash *futex_private_hash(void)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
/*
|
||||
* Ideally we don't loop. If there is a replacement in progress
|
||||
* then a new private hash is already prepared and a reference can't be
|
||||
* obtained once the last user dropped it's.
|
||||
* In that case we block on mm_struct::futex_hash_lock and either have
|
||||
* to perform the replacement or wait while someone else is doing the
|
||||
* job. Eitherway, on the second iteration we acquire a reference on the
|
||||
* new private hash or loop again because a new replacement has been
|
||||
* requested.
|
||||
*/
|
||||
again:
|
||||
scoped_guard(rcu) {
|
||||
struct futex_private_hash *fph;
|
||||
|
||||
fph = rcu_dereference(mm->futex_phash);
|
||||
if (!fph)
|
||||
return NULL;
|
||||
|
||||
if (fph->immutable)
|
||||
return fph;
|
||||
if (rcuref_get(&fph->users))
|
||||
return fph;
|
||||
}
|
||||
futex_pivot_hash(mm);
|
||||
goto again;
|
||||
}
|
||||
|
||||
struct futex_hash_bucket *futex_hash(union futex_key *key)
|
||||
{
|
||||
struct futex_private_hash *fph;
|
||||
struct futex_hash_bucket *hb;
|
||||
|
||||
again:
|
||||
scoped_guard(rcu) {
|
||||
hb = __futex_hash(key, NULL);
|
||||
fph = hb->priv;
|
||||
|
||||
if (!fph || futex_private_hash_get(fph))
|
||||
return hb;
|
||||
}
|
||||
futex_pivot_hash(key->private.mm);
|
||||
goto again;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_FUTEX_PRIVATE_HASH */
|
||||
|
||||
static struct futex_hash_bucket *
|
||||
__futex_hash_private(union futex_key *key, struct futex_private_hash *fph)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct futex_hash_bucket *futex_hash(union futex_key *key)
|
||||
{
|
||||
return __futex_hash(key, NULL);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_FUTEX_PRIVATE_HASH */
|
||||
|
||||
#ifdef CONFIG_FUTEX_MPOL
|
||||
|
||||
static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
struct vm_area_struct *vma = vma_lookup(mm, addr);
|
||||
struct mempolicy *mpol;
|
||||
int node = FUTEX_NO_NODE;
|
||||
|
||||
if (!vma)
|
||||
return FUTEX_NO_NODE;
|
||||
|
||||
mpol = vma_policy(vma);
|
||||
if (!mpol)
|
||||
return FUTEX_NO_NODE;
|
||||
|
||||
switch (mpol->mode) {
|
||||
case MPOL_PREFERRED:
|
||||
node = first_node(mpol->nodes);
|
||||
break;
|
||||
case MPOL_PREFERRED_MANY:
|
||||
case MPOL_BIND:
|
||||
if (mpol->home_node != NUMA_NO_NODE)
|
||||
node = mpol->home_node;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static int futex_key_to_node_opt(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
int seq, node;
|
||||
|
||||
guard(rcu)();
|
||||
|
||||
if (!mmap_lock_speculate_try_begin(mm, &seq))
|
||||
return -EBUSY;
|
||||
|
||||
node = __futex_key_to_node(mm, addr);
|
||||
|
||||
if (mmap_lock_speculate_retry(mm, seq))
|
||||
return -EAGAIN;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static int futex_mpol(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
int node;
|
||||
|
||||
node = futex_key_to_node_opt(mm, addr);
|
||||
if (node >= FUTEX_NO_NODE)
|
||||
return node;
|
||||
|
||||
guard(mmap_read_lock)(mm);
|
||||
return __futex_key_to_node(mm, addr);
|
||||
}
|
||||
|
||||
#else /* !CONFIG_FUTEX_MPOL */
|
||||
|
||||
static int futex_mpol(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
return FUTEX_NO_NODE;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_FUTEX_MPOL */
|
||||
|
||||
/**
|
||||
* __futex_hash - Return the hash bucket
|
||||
* @key: Pointer to the futex key for which the hash is calculated
|
||||
* @fph: Pointer to private hash if known
|
||||
*
|
||||
* We hash on the keys returned from get_futex_key (see below) and return the
|
||||
* corresponding hash bucket.
|
||||
* If the FUTEX is PROCESS_PRIVATE then a per-process hash bucket (from the
|
||||
* private hash) is returned if existing. Otherwise a hash bucket from the
|
||||
* global hash is returned.
|
||||
*/
|
||||
static struct futex_hash_bucket *
|
||||
__futex_hash(union futex_key *key, struct futex_private_hash *fph)
|
||||
{
|
||||
int node = key->both.node;
|
||||
u32 hash;
|
||||
|
||||
if (node == FUTEX_NO_NODE) {
|
||||
struct futex_hash_bucket *hb;
|
||||
|
||||
hb = __futex_hash_private(key, fph);
|
||||
if (hb)
|
||||
return hb;
|
||||
}
|
||||
|
||||
hash = jhash2((u32 *)key,
|
||||
offsetof(typeof(*key), both.offset) / sizeof(u32),
|
||||
key->both.offset);
|
||||
|
||||
if (node == FUTEX_NO_NODE) {
|
||||
/*
|
||||
* In case of !FLAGS_NUMA, use some unused hash bits to pick a
|
||||
* node -- this ensures regular futexes are interleaved across
|
||||
* the nodes and avoids having to allocate multiple
|
||||
* hash-tables.
|
||||
*
|
||||
* NOTE: this isn't perfectly uniform, but it is fast and
|
||||
* handles sparse node masks.
|
||||
*/
|
||||
node = (hash >> futex_hashshift) % nr_node_ids;
|
||||
if (!node_possible(node)) {
|
||||
node = find_next_bit_wrap(node_possible_map.bits,
|
||||
nr_node_ids, node);
|
||||
}
|
||||
}
|
||||
|
||||
return &futex_queues[node][hash & futex_hashmask];
|
||||
}
|
||||
|
||||
/**
|
||||
* futex_setup_timer - set up the sleeping hrtimer.
|
||||
@@ -227,25 +552,60 @@ int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
|
||||
struct page *page;
|
||||
struct folio *folio;
|
||||
struct address_space *mapping;
|
||||
int err, ro = 0;
|
||||
int node, err, size, ro = 0;
|
||||
bool node_updated = false;
|
||||
bool fshared;
|
||||
|
||||
fshared = flags & FLAGS_SHARED;
|
||||
size = futex_size(flags);
|
||||
if (flags & FLAGS_NUMA)
|
||||
size *= 2;
|
||||
|
||||
/*
|
||||
* The futex address must be "naturally" aligned.
|
||||
*/
|
||||
key->both.offset = address % PAGE_SIZE;
|
||||
if (unlikely((address % sizeof(u32)) != 0))
|
||||
if (unlikely((address % size) != 0))
|
||||
return -EINVAL;
|
||||
address -= key->both.offset;
|
||||
|
||||
if (unlikely(!access_ok(uaddr, sizeof(u32))))
|
||||
if (unlikely(!access_ok(uaddr, size)))
|
||||
return -EFAULT;
|
||||
|
||||
if (unlikely(should_fail_futex(fshared)))
|
||||
return -EFAULT;
|
||||
|
||||
node = FUTEX_NO_NODE;
|
||||
|
||||
if (flags & FLAGS_NUMA) {
|
||||
u32 __user *naddr = (void *)uaddr + size / 2;
|
||||
|
||||
if (futex_get_value(&node, naddr))
|
||||
return -EFAULT;
|
||||
|
||||
if (node != FUTEX_NO_NODE &&
|
||||
(node >= MAX_NUMNODES || !node_possible(node)))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (node == FUTEX_NO_NODE && (flags & FLAGS_MPOL)) {
|
||||
node = futex_mpol(mm, address);
|
||||
node_updated = true;
|
||||
}
|
||||
|
||||
if (flags & FLAGS_NUMA) {
|
||||
u32 __user *naddr = (void *)uaddr + size / 2;
|
||||
|
||||
if (node == FUTEX_NO_NODE) {
|
||||
node = numa_node_id();
|
||||
node_updated = true;
|
||||
}
|
||||
if (node_updated && futex_put_value(node, naddr))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
key->both.node = node;
|
||||
|
||||
/*
|
||||
* PROCESS_PRIVATE futexes are fast.
|
||||
* As the mm cannot disappear under us and the 'key' only needs
|
||||
@@ -502,13 +862,9 @@ void __futex_unqueue(struct futex_q *q)
|
||||
}
|
||||
|
||||
/* The key must be already stored in q->key. */
|
||||
struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
|
||||
void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb)
|
||||
__acquires(&hb->lock)
|
||||
{
|
||||
struct futex_hash_bucket *hb;
|
||||
|
||||
hb = futex_hash(&q->key);
|
||||
|
||||
/*
|
||||
* Increment the counter before taking the lock so that
|
||||
* a potential waker won't miss a to-be-slept task that is
|
||||
@@ -522,14 +878,13 @@ struct futex_hash_bucket *futex_q_lock(struct futex_q *q)
|
||||
q->lock_ptr = &hb->lock;
|
||||
|
||||
spin_lock(&hb->lock);
|
||||
return hb;
|
||||
}
|
||||
|
||||
void futex_q_unlock(struct futex_hash_bucket *hb)
|
||||
__releases(&hb->lock)
|
||||
{
|
||||
spin_unlock(&hb->lock);
|
||||
futex_hb_waiters_dec(hb);
|
||||
spin_unlock(&hb->lock);
|
||||
}
|
||||
|
||||
void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb,
|
||||
@@ -568,6 +923,8 @@ int futex_unqueue(struct futex_q *q)
|
||||
spinlock_t *lock_ptr;
|
||||
int ret = 0;
|
||||
|
||||
/* RCU so lock_ptr is not going away during locking. */
|
||||
guard(rcu)();
|
||||
/* In the common case we don't take the spinlock, which is nice. */
|
||||
retry:
|
||||
/*
|
||||
@@ -606,6 +963,24 @@ retry:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void futex_q_lockptr_lock(struct futex_q *q)
|
||||
{
|
||||
spinlock_t *lock_ptr;
|
||||
|
||||
/*
|
||||
* See futex_unqueue() why lock_ptr can change.
|
||||
*/
|
||||
guard(rcu)();
|
||||
retry:
|
||||
lock_ptr = READ_ONCE(q->lock_ptr);
|
||||
spin_lock(lock_ptr);
|
||||
|
||||
if (unlikely(lock_ptr != q->lock_ptr)) {
|
||||
spin_unlock(lock_ptr);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* PI futexes can not be requeued and must remove themselves from the hash
|
||||
* bucket. The hash bucket lock (i.e. lock_ptr) is held.
|
||||
@@ -949,9 +1324,19 @@ static void exit_pi_state_list(struct task_struct *curr)
|
||||
{
|
||||
struct list_head *next, *head = &curr->pi_state_list;
|
||||
struct futex_pi_state *pi_state;
|
||||
struct futex_hash_bucket *hb;
|
||||
union futex_key key = FUTEX_KEY_INIT;
|
||||
|
||||
/*
|
||||
* The mutex mm_struct::futex_hash_lock might be acquired.
|
||||
*/
|
||||
might_sleep();
|
||||
/*
|
||||
* Ensure the hash remains stable (no resize) during the while loop
|
||||
* below. The hb pointer is acquired under the pi_lock so we can't block
|
||||
* on the mutex.
|
||||
*/
|
||||
WARN_ON(curr != current);
|
||||
guard(private_hash)();
|
||||
/*
|
||||
* We are a ZOMBIE and nobody can enqueue itself on
|
||||
* pi_state_list anymore, but we have to be careful
|
||||
@@ -962,50 +1347,52 @@ static void exit_pi_state_list(struct task_struct *curr)
|
||||
next = head->next;
|
||||
pi_state = list_entry(next, struct futex_pi_state, list);
|
||||
key = pi_state->key;
|
||||
hb = futex_hash(&key);
|
||||
if (1) {
|
||||
CLASS(hb, hb)(&key);
|
||||
|
||||
/*
|
||||
* We can race against put_pi_state() removing itself from the
|
||||
* list (a waiter going away). put_pi_state() will first
|
||||
* decrement the reference count and then modify the list, so
|
||||
* its possible to see the list entry but fail this reference
|
||||
* acquire.
|
||||
*
|
||||
* In that case; drop the locks to let put_pi_state() make
|
||||
* progress and retry the loop.
|
||||
*/
|
||||
if (!refcount_inc_not_zero(&pi_state->refcount)) {
|
||||
/*
|
||||
* We can race against put_pi_state() removing itself from the
|
||||
* list (a waiter going away). put_pi_state() will first
|
||||
* decrement the reference count and then modify the list, so
|
||||
* its possible to see the list entry but fail this reference
|
||||
* acquire.
|
||||
*
|
||||
* In that case; drop the locks to let put_pi_state() make
|
||||
* progress and retry the loop.
|
||||
*/
|
||||
if (!refcount_inc_not_zero(&pi_state->refcount)) {
|
||||
raw_spin_unlock_irq(&curr->pi_lock);
|
||||
cpu_relax();
|
||||
raw_spin_lock_irq(&curr->pi_lock);
|
||||
continue;
|
||||
}
|
||||
raw_spin_unlock_irq(&curr->pi_lock);
|
||||
cpu_relax();
|
||||
raw_spin_lock_irq(&curr->pi_lock);
|
||||
continue;
|
||||
}
|
||||
raw_spin_unlock_irq(&curr->pi_lock);
|
||||
|
||||
spin_lock(&hb->lock);
|
||||
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
raw_spin_lock(&curr->pi_lock);
|
||||
/*
|
||||
* We dropped the pi-lock, so re-check whether this
|
||||
* task still owns the PI-state:
|
||||
*/
|
||||
if (head->next != next) {
|
||||
/* retain curr->pi_lock for the loop invariant */
|
||||
raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
|
||||
spin_lock(&hb->lock);
|
||||
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
raw_spin_lock(&curr->pi_lock);
|
||||
/*
|
||||
* We dropped the pi-lock, so re-check whether this
|
||||
* task still owns the PI-state:
|
||||
*/
|
||||
if (head->next != next) {
|
||||
/* retain curr->pi_lock for the loop invariant */
|
||||
raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
|
||||
spin_unlock(&hb->lock);
|
||||
put_pi_state(pi_state);
|
||||
continue;
|
||||
}
|
||||
|
||||
WARN_ON(pi_state->owner != curr);
|
||||
WARN_ON(list_empty(&pi_state->list));
|
||||
list_del_init(&pi_state->list);
|
||||
pi_state->owner = NULL;
|
||||
|
||||
raw_spin_unlock(&curr->pi_lock);
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
spin_unlock(&hb->lock);
|
||||
put_pi_state(pi_state);
|
||||
continue;
|
||||
}
|
||||
|
||||
WARN_ON(pi_state->owner != curr);
|
||||
WARN_ON(list_empty(&pi_state->list));
|
||||
list_del_init(&pi_state->list);
|
||||
pi_state->owner = NULL;
|
||||
|
||||
raw_spin_unlock(&curr->pi_lock);
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
spin_unlock(&hb->lock);
|
||||
|
||||
rt_mutex_futex_unlock(&pi_state->pi_mutex);
|
||||
put_pi_state(pi_state);
|
||||
|
||||
@@ -1125,30 +1512,304 @@ void futex_exit_release(struct task_struct *tsk)
|
||||
futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
|
||||
}
|
||||
|
||||
static void futex_hash_bucket_init(struct futex_hash_bucket *fhb,
|
||||
struct futex_private_hash *fph)
|
||||
{
|
||||
#ifdef CONFIG_FUTEX_PRIVATE_HASH
|
||||
fhb->priv = fph;
|
||||
#endif
|
||||
atomic_set(&fhb->waiters, 0);
|
||||
plist_head_init(&fhb->chain);
|
||||
spin_lock_init(&fhb->lock);
|
||||
}
|
||||
|
||||
#define FH_CUSTOM 0x01
|
||||
#define FH_IMMUTABLE 0x02
|
||||
|
||||
#ifdef CONFIG_FUTEX_PRIVATE_HASH
|
||||
void futex_hash_free(struct mm_struct *mm)
|
||||
{
|
||||
struct futex_private_hash *fph;
|
||||
|
||||
kvfree(mm->futex_phash_new);
|
||||
fph = rcu_dereference_raw(mm->futex_phash);
|
||||
if (fph) {
|
||||
WARN_ON_ONCE(rcuref_read(&fph->users) > 1);
|
||||
kvfree(fph);
|
||||
}
|
||||
}
|
||||
|
||||
static bool futex_pivot_pending(struct mm_struct *mm)
|
||||
{
|
||||
struct futex_private_hash *fph;
|
||||
|
||||
guard(rcu)();
|
||||
|
||||
if (!mm->futex_phash_new)
|
||||
return true;
|
||||
|
||||
fph = rcu_dereference(mm->futex_phash);
|
||||
return rcuref_is_dead(&fph->users);
|
||||
}
|
||||
|
||||
static bool futex_hash_less(struct futex_private_hash *a,
|
||||
struct futex_private_hash *b)
|
||||
{
|
||||
/* user provided always wins */
|
||||
if (!a->custom && b->custom)
|
||||
return true;
|
||||
if (a->custom && !b->custom)
|
||||
return false;
|
||||
|
||||
/* zero-sized hash wins */
|
||||
if (!b->hash_mask)
|
||||
return true;
|
||||
if (!a->hash_mask)
|
||||
return false;
|
||||
|
||||
/* keep the biggest */
|
||||
if (a->hash_mask < b->hash_mask)
|
||||
return true;
|
||||
if (a->hash_mask > b->hash_mask)
|
||||
return false;
|
||||
|
||||
return false; /* equal */
|
||||
}
|
||||
|
||||
static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct futex_private_hash *fph;
|
||||
bool custom = flags & FH_CUSTOM;
|
||||
int i;
|
||||
|
||||
if (hash_slots && (hash_slots == 1 || !is_power_of_2(hash_slots)))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Once we've disabled the global hash there is no way back.
|
||||
*/
|
||||
scoped_guard(rcu) {
|
||||
fph = rcu_dereference(mm->futex_phash);
|
||||
if (fph && (!fph->hash_mask || fph->immutable)) {
|
||||
if (custom)
|
||||
return -EBUSY;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
fph = kvzalloc(struct_size(fph, queues, hash_slots), GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
|
||||
if (!fph)
|
||||
return -ENOMEM;
|
||||
|
||||
rcuref_init(&fph->users, 1);
|
||||
fph->hash_mask = hash_slots ? hash_slots - 1 : 0;
|
||||
fph->custom = custom;
|
||||
fph->immutable = !!(flags & FH_IMMUTABLE);
|
||||
fph->mm = mm;
|
||||
|
||||
for (i = 0; i < hash_slots; i++)
|
||||
futex_hash_bucket_init(&fph->queues[i], fph);
|
||||
|
||||
if (custom) {
|
||||
/*
|
||||
* Only let prctl() wait / retry; don't unduly delay clone().
|
||||
*/
|
||||
again:
|
||||
wait_var_event(mm, futex_pivot_pending(mm));
|
||||
}
|
||||
|
||||
scoped_guard(mutex, &mm->futex_hash_lock) {
|
||||
struct futex_private_hash *free __free(kvfree) = NULL;
|
||||
struct futex_private_hash *cur, *new;
|
||||
|
||||
cur = rcu_dereference_protected(mm->futex_phash,
|
||||
lockdep_is_held(&mm->futex_hash_lock));
|
||||
new = mm->futex_phash_new;
|
||||
mm->futex_phash_new = NULL;
|
||||
|
||||
if (fph) {
|
||||
if (cur && !new) {
|
||||
/*
|
||||
* If we have an existing hash, but do not yet have
|
||||
* allocated a replacement hash, drop the initial
|
||||
* reference on the existing hash.
|
||||
*/
|
||||
futex_private_hash_put(cur);
|
||||
}
|
||||
|
||||
if (new) {
|
||||
/*
|
||||
* Two updates raced; throw out the lesser one.
|
||||
*/
|
||||
if (futex_hash_less(new, fph)) {
|
||||
free = new;
|
||||
new = fph;
|
||||
} else {
|
||||
free = fph;
|
||||
}
|
||||
} else {
|
||||
new = fph;
|
||||
}
|
||||
fph = NULL;
|
||||
}
|
||||
|
||||
if (new) {
|
||||
/*
|
||||
* Will set mm->futex_phash_new on failure;
|
||||
* futex_private_hash_get() will try again.
|
||||
*/
|
||||
if (!__futex_pivot_hash(mm, new) && custom)
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int futex_hash_allocate_default(void)
|
||||
{
|
||||
unsigned int threads, buckets, current_buckets = 0;
|
||||
struct futex_private_hash *fph;
|
||||
|
||||
if (!current->mm)
|
||||
return 0;
|
||||
|
||||
scoped_guard(rcu) {
|
||||
threads = min_t(unsigned int,
|
||||
get_nr_threads(current),
|
||||
num_online_cpus());
|
||||
|
||||
fph = rcu_dereference(current->mm->futex_phash);
|
||||
if (fph) {
|
||||
if (fph->custom)
|
||||
return 0;
|
||||
|
||||
current_buckets = fph->hash_mask + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The default allocation will remain within
|
||||
* 16 <= threads * 4 <= global hash size
|
||||
*/
|
||||
buckets = roundup_pow_of_two(4 * threads);
|
||||
buckets = clamp(buckets, 16, futex_hashmask + 1);
|
||||
|
||||
if (current_buckets >= buckets)
|
||||
return 0;
|
||||
|
||||
return futex_hash_allocate(buckets, 0);
|
||||
}
|
||||
|
||||
static int futex_hash_get_slots(void)
|
||||
{
|
||||
struct futex_private_hash *fph;
|
||||
|
||||
guard(rcu)();
|
||||
fph = rcu_dereference(current->mm->futex_phash);
|
||||
if (fph && fph->hash_mask)
|
||||
return fph->hash_mask + 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int futex_hash_get_immutable(void)
|
||||
{
|
||||
struct futex_private_hash *fph;
|
||||
|
||||
guard(rcu)();
|
||||
fph = rcu_dereference(current->mm->futex_phash);
|
||||
if (fph && fph->immutable)
|
||||
return 1;
|
||||
if (fph && !fph->hash_mask)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static int futex_hash_allocate(unsigned int hash_slots, unsigned int flags)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int futex_hash_get_slots(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int futex_hash_get_immutable(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4)
|
||||
{
|
||||
unsigned int flags = FH_CUSTOM;
|
||||
int ret;
|
||||
|
||||
switch (arg2) {
|
||||
case PR_FUTEX_HASH_SET_SLOTS:
|
||||
if (arg4 & ~FH_FLAG_IMMUTABLE)
|
||||
return -EINVAL;
|
||||
if (arg4 & FH_FLAG_IMMUTABLE)
|
||||
flags |= FH_IMMUTABLE;
|
||||
ret = futex_hash_allocate(arg3, flags);
|
||||
break;
|
||||
|
||||
case PR_FUTEX_HASH_GET_SLOTS:
|
||||
ret = futex_hash_get_slots();
|
||||
break;
|
||||
|
||||
case PR_FUTEX_HASH_GET_IMMUTABLE:
|
||||
ret = futex_hash_get_immutable();
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init futex_init(void)
|
||||
{
|
||||
unsigned long hashsize, i;
|
||||
unsigned int futex_shift;
|
||||
unsigned int order, n;
|
||||
unsigned long size;
|
||||
|
||||
#ifdef CONFIG_BASE_SMALL
|
||||
hashsize = 16;
|
||||
#else
|
||||
hashsize = roundup_pow_of_two(256 * num_possible_cpus());
|
||||
hashsize = 256 * num_possible_cpus();
|
||||
hashsize /= num_possible_nodes();
|
||||
hashsize = max(4, hashsize);
|
||||
hashsize = roundup_pow_of_two(hashsize);
|
||||
#endif
|
||||
futex_hashshift = ilog2(hashsize);
|
||||
size = sizeof(struct futex_hash_bucket) * hashsize;
|
||||
order = get_order(size);
|
||||
|
||||
futex_queues = alloc_large_system_hash("futex", sizeof(*futex_queues),
|
||||
hashsize, 0, 0,
|
||||
&futex_shift, NULL,
|
||||
hashsize, hashsize);
|
||||
hashsize = 1UL << futex_shift;
|
||||
for_each_node(n) {
|
||||
struct futex_hash_bucket *table;
|
||||
|
||||
for (i = 0; i < hashsize; i++) {
|
||||
atomic_set(&futex_queues[i].waiters, 0);
|
||||
plist_head_init(&futex_queues[i].chain);
|
||||
spin_lock_init(&futex_queues[i].lock);
|
||||
if (order > MAX_PAGE_ORDER)
|
||||
table = vmalloc_huge_node(size, GFP_KERNEL, n);
|
||||
else
|
||||
table = alloc_pages_exact_nid(n, size, GFP_KERNEL);
|
||||
|
||||
BUG_ON(!table);
|
||||
|
||||
for (i = 0; i < hashsize; i++)
|
||||
futex_hash_bucket_init(&table[i], NULL);
|
||||
|
||||
futex_queues[n] = table;
|
||||
}
|
||||
|
||||
futex_hashmask = hashsize - 1;
|
||||
pr_info("futex hash table entries: %lu (%lu bytes on %d NUMA nodes, total %lu KiB, %s).\n",
|
||||
hashsize, size, num_possible_nodes(), size * num_possible_nodes() / 1024,
|
||||
order > MAX_PAGE_ORDER ? "vmalloc" : "linear");
|
||||
return 0;
|
||||
}
|
||||
core_initcall(futex_init);
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <linux/sched/wake_q.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/cleanup.h>
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
#include <linux/rcuwait.h>
|
||||
@@ -38,6 +39,7 @@
|
||||
#define FLAGS_HAS_TIMEOUT 0x0040
|
||||
#define FLAGS_NUMA 0x0080
|
||||
#define FLAGS_STRICT 0x0100
|
||||
#define FLAGS_MPOL 0x0200
|
||||
|
||||
/* FUTEX_ to FLAGS_ */
|
||||
static inline unsigned int futex_to_flags(unsigned int op)
|
||||
@@ -53,7 +55,7 @@ static inline unsigned int futex_to_flags(unsigned int op)
|
||||
return flags;
|
||||
}
|
||||
|
||||
#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE)
|
||||
#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_NUMA | FUTEX2_MPOL | FUTEX2_PRIVATE)
|
||||
|
||||
/* FUTEX2_ to FLAGS_ */
|
||||
static inline unsigned int futex2_to_flags(unsigned int flags2)
|
||||
@@ -66,6 +68,9 @@ static inline unsigned int futex2_to_flags(unsigned int flags2)
|
||||
if (flags2 & FUTEX2_NUMA)
|
||||
flags |= FLAGS_NUMA;
|
||||
|
||||
if (flags2 & FUTEX2_MPOL)
|
||||
flags |= FLAGS_MPOL;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
@@ -86,6 +91,19 @@ static inline bool futex_flags_valid(unsigned int flags)
|
||||
if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Must be able to represent both FUTEX_NO_NODE and every valid nodeid
|
||||
* in a futex word.
|
||||
*/
|
||||
if (flags & FLAGS_NUMA) {
|
||||
int bits = 8 * futex_size(flags);
|
||||
u64 max = ~0ULL;
|
||||
|
||||
max >>= 64 - bits;
|
||||
if (nr_node_ids >= max)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -117,6 +135,7 @@ struct futex_hash_bucket {
|
||||
atomic_t waiters;
|
||||
spinlock_t lock;
|
||||
struct plist_head chain;
|
||||
struct futex_private_hash *priv;
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
/*
|
||||
@@ -156,6 +175,7 @@ typedef void (futex_wake_fn)(struct wake_q_head *wake_q, struct futex_q *q);
|
||||
* @requeue_pi_key: the requeue_pi target futex key
|
||||
* @bitset: bitset for the optional bitmasked wakeup
|
||||
* @requeue_state: State field for futex_requeue_pi()
|
||||
* @drop_hb_ref: Waiter should drop the extra hash bucket reference if true
|
||||
* @requeue_wait: RCU wait for futex_requeue_pi() (RT only)
|
||||
*
|
||||
* We use this hashed waitqueue, instead of a normal wait_queue_entry_t, so
|
||||
@@ -182,6 +202,7 @@ struct futex_q {
|
||||
union futex_key *requeue_pi_key;
|
||||
u32 bitset;
|
||||
atomic_t requeue_state;
|
||||
bool drop_hb_ref;
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
struct rcuwait requeue_wait;
|
||||
#endif
|
||||
@@ -196,12 +217,35 @@ enum futex_access {
|
||||
|
||||
extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
|
||||
enum futex_access rw);
|
||||
|
||||
extern void futex_q_lockptr_lock(struct futex_q *q);
|
||||
extern struct hrtimer_sleeper *
|
||||
futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
|
||||
int flags, u64 range_ns);
|
||||
|
||||
extern struct futex_hash_bucket *futex_hash(union futex_key *key);
|
||||
#ifdef CONFIG_FUTEX_PRIVATE_HASH
|
||||
extern void futex_hash_get(struct futex_hash_bucket *hb);
|
||||
extern void futex_hash_put(struct futex_hash_bucket *hb);
|
||||
|
||||
extern struct futex_private_hash *futex_private_hash(void);
|
||||
extern bool futex_private_hash_get(struct futex_private_hash *fph);
|
||||
extern void futex_private_hash_put(struct futex_private_hash *fph);
|
||||
|
||||
#else /* !CONFIG_FUTEX_PRIVATE_HASH */
|
||||
static inline void futex_hash_get(struct futex_hash_bucket *hb) { }
|
||||
static inline void futex_hash_put(struct futex_hash_bucket *hb) { }
|
||||
static inline struct futex_private_hash *futex_private_hash(void) { return NULL; }
|
||||
static inline bool futex_private_hash_get(void) { return false; }
|
||||
static inline void futex_private_hash_put(struct futex_private_hash *fph) { }
|
||||
#endif
|
||||
|
||||
DEFINE_CLASS(hb, struct futex_hash_bucket *,
|
||||
if (_T) futex_hash_put(_T),
|
||||
futex_hash(key), union futex_key *key);
|
||||
|
||||
DEFINE_CLASS(private_hash, struct futex_private_hash *,
|
||||
if (_T) futex_private_hash_put(_T),
|
||||
futex_private_hash(), void);
|
||||
|
||||
/**
|
||||
* futex_match - Check whether two futex keys are equal
|
||||
@@ -219,9 +263,9 @@ static inline int futex_match(union futex_key *key1, union futex_key *key2)
|
||||
}
|
||||
|
||||
extern int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
|
||||
struct futex_q *q, struct futex_hash_bucket **hb);
|
||||
extern void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
|
||||
struct hrtimer_sleeper *timeout);
|
||||
struct futex_q *q, union futex_key *key2,
|
||||
struct task_struct *task);
|
||||
extern void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout);
|
||||
extern bool __futex_wake_mark(struct futex_q *q);
|
||||
extern void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q);
|
||||
|
||||
@@ -256,7 +300,7 @@ static inline int futex_cmpxchg_value_locked(u32 *curval, u32 __user *uaddr, u32
|
||||
* This looks a bit overkill, but generally just results in a couple
|
||||
* of instructions.
|
||||
*/
|
||||
static __always_inline int futex_read_inatomic(u32 *dest, u32 __user *from)
|
||||
static __always_inline int futex_get_value(u32 *dest, u32 __user *from)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
@@ -273,12 +317,26 @@ Efault:
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static __always_inline int futex_put_value(u32 val, u32 __user *to)
|
||||
{
|
||||
if (can_do_masked_user_access())
|
||||
to = masked_user_access_begin(to);
|
||||
else if (!user_read_access_begin(to, sizeof(*to)))
|
||||
return -EFAULT;
|
||||
unsafe_put_user(val, to, Efault);
|
||||
user_read_access_end();
|
||||
return 0;
|
||||
Efault:
|
||||
user_read_access_end();
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static inline int futex_get_value_locked(u32 *dest, u32 __user *from)
|
||||
{
|
||||
int ret;
|
||||
|
||||
pagefault_disable();
|
||||
ret = futex_read_inatomic(dest, from);
|
||||
ret = futex_get_value(dest, from);
|
||||
pagefault_enable();
|
||||
|
||||
return ret;
|
||||
@@ -354,7 +412,7 @@ static inline int futex_hb_waiters_pending(struct futex_hash_bucket *hb)
|
||||
#endif
|
||||
}
|
||||
|
||||
extern struct futex_hash_bucket *futex_q_lock(struct futex_q *q);
|
||||
extern void futex_q_lock(struct futex_q *q, struct futex_hash_bucket *hb);
|
||||
extern void futex_q_unlock(struct futex_hash_bucket *hb);
|
||||
|
||||
|
||||
|
||||
@@ -806,7 +806,7 @@ handle_err:
|
||||
break;
|
||||
}
|
||||
|
||||
spin_lock(q->lock_ptr);
|
||||
futex_q_lockptr_lock(q);
|
||||
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
|
||||
/*
|
||||
@@ -920,7 +920,6 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
|
||||
struct hrtimer_sleeper timeout, *to;
|
||||
struct task_struct *exiting = NULL;
|
||||
struct rt_mutex_waiter rt_waiter;
|
||||
struct futex_hash_bucket *hb;
|
||||
struct futex_q q = futex_q_init;
|
||||
DEFINE_WAKE_Q(wake_q);
|
||||
int res, ret;
|
||||
@@ -939,151 +938,183 @@ retry:
|
||||
goto out;
|
||||
|
||||
retry_private:
|
||||
hb = futex_q_lock(&q);
|
||||
if (1) {
|
||||
CLASS(hb, hb)(&q.key);
|
||||
|
||||
ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
|
||||
&exiting, 0);
|
||||
if (unlikely(ret)) {
|
||||
/*
|
||||
* Atomic work succeeded and we got the lock,
|
||||
* or failed. Either way, we do _not_ block.
|
||||
*/
|
||||
switch (ret) {
|
||||
case 1:
|
||||
/* We got the lock. */
|
||||
ret = 0;
|
||||
goto out_unlock_put_key;
|
||||
case -EFAULT:
|
||||
goto uaddr_faulted;
|
||||
case -EBUSY:
|
||||
case -EAGAIN:
|
||||
futex_q_lock(&q, hb);
|
||||
|
||||
ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current,
|
||||
&exiting, 0);
|
||||
if (unlikely(ret)) {
|
||||
/*
|
||||
* Two reasons for this:
|
||||
* - EBUSY: Task is exiting and we just wait for the
|
||||
* exit to complete.
|
||||
* - EAGAIN: The user space value changed.
|
||||
* Atomic work succeeded and we got the lock,
|
||||
* or failed. Either way, we do _not_ block.
|
||||
*/
|
||||
futex_q_unlock(hb);
|
||||
/*
|
||||
* Handle the case where the owner is in the middle of
|
||||
* exiting. Wait for the exit to complete otherwise
|
||||
* this task might loop forever, aka. live lock.
|
||||
*/
|
||||
wait_for_owner_exiting(ret, exiting);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
default:
|
||||
goto out_unlock_put_key;
|
||||
switch (ret) {
|
||||
case 1:
|
||||
/* We got the lock. */
|
||||
ret = 0;
|
||||
goto out_unlock_put_key;
|
||||
case -EFAULT:
|
||||
goto uaddr_faulted;
|
||||
case -EBUSY:
|
||||
case -EAGAIN:
|
||||
/*
|
||||
* Two reasons for this:
|
||||
* - EBUSY: Task is exiting and we just wait for the
|
||||
* exit to complete.
|
||||
* - EAGAIN: The user space value changed.
|
||||
*/
|
||||
futex_q_unlock(hb);
|
||||
/*
|
||||
* Handle the case where the owner is in the middle of
|
||||
* exiting. Wait for the exit to complete otherwise
|
||||
* this task might loop forever, aka. live lock.
|
||||
*/
|
||||
wait_for_owner_exiting(ret, exiting);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
default:
|
||||
goto out_unlock_put_key;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ON(!q.pi_state);
|
||||
WARN_ON(!q.pi_state);
|
||||
|
||||
/*
|
||||
* Only actually queue now that the atomic ops are done:
|
||||
*/
|
||||
__futex_queue(&q, hb, current);
|
||||
/*
|
||||
* Only actually queue now that the atomic ops are done:
|
||||
*/
|
||||
__futex_queue(&q, hb, current);
|
||||
|
||||
if (trylock) {
|
||||
ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
|
||||
/* Fixup the trylock return value: */
|
||||
ret = ret ? 0 : -EWOULDBLOCK;
|
||||
goto no_block;
|
||||
}
|
||||
if (trylock) {
|
||||
ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex);
|
||||
/* Fixup the trylock return value: */
|
||||
ret = ret ? 0 : -EWOULDBLOCK;
|
||||
goto no_block;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be done before we enqueue the waiter, here is unfortunately
|
||||
* under the hb lock, but that *should* work because it does nothing.
|
||||
*/
|
||||
rt_mutex_pre_schedule();
|
||||
/*
|
||||
* Caution; releasing @hb in-scope. The hb->lock is still locked
|
||||
* while the reference is dropped. The reference can not be dropped
|
||||
* after the unlock because if a user initiated resize is in progress
|
||||
* then we might need to wake him. This can not be done after the
|
||||
* rt_mutex_pre_schedule() invocation. The hb will remain valid because
|
||||
* the thread, performing resize, will block on hb->lock during
|
||||
* the requeue.
|
||||
*/
|
||||
futex_hash_put(no_free_ptr(hb));
|
||||
/*
|
||||
* Must be done before we enqueue the waiter, here is unfortunately
|
||||
* under the hb lock, but that *should* work because it does nothing.
|
||||
*/
|
||||
rt_mutex_pre_schedule();
|
||||
|
||||
rt_mutex_init_waiter(&rt_waiter);
|
||||
rt_mutex_init_waiter(&rt_waiter);
|
||||
|
||||
/*
|
||||
* On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not
|
||||
* hold it while doing rt_mutex_start_proxy(), because then it will
|
||||
* include hb->lock in the blocking chain, even through we'll not in
|
||||
* fact hold it while blocking. This will lead it to report -EDEADLK
|
||||
* and BUG when futex_unlock_pi() interleaves with this.
|
||||
*
|
||||
* Therefore acquire wait_lock while holding hb->lock, but drop the
|
||||
* latter before calling __rt_mutex_start_proxy_lock(). This
|
||||
* interleaves with futex_unlock_pi() -- which does a similar lock
|
||||
* handoff -- such that the latter can observe the futex_q::pi_state
|
||||
* before __rt_mutex_start_proxy_lock() is done.
|
||||
*/
|
||||
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
|
||||
spin_unlock(q.lock_ptr);
|
||||
/*
|
||||
* __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
|
||||
* such that futex_unlock_pi() is guaranteed to observe the waiter when
|
||||
* it sees the futex_q::pi_state.
|
||||
*/
|
||||
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q);
|
||||
raw_spin_unlock_irq_wake(&q.pi_state->pi_mutex.wait_lock, &wake_q);
|
||||
/*
|
||||
* On PREEMPT_RT, when hb->lock becomes an rt_mutex, we must not
|
||||
* hold it while doing rt_mutex_start_proxy(), because then it will
|
||||
* include hb->lock in the blocking chain, even through we'll not in
|
||||
* fact hold it while blocking. This will lead it to report -EDEADLK
|
||||
* and BUG when futex_unlock_pi() interleaves with this.
|
||||
*
|
||||
* Therefore acquire wait_lock while holding hb->lock, but drop the
|
||||
* latter before calling __rt_mutex_start_proxy_lock(). This
|
||||
* interleaves with futex_unlock_pi() -- which does a similar lock
|
||||
* handoff -- such that the latter can observe the futex_q::pi_state
|
||||
* before __rt_mutex_start_proxy_lock() is done.
|
||||
*/
|
||||
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
|
||||
spin_unlock(q.lock_ptr);
|
||||
/*
|
||||
* __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
|
||||
* such that futex_unlock_pi() is guaranteed to observe the waiter when
|
||||
* it sees the futex_q::pi_state.
|
||||
*/
|
||||
ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q);
|
||||
raw_spin_unlock_irq_wake(&q.pi_state->pi_mutex.wait_lock, &wake_q);
|
||||
|
||||
if (ret) {
|
||||
if (ret == 1)
|
||||
ret = 0;
|
||||
goto cleanup;
|
||||
}
|
||||
if (ret) {
|
||||
if (ret == 1)
|
||||
ret = 0;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (unlikely(to))
|
||||
hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
|
||||
if (unlikely(to))
|
||||
hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
|
||||
|
||||
ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
|
||||
ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
|
||||
|
||||
cleanup:
|
||||
/*
|
||||
* If we failed to acquire the lock (deadlock/signal/timeout), we must
|
||||
* must unwind the above, however we canont lock hb->lock because
|
||||
* rt_mutex already has a waiter enqueued and hb->lock can itself try
|
||||
* and enqueue an rt_waiter through rtlock.
|
||||
*
|
||||
* Doing the cleanup without holding hb->lock can cause inconsistent
|
||||
* state between hb and pi_state, but only in the direction of not
|
||||
* seeing a waiter that is leaving.
|
||||
*
|
||||
* See futex_unlock_pi(), it deals with this inconsistency.
|
||||
*
|
||||
* There be dragons here, since we must deal with the inconsistency on
|
||||
* the way out (here), it is impossible to detect/warn about the race
|
||||
* the other way around (missing an incoming waiter).
|
||||
*
|
||||
* What could possibly go wrong...
|
||||
*/
|
||||
if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
|
||||
ret = 0;
|
||||
/*
|
||||
* If we failed to acquire the lock (deadlock/signal/timeout), we must
|
||||
* unwind the above, however we canont lock hb->lock because
|
||||
* rt_mutex already has a waiter enqueued and hb->lock can itself try
|
||||
* and enqueue an rt_waiter through rtlock.
|
||||
*
|
||||
* Doing the cleanup without holding hb->lock can cause inconsistent
|
||||
* state between hb and pi_state, but only in the direction of not
|
||||
* seeing a waiter that is leaving.
|
||||
*
|
||||
* See futex_unlock_pi(), it deals with this inconsistency.
|
||||
*
|
||||
* There be dragons here, since we must deal with the inconsistency on
|
||||
* the way out (here), it is impossible to detect/warn about the race
|
||||
* the other way around (missing an incoming waiter).
|
||||
*
|
||||
* What could possibly go wrong...
|
||||
*/
|
||||
if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
|
||||
ret = 0;
|
||||
|
||||
/*
|
||||
* Now that the rt_waiter has been dequeued, it is safe to use
|
||||
* spinlock/rtlock (which might enqueue its own rt_waiter) and fix up
|
||||
* the
|
||||
*/
|
||||
spin_lock(q.lock_ptr);
|
||||
/*
|
||||
* Waiter is unqueued.
|
||||
*/
|
||||
rt_mutex_post_schedule();
|
||||
/*
|
||||
* Now that the rt_waiter has been dequeued, it is safe to use
|
||||
* spinlock/rtlock (which might enqueue its own rt_waiter) and fix up
|
||||
* the
|
||||
*/
|
||||
futex_q_lockptr_lock(&q);
|
||||
/*
|
||||
* Waiter is unqueued.
|
||||
*/
|
||||
rt_mutex_post_schedule();
|
||||
no_block:
|
||||
/*
|
||||
* Fixup the pi_state owner and possibly acquire the lock if we
|
||||
* haven't already.
|
||||
*/
|
||||
res = fixup_pi_owner(uaddr, &q, !ret);
|
||||
/*
|
||||
* If fixup_pi_owner() returned an error, propagate that. If it acquired
|
||||
* the lock, clear our -ETIMEDOUT or -EINTR.
|
||||
*/
|
||||
if (res)
|
||||
ret = (res < 0) ? res : 0;
|
||||
/*
|
||||
* Fixup the pi_state owner and possibly acquire the lock if we
|
||||
* haven't already.
|
||||
*/
|
||||
res = fixup_pi_owner(uaddr, &q, !ret);
|
||||
/*
|
||||
* If fixup_pi_owner() returned an error, propagate that. If it acquired
|
||||
* the lock, clear our -ETIMEDOUT or -EINTR.
|
||||
*/
|
||||
if (res)
|
||||
ret = (res < 0) ? res : 0;
|
||||
|
||||
futex_unqueue_pi(&q);
|
||||
spin_unlock(q.lock_ptr);
|
||||
goto out;
|
||||
futex_unqueue_pi(&q);
|
||||
spin_unlock(q.lock_ptr);
|
||||
if (q.drop_hb_ref) {
|
||||
CLASS(hb, hb)(&q.key);
|
||||
/* Additional reference from futex_unlock_pi() */
|
||||
futex_hash_put(hb);
|
||||
}
|
||||
goto out;
|
||||
|
||||
out_unlock_put_key:
|
||||
futex_q_unlock(hb);
|
||||
futex_q_unlock(hb);
|
||||
goto out;
|
||||
|
||||
uaddr_faulted:
|
||||
futex_q_unlock(hb);
|
||||
|
||||
ret = fault_in_user_writeable(uaddr);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (!(flags & FLAGS_SHARED))
|
||||
goto retry_private;
|
||||
|
||||
goto retry;
|
||||
}
|
||||
|
||||
out:
|
||||
if (to) {
|
||||
@@ -1091,18 +1122,6 @@ out:
|
||||
destroy_hrtimer_on_stack(&to->timer);
|
||||
}
|
||||
return ret != -EINTR ? ret : -ERESTARTNOINTR;
|
||||
|
||||
uaddr_faulted:
|
||||
futex_q_unlock(hb);
|
||||
|
||||
ret = fault_in_user_writeable(uaddr);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (!(flags & FLAGS_SHARED))
|
||||
goto retry_private;
|
||||
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1114,7 +1133,6 @@ int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
|
||||
{
|
||||
u32 curval, uval, vpid = task_pid_vnr(current);
|
||||
union futex_key key = FUTEX_KEY_INIT;
|
||||
struct futex_hash_bucket *hb;
|
||||
struct futex_q *top_waiter;
|
||||
int ret;
|
||||
|
||||
@@ -1134,7 +1152,7 @@ retry:
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
hb = futex_hash(&key);
|
||||
CLASS(hb, hb)(&key);
|
||||
spin_lock(&hb->lock);
|
||||
retry_hb:
|
||||
|
||||
@@ -1187,6 +1205,12 @@ retry_hb:
|
||||
*/
|
||||
rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
|
||||
if (!rt_waiter) {
|
||||
/*
|
||||
* Acquire a reference for the leaving waiter to ensure
|
||||
* valid futex_q::lock_ptr.
|
||||
*/
|
||||
futex_hash_get(hb);
|
||||
top_waiter->drop_hb_ref = true;
|
||||
__futex_unqueue(top_waiter);
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
goto retry_hb;
|
||||
|
||||
@@ -87,6 +87,11 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
|
||||
futex_hb_waiters_inc(hb2);
|
||||
plist_add(&q->list, &hb2->chain);
|
||||
q->lock_ptr = &hb2->lock;
|
||||
/*
|
||||
* hb1 and hb2 belong to the same futex_hash_bucket_private
|
||||
* because if we managed get a reference on hb1 then it can't be
|
||||
* replaced. Therefore we avoid put(hb1)+get(hb2) here.
|
||||
*/
|
||||
}
|
||||
q->key = *key2;
|
||||
}
|
||||
@@ -231,7 +236,12 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
|
||||
|
||||
WARN_ON(!q->rt_waiter);
|
||||
q->rt_waiter = NULL;
|
||||
|
||||
/*
|
||||
* Acquire a reference for the waiter to ensure valid
|
||||
* futex_q::lock_ptr.
|
||||
*/
|
||||
futex_hash_get(hb);
|
||||
q->drop_hb_ref = true;
|
||||
q->lock_ptr = &hb->lock;
|
||||
|
||||
/* Signal locked state to the waiter */
|
||||
@@ -371,7 +381,6 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
|
||||
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
|
||||
int task_count = 0, ret;
|
||||
struct futex_pi_state *pi_state = NULL;
|
||||
struct futex_hash_bucket *hb1, *hb2;
|
||||
struct futex_q *this, *next;
|
||||
DEFINE_WAKE_Q(wake_q);
|
||||
|
||||
@@ -443,240 +452,242 @@ retry:
|
||||
if (requeue_pi && futex_match(&key1, &key2))
|
||||
return -EINVAL;
|
||||
|
||||
hb1 = futex_hash(&key1);
|
||||
hb2 = futex_hash(&key2);
|
||||
|
||||
retry_private:
|
||||
futex_hb_waiters_inc(hb2);
|
||||
double_lock_hb(hb1, hb2);
|
||||
if (1) {
|
||||
CLASS(hb, hb1)(&key1);
|
||||
CLASS(hb, hb2)(&key2);
|
||||
|
||||
if (likely(cmpval != NULL)) {
|
||||
u32 curval;
|
||||
futex_hb_waiters_inc(hb2);
|
||||
double_lock_hb(hb1, hb2);
|
||||
|
||||
ret = futex_get_value_locked(&curval, uaddr1);
|
||||
if (likely(cmpval != NULL)) {
|
||||
u32 curval;
|
||||
|
||||
if (unlikely(ret)) {
|
||||
double_unlock_hb(hb1, hb2);
|
||||
futex_hb_waiters_dec(hb2);
|
||||
ret = futex_get_value_locked(&curval, uaddr1);
|
||||
|
||||
ret = get_user(curval, uaddr1);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (unlikely(ret)) {
|
||||
futex_hb_waiters_dec(hb2);
|
||||
double_unlock_hb(hb1, hb2);
|
||||
|
||||
if (!(flags1 & FLAGS_SHARED))
|
||||
goto retry_private;
|
||||
ret = get_user(curval, uaddr1);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
goto retry;
|
||||
}
|
||||
if (curval != *cmpval) {
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
if (!(flags1 & FLAGS_SHARED))
|
||||
goto retry_private;
|
||||
|
||||
if (requeue_pi) {
|
||||
struct task_struct *exiting = NULL;
|
||||
|
||||
/*
|
||||
* Attempt to acquire uaddr2 and wake the top waiter. If we
|
||||
* intend to requeue waiters, force setting the FUTEX_WAITERS
|
||||
* bit. We force this here where we are able to easily handle
|
||||
* faults rather in the requeue loop below.
|
||||
*
|
||||
* Updates topwaiter::requeue_state if a top waiter exists.
|
||||
*/
|
||||
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
|
||||
&key2, &pi_state,
|
||||
&exiting, nr_requeue);
|
||||
|
||||
/*
|
||||
* At this point the top_waiter has either taken uaddr2 or
|
||||
* is waiting on it. In both cases pi_state has been
|
||||
* established and an initial refcount on it. In case of an
|
||||
* error there's nothing.
|
||||
*
|
||||
* The top waiter's requeue_state is up to date:
|
||||
*
|
||||
* - If the lock was acquired atomically (ret == 1), then
|
||||
* the state is Q_REQUEUE_PI_LOCKED.
|
||||
*
|
||||
* The top waiter has been dequeued and woken up and can
|
||||
* return to user space immediately. The kernel/user
|
||||
* space state is consistent. In case that there must be
|
||||
* more waiters requeued the WAITERS bit in the user
|
||||
* space futex is set so the top waiter task has to go
|
||||
* into the syscall slowpath to unlock the futex. This
|
||||
* will block until this requeue operation has been
|
||||
* completed and the hash bucket locks have been
|
||||
* dropped.
|
||||
*
|
||||
* - If the trylock failed with an error (ret < 0) then
|
||||
* the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
|
||||
* happened", or Q_REQUEUE_PI_IGNORE when there was an
|
||||
* interleaved early wakeup.
|
||||
*
|
||||
* - If the trylock did not succeed (ret == 0) then the
|
||||
* state is either Q_REQUEUE_PI_IN_PROGRESS or
|
||||
* Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
|
||||
* This will be cleaned up in the loop below, which
|
||||
* cannot fail because futex_proxy_trylock_atomic() did
|
||||
* the same sanity checks for requeue_pi as the loop
|
||||
* below does.
|
||||
*/
|
||||
switch (ret) {
|
||||
case 0:
|
||||
/* We hold a reference on the pi state. */
|
||||
break;
|
||||
|
||||
case 1:
|
||||
/*
|
||||
* futex_proxy_trylock_atomic() acquired the user space
|
||||
* futex. Adjust task_count.
|
||||
*/
|
||||
task_count++;
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
/*
|
||||
* If the above failed, then pi_state is NULL and
|
||||
* waiter::requeue_state is correct.
|
||||
*/
|
||||
case -EFAULT:
|
||||
double_unlock_hb(hb1, hb2);
|
||||
futex_hb_waiters_dec(hb2);
|
||||
ret = fault_in_user_writeable(uaddr2);
|
||||
if (!ret)
|
||||
goto retry;
|
||||
return ret;
|
||||
case -EBUSY:
|
||||
case -EAGAIN:
|
||||
/*
|
||||
* Two reasons for this:
|
||||
* - EBUSY: Owner is exiting and we just wait for the
|
||||
* exit to complete.
|
||||
* - EAGAIN: The user space value changed.
|
||||
*/
|
||||
double_unlock_hb(hb1, hb2);
|
||||
futex_hb_waiters_dec(hb2);
|
||||
/*
|
||||
* Handle the case where the owner is in the middle of
|
||||
* exiting. Wait for the exit to complete otherwise
|
||||
* this task might loop forever, aka. live lock.
|
||||
*/
|
||||
wait_for_owner_exiting(ret, exiting);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
default:
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
plist_for_each_entry_safe(this, next, &hb1->chain, list) {
|
||||
if (task_count - nr_wake >= nr_requeue)
|
||||
break;
|
||||
|
||||
if (!futex_match(&this->key, &key1))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
|
||||
* be paired with each other and no other futex ops.
|
||||
*
|
||||
* We should never be requeueing a futex_q with a pi_state,
|
||||
* which is awaiting a futex_unlock_pi().
|
||||
*/
|
||||
if ((requeue_pi && !this->rt_waiter) ||
|
||||
(!requeue_pi && this->rt_waiter) ||
|
||||
this->pi_state) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
if (curval != *cmpval) {
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
/* Plain futexes just wake or requeue and are done */
|
||||
if (!requeue_pi) {
|
||||
if (++task_count <= nr_wake)
|
||||
this->wake(&wake_q, this);
|
||||
else
|
||||
if (requeue_pi) {
|
||||
struct task_struct *exiting = NULL;
|
||||
|
||||
/*
|
||||
* Attempt to acquire uaddr2 and wake the top waiter. If we
|
||||
* intend to requeue waiters, force setting the FUTEX_WAITERS
|
||||
* bit. We force this here where we are able to easily handle
|
||||
* faults rather in the requeue loop below.
|
||||
*
|
||||
* Updates topwaiter::requeue_state if a top waiter exists.
|
||||
*/
|
||||
ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
|
||||
&key2, &pi_state,
|
||||
&exiting, nr_requeue);
|
||||
|
||||
/*
|
||||
* At this point the top_waiter has either taken uaddr2 or
|
||||
* is waiting on it. In both cases pi_state has been
|
||||
* established and an initial refcount on it. In case of an
|
||||
* error there's nothing.
|
||||
*
|
||||
* The top waiter's requeue_state is up to date:
|
||||
*
|
||||
* - If the lock was acquired atomically (ret == 1), then
|
||||
* the state is Q_REQUEUE_PI_LOCKED.
|
||||
*
|
||||
* The top waiter has been dequeued and woken up and can
|
||||
* return to user space immediately. The kernel/user
|
||||
* space state is consistent. In case that there must be
|
||||
* more waiters requeued the WAITERS bit in the user
|
||||
* space futex is set so the top waiter task has to go
|
||||
* into the syscall slowpath to unlock the futex. This
|
||||
* will block until this requeue operation has been
|
||||
* completed and the hash bucket locks have been
|
||||
* dropped.
|
||||
*
|
||||
* - If the trylock failed with an error (ret < 0) then
|
||||
* the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
|
||||
* happened", or Q_REQUEUE_PI_IGNORE when there was an
|
||||
* interleaved early wakeup.
|
||||
*
|
||||
* - If the trylock did not succeed (ret == 0) then the
|
||||
* state is either Q_REQUEUE_PI_IN_PROGRESS or
|
||||
* Q_REQUEUE_PI_WAIT if an early wakeup interleaved.
|
||||
* This will be cleaned up in the loop below, which
|
||||
* cannot fail because futex_proxy_trylock_atomic() did
|
||||
* the same sanity checks for requeue_pi as the loop
|
||||
* below does.
|
||||
*/
|
||||
switch (ret) {
|
||||
case 0:
|
||||
/* We hold a reference on the pi state. */
|
||||
break;
|
||||
|
||||
case 1:
|
||||
/*
|
||||
* futex_proxy_trylock_atomic() acquired the user space
|
||||
* futex. Adjust task_count.
|
||||
*/
|
||||
task_count++;
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
/*
|
||||
* If the above failed, then pi_state is NULL and
|
||||
* waiter::requeue_state is correct.
|
||||
*/
|
||||
case -EFAULT:
|
||||
futex_hb_waiters_dec(hb2);
|
||||
double_unlock_hb(hb1, hb2);
|
||||
ret = fault_in_user_writeable(uaddr2);
|
||||
if (!ret)
|
||||
goto retry;
|
||||
return ret;
|
||||
case -EBUSY:
|
||||
case -EAGAIN:
|
||||
/*
|
||||
* Two reasons for this:
|
||||
* - EBUSY: Owner is exiting and we just wait for the
|
||||
* exit to complete.
|
||||
* - EAGAIN: The user space value changed.
|
||||
*/
|
||||
futex_hb_waiters_dec(hb2);
|
||||
double_unlock_hb(hb1, hb2);
|
||||
/*
|
||||
* Handle the case where the owner is in the middle of
|
||||
* exiting. Wait for the exit to complete otherwise
|
||||
* this task might loop forever, aka. live lock.
|
||||
*/
|
||||
wait_for_owner_exiting(ret, exiting);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
default:
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
plist_for_each_entry_safe(this, next, &hb1->chain, list) {
|
||||
if (task_count - nr_wake >= nr_requeue)
|
||||
break;
|
||||
|
||||
if (!futex_match(&this->key, &key1))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always
|
||||
* be paired with each other and no other futex ops.
|
||||
*
|
||||
* We should never be requeueing a futex_q with a pi_state,
|
||||
* which is awaiting a futex_unlock_pi().
|
||||
*/
|
||||
if ((requeue_pi && !this->rt_waiter) ||
|
||||
(!requeue_pi && this->rt_waiter) ||
|
||||
this->pi_state) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Plain futexes just wake or requeue and are done */
|
||||
if (!requeue_pi) {
|
||||
if (++task_count <= nr_wake)
|
||||
this->wake(&wake_q, this);
|
||||
else
|
||||
requeue_futex(this, hb1, hb2, &key2);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Ensure we requeue to the expected futex for requeue_pi. */
|
||||
if (!futex_match(this->requeue_pi_key, &key2)) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Requeue nr_requeue waiters and possibly one more in the case
|
||||
* of requeue_pi if we couldn't acquire the lock atomically.
|
||||
*
|
||||
* Prepare the waiter to take the rt_mutex. Take a refcount
|
||||
* on the pi_state and store the pointer in the futex_q
|
||||
* object of the waiter.
|
||||
*/
|
||||
get_pi_state(pi_state);
|
||||
|
||||
/* Don't requeue when the waiter is already on the way out. */
|
||||
if (!futex_requeue_pi_prepare(this, pi_state)) {
|
||||
/*
|
||||
* Early woken waiter signaled that it is on the
|
||||
* way out. Drop the pi_state reference and try the
|
||||
* next waiter. @this->pi_state is still NULL.
|
||||
*/
|
||||
put_pi_state(pi_state);
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
|
||||
this->rt_waiter,
|
||||
this->task);
|
||||
|
||||
if (ret == 1) {
|
||||
/*
|
||||
* We got the lock. We do neither drop the refcount
|
||||
* on pi_state nor clear this->pi_state because the
|
||||
* waiter needs the pi_state for cleaning up the
|
||||
* user space value. It will drop the refcount
|
||||
* after doing so. this::requeue_state is updated
|
||||
* in the wakeup as well.
|
||||
*/
|
||||
requeue_pi_wake_futex(this, &key2, hb2);
|
||||
task_count++;
|
||||
} else if (!ret) {
|
||||
/* Waiter is queued, move it to hb2 */
|
||||
requeue_futex(this, hb1, hb2, &key2);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Ensure we requeue to the expected futex for requeue_pi. */
|
||||
if (!futex_match(this->requeue_pi_key, &key2)) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
futex_requeue_pi_complete(this, 0);
|
||||
task_count++;
|
||||
} else {
|
||||
/*
|
||||
* rt_mutex_start_proxy_lock() detected a potential
|
||||
* deadlock when we tried to queue that waiter.
|
||||
* Drop the pi_state reference which we took above
|
||||
* and remove the pointer to the state from the
|
||||
* waiters futex_q object.
|
||||
*/
|
||||
this->pi_state = NULL;
|
||||
put_pi_state(pi_state);
|
||||
futex_requeue_pi_complete(this, ret);
|
||||
/*
|
||||
* We stop queueing more waiters and let user space
|
||||
* deal with the mess.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Requeue nr_requeue waiters and possibly one more in the case
|
||||
* of requeue_pi if we couldn't acquire the lock atomically.
|
||||
*
|
||||
* Prepare the waiter to take the rt_mutex. Take a refcount
|
||||
* on the pi_state and store the pointer in the futex_q
|
||||
* object of the waiter.
|
||||
* We took an extra initial reference to the pi_state in
|
||||
* futex_proxy_trylock_atomic(). We need to drop it here again.
|
||||
*/
|
||||
get_pi_state(pi_state);
|
||||
|
||||
/* Don't requeue when the waiter is already on the way out. */
|
||||
if (!futex_requeue_pi_prepare(this, pi_state)) {
|
||||
/*
|
||||
* Early woken waiter signaled that it is on the
|
||||
* way out. Drop the pi_state reference and try the
|
||||
* next waiter. @this->pi_state is still NULL.
|
||||
*/
|
||||
put_pi_state(pi_state);
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
|
||||
this->rt_waiter,
|
||||
this->task);
|
||||
|
||||
if (ret == 1) {
|
||||
/*
|
||||
* We got the lock. We do neither drop the refcount
|
||||
* on pi_state nor clear this->pi_state because the
|
||||
* waiter needs the pi_state for cleaning up the
|
||||
* user space value. It will drop the refcount
|
||||
* after doing so. this::requeue_state is updated
|
||||
* in the wakeup as well.
|
||||
*/
|
||||
requeue_pi_wake_futex(this, &key2, hb2);
|
||||
task_count++;
|
||||
} else if (!ret) {
|
||||
/* Waiter is queued, move it to hb2 */
|
||||
requeue_futex(this, hb1, hb2, &key2);
|
||||
futex_requeue_pi_complete(this, 0);
|
||||
task_count++;
|
||||
} else {
|
||||
/*
|
||||
* rt_mutex_start_proxy_lock() detected a potential
|
||||
* deadlock when we tried to queue that waiter.
|
||||
* Drop the pi_state reference which we took above
|
||||
* and remove the pointer to the state from the
|
||||
* waiters futex_q object.
|
||||
*/
|
||||
this->pi_state = NULL;
|
||||
put_pi_state(pi_state);
|
||||
futex_requeue_pi_complete(this, ret);
|
||||
/*
|
||||
* We stop queueing more waiters and let user space
|
||||
* deal with the mess.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We took an extra initial reference to the pi_state in
|
||||
* futex_proxy_trylock_atomic(). We need to drop it here again.
|
||||
*/
|
||||
put_pi_state(pi_state);
|
||||
put_pi_state(pi_state);
|
||||
|
||||
out_unlock:
|
||||
double_unlock_hb(hb1, hb2);
|
||||
futex_hb_waiters_dec(hb2);
|
||||
double_unlock_hb(hb1, hb2);
|
||||
}
|
||||
wake_up_q(&wake_q);
|
||||
futex_hb_waiters_dec(hb2);
|
||||
return ret ? ret : task_count;
|
||||
}
|
||||
|
||||
@@ -769,7 +780,6 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
||||
{
|
||||
struct hrtimer_sleeper timeout, *to;
|
||||
struct rt_mutex_waiter rt_waiter;
|
||||
struct futex_hash_bucket *hb;
|
||||
union futex_key key2 = FUTEX_KEY_INIT;
|
||||
struct futex_q q = futex_q_init;
|
||||
struct rt_mutex_base *pi_mutex;
|
||||
@@ -805,35 +815,28 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
||||
* Prepare to wait on uaddr. On success, it holds hb->lock and q
|
||||
* is initialized.
|
||||
*/
|
||||
ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
|
||||
ret = futex_wait_setup(uaddr, val, flags, &q, &key2, current);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* The check above which compares uaddrs is not sufficient for
|
||||
* shared futexes. We need to compare the keys:
|
||||
*/
|
||||
if (futex_match(&q.key, &key2)) {
|
||||
futex_q_unlock(hb);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
|
||||
futex_wait_queue(hb, &q, to);
|
||||
futex_do_wait(&q, to);
|
||||
|
||||
switch (futex_requeue_pi_wakeup_sync(&q)) {
|
||||
case Q_REQUEUE_PI_IGNORE:
|
||||
/* The waiter is still on uaddr1 */
|
||||
spin_lock(&hb->lock);
|
||||
ret = handle_early_requeue_pi_wakeup(hb, &q, to);
|
||||
spin_unlock(&hb->lock);
|
||||
{
|
||||
CLASS(hb, hb)(&q.key);
|
||||
/* The waiter is still on uaddr1 */
|
||||
spin_lock(&hb->lock);
|
||||
ret = handle_early_requeue_pi_wakeup(hb, &q, to);
|
||||
spin_unlock(&hb->lock);
|
||||
}
|
||||
break;
|
||||
|
||||
case Q_REQUEUE_PI_LOCKED:
|
||||
/* The requeue acquired the lock */
|
||||
if (q.pi_state && (q.pi_state->owner != current)) {
|
||||
spin_lock(q.lock_ptr);
|
||||
futex_q_lockptr_lock(&q);
|
||||
ret = fixup_pi_owner(uaddr2, &q, true);
|
||||
/*
|
||||
* Drop the reference to the pi state which the
|
||||
@@ -860,7 +863,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
||||
if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
|
||||
ret = 0;
|
||||
|
||||
spin_lock(q.lock_ptr);
|
||||
futex_q_lockptr_lock(&q);
|
||||
debug_rt_mutex_free_waiter(&rt_waiter);
|
||||
/*
|
||||
* Fixup the pi_state owner and possibly acquire the lock if we
|
||||
@@ -892,6 +895,11 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
if (q.drop_hb_ref) {
|
||||
CLASS(hb, hb)(&q.key);
|
||||
/* Additional reference from requeue_pi_wake_futex() */
|
||||
futex_hash_put(hb);
|
||||
}
|
||||
|
||||
out:
|
||||
if (to) {
|
||||
|
||||
@@ -154,7 +154,6 @@ void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q)
|
||||
*/
|
||||
int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
|
||||
{
|
||||
struct futex_hash_bucket *hb;
|
||||
struct futex_q *this, *next;
|
||||
union futex_key key = FUTEX_KEY_INIT;
|
||||
DEFINE_WAKE_Q(wake_q);
|
||||
@@ -170,7 +169,7 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
|
||||
if ((flags & FLAGS_STRICT) && !nr_wake)
|
||||
return 0;
|
||||
|
||||
hb = futex_hash(&key);
|
||||
CLASS(hb, hb)(&key);
|
||||
|
||||
/* Make sure we really have tasks to wakeup */
|
||||
if (!futex_hb_waiters_pending(hb))
|
||||
@@ -253,7 +252,6 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
|
||||
int nr_wake, int nr_wake2, int op)
|
||||
{
|
||||
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
|
||||
struct futex_hash_bucket *hb1, *hb2;
|
||||
struct futex_q *this, *next;
|
||||
int ret, op_ret;
|
||||
DEFINE_WAKE_Q(wake_q);
|
||||
@@ -266,67 +264,69 @@ retry:
|
||||
if (unlikely(ret != 0))
|
||||
return ret;
|
||||
|
||||
hb1 = futex_hash(&key1);
|
||||
hb2 = futex_hash(&key2);
|
||||
|
||||
retry_private:
|
||||
double_lock_hb(hb1, hb2);
|
||||
op_ret = futex_atomic_op_inuser(op, uaddr2);
|
||||
if (unlikely(op_ret < 0)) {
|
||||
double_unlock_hb(hb1, hb2);
|
||||
if (1) {
|
||||
CLASS(hb, hb1)(&key1);
|
||||
CLASS(hb, hb2)(&key2);
|
||||
|
||||
if (!IS_ENABLED(CONFIG_MMU) ||
|
||||
unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
|
||||
/*
|
||||
* we don't get EFAULT from MMU faults if we don't have
|
||||
* an MMU, but we might get them from range checking
|
||||
*/
|
||||
ret = op_ret;
|
||||
return ret;
|
||||
}
|
||||
double_lock_hb(hb1, hb2);
|
||||
op_ret = futex_atomic_op_inuser(op, uaddr2);
|
||||
if (unlikely(op_ret < 0)) {
|
||||
double_unlock_hb(hb1, hb2);
|
||||
|
||||
if (op_ret == -EFAULT) {
|
||||
ret = fault_in_user_writeable(uaddr2);
|
||||
if (ret)
|
||||
if (!IS_ENABLED(CONFIG_MMU) ||
|
||||
unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) {
|
||||
/*
|
||||
* we don't get EFAULT from MMU faults if we don't have
|
||||
* an MMU, but we might get them from range checking
|
||||
*/
|
||||
ret = op_ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
if (!(flags & FLAGS_SHARED))
|
||||
goto retry_private;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
plist_for_each_entry_safe(this, next, &hb1->chain, list) {
|
||||
if (futex_match (&this->key, &key1)) {
|
||||
if (this->pi_state || this->rt_waiter) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
this->wake(&wake_q, this);
|
||||
if (++ret >= nr_wake)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (op_ret > 0) {
|
||||
op_ret = 0;
|
||||
plist_for_each_entry_safe(this, next, &hb2->chain, list) {
|
||||
if (futex_match (&this->key, &key2)) {
|
||||
if (op_ret == -EFAULT) {
|
||||
ret = fault_in_user_writeable(uaddr2);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
cond_resched();
|
||||
if (!(flags & FLAGS_SHARED))
|
||||
goto retry_private;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
plist_for_each_entry_safe(this, next, &hb1->chain, list) {
|
||||
if (futex_match(&this->key, &key1)) {
|
||||
if (this->pi_state || this->rt_waiter) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
this->wake(&wake_q, this);
|
||||
if (++op_ret >= nr_wake2)
|
||||
if (++ret >= nr_wake)
|
||||
break;
|
||||
}
|
||||
}
|
||||
ret += op_ret;
|
||||
}
|
||||
|
||||
if (op_ret > 0) {
|
||||
op_ret = 0;
|
||||
plist_for_each_entry_safe(this, next, &hb2->chain, list) {
|
||||
if (futex_match(&this->key, &key2)) {
|
||||
if (this->pi_state || this->rt_waiter) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
this->wake(&wake_q, this);
|
||||
if (++op_ret >= nr_wake2)
|
||||
break;
|
||||
}
|
||||
}
|
||||
ret += op_ret;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
double_unlock_hb(hb1, hb2);
|
||||
double_unlock_hb(hb1, hb2);
|
||||
}
|
||||
wake_up_q(&wake_q);
|
||||
return ret;
|
||||
}
|
||||
@@ -334,23 +334,12 @@ out_unlock:
|
||||
static long futex_wait_restart(struct restart_block *restart);
|
||||
|
||||
/**
|
||||
* futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal
|
||||
* @hb: the futex hash bucket, must be locked by the caller
|
||||
* futex_do_wait() - wait for wakeup, timeout, or signal
|
||||
* @q: the futex_q to queue up on
|
||||
* @timeout: the prepared hrtimer_sleeper, or null for no timeout
|
||||
*/
|
||||
void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
|
||||
struct hrtimer_sleeper *timeout)
|
||||
void futex_do_wait(struct futex_q *q, struct hrtimer_sleeper *timeout)
|
||||
{
|
||||
/*
|
||||
* The task state is guaranteed to be set before another task can
|
||||
* wake it. set_current_state() is implemented using smp_store_mb() and
|
||||
* futex_queue() calls spin_unlock() upon completion, both serializing
|
||||
* access to the hash list and forcing another memory barrier.
|
||||
*/
|
||||
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
futex_queue(q, hb, current);
|
||||
|
||||
/* Arm the timer */
|
||||
if (timeout)
|
||||
hrtimer_sleeper_start_expires(timeout, HRTIMER_MODE_ABS);
|
||||
@@ -412,11 +401,16 @@ int futex_unqueue_multiple(struct futex_vector *v, int count)
|
||||
*/
|
||||
int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
|
||||
{
|
||||
struct futex_hash_bucket *hb;
|
||||
bool retry = false;
|
||||
int ret, i;
|
||||
u32 uval;
|
||||
|
||||
/*
|
||||
* Make sure to have a reference on the private_hash such that we
|
||||
* don't block on rehash after changing the task state below.
|
||||
*/
|
||||
guard(private_hash)();
|
||||
|
||||
/*
|
||||
* Enqueuing multiple futexes is tricky, because we need to enqueue
|
||||
* each futex on the list before dealing with the next one to avoid
|
||||
@@ -451,20 +445,24 @@ retry:
|
||||
struct futex_q *q = &vs[i].q;
|
||||
u32 val = vs[i].w.val;
|
||||
|
||||
hb = futex_q_lock(q);
|
||||
ret = futex_get_value_locked(&uval, uaddr);
|
||||
if (1) {
|
||||
CLASS(hb, hb)(&q->key);
|
||||
|
||||
if (!ret && uval == val) {
|
||||
/*
|
||||
* The bucket lock can't be held while dealing with the
|
||||
* next futex. Queue each futex at this moment so hb can
|
||||
* be unlocked.
|
||||
*/
|
||||
futex_queue(q, hb, current);
|
||||
continue;
|
||||
futex_q_lock(q, hb);
|
||||
ret = futex_get_value_locked(&uval, uaddr);
|
||||
|
||||
if (!ret && uval == val) {
|
||||
/*
|
||||
* The bucket lock can't be held while dealing with the
|
||||
* next futex. Queue each futex at this moment so hb can
|
||||
* be unlocked.
|
||||
*/
|
||||
futex_queue(q, hb, current);
|
||||
continue;
|
||||
}
|
||||
|
||||
futex_q_unlock(hb);
|
||||
}
|
||||
|
||||
futex_q_unlock(hb);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
/*
|
||||
@@ -578,7 +576,8 @@ int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
|
||||
* @val: the expected value
|
||||
* @flags: futex flags (FLAGS_SHARED, etc.)
|
||||
* @q: the associated futex_q
|
||||
* @hb: storage for hash_bucket pointer to be returned to caller
|
||||
* @key2: the second futex_key if used for requeue PI
|
||||
* @task: Task queueing this futex
|
||||
*
|
||||
* Setup the futex_q and locate the hash_bucket. Get the futex value and
|
||||
* compare it with the expected value. Handle atomic faults internally.
|
||||
@@ -586,10 +585,12 @@ int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
|
||||
*
|
||||
* Return:
|
||||
* - 0 - uaddr contains val and hb has been locked;
|
||||
* - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
|
||||
* - <0 - On error and the hb is unlocked. A possible reason: the uaddr can not
|
||||
* be read, does not contain the expected value or is not properly aligned.
|
||||
*/
|
||||
int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
|
||||
struct futex_q *q, struct futex_hash_bucket **hb)
|
||||
struct futex_q *q, union futex_key *key2,
|
||||
struct task_struct *task)
|
||||
{
|
||||
u32 uval;
|
||||
int ret;
|
||||
@@ -618,26 +619,45 @@ retry:
|
||||
return ret;
|
||||
|
||||
retry_private:
|
||||
*hb = futex_q_lock(q);
|
||||
if (1) {
|
||||
CLASS(hb, hb)(&q->key);
|
||||
|
||||
ret = futex_get_value_locked(&uval, uaddr);
|
||||
futex_q_lock(q, hb);
|
||||
|
||||
if (ret) {
|
||||
futex_q_unlock(*hb);
|
||||
ret = futex_get_value_locked(&uval, uaddr);
|
||||
|
||||
ret = get_user(uval, uaddr);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (ret) {
|
||||
futex_q_unlock(hb);
|
||||
|
||||
if (!(flags & FLAGS_SHARED))
|
||||
goto retry_private;
|
||||
ret = get_user(uval, uaddr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
goto retry;
|
||||
}
|
||||
if (!(flags & FLAGS_SHARED))
|
||||
goto retry_private;
|
||||
|
||||
if (uval != val) {
|
||||
futex_q_unlock(*hb);
|
||||
ret = -EWOULDBLOCK;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (uval != val) {
|
||||
futex_q_unlock(hb);
|
||||
return -EWOULDBLOCK;
|
||||
}
|
||||
|
||||
if (key2 && futex_match(&q->key, key2)) {
|
||||
futex_q_unlock(hb);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* The task state is guaranteed to be set before another task can
|
||||
* wake it. set_current_state() is implemented using smp_store_mb() and
|
||||
* futex_queue() calls spin_unlock() upon completion, both serializing
|
||||
* access to the hash list and forcing another memory barrier.
|
||||
*/
|
||||
if (task == current)
|
||||
set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
|
||||
futex_queue(q, hb, task);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -647,7 +667,6 @@ int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
|
||||
struct hrtimer_sleeper *to, u32 bitset)
|
||||
{
|
||||
struct futex_q q = futex_q_init;
|
||||
struct futex_hash_bucket *hb;
|
||||
int ret;
|
||||
|
||||
if (!bitset)
|
||||
@@ -660,12 +679,12 @@ retry:
|
||||
* Prepare to wait on uaddr. On success, it holds hb->lock and q
|
||||
* is initialized.
|
||||
*/
|
||||
ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
|
||||
ret = futex_wait_setup(uaddr, val, flags, &q, NULL, current);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* futex_queue and wait for wakeup, timeout, or a signal. */
|
||||
futex_wait_queue(hb, &q, to);
|
||||
futex_do_wait(&q, to);
|
||||
|
||||
/* If we were woken (and unqueued), we succeeded, whatever. */
|
||||
if (!futex_unqueue(&q))
|
||||
|
||||
@@ -219,6 +219,7 @@ static DECLARE_BITMAP(list_entries_in_use, MAX_LOCKDEP_ENTRIES);
|
||||
static struct hlist_head lock_keys_hash[KEYHASH_SIZE];
|
||||
unsigned long nr_lock_classes;
|
||||
unsigned long nr_zapped_classes;
|
||||
unsigned long nr_dynamic_keys;
|
||||
unsigned long max_lock_class_idx;
|
||||
struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
|
||||
DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
|
||||
@@ -1238,6 +1239,7 @@ void lockdep_register_key(struct lock_class_key *key)
|
||||
goto out_unlock;
|
||||
}
|
||||
hlist_add_head_rcu(&key->hash_entry, hash_head);
|
||||
nr_dynamic_keys++;
|
||||
out_unlock:
|
||||
graph_unlock();
|
||||
restore_irqs:
|
||||
@@ -1976,41 +1978,6 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
|
||||
print_circular_bug_entry(entry, depth);
|
||||
}
|
||||
|
||||
/*
|
||||
* We are about to add A -> B into the dependency graph, and in __bfs() a
|
||||
* strong dependency path A -> .. -> B is found: hlock_class equals
|
||||
* entry->class.
|
||||
*
|
||||
* If A -> .. -> B can replace A -> B in any __bfs() search (means the former
|
||||
* is _stronger_ than or equal to the latter), we consider A -> B as redundant.
|
||||
* For example if A -> .. -> B is -(EN)-> (i.e. A -(E*)-> .. -(*N)-> B), and A
|
||||
* -> B is -(ER)-> or -(EN)->, then we don't need to add A -> B into the
|
||||
* dependency graph, as any strong path ..-> A -> B ->.. we can get with
|
||||
* having dependency A -> B, we could already get a equivalent path ..-> A ->
|
||||
* .. -> B -> .. with A -> .. -> B. Therefore A -> B is redundant.
|
||||
*
|
||||
* We need to make sure both the start and the end of A -> .. -> B is not
|
||||
* weaker than A -> B. For the start part, please see the comment in
|
||||
* check_redundant(). For the end part, we need:
|
||||
*
|
||||
* Either
|
||||
*
|
||||
* a) A -> B is -(*R)-> (everything is not weaker than that)
|
||||
*
|
||||
* or
|
||||
*
|
||||
* b) A -> .. -> B is -(*N)-> (nothing is stronger than this)
|
||||
*
|
||||
*/
|
||||
static inline bool hlock_equal(struct lock_list *entry, void *data)
|
||||
{
|
||||
struct held_lock *hlock = (struct held_lock *)data;
|
||||
|
||||
return hlock_class(hlock) == entry->class && /* Found A -> .. -> B */
|
||||
(hlock->read == 2 || /* A -> B is -(*R)-> */
|
||||
!entry->only_xr); /* A -> .. -> B is -(*N)-> */
|
||||
}
|
||||
|
||||
/*
|
||||
* We are about to add B -> A into the dependency graph, and in __bfs() a
|
||||
* strong dependency path A -> .. -> B is found: hlock_class equals
|
||||
@@ -2915,6 +2882,41 @@ static inline bool usage_skip(struct lock_list *entry, void *mask)
|
||||
#endif /* CONFIG_TRACE_IRQFLAGS */
|
||||
|
||||
#ifdef CONFIG_LOCKDEP_SMALL
|
||||
/*
|
||||
* We are about to add A -> B into the dependency graph, and in __bfs() a
|
||||
* strong dependency path A -> .. -> B is found: hlock_class equals
|
||||
* entry->class.
|
||||
*
|
||||
* If A -> .. -> B can replace A -> B in any __bfs() search (means the former
|
||||
* is _stronger_ than or equal to the latter), we consider A -> B as redundant.
|
||||
* For example if A -> .. -> B is -(EN)-> (i.e. A -(E*)-> .. -(*N)-> B), and A
|
||||
* -> B is -(ER)-> or -(EN)->, then we don't need to add A -> B into the
|
||||
* dependency graph, as any strong path ..-> A -> B ->.. we can get with
|
||||
* having dependency A -> B, we could already get a equivalent path ..-> A ->
|
||||
* .. -> B -> .. with A -> .. -> B. Therefore A -> B is redundant.
|
||||
*
|
||||
* We need to make sure both the start and the end of A -> .. -> B is not
|
||||
* weaker than A -> B. For the start part, please see the comment in
|
||||
* check_redundant(). For the end part, we need:
|
||||
*
|
||||
* Either
|
||||
*
|
||||
* a) A -> B is -(*R)-> (everything is not weaker than that)
|
||||
*
|
||||
* or
|
||||
*
|
||||
* b) A -> .. -> B is -(*N)-> (nothing is stronger than this)
|
||||
*
|
||||
*/
|
||||
static inline bool hlock_equal(struct lock_list *entry, void *data)
|
||||
{
|
||||
struct held_lock *hlock = (struct held_lock *)data;
|
||||
|
||||
return hlock_class(hlock) == entry->class && /* Found A -> .. -> B */
|
||||
(hlock->read == 2 || /* A -> B is -(*R)-> */
|
||||
!entry->only_xr); /* A -> .. -> B is -(*N)-> */
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that the dependency graph starting at <src> can lead to
|
||||
* <target> or not. If it can, <src> -> <target> dependency is already
|
||||
@@ -5101,6 +5103,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
|
||||
lockevent_inc(lockdep_nocheck);
|
||||
}
|
||||
|
||||
if (DEBUG_LOCKS_WARN_ON(subclass >= MAX_LOCKDEP_SUBCLASSES))
|
||||
return 0;
|
||||
|
||||
if (subclass < NR_LOCKDEP_CACHING_CLASSES)
|
||||
class = lock->class_cache[subclass];
|
||||
/*
|
||||
@@ -6606,6 +6611,7 @@ void lockdep_unregister_key(struct lock_class_key *key)
|
||||
pf = get_pending_free();
|
||||
__lockdep_free_key_range(pf, key, 1);
|
||||
need_callback = prepare_call_rcu_zapped(pf);
|
||||
nr_dynamic_keys--;
|
||||
}
|
||||
lockdep_unlock();
|
||||
raw_local_irq_restore(flags);
|
||||
|
||||
@@ -138,6 +138,7 @@ extern unsigned long nr_lock_classes;
|
||||
extern unsigned long nr_zapped_classes;
|
||||
extern unsigned long nr_zapped_lock_chains;
|
||||
extern unsigned long nr_list_entries;
|
||||
extern unsigned long nr_dynamic_keys;
|
||||
long lockdep_next_lockchain(long i);
|
||||
unsigned long lock_chain_count(void);
|
||||
extern unsigned long nr_stack_trace_entries;
|
||||
|
||||
@@ -286,6 +286,8 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
|
||||
#endif
|
||||
seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
|
||||
nr_lock_classes, MAX_LOCKDEP_KEYS);
|
||||
seq_printf(m, " dynamic-keys: %11lu\n",
|
||||
nr_dynamic_keys);
|
||||
seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
|
||||
nr_list_entries, MAX_LOCKDEP_ENTRIES);
|
||||
seq_printf(m, " indirect dependencies: %11lu\n",
|
||||
|
||||
@@ -52,6 +52,7 @@
|
||||
#include <linux/user_namespace.h>
|
||||
#include <linux/time_namespace.h>
|
||||
#include <linux/binfmts.h>
|
||||
#include <linux/futex.h>
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/autogroup.h>
|
||||
@@ -2820,6 +2821,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
|
||||
return -EINVAL;
|
||||
error = posixtimer_create_prctl(arg2);
|
||||
break;
|
||||
case PR_FUTEX_HASH:
|
||||
error = futex_hash_prctl(arg2, arg3, arg4);
|
||||
break;
|
||||
default:
|
||||
trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5);
|
||||
error = -EINVAL;
|
||||
|
||||
18
mm/nommu.c
18
mm/nommu.c
@@ -200,7 +200,23 @@ void *vmalloc_noprof(unsigned long size)
|
||||
}
|
||||
EXPORT_SYMBOL(vmalloc_noprof);
|
||||
|
||||
void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __weak __alias(__vmalloc_noprof);
|
||||
/*
|
||||
* vmalloc_huge_node - allocate virtually contiguous memory, on a node
|
||||
*
|
||||
* @size: allocation size
|
||||
* @gfp_mask: flags for the page level allocator
|
||||
* @node: node to use for allocation or NUMA_NO_NODE
|
||||
*
|
||||
* Allocate enough pages to cover @size from the page level
|
||||
* allocator and map them into contiguous kernel virtual space.
|
||||
*
|
||||
* Due to NOMMU implications the node argument and HUGE page attribute is
|
||||
* ignored.
|
||||
*/
|
||||
void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node)
|
||||
{
|
||||
return __vmalloc_noprof(size, gfp_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* vzalloc - allocate virtually contiguous memory with zero fill
|
||||
|
||||
11
mm/vmalloc.c
11
mm/vmalloc.c
@@ -3944,9 +3944,10 @@ void *vmalloc_noprof(unsigned long size)
|
||||
EXPORT_SYMBOL(vmalloc_noprof);
|
||||
|
||||
/**
|
||||
* vmalloc_huge - allocate virtually contiguous memory, allow huge pages
|
||||
* vmalloc_huge_node - allocate virtually contiguous memory, allow huge pages
|
||||
* @size: allocation size
|
||||
* @gfp_mask: flags for the page level allocator
|
||||
* @node: node to use for allocation or NUMA_NO_NODE
|
||||
*
|
||||
* Allocate enough pages to cover @size from the page level
|
||||
* allocator and map them into contiguous kernel virtual space.
|
||||
@@ -3955,13 +3956,13 @@ EXPORT_SYMBOL(vmalloc_noprof);
|
||||
*
|
||||
* Return: pointer to the allocated memory or %NULL on error
|
||||
*/
|
||||
void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask)
|
||||
void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node)
|
||||
{
|
||||
return __vmalloc_node_range_noprof(size, 1, VMALLOC_START, VMALLOC_END,
|
||||
gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
|
||||
NUMA_NO_NODE, __builtin_return_address(0));
|
||||
gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
|
||||
node, __builtin_return_address(0));
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vmalloc_huge_noprof);
|
||||
EXPORT_SYMBOL_GPL(vmalloc_huge_node_noprof);
|
||||
|
||||
/**
|
||||
* vzalloc - allocate virtually contiguous memory with zero fill
|
||||
|
||||
@@ -230,7 +230,7 @@ struct prctl_mm_map {
|
||||
# define PR_PAC_APDBKEY (1UL << 3)
|
||||
# define PR_PAC_APGAKEY (1UL << 4)
|
||||
|
||||
/* Tagged user address controls for arm64 */
|
||||
/* Tagged user address controls for arm64 and RISC-V */
|
||||
#define PR_SET_TAGGED_ADDR_CTRL 55
|
||||
#define PR_GET_TAGGED_ADDR_CTRL 56
|
||||
# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
|
||||
@@ -244,6 +244,9 @@ struct prctl_mm_map {
|
||||
# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
|
||||
/* Unused; kept only for source compatibility */
|
||||
# define PR_MTE_TCF_SHIFT 1
|
||||
/* RISC-V pointer masking tag length */
|
||||
# define PR_PMLEN_SHIFT 24
|
||||
# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT)
|
||||
|
||||
/* Control reclaim behavior when allocating memory */
|
||||
#define PR_SET_IO_FLUSHER 57
|
||||
@@ -328,4 +331,44 @@ struct prctl_mm_map {
|
||||
# define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */
|
||||
# define PR_PPC_DEXCR_CTRL_MASK 0x1f
|
||||
|
||||
/*
|
||||
* Get the current shadow stack configuration for the current thread,
|
||||
* this will be the value configured via PR_SET_SHADOW_STACK_STATUS.
|
||||
*/
|
||||
#define PR_GET_SHADOW_STACK_STATUS 74
|
||||
|
||||
/*
|
||||
* Set the current shadow stack configuration. Enabling the shadow
|
||||
* stack will cause a shadow stack to be allocated for the thread.
|
||||
*/
|
||||
#define PR_SET_SHADOW_STACK_STATUS 75
|
||||
# define PR_SHADOW_STACK_ENABLE (1UL << 0)
|
||||
# define PR_SHADOW_STACK_WRITE (1UL << 1)
|
||||
# define PR_SHADOW_STACK_PUSH (1UL << 2)
|
||||
|
||||
/*
|
||||
* Prevent further changes to the specified shadow stack
|
||||
* configuration. All bits may be locked via this call, including
|
||||
* undefined bits.
|
||||
*/
|
||||
#define PR_LOCK_SHADOW_STACK_STATUS 76
|
||||
|
||||
/*
|
||||
* Controls the mode of timer_create() for CRIU restore operations.
|
||||
* Enabling this allows CRIU to restore timers with explicit IDs.
|
||||
*
|
||||
* Don't use for normal operations as the result might be undefined.
|
||||
*/
|
||||
#define PR_TIMER_CREATE_RESTORE_IDS 77
|
||||
# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0
|
||||
# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
|
||||
# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
|
||||
|
||||
/* FUTEX hash management */
|
||||
#define PR_FUTEX_HASH 78
|
||||
# define PR_FUTEX_HASH_SET_SLOTS 1
|
||||
# define FH_FLAG_IMMUTABLE (1ULL << 0)
|
||||
# define PR_FUTEX_HASH_GET_SLOTS 2
|
||||
# define PR_FUTEX_HASH_GET_IMMUTABLE 3
|
||||
|
||||
#endif /* _LINUX_PRCTL_H */
|
||||
|
||||
@@ -3,6 +3,7 @@ perf-bench-y += sched-pipe.o
|
||||
perf-bench-y += sched-seccomp-notify.o
|
||||
perf-bench-y += syscall.o
|
||||
perf-bench-y += mem-functions.o
|
||||
perf-bench-y += futex.o
|
||||
perf-bench-y += futex-hash.o
|
||||
perf-bench-y += futex-wake.o
|
||||
perf-bench-y += futex-wake-parallel.o
|
||||
|
||||
@@ -18,9 +18,11 @@
|
||||
#include <stdlib.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/prctl.h>
|
||||
#include <linux/zalloc.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <perf/cpumap.h>
|
||||
|
||||
#include "../util/mutex.h"
|
||||
@@ -50,9 +52,12 @@ struct worker {
|
||||
static struct bench_futex_parameters params = {
|
||||
.nfutexes = 1024,
|
||||
.runtime = 10,
|
||||
.nbuckets = -1,
|
||||
};
|
||||
|
||||
static const struct option options[] = {
|
||||
OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"),
|
||||
OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"),
|
||||
OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"),
|
||||
OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"),
|
||||
OPT_UINTEGER('f', "futexes", ¶ms.nfutexes, "Specify amount of futexes per threads"),
|
||||
@@ -118,6 +123,7 @@ static void print_summary(void)
|
||||
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
|
||||
!params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
|
||||
(int)bench__runtime.tv_sec);
|
||||
futex_print_nbuckets(¶ms);
|
||||
}
|
||||
|
||||
int bench_futex_hash(int argc, const char **argv)
|
||||
@@ -161,6 +167,7 @@ int bench_futex_hash(int argc, const char **argv)
|
||||
|
||||
if (!params.fshared)
|
||||
futex_flag = FUTEX_PRIVATE_FLAG;
|
||||
futex_set_nbuckets_param(¶ms);
|
||||
|
||||
printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
|
||||
getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime);
|
||||
|
||||
@@ -41,10 +41,13 @@ static struct stats throughput_stats;
|
||||
static struct cond thread_parent, thread_worker;
|
||||
|
||||
static struct bench_futex_parameters params = {
|
||||
.nbuckets = -1,
|
||||
.runtime = 10,
|
||||
};
|
||||
|
||||
static const struct option options[] = {
|
||||
OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"),
|
||||
OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"),
|
||||
OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"),
|
||||
OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"),
|
||||
OPT_BOOLEAN( 'M', "multi", ¶ms.multi, "Use multiple futexes"),
|
||||
@@ -67,6 +70,7 @@ static void print_summary(void)
|
||||
printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
|
||||
!params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
|
||||
(int)bench__runtime.tv_sec);
|
||||
futex_print_nbuckets(¶ms);
|
||||
}
|
||||
|
||||
static void toggle_done(int sig __maybe_unused,
|
||||
@@ -203,6 +207,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
|
||||
mutex_init(&thread_lock);
|
||||
cond_init(&thread_parent);
|
||||
cond_init(&thread_worker);
|
||||
futex_set_nbuckets_param(¶ms);
|
||||
|
||||
threads_starting = params.nthreads;
|
||||
gettimeofday(&bench__start, NULL);
|
||||
|
||||
@@ -42,6 +42,7 @@ static unsigned int threads_starting;
|
||||
static int futex_flag = 0;
|
||||
|
||||
static struct bench_futex_parameters params = {
|
||||
.nbuckets = -1,
|
||||
/*
|
||||
* How many tasks to requeue at a time.
|
||||
* Default to 1 in order to make the kernel work more.
|
||||
@@ -50,6 +51,8 @@ static struct bench_futex_parameters params = {
|
||||
};
|
||||
|
||||
static const struct option options[] = {
|
||||
OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"),
|
||||
OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"),
|
||||
OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"),
|
||||
OPT_UINTEGER('q', "nrequeue", ¶ms.nrequeue, "Specify amount of threads to requeue at once"),
|
||||
OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"),
|
||||
@@ -77,6 +80,7 @@ static void print_summary(void)
|
||||
params.nthreads,
|
||||
requeuetime_avg / USEC_PER_MSEC,
|
||||
rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
|
||||
futex_print_nbuckets(¶ms);
|
||||
}
|
||||
|
||||
static void *workerfn(void *arg __maybe_unused)
|
||||
@@ -204,6 +208,8 @@ int bench_futex_requeue(int argc, const char **argv)
|
||||
if (params.broadcast)
|
||||
params.nrequeue = params.nthreads;
|
||||
|
||||
futex_set_nbuckets_param(¶ms);
|
||||
|
||||
printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
|
||||
"%d at a time.\n\n", getpid(), params.nthreads,
|
||||
params.fshared ? "shared":"private", &futex1,
|
||||
|
||||
@@ -57,9 +57,13 @@ static struct stats waketime_stats, wakeup_stats;
|
||||
static unsigned int threads_starting;
|
||||
static int futex_flag = 0;
|
||||
|
||||
static struct bench_futex_parameters params;
|
||||
static struct bench_futex_parameters params = {
|
||||
.nbuckets = -1,
|
||||
};
|
||||
|
||||
static const struct option options[] = {
|
||||
OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"),
|
||||
OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"),
|
||||
OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"),
|
||||
OPT_UINTEGER('w', "nwakers", ¶ms.nwakes, "Specify amount of waking threads"),
|
||||
OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"),
|
||||
@@ -218,6 +222,7 @@ static void print_summary(void)
|
||||
params.nthreads,
|
||||
waketime_avg / USEC_PER_MSEC,
|
||||
rel_stddev_stats(waketime_stddev, waketime_avg));
|
||||
futex_print_nbuckets(¶ms);
|
||||
}
|
||||
|
||||
|
||||
@@ -291,6 +296,8 @@ int bench_futex_wake_parallel(int argc, const char **argv)
|
||||
if (!params.fshared)
|
||||
futex_flag = FUTEX_PRIVATE_FLAG;
|
||||
|
||||
futex_set_nbuckets_param(¶ms);
|
||||
|
||||
printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
|
||||
"futex %p), %d threads waking up %d at a time.\n\n",
|
||||
getpid(), params.nthreads, params.fshared ? "shared":"private",
|
||||
|
||||
@@ -42,6 +42,7 @@ static unsigned int threads_starting;
|
||||
static int futex_flag = 0;
|
||||
|
||||
static struct bench_futex_parameters params = {
|
||||
.nbuckets = -1,
|
||||
/*
|
||||
* How many wakeups to do at a time.
|
||||
* Default to 1 in order to make the kernel work more.
|
||||
@@ -50,6 +51,8 @@ static struct bench_futex_parameters params = {
|
||||
};
|
||||
|
||||
static const struct option options[] = {
|
||||
OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"),
|
||||
OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"),
|
||||
OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"),
|
||||
OPT_UINTEGER('w', "nwakes", ¶ms.nwakes, "Specify amount of threads to wake at once"),
|
||||
OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"),
|
||||
@@ -93,6 +96,7 @@ static void print_summary(void)
|
||||
params.nthreads,
|
||||
waketime_avg / USEC_PER_MSEC,
|
||||
rel_stddev_stats(waketime_stddev, waketime_avg));
|
||||
futex_print_nbuckets(¶ms);
|
||||
}
|
||||
|
||||
static void block_threads(pthread_t *w, struct perf_cpu_map *cpu)
|
||||
|
||||
67
tools/perf/bench/futex.c
Normal file
67
tools/perf/bench/futex.c
Normal file
@@ -0,0 +1,67 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <err.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <linux/prctl.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#include "futex.h"
|
||||
|
||||
void futex_set_nbuckets_param(struct bench_futex_parameters *params)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
if (params->nbuckets < 0)
|
||||
return;
|
||||
|
||||
flags = params->buckets_immutable ? FH_FLAG_IMMUTABLE : 0;
|
||||
ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, flags);
|
||||
if (ret) {
|
||||
printf("Requesting %d hash buckets failed: %d/%m\n",
|
||||
params->nbuckets, ret);
|
||||
err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
|
||||
}
|
||||
}
|
||||
|
||||
void futex_print_nbuckets(struct bench_futex_parameters *params)
|
||||
{
|
||||
char *futex_hash_mode;
|
||||
int ret;
|
||||
|
||||
ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
|
||||
if (params->nbuckets >= 0) {
|
||||
if (ret != params->nbuckets) {
|
||||
if (ret < 0) {
|
||||
printf("Can't query number of buckets: %m\n");
|
||||
err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
|
||||
}
|
||||
printf("Requested number of hash buckets does not currently used.\n");
|
||||
printf("Requested: %d in usage: %d\n", params->nbuckets, ret);
|
||||
err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
|
||||
}
|
||||
if (params->nbuckets == 0) {
|
||||
ret = asprintf(&futex_hash_mode, "Futex hashing: global hash");
|
||||
} else {
|
||||
ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE);
|
||||
if (ret < 0) {
|
||||
printf("Can't check if the hash is immutable: %m\n");
|
||||
err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)");
|
||||
}
|
||||
ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets %s",
|
||||
params->nbuckets,
|
||||
ret == 1 ? "(immutable)" : "");
|
||||
}
|
||||
} else {
|
||||
if (ret <= 0) {
|
||||
ret = asprintf(&futex_hash_mode, "Futex hashing: global hash");
|
||||
} else {
|
||||
ret = asprintf(&futex_hash_mode, "Futex hashing: auto resized to %d buckets",
|
||||
ret);
|
||||
}
|
||||
}
|
||||
if (ret < 0)
|
||||
err(EXIT_FAILURE, "ENOMEM, futex_hash_mode");
|
||||
printf("%s\n", futex_hash_mode);
|
||||
free(futex_hash_mode);
|
||||
}
|
||||
@@ -25,6 +25,8 @@ struct bench_futex_parameters {
|
||||
unsigned int nfutexes;
|
||||
unsigned int nwakes;
|
||||
unsigned int nrequeue;
|
||||
int nbuckets;
|
||||
bool buckets_immutable;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -143,4 +145,7 @@ futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
|
||||
val, opflags);
|
||||
}
|
||||
|
||||
void futex_set_nbuckets_param(struct bench_futex_parameters *params);
|
||||
void futex_print_nbuckets(struct bench_futex_parameters *params);
|
||||
|
||||
#endif /* _FUTEX_H */
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
futex_numa_mpol
|
||||
futex_priv_hash
|
||||
futex_requeue
|
||||
futex_requeue_pi
|
||||
futex_requeue_pi_mismatched_ops
|
||||
futex_requeue_pi_signal_restart
|
||||
futex_wait
|
||||
futex_wait_private_mapped_file
|
||||
futex_wait_timeout
|
||||
futex_wait_uninitialized_heap
|
||||
futex_wait_wouldblock
|
||||
futex_wait
|
||||
futex_requeue
|
||||
futex_waitv
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
INCLUDES := -I../include -I../../ $(KHDR_INCLUDES)
|
||||
CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES)
|
||||
LDLIBS := -lpthread -lrt
|
||||
LDLIBS := -lpthread -lrt -lnuma
|
||||
|
||||
LOCAL_HDRS := \
|
||||
../include/futextest.h \
|
||||
@@ -17,7 +17,10 @@ TEST_GEN_PROGS := \
|
||||
futex_wait_private_mapped_file \
|
||||
futex_wait \
|
||||
futex_requeue \
|
||||
futex_waitv
|
||||
futex_priv_hash \
|
||||
futex_numa_mpol \
|
||||
futex_waitv \
|
||||
futex_numa
|
||||
|
||||
TEST_PROGS := run.sh
|
||||
|
||||
|
||||
262
tools/testing/selftests/futex/functional/futex_numa.c
Normal file
262
tools/testing/selftests/futex/functional/futex_numa.c
Normal file
@@ -0,0 +1,262 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <pthread.h>
|
||||
#include <sys/shm.h>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdbool.h>
|
||||
#include <time.h>
|
||||
#include <assert.h>
|
||||
#include "logging.h"
|
||||
#include "futextest.h"
|
||||
#include "futex2test.h"
|
||||
|
||||
typedef u_int32_t u32;
|
||||
typedef int32_t s32;
|
||||
typedef u_int64_t u64;
|
||||
|
||||
static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE);
|
||||
static int fnode = FUTEX_NO_NODE;
|
||||
|
||||
/* fairly stupid test-and-set lock with a waiter flag */
|
||||
|
||||
#define N_LOCK 0x0000001
|
||||
#define N_WAITERS 0x0001000
|
||||
|
||||
struct futex_numa_32 {
|
||||
union {
|
||||
u64 full;
|
||||
struct {
|
||||
u32 val;
|
||||
u32 node;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
void futex_numa_32_lock(struct futex_numa_32 *lock)
|
||||
{
|
||||
for (;;) {
|
||||
struct futex_numa_32 new, old = {
|
||||
.full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED),
|
||||
};
|
||||
|
||||
for (;;) {
|
||||
new = old;
|
||||
if (old.val == 0) {
|
||||
/* no waiter, no lock -> first lock, set no-node */
|
||||
new.node = fnode;
|
||||
}
|
||||
if (old.val & N_LOCK) {
|
||||
/* contention, set waiter */
|
||||
new.val |= N_WAITERS;
|
||||
}
|
||||
new.val |= N_LOCK;
|
||||
|
||||
/* nothing changed, ready to block */
|
||||
if (old.full == new.full)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Use u64 cmpxchg to set the futex value and node in a
|
||||
* consistent manner.
|
||||
*/
|
||||
if (__atomic_compare_exchange_n(&lock->full,
|
||||
&old.full, new.full,
|
||||
/* .weak */ false,
|
||||
__ATOMIC_ACQUIRE,
|
||||
__ATOMIC_RELAXED)) {
|
||||
|
||||
/* if we just set N_LOCK, we own it */
|
||||
if (!(old.val & N_LOCK))
|
||||
return;
|
||||
|
||||
/* go block */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
futex2_wait(lock, new.val, fflags, NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void futex_numa_32_unlock(struct futex_numa_32 *lock)
|
||||
{
|
||||
u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE);
|
||||
assert((s32)val >= 0);
|
||||
if (val & N_WAITERS) {
|
||||
int woken = futex2_wake(lock, 1, fflags);
|
||||
assert(val == N_WAITERS);
|
||||
if (!woken) {
|
||||
__atomic_compare_exchange_n(&lock->val, &val, 0U,
|
||||
false, __ATOMIC_RELAXED,
|
||||
__ATOMIC_RELAXED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static long nanos = 50000;
|
||||
|
||||
struct thread_args {
|
||||
pthread_t tid;
|
||||
volatile int * done;
|
||||
struct futex_numa_32 *lock;
|
||||
int val;
|
||||
int *val1, *val2;
|
||||
int node;
|
||||
};
|
||||
|
||||
static void *threadfn(void *_arg)
|
||||
{
|
||||
struct thread_args *args = _arg;
|
||||
struct timespec ts = {
|
||||
.tv_nsec = nanos,
|
||||
};
|
||||
int node;
|
||||
|
||||
while (!*args->done) {
|
||||
|
||||
futex_numa_32_lock(args->lock);
|
||||
args->val++;
|
||||
|
||||
assert(*args->val1 == *args->val2);
|
||||
(*args->val1)++;
|
||||
nanosleep(&ts, NULL);
|
||||
(*args->val2)++;
|
||||
|
||||
node = args->lock->node;
|
||||
futex_numa_32_unlock(args->lock);
|
||||
|
||||
if (node != args->node) {
|
||||
args->node = node;
|
||||
printf("node: %d\n", node);
|
||||
}
|
||||
|
||||
nanosleep(&ts, NULL);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *contendfn(void *_arg)
|
||||
{
|
||||
struct thread_args *args = _arg;
|
||||
|
||||
while (!*args->done) {
|
||||
/*
|
||||
* futex2_wait() will take hb-lock, verify *var == val and
|
||||
* queue/abort. By knowingly setting val 'wrong' this will
|
||||
* abort and thereby generate hb-lock contention.
|
||||
*/
|
||||
futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0);
|
||||
args->val++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static volatile int done = 0;
|
||||
static struct futex_numa_32 lock = { .val = 0, };
|
||||
static int val1, val2;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct thread_args *tas[512], *cas[512];
|
||||
int c, t, threads = 2, contenders = 0;
|
||||
int sleeps = 10;
|
||||
int total = 0;
|
||||
|
||||
while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) {
|
||||
switch (c) {
|
||||
case 'c':
|
||||
contenders = atoi(optarg);
|
||||
break;
|
||||
case 't':
|
||||
threads = atoi(optarg);
|
||||
break;
|
||||
case 's':
|
||||
sleeps = atoi(optarg);
|
||||
break;
|
||||
case 'n':
|
||||
nanos = atoi(optarg);
|
||||
break;
|
||||
case 'N':
|
||||
fflags |= FUTEX2_NUMA;
|
||||
if (optarg)
|
||||
fnode = atoi(optarg);
|
||||
break;
|
||||
default:
|
||||
exit(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (t = 0; t < contenders; t++) {
|
||||
struct thread_args *args = calloc(1, sizeof(*args));
|
||||
if (!args) {
|
||||
perror("thread_args");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
args->done = &done;
|
||||
args->lock = &lock;
|
||||
args->val1 = &val1;
|
||||
args->val2 = &val2;
|
||||
args->node = -1;
|
||||
|
||||
if (pthread_create(&args->tid, NULL, contendfn, args)) {
|
||||
perror("pthread_create");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
cas[t] = args;
|
||||
}
|
||||
|
||||
for (t = 0; t < threads; t++) {
|
||||
struct thread_args *args = calloc(1, sizeof(*args));
|
||||
if (!args) {
|
||||
perror("thread_args");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
args->done = &done;
|
||||
args->lock = &lock;
|
||||
args->val1 = &val1;
|
||||
args->val2 = &val2;
|
||||
args->node = -1;
|
||||
|
||||
if (pthread_create(&args->tid, NULL, threadfn, args)) {
|
||||
perror("pthread_create");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
tas[t] = args;
|
||||
}
|
||||
|
||||
sleep(sleeps);
|
||||
|
||||
done = true;
|
||||
|
||||
for (t = 0; t < threads; t++) {
|
||||
struct thread_args *args = tas[t];
|
||||
|
||||
pthread_join(args->tid, NULL);
|
||||
total += args->val;
|
||||
// printf("tval: %d\n", args->val);
|
||||
}
|
||||
printf("total: %d\n", total);
|
||||
|
||||
if (contenders) {
|
||||
total = 0;
|
||||
for (t = 0; t < contenders; t++) {
|
||||
struct thread_args *args = cas[t];
|
||||
|
||||
pthread_join(args->tid, NULL);
|
||||
total += args->val;
|
||||
// printf("tval: %d\n", args->val);
|
||||
}
|
||||
printf("contenders: %d\n", total);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
231
tools/testing/selftests/futex/functional/futex_numa_mpol.c
Normal file
231
tools/testing/selftests/futex/functional/futex_numa_mpol.c
Normal file
@@ -0,0 +1,231 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <numa.h>
|
||||
#include <numaif.h>
|
||||
|
||||
#include <linux/futex.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include "logging.h"
|
||||
#include "futextest.h"
|
||||
#include "futex2test.h"
|
||||
|
||||
#define MAX_THREADS 64
|
||||
|
||||
static pthread_barrier_t barrier_main;
|
||||
static pthread_t threads[MAX_THREADS];
|
||||
|
||||
struct thread_args {
|
||||
void *futex_ptr;
|
||||
unsigned int flags;
|
||||
int result;
|
||||
};
|
||||
|
||||
static struct thread_args thread_args[MAX_THREADS];
|
||||
|
||||
#ifndef FUTEX_NO_NODE
|
||||
#define FUTEX_NO_NODE (-1)
|
||||
#endif
|
||||
|
||||
#ifndef FUTEX2_MPOL
|
||||
#define FUTEX2_MPOL 0x08
|
||||
#endif
|
||||
|
||||
static void *thread_lock_fn(void *arg)
|
||||
{
|
||||
struct thread_args *args = arg;
|
||||
int ret;
|
||||
|
||||
pthread_barrier_wait(&barrier_main);
|
||||
ret = futex2_wait(args->futex_ptr, 0, args->flags, NULL, 0);
|
||||
args->result = ret;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void create_max_threads(void *futex_ptr)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < MAX_THREADS; i++) {
|
||||
thread_args[i].futex_ptr = futex_ptr;
|
||||
thread_args[i].flags = FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA;
|
||||
thread_args[i].result = 0;
|
||||
ret = pthread_create(&threads[i], NULL, thread_lock_fn, &thread_args[i]);
|
||||
if (ret)
|
||||
ksft_exit_fail_msg("pthread_create failed\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void join_max_threads(void)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < MAX_THREADS; i++) {
|
||||
ret = pthread_join(threads[i], NULL);
|
||||
if (ret)
|
||||
ksft_exit_fail_msg("pthread_join failed for thread %d\n", i);
|
||||
}
|
||||
}
|
||||
|
||||
static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flags)
|
||||
{
|
||||
int to_wake, ret, i, need_exit = 0;
|
||||
|
||||
pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
|
||||
create_max_threads(futex_ptr);
|
||||
pthread_barrier_wait(&barrier_main);
|
||||
to_wake = MAX_THREADS;
|
||||
|
||||
do {
|
||||
ret = futex2_wake(futex_ptr, to_wake, futex_flags);
|
||||
if (must_fail) {
|
||||
if (ret < 0)
|
||||
break;
|
||||
ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n",
|
||||
to_wake, futex_flags);
|
||||
}
|
||||
if (ret < 0) {
|
||||
ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n",
|
||||
to_wake, futex_flags);
|
||||
}
|
||||
if (!ret)
|
||||
usleep(50);
|
||||
to_wake -= ret;
|
||||
|
||||
} while (to_wake);
|
||||
join_max_threads();
|
||||
|
||||
for (i = 0; i < MAX_THREADS; i++) {
|
||||
if (must_fail && thread_args[i].result != -1) {
|
||||
ksft_print_msg("Thread %d should fail but succeeded (%d)\n",
|
||||
i, thread_args[i].result);
|
||||
need_exit = 1;
|
||||
}
|
||||
if (!must_fail && thread_args[i].result != 0) {
|
||||
ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result);
|
||||
need_exit = 1;
|
||||
}
|
||||
}
|
||||
if (need_exit)
|
||||
ksft_exit_fail_msg("Aborting due to earlier errors.\n");
|
||||
}
|
||||
|
||||
static void test_futex(void *futex_ptr, int must_fail)
|
||||
{
|
||||
__test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA);
|
||||
}
|
||||
|
||||
static void test_futex_mpol(void *futex_ptr, int must_fail)
|
||||
{
|
||||
__test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
|
||||
}
|
||||
|
||||
static void usage(char *prog)
|
||||
{
|
||||
printf("Usage: %s\n", prog);
|
||||
printf(" -c Use color\n");
|
||||
printf(" -h Display this help message\n");
|
||||
printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
|
||||
VQUIET, VCRITICAL, VINFO);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
struct futex32_numa *futex_numa;
|
||||
int mem_size, i;
|
||||
void *futex_ptr;
|
||||
char c;
|
||||
|
||||
while ((c = getopt(argc, argv, "chv:")) != -1) {
|
||||
switch (c) {
|
||||
case 'c':
|
||||
log_color(1);
|
||||
break;
|
||||
case 'h':
|
||||
usage(basename(argv[0]));
|
||||
exit(0);
|
||||
break;
|
||||
case 'v':
|
||||
log_verbosity(atoi(optarg));
|
||||
break;
|
||||
default:
|
||||
usage(basename(argv[0]));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
ksft_print_header();
|
||||
ksft_set_plan(1);
|
||||
|
||||
mem_size = sysconf(_SC_PAGE_SIZE);
|
||||
futex_ptr = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
|
||||
if (futex_ptr == MAP_FAILED)
|
||||
ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size);
|
||||
|
||||
futex_numa = futex_ptr;
|
||||
|
||||
ksft_print_msg("Regular test\n");
|
||||
futex_numa->futex = 0;
|
||||
futex_numa->numa = FUTEX_NO_NODE;
|
||||
test_futex(futex_ptr, 0);
|
||||
|
||||
if (futex_numa->numa == FUTEX_NO_NODE)
|
||||
ksft_exit_fail_msg("NUMA node is left uninitialized\n");
|
||||
|
||||
ksft_print_msg("Memory too small\n");
|
||||
test_futex(futex_ptr + mem_size - 4, 1);
|
||||
|
||||
ksft_print_msg("Memory out of range\n");
|
||||
test_futex(futex_ptr + mem_size, 1);
|
||||
|
||||
futex_numa->numa = FUTEX_NO_NODE;
|
||||
mprotect(futex_ptr, mem_size, PROT_READ);
|
||||
ksft_print_msg("Memory, RO\n");
|
||||
test_futex(futex_ptr, 1);
|
||||
|
||||
mprotect(futex_ptr, mem_size, PROT_NONE);
|
||||
ksft_print_msg("Memory, no access\n");
|
||||
test_futex(futex_ptr, 1);
|
||||
|
||||
mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE);
|
||||
ksft_print_msg("Memory back to RW\n");
|
||||
test_futex(futex_ptr, 0);
|
||||
|
||||
/* MPOL test. Does not work as expected */
|
||||
for (i = 0; i < 4; i++) {
|
||||
unsigned long nodemask;
|
||||
int ret;
|
||||
|
||||
nodemask = 1 << i;
|
||||
ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask,
|
||||
sizeof(nodemask) * 8, 0);
|
||||
if (ret == 0) {
|
||||
ksft_print_msg("Node %d test\n", i);
|
||||
futex_numa->futex = 0;
|
||||
futex_numa->numa = FUTEX_NO_NODE;
|
||||
|
||||
ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL);
|
||||
if (ret < 0)
|
||||
ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n");
|
||||
if (0)
|
||||
test_futex_mpol(futex_numa, 0);
|
||||
if (futex_numa->numa != i) {
|
||||
ksft_test_result_fail("Returned NUMA node is %d expected %d\n",
|
||||
futex_numa->numa, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
ksft_test_result_pass("NUMA MPOL tests passed\n");
|
||||
ksft_finished();
|
||||
return 0;
|
||||
}
|
||||
292
tools/testing/selftests/futex/functional/futex_priv_hash.c
Normal file
292
tools/testing/selftests/futex/functional/futex_priv_hash.c
Normal file
@@ -0,0 +1,292 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <linux/prctl.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#include "logging.h"
|
||||
|
||||
#define MAX_THREADS 64
|
||||
|
||||
static pthread_barrier_t barrier_main;
|
||||
static pthread_mutex_t global_lock;
|
||||
static pthread_t threads[MAX_THREADS];
|
||||
static int counter;
|
||||
|
||||
#ifndef PR_FUTEX_HASH
|
||||
#define PR_FUTEX_HASH 78
|
||||
# define PR_FUTEX_HASH_SET_SLOTS 1
|
||||
# define FH_FLAG_IMMUTABLE (1ULL << 0)
|
||||
# define PR_FUTEX_HASH_GET_SLOTS 2
|
||||
# define PR_FUTEX_HASH_GET_IMMUTABLE 3
|
||||
#endif
|
||||
|
||||
static int futex_hash_slots_set(unsigned int slots, int flags)
|
||||
{
|
||||
return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags);
|
||||
}
|
||||
|
||||
static int futex_hash_slots_get(void)
|
||||
{
|
||||
return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS);
|
||||
}
|
||||
|
||||
static int futex_hash_immutable_get(void)
|
||||
{
|
||||
return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE);
|
||||
}
|
||||
|
||||
static void futex_hash_slots_set_verify(int slots)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = futex_hash_slots_set(slots, 0);
|
||||
if (ret != 0) {
|
||||
ksft_test_result_fail("Failed to set slots to %d: %m\n", slots);
|
||||
ksft_finished();
|
||||
}
|
||||
ret = futex_hash_slots_get();
|
||||
if (ret != slots) {
|
||||
ksft_test_result_fail("Set %d slots but PR_FUTEX_HASH_GET_SLOTS returns: %d, %m\n",
|
||||
slots, ret);
|
||||
ksft_finished();
|
||||
}
|
||||
ksft_test_result_pass("SET and GET slots %d passed\n", slots);
|
||||
}
|
||||
|
||||
static void futex_hash_slots_set_must_fail(int slots, int flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = futex_hash_slots_set(slots, flags);
|
||||
ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n",
|
||||
slots, flags);
|
||||
}
|
||||
|
||||
static void *thread_return_fn(void *arg)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *thread_lock_fn(void *arg)
|
||||
{
|
||||
pthread_barrier_wait(&barrier_main);
|
||||
|
||||
pthread_mutex_lock(&global_lock);
|
||||
counter++;
|
||||
usleep(20);
|
||||
pthread_mutex_unlock(&global_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void create_max_threads(void *(*thread_fn)(void *))
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < MAX_THREADS; i++) {
|
||||
ret = pthread_create(&threads[i], NULL, thread_fn, NULL);
|
||||
if (ret)
|
||||
ksft_exit_fail_msg("pthread_create failed: %m\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void join_max_threads(void)
|
||||
{
|
||||
int i, ret;
|
||||
|
||||
for (i = 0; i < MAX_THREADS; i++) {
|
||||
ret = pthread_join(threads[i], NULL);
|
||||
if (ret)
|
||||
ksft_exit_fail_msg("pthread_join failed for thread %d\n", i);
|
||||
}
|
||||
}
|
||||
|
||||
static void usage(char *prog)
|
||||
{
|
||||
printf("Usage: %s\n", prog);
|
||||
printf(" -c Use color\n");
|
||||
printf(" -g Test global hash instead intead local immutable \n");
|
||||
printf(" -h Display this help message\n");
|
||||
printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
|
||||
VQUIET, VCRITICAL, VINFO);
|
||||
}
|
||||
|
||||
static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n";
|
||||
static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n";
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int futex_slots1, futex_slotsn, online_cpus;
|
||||
pthread_mutexattr_t mutex_attr_pi;
|
||||
int use_global_hash = 0;
|
||||
int ret;
|
||||
char c;
|
||||
|
||||
while ((c = getopt(argc, argv, "cghv:")) != -1) {
|
||||
switch (c) {
|
||||
case 'c':
|
||||
log_color(1);
|
||||
break;
|
||||
case 'g':
|
||||
use_global_hash = 1;
|
||||
break;
|
||||
case 'h':
|
||||
usage(basename(argv[0]));
|
||||
exit(0);
|
||||
break;
|
||||
case 'v':
|
||||
log_verbosity(atoi(optarg));
|
||||
break;
|
||||
default:
|
||||
usage(basename(argv[0]));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
ksft_print_header();
|
||||
ksft_set_plan(22);
|
||||
|
||||
ret = pthread_mutexattr_init(&mutex_attr_pi);
|
||||
ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT);
|
||||
ret |= pthread_mutex_init(&global_lock, &mutex_attr_pi);
|
||||
if (ret != 0) {
|
||||
ksft_exit_fail_msg("Failed to initialize pthread mutex.\n");
|
||||
}
|
||||
/* First thread, expect to be 0, not yet initialized */
|
||||
ret = futex_hash_slots_get();
|
||||
if (ret != 0)
|
||||
ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret);
|
||||
|
||||
ret = futex_hash_immutable_get();
|
||||
if (ret != 0)
|
||||
ksft_exit_fail_msg("futex_hash_immutable_get() failed: %d, %m\n", ret);
|
||||
|
||||
ksft_test_result_pass("Basic get slots and immutable status.\n");
|
||||
ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL);
|
||||
if (ret != 0)
|
||||
ksft_exit_fail_msg("pthread_create() failed: %d, %m\n", ret);
|
||||
|
||||
ret = pthread_join(threads[0], NULL);
|
||||
if (ret != 0)
|
||||
ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret);
|
||||
|
||||
/* First thread, has to initialiaze private hash */
|
||||
futex_slots1 = futex_hash_slots_get();
|
||||
if (futex_slots1 <= 0) {
|
||||
ksft_print_msg("Current hash buckets: %d\n", futex_slots1);
|
||||
ksft_exit_fail_msg(test_msg_auto_create);
|
||||
}
|
||||
|
||||
ksft_test_result_pass(test_msg_auto_create);
|
||||
|
||||
online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1);
|
||||
if (ret != 0)
|
||||
ksft_exit_fail_msg("pthread_barrier_init failed: %m.\n");
|
||||
|
||||
ret = pthread_mutex_lock(&global_lock);
|
||||
if (ret != 0)
|
||||
ksft_exit_fail_msg("pthread_mutex_lock failed: %m.\n");
|
||||
|
||||
counter = 0;
|
||||
create_max_threads(thread_lock_fn);
|
||||
pthread_barrier_wait(&barrier_main);
|
||||
|
||||
/*
|
||||
* The current default size of hash buckets is 16. The auto increase
|
||||
* works only if more than 16 CPUs are available.
|
||||
*/
|
||||
ksft_print_msg("Online CPUs: %d\n", online_cpus);
|
||||
if (online_cpus > 16) {
|
||||
futex_slotsn = futex_hash_slots_get();
|
||||
if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) {
|
||||
ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n",
|
||||
futex_slots1, futex_slotsn);
|
||||
ksft_exit_fail_msg(test_msg_auto_inc);
|
||||
}
|
||||
ksft_test_result_pass(test_msg_auto_inc);
|
||||
} else {
|
||||
ksft_test_result_skip(test_msg_auto_inc);
|
||||
}
|
||||
ret = pthread_mutex_unlock(&global_lock);
|
||||
|
||||
/* Once the user changes it, it has to be what is set */
|
||||
futex_hash_slots_set_verify(2);
|
||||
futex_hash_slots_set_verify(4);
|
||||
futex_hash_slots_set_verify(8);
|
||||
futex_hash_slots_set_verify(32);
|
||||
futex_hash_slots_set_verify(16);
|
||||
|
||||
ret = futex_hash_slots_set(15, 0);
|
||||
ksft_test_result(ret < 0, "Use 15 slots\n");
|
||||
|
||||
futex_hash_slots_set_verify(2);
|
||||
join_max_threads();
|
||||
ksft_test_result(counter == MAX_THREADS, "Created of waited for %d of %d threads\n",
|
||||
counter, MAX_THREADS);
|
||||
counter = 0;
|
||||
/* Once the user set something, auto reisze must be disabled */
|
||||
ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
|
||||
|
||||
create_max_threads(thread_lock_fn);
|
||||
join_max_threads();
|
||||
|
||||
ret = futex_hash_slots_get();
|
||||
ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n",
|
||||
ret);
|
||||
|
||||
futex_hash_slots_set_must_fail(1 << 29, 0);
|
||||
|
||||
/*
|
||||
* Once the private hash has been made immutable or global hash has been requested,
|
||||
* then this requested can not be undone.
|
||||
*/
|
||||
if (use_global_hash) {
|
||||
ret = futex_hash_slots_set(0, 0);
|
||||
ksft_test_result(ret == 0, "Global hash request\n");
|
||||
} else {
|
||||
ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE);
|
||||
ksft_test_result(ret == 0, "Immutable resize to 4\n");
|
||||
}
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
|
||||
futex_hash_slots_set_must_fail(4, 0);
|
||||
futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE);
|
||||
futex_hash_slots_set_must_fail(8, 0);
|
||||
futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE);
|
||||
futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE);
|
||||
futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE);
|
||||
|
||||
ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS);
|
||||
if (ret != 0) {
|
||||
ksft_exit_fail_msg("pthread_barrier_init failed: %m\n");
|
||||
return 1;
|
||||
}
|
||||
create_max_threads(thread_lock_fn);
|
||||
join_max_threads();
|
||||
|
||||
ret = futex_hash_slots_get();
|
||||
if (use_global_hash) {
|
||||
ksft_test_result(ret == 0, "Continue to use global hash\n");
|
||||
} else {
|
||||
ksft_test_result(ret == 4, "Continue to use the 4 hash buckets\n");
|
||||
}
|
||||
|
||||
ret = futex_hash_immutable_get();
|
||||
ksft_test_result(ret == 1, "Hash reports to be immutable\n");
|
||||
|
||||
out:
|
||||
ksft_finished();
|
||||
return 0;
|
||||
}
|
||||
@@ -82,3 +82,10 @@ echo
|
||||
|
||||
echo
|
||||
./futex_waitv $COLOR
|
||||
|
||||
echo
|
||||
./futex_priv_hash $COLOR
|
||||
./futex_priv_hash -g $COLOR
|
||||
|
||||
echo
|
||||
./futex_numa_mpol $COLOR
|
||||
|
||||
@@ -8,6 +8,53 @@
|
||||
|
||||
#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
|
||||
|
||||
#ifndef __NR_futex_waitv
|
||||
#define __NR_futex_waitv 449
|
||||
struct futex_waitv {
|
||||
__u64 val;
|
||||
__u64 uaddr;
|
||||
__u32 flags;
|
||||
__u32 __reserved;
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef __NR_futex_wake
|
||||
#define __NR_futex_wake 454
|
||||
#endif
|
||||
|
||||
#ifndef __NR_futex_wait
|
||||
#define __NR_futex_wait 455
|
||||
#endif
|
||||
|
||||
#ifndef FUTEX2_SIZE_U32
|
||||
#define FUTEX2_SIZE_U32 0x02
|
||||
#endif
|
||||
|
||||
#ifndef FUTEX2_NUMA
|
||||
#define FUTEX2_NUMA 0x04
|
||||
#endif
|
||||
|
||||
#ifndef FUTEX2_MPOL
|
||||
#define FUTEX2_MPOL 0x08
|
||||
#endif
|
||||
|
||||
#ifndef FUTEX2_PRIVATE
|
||||
#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG
|
||||
#endif
|
||||
|
||||
#ifndef FUTEX2_NO_NODE
|
||||
#define FUTEX_NO_NODE (-1)
|
||||
#endif
|
||||
|
||||
#ifndef FUTEX_32
|
||||
#define FUTEX_32 FUTEX2_SIZE_U32
|
||||
#endif
|
||||
|
||||
struct futex32_numa {
|
||||
futex_t futex;
|
||||
futex_t numa;
|
||||
};
|
||||
|
||||
/**
|
||||
* futex_waitv - Wait at multiple futexes, wake on any
|
||||
* @waiters: Array of waiters
|
||||
@@ -20,3 +67,26 @@ static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned lon
|
||||
{
|
||||
return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
|
||||
}
|
||||
|
||||
/*
|
||||
* futex_wait() - block on uaddr with optional timeout
|
||||
* @val: Expected value
|
||||
* @flags: FUTEX2 flags
|
||||
* @timeout: Relative timeout
|
||||
* @clockid: Clock id for the timeout
|
||||
*/
|
||||
static inline int futex2_wait(void *uaddr, long val, unsigned int flags,
|
||||
struct timespec *timeout, clockid_t clockid)
|
||||
{
|
||||
return syscall(__NR_futex_wait, uaddr, val, ~0U, flags, timeout, clockid);
|
||||
}
|
||||
|
||||
/*
|
||||
* futex2_wake() - Wake a number of futexes
|
||||
* @nr: Number of threads to wake at most
|
||||
* @flags: FUTEX2 flags
|
||||
*/
|
||||
static inline int futex2_wake(void *uaddr, int nr, unsigned int flags)
|
||||
{
|
||||
return syscall(__NR_futex_wake, uaddr, ~0U, nr, flags);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user