mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-04-29 17:53:58 +00:00
RDMA/umem: Get rid of per_mm->notifier_count
This is intrinsically racy and the scheme is simply unnecessary. New MR registration can wait for any on going invalidation to fully complete. CPU0 CPU1 if (atomic_read()) if (atomic_dec_and_test() && !list_empty()) { /* not taken */ } list_add() Putting the new UMEM into some kind of purgatory until another invalidate rolls through.. Instead hold the read side of the umem_rwsem across the pair'd start/end and get rid of the racy 'deferred add' approach. Since all umem's in the rbt are always ready to go, also get rid of the mn_counters_active stuff. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
parent
f27a0d50a4
commit
ca748c39ea
2 changed files with 18 additions and 110 deletions
|
@ -80,83 +80,29 @@ INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
|
||||||
static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
|
static void ib_umem_notifier_start_account(struct ib_umem_odp *umem_odp)
|
||||||
{
|
{
|
||||||
mutex_lock(&umem_odp->umem_mutex);
|
mutex_lock(&umem_odp->umem_mutex);
|
||||||
|
if (umem_odp->notifiers_count++ == 0)
|
||||||
/* Only update private counters for this umem if it has them.
|
/*
|
||||||
* Otherwise skip it. All page faults will be delayed for this umem. */
|
* Initialize the completion object for waiting on
|
||||||
if (umem_odp->mn_counters_active) {
|
* notifiers. Since notifier_count is zero, no one should be
|
||||||
int notifiers_count = umem_odp->notifiers_count++;
|
* waiting right now.
|
||||||
|
*/
|
||||||
if (notifiers_count == 0)
|
reinit_completion(&umem_odp->notifier_completion);
|
||||||
/* Initialize the completion object for waiting on
|
|
||||||
* notifiers. Since notifier_count is zero, no one
|
|
||||||
* should be waiting right now. */
|
|
||||||
reinit_completion(&umem_odp->notifier_completion);
|
|
||||||
}
|
|
||||||
mutex_unlock(&umem_odp->umem_mutex);
|
mutex_unlock(&umem_odp->umem_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
|
static void ib_umem_notifier_end_account(struct ib_umem_odp *umem_odp)
|
||||||
{
|
{
|
||||||
mutex_lock(&umem_odp->umem_mutex);
|
mutex_lock(&umem_odp->umem_mutex);
|
||||||
|
/*
|
||||||
/* Only update private counters for this umem if it has them.
|
* This sequence increase will notify the QP page fault that the page
|
||||||
* Otherwise skip it. All page faults will be delayed for this umem. */
|
* that is going to be mapped in the spte could have been freed.
|
||||||
if (umem_odp->mn_counters_active) {
|
*/
|
||||||
/*
|
++umem_odp->notifiers_seq;
|
||||||
* This sequence increase will notify the QP page fault that
|
if (--umem_odp->notifiers_count == 0)
|
||||||
* the page that is going to be mapped in the spte could have
|
complete_all(&umem_odp->notifier_completion);
|
||||||
* been freed.
|
|
||||||
*/
|
|
||||||
++umem_odp->notifiers_seq;
|
|
||||||
if (--umem_odp->notifiers_count == 0)
|
|
||||||
complete_all(&umem_odp->notifier_completion);
|
|
||||||
}
|
|
||||||
mutex_unlock(&umem_odp->umem_mutex);
|
mutex_unlock(&umem_odp->umem_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Account for a new mmu notifier in an ib_ucontext. */
|
|
||||||
static void
|
|
||||||
ib_ucontext_notifier_start_account(struct ib_ucontext_per_mm *per_mm)
|
|
||||||
{
|
|
||||||
atomic_inc(&per_mm->notifier_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Account for a terminating mmu notifier in an ib_ucontext.
|
|
||||||
*
|
|
||||||
* Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
|
|
||||||
* the function takes the semaphore itself. */
|
|
||||||
static void ib_ucontext_notifier_end_account(struct ib_ucontext_per_mm *per_mm)
|
|
||||||
{
|
|
||||||
int zero_notifiers = atomic_dec_and_test(&per_mm->notifier_count);
|
|
||||||
|
|
||||||
if (zero_notifiers &&
|
|
||||||
!list_empty(&per_mm->no_private_counters)) {
|
|
||||||
/* No currently running mmu notifiers. Now is the chance to
|
|
||||||
* add private accounting to all previously added umems. */
|
|
||||||
struct ib_umem_odp *odp_data, *next;
|
|
||||||
|
|
||||||
/* Prevent concurrent mmu notifiers from working on the
|
|
||||||
* no_private_counters list. */
|
|
||||||
down_write(&per_mm->umem_rwsem);
|
|
||||||
|
|
||||||
/* Read the notifier_count again, with the umem_rwsem
|
|
||||||
* semaphore taken for write. */
|
|
||||||
if (!atomic_read(&per_mm->notifier_count)) {
|
|
||||||
list_for_each_entry_safe(odp_data, next,
|
|
||||||
&per_mm->no_private_counters,
|
|
||||||
no_private_counters) {
|
|
||||||
mutex_lock(&odp_data->umem_mutex);
|
|
||||||
odp_data->mn_counters_active = true;
|
|
||||||
list_del(&odp_data->no_private_counters);
|
|
||||||
complete_all(&odp_data->notifier_completion);
|
|
||||||
mutex_unlock(&odp_data->umem_mutex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
up_write(&per_mm->umem_rwsem);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
|
static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
|
||||||
u64 start, u64 end, void *cookie)
|
u64 start, u64 end, void *cookie)
|
||||||
{
|
{
|
||||||
|
@ -186,7 +132,6 @@ static void ib_umem_notifier_release(struct mmu_notifier *mn,
|
||||||
if (!per_mm->context->invalidate_range)
|
if (!per_mm->context->invalidate_range)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
ib_ucontext_notifier_start_account(per_mm);
|
|
||||||
down_read(&per_mm->umem_rwsem);
|
down_read(&per_mm->umem_rwsem);
|
||||||
rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0,
|
rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0,
|
||||||
ULLONG_MAX,
|
ULLONG_MAX,
|
||||||
|
@ -231,14 +176,9 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
|
||||||
else if (!down_read_trylock(&per_mm->umem_rwsem))
|
else if (!down_read_trylock(&per_mm->umem_rwsem))
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
ib_ucontext_notifier_start_account(per_mm);
|
return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end,
|
||||||
ret = rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
|
invalidate_range_start_trampoline,
|
||||||
end,
|
blockable, NULL);
|
||||||
invalidate_range_start_trampoline,
|
|
||||||
blockable, NULL);
|
|
||||||
up_read(&per_mm->umem_rwsem);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
|
static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
|
||||||
|
@ -259,17 +199,10 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
|
||||||
if (!per_mm->context->invalidate_range)
|
if (!per_mm->context->invalidate_range)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
|
||||||
* TODO: we currently bail out if there is any sleepable work to be done
|
|
||||||
* in ib_umem_notifier_invalidate_range_start so we shouldn't really block
|
|
||||||
* here. But this is ugly and fragile.
|
|
||||||
*/
|
|
||||||
down_read(&per_mm->umem_rwsem);
|
|
||||||
rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
|
rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
|
||||||
end,
|
end,
|
||||||
invalidate_range_end_trampoline, true, NULL);
|
invalidate_range_end_trampoline, true, NULL);
|
||||||
up_read(&per_mm->umem_rwsem);
|
up_read(&per_mm->umem_rwsem);
|
||||||
ib_ucontext_notifier_end_account(per_mm);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const struct mmu_notifier_ops ib_umem_notifiers = {
|
static const struct mmu_notifier_ops ib_umem_notifiers = {
|
||||||
|
@ -287,12 +220,6 @@ static void add_umem_to_per_mm(struct ib_umem_odp *umem_odp)
|
||||||
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
|
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
|
||||||
rbt_ib_umem_insert(&umem_odp->interval_tree,
|
rbt_ib_umem_insert(&umem_odp->interval_tree,
|
||||||
&per_mm->umem_tree);
|
&per_mm->umem_tree);
|
||||||
|
|
||||||
if (likely(!atomic_read(&per_mm->notifier_count)))
|
|
||||||
umem_odp->mn_counters_active = true;
|
|
||||||
else
|
|
||||||
list_add(&umem_odp->no_private_counters,
|
|
||||||
&per_mm->no_private_counters);
|
|
||||||
up_write(&per_mm->umem_rwsem);
|
up_write(&per_mm->umem_rwsem);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -305,10 +232,7 @@ static void remove_umem_from_per_mm(struct ib_umem_odp *umem_odp)
|
||||||
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
|
if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
|
||||||
rbt_ib_umem_remove(&umem_odp->interval_tree,
|
rbt_ib_umem_remove(&umem_odp->interval_tree,
|
||||||
&per_mm->umem_tree);
|
&per_mm->umem_tree);
|
||||||
if (!umem_odp->mn_counters_active) {
|
complete_all(&umem_odp->notifier_completion);
|
||||||
list_del(&umem_odp->no_private_counters);
|
|
||||||
complete_all(&umem_odp->notifier_completion);
|
|
||||||
}
|
|
||||||
|
|
||||||
up_write(&per_mm->umem_rwsem);
|
up_write(&per_mm->umem_rwsem);
|
||||||
}
|
}
|
||||||
|
@ -327,7 +251,6 @@ static struct ib_ucontext_per_mm *alloc_per_mm(struct ib_ucontext *ctx,
|
||||||
per_mm->mm = mm;
|
per_mm->mm = mm;
|
||||||
per_mm->umem_tree = RB_ROOT_CACHED;
|
per_mm->umem_tree = RB_ROOT_CACHED;
|
||||||
init_rwsem(&per_mm->umem_rwsem);
|
init_rwsem(&per_mm->umem_rwsem);
|
||||||
INIT_LIST_HEAD(&per_mm->no_private_counters);
|
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
|
per_mm->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
|
||||||
|
|
|
@ -67,15 +67,9 @@ struct ib_umem_odp {
|
||||||
struct mutex umem_mutex;
|
struct mutex umem_mutex;
|
||||||
void *private; /* for the HW driver to use. */
|
void *private; /* for the HW driver to use. */
|
||||||
|
|
||||||
/* When false, use the notifier counter in the ucontext struct. */
|
|
||||||
bool mn_counters_active;
|
|
||||||
int notifiers_seq;
|
int notifiers_seq;
|
||||||
int notifiers_count;
|
int notifiers_count;
|
||||||
|
|
||||||
/* A linked list of umems that don't have private mmu notifier
|
|
||||||
* counters yet. */
|
|
||||||
struct list_head no_private_counters;
|
|
||||||
|
|
||||||
/* Tree tracking */
|
/* Tree tracking */
|
||||||
struct umem_odp_node interval_tree;
|
struct umem_odp_node interval_tree;
|
||||||
|
|
||||||
|
@ -99,11 +93,8 @@ struct ib_ucontext_per_mm {
|
||||||
struct rb_root_cached umem_tree;
|
struct rb_root_cached umem_tree;
|
||||||
/* Protects umem_tree */
|
/* Protects umem_tree */
|
||||||
struct rw_semaphore umem_rwsem;
|
struct rw_semaphore umem_rwsem;
|
||||||
atomic_t notifier_count;
|
|
||||||
|
|
||||||
struct mmu_notifier mn;
|
struct mmu_notifier mn;
|
||||||
/* A list of umems that don't have private mmu notifier counters yet. */
|
|
||||||
struct list_head no_private_counters;
|
|
||||||
unsigned int odp_mrs_count;
|
unsigned int odp_mrs_count;
|
||||||
|
|
||||||
struct list_head ucontext_list;
|
struct list_head ucontext_list;
|
||||||
|
@ -162,12 +153,6 @@ static inline int ib_umem_mmu_notifier_retry(struct ib_umem_odp *umem_odp,
|
||||||
* and the ucontext umem_mutex semaphore locked for read).
|
* and the ucontext umem_mutex semaphore locked for read).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Do not allow page faults while the new ib_umem hasn't seen a state
|
|
||||||
* with zero notifiers yet, and doesn't have its own valid set of
|
|
||||||
* private counters. */
|
|
||||||
if (!umem_odp->mn_counters_active)
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
if (unlikely(umem_odp->notifiers_count))
|
if (unlikely(umem_odp->notifiers_count))
|
||||||
return 1;
|
return 1;
|
||||||
if (umem_odp->notifiers_seq != mmu_seq)
|
if (umem_odp->notifiers_seq != mmu_seq)
|
||||||
|
|
Loading…
Add table
Reference in a new issue