percpu_ref: reduce memory footprint of percpu_ref in fast path

'struct percpu_ref' is often embedded into one user structure, and the
instance is usually referenced in fast path, however actually only
'percpu_count_ptr' is needed in fast path.

So move other fields into one new structure of 'percpu_ref_data', and
allocate it dynamically via kzalloc(), then memory footprint of
'percpu_ref' in fast path is reduced a lot and becomes suitable to put
into hot cacheline of user structure.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Veronika Kabatova <vkabatov@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Ming Lei 2020-10-01 23:48:41 +08:00 committed by Jens Axboe
parent cf785af193
commit 2b0d3d3e4f
3 changed files with 123 additions and 62 deletions

View file

@ -92,18 +92,30 @@ enum {
PERCPU_REF_ALLOW_REINIT = 1 << 2,
};
struct percpu_ref {
struct percpu_ref_data {
atomic_long_t count;
/*
* The low bit of the pointer indicates whether the ref is in percpu
* mode; if set, then get/put will manipulate the atomic_t.
*/
unsigned long percpu_count_ptr;
percpu_ref_func_t *release;
percpu_ref_func_t *confirm_switch;
bool force_atomic:1;
bool allow_reinit:1;
struct rcu_head rcu;
struct percpu_ref *ref;
};
struct percpu_ref {
/*
* The low bit of the pointer indicates whether the ref is in percpu
* mode; if set, then get/put will manipulate the atomic_t.
*/
unsigned long percpu_count_ptr;
/*
* 'percpu_ref' is often embedded into user structure, and only
* 'percpu_count_ptr' is required in fast path, move other fields
* into 'percpu_ref_data', so we can reduce memory footprint in
* fast path.
*/
struct percpu_ref_data *data;
};
int __must_check percpu_ref_init(struct percpu_ref *ref,
@ -118,6 +130,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
percpu_ref_func_t *confirm_kill);
void percpu_ref_resurrect(struct percpu_ref *ref);
void percpu_ref_reinit(struct percpu_ref *ref);
bool percpu_ref_is_zero(struct percpu_ref *ref);
/**
* percpu_ref_kill - drop the initial ref
@ -191,7 +204,7 @@ static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
if (__ref_is_percpu(ref, &percpu_count))
this_cpu_add(*percpu_count, nr);
else
atomic_long_add(nr, &ref->count);
atomic_long_add(nr, &ref->data->count);
rcu_read_unlock();
}
@ -231,7 +244,7 @@ static inline bool percpu_ref_tryget_many(struct percpu_ref *ref,
this_cpu_add(*percpu_count, nr);
ret = true;
} else {
ret = atomic_long_add_unless(&ref->count, nr, 0);
ret = atomic_long_add_unless(&ref->data->count, nr, 0);
}
rcu_read_unlock();
@ -279,7 +292,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
this_cpu_inc(*percpu_count);
ret = true;
} else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) {
ret = atomic_long_inc_not_zero(&ref->count);
ret = atomic_long_inc_not_zero(&ref->data->count);
}
rcu_read_unlock();
@ -305,8 +318,8 @@ static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
if (__ref_is_percpu(ref, &percpu_count))
this_cpu_sub(*percpu_count, nr);
else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
ref->release(ref);
else if (unlikely(atomic_long_sub_and_test(nr, &ref->data->count)))
ref->data->release(ref);
rcu_read_unlock();
}
@ -339,21 +352,4 @@ static inline bool percpu_ref_is_dying(struct percpu_ref *ref)
return ref->percpu_count_ptr & __PERCPU_REF_DEAD;
}
/**
* percpu_ref_is_zero - test whether a percpu refcount reached zero
* @ref: percpu_ref to test
*
* Returns %true if @ref reached zero.
*
* This function is safe to call as long as @ref is between init and exit.
*/
static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
{
unsigned long __percpu *percpu_count;
if (__ref_is_percpu(ref, &percpu_count))
return false;
return !atomic_long_read(&ref->count);
}
#endif