mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-06-07 15:15:29 +00:00
drm/amdgpu: signal fences directly in amdgpu_fence_process
Because of the scheduler we need to signal all fences immediately anyway, so try to avoid the waitqueue overhead. Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
This commit is contained in:
parent
f09c2be4d4
commit
4a7d74f176
2 changed files with 31 additions and 68 deletions
|
@ -392,8 +392,8 @@ struct amdgpu_fence_driver {
|
||||||
struct amdgpu_irq_src *irq_src;
|
struct amdgpu_irq_src *irq_src;
|
||||||
unsigned irq_type;
|
unsigned irq_type;
|
||||||
struct timer_list fallback_timer;
|
struct timer_list fallback_timer;
|
||||||
wait_queue_head_t fence_queue;
|
|
||||||
unsigned num_fences_mask;
|
unsigned num_fences_mask;
|
||||||
|
spinlock_t lock;
|
||||||
struct fence **fences;
|
struct fence **fences;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -53,8 +53,6 @@ struct amdgpu_fence {
|
||||||
/* RB, DMA, etc. */
|
/* RB, DMA, etc. */
|
||||||
struct amdgpu_ring *ring;
|
struct amdgpu_ring *ring;
|
||||||
uint64_t seq;
|
uint64_t seq;
|
||||||
|
|
||||||
wait_queue_t fence_wake;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct kmem_cache *amdgpu_fence_slab;
|
static struct kmem_cache *amdgpu_fence_slab;
|
||||||
|
@ -124,7 +122,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = ring->adev;
|
struct amdgpu_device *adev = ring->adev;
|
||||||
struct amdgpu_fence *fence;
|
struct amdgpu_fence *fence;
|
||||||
struct fence *old, **ptr;
|
struct fence **ptr;
|
||||||
unsigned idx;
|
unsigned idx;
|
||||||
|
|
||||||
fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
|
fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
|
||||||
|
@ -134,7 +132,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
|
||||||
fence->seq = ++ring->fence_drv.sync_seq;
|
fence->seq = ++ring->fence_drv.sync_seq;
|
||||||
fence->ring = ring;
|
fence->ring = ring;
|
||||||
fence_init(&fence->base, &amdgpu_fence_ops,
|
fence_init(&fence->base, &amdgpu_fence_ops,
|
||||||
&ring->fence_drv.fence_queue.lock,
|
&ring->fence_drv.lock,
|
||||||
adev->fence_context + ring->idx,
|
adev->fence_context + ring->idx,
|
||||||
fence->seq);
|
fence->seq);
|
||||||
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
|
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
|
||||||
|
@ -145,13 +143,10 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
|
||||||
/* This function can't be called concurrently anyway, otherwise
|
/* This function can't be called concurrently anyway, otherwise
|
||||||
* emitting the fence would mess up the hardware ring buffer.
|
* emitting the fence would mess up the hardware ring buffer.
|
||||||
*/
|
*/
|
||||||
old = rcu_dereference_protected(*ptr, 1);
|
BUG_ON(rcu_dereference_protected(*ptr, 1));
|
||||||
|
|
||||||
rcu_assign_pointer(*ptr, fence_get(&fence->base));
|
rcu_assign_pointer(*ptr, fence_get(&fence->base));
|
||||||
|
|
||||||
BUG_ON(old && !fence_is_signaled(old));
|
|
||||||
fence_put(old);
|
|
||||||
|
|
||||||
*f = &fence->base;
|
*f = &fence->base;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -181,11 +176,12 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
|
||||||
*/
|
*/
|
||||||
void amdgpu_fence_process(struct amdgpu_ring *ring)
|
void amdgpu_fence_process(struct amdgpu_ring *ring)
|
||||||
{
|
{
|
||||||
|
struct amdgpu_fence_driver *drv = &ring->fence_drv;
|
||||||
uint64_t seq, last_seq, last_emitted;
|
uint64_t seq, last_seq, last_emitted;
|
||||||
bool wake = false;
|
int r;
|
||||||
|
|
||||||
last_seq = atomic64_read(&ring->fence_drv.last_seq);
|
|
||||||
do {
|
do {
|
||||||
|
last_seq = atomic64_read(&ring->fence_drv.last_seq);
|
||||||
last_emitted = ring->fence_drv.sync_seq;
|
last_emitted = ring->fence_drv.sync_seq;
|
||||||
seq = amdgpu_fence_read(ring);
|
seq = amdgpu_fence_read(ring);
|
||||||
seq |= last_seq & 0xffffffff00000000LL;
|
seq |= last_seq & 0xffffffff00000000LL;
|
||||||
|
@ -195,22 +191,32 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (seq <= last_seq || seq > last_emitted)
|
if (seq <= last_seq || seq > last_emitted)
|
||||||
break;
|
return;
|
||||||
|
|
||||||
/* If we loop over we don't want to return without
|
} while (atomic64_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
|
||||||
* checking if a fence is signaled as it means that the
|
|
||||||
* seq we just read is different from the previous on.
|
|
||||||
*/
|
|
||||||
wake = true;
|
|
||||||
last_seq = seq;
|
|
||||||
|
|
||||||
} while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
|
|
||||||
|
|
||||||
if (seq < last_emitted)
|
if (seq < last_emitted)
|
||||||
amdgpu_fence_schedule_fallback(ring);
|
amdgpu_fence_schedule_fallback(ring);
|
||||||
|
|
||||||
if (wake)
|
while (last_seq != seq) {
|
||||||
wake_up_all(&ring->fence_drv.fence_queue);
|
struct fence *fence, **ptr;
|
||||||
|
|
||||||
|
ptr = &drv->fences[++last_seq & drv->num_fences_mask];
|
||||||
|
|
||||||
|
/* There is always exactly one thread signaling this fence slot */
|
||||||
|
fence = rcu_dereference_protected(*ptr, 1);
|
||||||
|
rcu_assign_pointer(*ptr, NULL);
|
||||||
|
|
||||||
|
BUG_ON(!fence);
|
||||||
|
|
||||||
|
r = fence_signal(fence);
|
||||||
|
if (!r)
|
||||||
|
FENCE_TRACE(fence, "signaled from irq context\n");
|
||||||
|
else
|
||||||
|
BUG();
|
||||||
|
|
||||||
|
fence_put(fence);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -356,8 +362,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
|
||||||
setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
|
setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
|
||||||
(unsigned long)ring);
|
(unsigned long)ring);
|
||||||
|
|
||||||
init_waitqueue_head(&ring->fence_drv.fence_queue);
|
|
||||||
ring->fence_drv.num_fences_mask = num_hw_submission - 1;
|
ring->fence_drv.num_fences_mask = num_hw_submission - 1;
|
||||||
|
spin_lock_init(&ring->fence_drv.lock);
|
||||||
ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
|
ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
if (!ring->fence_drv.fences)
|
if (!ring->fence_drv.fences)
|
||||||
|
@ -436,7 +442,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
|
||||||
/* no need to trigger GPU reset as we are unloading */
|
/* no need to trigger GPU reset as we are unloading */
|
||||||
amdgpu_fence_driver_force_completion(adev);
|
amdgpu_fence_driver_force_completion(adev);
|
||||||
}
|
}
|
||||||
wake_up_all(&ring->fence_drv.fence_queue);
|
|
||||||
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
|
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
|
||||||
ring->fence_drv.irq_type);
|
ring->fence_drv.irq_type);
|
||||||
amd_sched_fini(&ring->sched);
|
amd_sched_fini(&ring->sched);
|
||||||
|
@ -568,42 +573,6 @@ static bool amdgpu_fence_is_signaled(struct fence *f)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* amdgpu_fence_check_signaled - callback from fence_queue
|
|
||||||
*
|
|
||||||
* this function is called with fence_queue lock held, which is also used
|
|
||||||
* for the fence locking itself, so unlocked variants are used for
|
|
||||||
* fence_signal, and remove_wait_queue.
|
|
||||||
*/
|
|
||||||
static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
|
|
||||||
{
|
|
||||||
struct amdgpu_fence *fence;
|
|
||||||
struct amdgpu_device *adev;
|
|
||||||
u64 seq;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
fence = container_of(wait, struct amdgpu_fence, fence_wake);
|
|
||||||
adev = fence->ring->adev;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We cannot use amdgpu_fence_process here because we're already
|
|
||||||
* in the waitqueue, in a call from wake_up_all.
|
|
||||||
*/
|
|
||||||
seq = atomic64_read(&fence->ring->fence_drv.last_seq);
|
|
||||||
if (seq >= fence->seq) {
|
|
||||||
ret = fence_signal_locked(&fence->base);
|
|
||||||
if (!ret)
|
|
||||||
FENCE_TRACE(&fence->base, "signaled from irq context\n");
|
|
||||||
else
|
|
||||||
FENCE_TRACE(&fence->base, "was already signaled\n");
|
|
||||||
|
|
||||||
__remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
|
|
||||||
fence_put(&fence->base);
|
|
||||||
} else
|
|
||||||
FENCE_TRACE(&fence->base, "pending\n");
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* amdgpu_fence_enable_signaling - enable signalling on fence
|
* amdgpu_fence_enable_signaling - enable signalling on fence
|
||||||
* @fence: fence
|
* @fence: fence
|
||||||
|
@ -617,17 +586,11 @@ static bool amdgpu_fence_enable_signaling(struct fence *f)
|
||||||
struct amdgpu_fence *fence = to_amdgpu_fence(f);
|
struct amdgpu_fence *fence = to_amdgpu_fence(f);
|
||||||
struct amdgpu_ring *ring = fence->ring;
|
struct amdgpu_ring *ring = fence->ring;
|
||||||
|
|
||||||
if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
fence->fence_wake.flags = 0;
|
|
||||||
fence->fence_wake.private = NULL;
|
|
||||||
fence->fence_wake.func = amdgpu_fence_check_signaled;
|
|
||||||
__add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
|
|
||||||
fence_get(f);
|
|
||||||
if (!timer_pending(&ring->fence_drv.fallback_timer))
|
if (!timer_pending(&ring->fence_drv.fallback_timer))
|
||||||
amdgpu_fence_schedule_fallback(ring);
|
amdgpu_fence_schedule_fallback(ring);
|
||||||
|
|
||||||
FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
|
FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue