From e2fb6e0a7a2194f5ef1fada737217aad71e29e4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 9 Jan 2019 15:36:29 +0100 Subject: [PATCH 01/49] drm/amdgpu: cleanup amdgpu_ih_process a bit more MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the callback and call the dispatcher directly. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 6 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 48 +++++++++---------------- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h | 2 +- 4 files changed, 21 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index d0a5db777b6d..1c50be3ab8a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -140,9 +140,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) * Interrupt hander (VI), walk the IH ring. * Returns irq process return code. */ -int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, - void (*callback)(struct amdgpu_device *adev, - struct amdgpu_ih_ring *ih)) +int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { u32 wptr; @@ -162,7 +160,7 @@ restart_ih: rmb(); while (ih->rptr != wptr) { - callback(adev, ih); + amdgpu_irq_dispatch(adev, ih); ih->rptr &= ih->ptr_mask; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 1ccb1831382a..113a1ba13d4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -69,8 +69,6 @@ struct amdgpu_ih_funcs { int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, unsigned ring_size, bool use_bus_addr); void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); -int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, - void (*callback)(struct amdgpu_device *adev, - struct amdgpu_ih_ring *ih)); +int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 8bfb3dab46f7..af4c3b1af322 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -130,29 +130,6 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) spin_unlock_irqrestore(&adev->irq.lock, irqflags); } -/** - * amdgpu_irq_callback - callback from the IH ring - * - * @adev: amdgpu device pointer - * @ih: amdgpu ih ring - * - * Callback from IH ring processing to handle the entry at the current position - * and advance the read pointer. - */ -static void amdgpu_irq_callback(struct amdgpu_device *adev, - struct amdgpu_ih_ring *ih) -{ - u32 ring_index = ih->rptr >> 2; - struct amdgpu_iv_entry entry; - - entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; - amdgpu_ih_decode_iv(adev, &entry); - - trace_amdgpu_iv(ih - &adev->irq.ih, &entry); - - amdgpu_irq_dispatch(adev, &entry); -} - /** * amdgpu_irq_handler - IRQ handler * @@ -170,7 +147,7 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) struct amdgpu_device *adev = dev->dev_private; irqreturn_t ret; - ret = amdgpu_ih_process(adev, &adev->irq.ih, amdgpu_irq_callback); + ret = amdgpu_ih_process(adev, &adev->irq.ih); if (ret == IRQ_HANDLED) pm_runtime_mark_last_busy(dev->dev); return ret; @@ -188,7 +165,7 @@ static void amdgpu_irq_handle_ih1(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, irq.ih1_work); - amdgpu_ih_process(adev, &adev->irq.ih1, amdgpu_irq_callback); + amdgpu_ih_process(adev, &adev->irq.ih1); } /** @@ -203,7 +180,7 @@ static void amdgpu_irq_handle_ih2(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, irq.ih2_work); - amdgpu_ih_process(adev, &adev->irq.ih2, amdgpu_irq_callback); + amdgpu_ih_process(adev, &adev->irq.ih2); } /** @@ -394,14 +371,23 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, * Dispatches IRQ to IP blocks. */ void amdgpu_irq_dispatch(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry) + struct amdgpu_ih_ring *ih) { - unsigned client_id = entry->client_id; - unsigned src_id = entry->src_id; + u32 ring_index = ih->rptr >> 2; + struct amdgpu_iv_entry entry; + unsigned client_id, src_id; struct amdgpu_irq_src *src; bool handled = false; int r; + entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; + amdgpu_ih_decode_iv(adev, &entry); + + trace_amdgpu_iv(ih - &adev->irq.ih, &entry); + + client_id = entry.client_id; + src_id = entry.src_id; + if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) { DRM_DEBUG("Invalid client_id in IV: %d\n", client_id); @@ -416,7 +402,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, client_id, src_id); } else if ((src = adev->irq.client[client_id].sources[src_id])) { - r = src->funcs->process(adev, src, entry); + r = src->funcs->process(adev, src, &entry); if (r < 0) DRM_ERROR("error processing interrupt (%d)\n", r); else if (r) @@ -428,7 +414,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, /* Send it to amdkfd as well if it isn't already handled */ if (!handled) - amdgpu_amdkfd_interrupt(adev, entry->iv_entry); + amdgpu_amdkfd_interrupt(adev, entry.iv_entry); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index c27decfda494..c718e94a55c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -108,7 +108,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned client_id, unsigned src_id, struct amdgpu_irq_src *source); void amdgpu_irq_dispatch(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry); + struct amdgpu_ih_ring *ih); int amdgpu_irq_update(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type); int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, From 2c11ee6ae5533821cf2c64972d6cb5aa4ab4a46e Mon Sep 17 00:00:00 2001 From: wentalou Date: Wed, 30 Jan 2019 11:13:01 +0800 Subject: [PATCH 02/49] drm/amdgpu: tighten gpu_recover in mailbox_flr to avoid duplicate recover in sriov sriov's gpu_recover inside xgpu_ai_mailbox_flr_work would cause duplicate recover in TDR. TDR's gpu_recover would be triggered by amdgpu_job_timedout, that could avoid vk-cts failure by unexpected recover. Signed-off-by: Wentao Lou Acked-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index b11a1c17a7f2..73851ebb3833 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -266,7 +266,8 @@ flr_done: } /* Trigger recovery for world switch failure if no TDR */ - if (amdgpu_device_should_recover_gpu(adev)) + if (amdgpu_device_should_recover_gpu(adev) + && amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT) amdgpu_device_gpu_recover(adev, NULL); } From e8e32426b1271d3387208c3a6eaf7c166bea9975 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 4 Feb 2019 17:46:35 -0500 Subject: [PATCH 03/49] drm/amdgpu: Add helper to wait for BO fences using a sync object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creates a temporary sync object to wait for the BO reservation. This generalizes amdgpu_vm_wait_pd. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 24 +++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 30 +++------------------- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index fd9c4beeaaa4..ec9e45004bff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1284,6 +1284,30 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, reservation_object_add_excl_fence(resv, fence); } +/** + * amdgpu_sync_wait_resv - Wait for BO reservation fences + * + * @bo: buffer object + * @owner: fence owner + * @intr: Whether the wait is interruptible + * + * Returns: + * 0 on success, errno otherwise. + */ +int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_sync sync; + int r; + + amdgpu_sync_create(&sync); + amdgpu_sync_resv(adev, &sync, bo->tbo.resv, owner, false); + r = amdgpu_sync_wait(&sync, intr); + amdgpu_sync_free(&sync); + + return r; +} + /** * amdgpu_bo_gpu_offset - return GPU offset of bo * @bo: amdgpu object for which we query the offset diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 9291c2f837e9..220a6a7b1bc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -266,6 +266,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared); +int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); int amdgpu_bo_validate(struct amdgpu_bo *bo); int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 75481cf3348f..3f33286c3cd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1332,31 +1332,6 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, } } - -/** - * amdgpu_vm_wait_pd - Wait for PT BOs to be free. - * - * @adev: amdgpu_device pointer - * @vm: related vm - * @owner: fence owner - * - * Returns: - * 0 on success, errno otherwise. - */ -static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, - void *owner) -{ - struct amdgpu_sync sync; - int r; - - amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false); - r = amdgpu_sync_wait(&sync, true); - amdgpu_sync_free(&sync); - - return r; -} - /** * amdgpu_vm_update_func - helper to call update function * @@ -1451,7 +1426,8 @@ restart: params.adev = adev; if (vm->use_cpu_for_update) { - r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); + r = amdgpu_bo_sync_wait(vm->root.base.bo, + AMDGPU_FENCE_OWNER_VM, true); if (unlikely(r)) return r; @@ -1784,7 +1760,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* Wait for PT BOs to be idle. PTs share the same resv. object * as the root PD BO */ - r = amdgpu_vm_wait_pd(adev, vm, owner); + r = amdgpu_bo_sync_wait(vm->root.base.bo, owner, true); if (unlikely(r)) return r; From c60cd590cb7da0d7dbb423727bb67350182a371c Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 4 Feb 2019 17:53:05 -0500 Subject: [PATCH 04/49] drm/amdgpu: Replace ttm_bo_wait with amdgpu_bo_sync_wait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fence_owner logic in amdgpu_sync_wait will allow waiting without having to temporarily remove eviction fences. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d7b10d79f1de..44a1581f6b8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -355,7 +355,7 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, if (ret) goto validate_fail; - ttm_bo_wait(&bo->tbo, false, false); + amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count); } @@ -1002,7 +1002,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, pr_err("validate_pt_pd_bos() failed\n"); goto validate_pd_fail; } - ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false); + amdgpu_bo_sync_wait(vm->root.base.bo, AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; amdgpu_bo_fence(vm->root.base.bo, From 8db588d5d35e6ab0f8fa404c0d3eade6bbd272bc Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 7 Feb 2019 12:08:14 -0500 Subject: [PATCH 05/49] drm/amdgpu: Avoid setting off KFD eviction fences in amdgpu_vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use FENCE_OWNER_KFD to synchronize PT/PD initialization and clearing of page table entries. This avoids triggering KFD eviction fences on the PD reservation objects of compute VMs. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 3f33286c3cd5..6dda415bb3ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -828,7 +828,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, WARN_ON(job->ibs[0].length_dw > 64); r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, - AMDGPU_FENCE_OWNER_UNDEFINED, false); + AMDGPU_FENCE_OWNER_KFD, false); if (r) goto error_free; @@ -1748,9 +1748,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, params.adev = adev; params.vm = vm; - /* sync to everything on unmapping */ + /* sync to everything except eviction fences on unmapping */ if (!(flags & AMDGPU_PTE_VALID)) - owner = AMDGPU_FENCE_OWNER_UNDEFINED; + owner = AMDGPU_FENCE_OWNER_KFD; if (vm->use_cpu_for_update) { /* params.src is used as flag to indicate system Memory */ From 2d086fded1021d6b482dfc705bab8f085d10a496 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 4 Feb 2019 18:17:26 -0500 Subject: [PATCH 06/49] drm/amdgpu: Simplify eviction fence handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Temporarily removing eviction fences to avoid triggering them by accident is no longer necessary due to the fence_owner logic in amdgpu_sync_resv. As a result the ef_list usage of amdgpu_amdkfd_remove_eviction_fence and amdgpu_amdkfd_add_eviction_fence are no longer needed. Signed-off-by: Felix Kuehling Acked-by: Christian König Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 134 ++---------------- 1 file changed, 11 insertions(+), 123 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 44a1581f6b8f..1921dec3df7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -204,38 +204,25 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) } -/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's +/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's * reservation object. * * @bo: [IN] Remove eviction fence(s) from this BO - * @ef: [IN] If ef is specified, then this eviction fence is removed if it + * @ef: [IN] This eviction fence is removed if it * is present in the shared list. - * @ef_list: [OUT] Returns list of eviction fences. These fences are removed - * from BO's reservation object shared list. - * @ef_count: [OUT] Number of fences in ef_list. * - * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be - * called to restore the eviction fences and to avoid memory leak. This is - * useful for shared BOs. * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. */ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, - struct amdgpu_amdkfd_fence *ef, - struct amdgpu_amdkfd_fence ***ef_list, - unsigned int *ef_count) + struct amdgpu_amdkfd_fence *ef) { struct reservation_object *resv = bo->tbo.resv; struct reservation_object_list *old, *new; unsigned int i, j, k; - if (!ef && !ef_list) + if (!ef) return -EINVAL; - if (ef_list) { - *ef_list = NULL; - *ef_count = 0; - } - old = reservation_object_get_list(resv); if (!old) return 0; @@ -254,8 +241,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, f = rcu_dereference_protected(old->shared[i], reservation_object_held(resv)); - if ((ef && f->context == ef->base.context) || - (!ef && to_amdgpu_amdkfd_fence(f))) + if (f->context == ef->base.context) RCU_INIT_POINTER(new->shared[--j], f); else RCU_INIT_POINTER(new->shared[k++], f); @@ -263,21 +249,6 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, new->shared_max = old->shared_max; new->shared_count = k; - if (!ef) { - unsigned int count = old->shared_count - j; - - /* Alloc memory for count number of eviction fence pointers. - * Fill the ef_list array and ef_count - */ - *ef_list = kcalloc(count, sizeof(**ef_list), GFP_KERNEL); - *ef_count = count; - - if (!*ef_list) { - kfree(new); - return -ENOMEM; - } - } - /* Install the new fence list, seqcount provides the barriers */ preempt_disable(); write_seqcount_begin(&resv->seq); @@ -291,46 +262,13 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, f = rcu_dereference_protected(new->shared[i], reservation_object_held(resv)); - if (!ef) - (*ef_list)[k++] = to_amdgpu_amdkfd_fence(f); - else - dma_fence_put(f); + dma_fence_put(f); } kfree_rcu(old, rcu); return 0; } -/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's - * reservation object. - * - * @bo: [IN] Add eviction fences to this BO - * @ef_list: [IN] List of eviction fences to be added - * @ef_count: [IN] Number of fences in ef_list. - * - * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this - * function. - */ -static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, - struct amdgpu_amdkfd_fence **ef_list, - unsigned int ef_count) -{ - int i; - - if (!ef_list || !ef_count) - return; - - for (i = 0; i < ef_count; i++) { - amdgpu_bo_fence(bo, &ef_list[i]->base, true); - /* Re-adding the fence takes an additional reference. Drop that - * reference. - */ - dma_fence_put(&ef_list[i]->base); - } - - kfree(ef_list); -} - static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, bool wait) { @@ -346,18 +284,8 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) goto validate_fail; - if (wait) { - struct amdgpu_amdkfd_fence **ef_list; - unsigned int ef_count; - - ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list, - &ef_count); - if (ret) - goto validate_fail; - + if (wait) amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false); - amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count); - } validate_fail: return ret; @@ -444,7 +372,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, { int ret; struct kfd_bo_va_list *bo_va_entry; - struct amdgpu_bo *pd = vm->root.base.bo; struct amdgpu_bo *bo = mem->bo; uint64_t va = mem->va; struct list_head *list_bo_va = &mem->bo_va_list; @@ -484,14 +411,8 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, *p_bo_va_entry = bo_va_entry; /* Allocate new page tables if needed and validate - * them. Clearing of new page tables and validate need to wait - * on move fences. We don't want that to trigger the eviction - * fence, so remove it temporarily. + * them. */ - amdgpu_amdkfd_remove_eviction_fence(pd, - vm->process_info->eviction_fence, - NULL, NULL); - ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo)); if (ret) { pr_err("Failed to allocate pts, err=%d\n", ret); @@ -504,13 +425,9 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, goto err_alloc_pts; } - /* Add the eviction fence back */ - amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); - return 0; err_alloc_pts: - amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); list_del(&bo_va_entry->bo_list); err_vmadd: @@ -809,24 +726,11 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, { struct amdgpu_bo_va *bo_va = entry->bo_va; struct amdgpu_vm *vm = bo_va->base.vm; - struct amdgpu_bo *pd = vm->root.base.bo; - /* Remove eviction fence from PD (and thereby from PTs too as - * they share the resv. object). Otherwise during PT update - * job (see amdgpu_vm_bo_update_mapping), eviction fence would - * get added to job->sync object and job execution would - * trigger the eviction fence. - */ - amdgpu_amdkfd_remove_eviction_fence(pd, - vm->process_info->eviction_fence, - NULL, NULL); amdgpu_vm_bo_unmap(adev, bo_va, entry->va); amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - /* Add the eviction fence back */ - amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); - amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); return 0; @@ -1389,8 +1293,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( * attached */ amdgpu_amdkfd_remove_eviction_fence(mem->bo, - process_info->eviction_fence, - NULL, NULL); + process_info->eviction_fence); pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, mem->va + bo_size * (1 + mem->aql_queue)); @@ -1617,8 +1520,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( if (mem->mapped_to_gpu_memory == 0 && !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) amdgpu_amdkfd_remove_eviction_fence(mem->bo, - process_info->eviction_fence, - NULL, NULL); + process_info->eviction_fence); unreserve_out: unreserve_bo_and_vms(&ctx, false, false); @@ -1679,7 +1581,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, } amdgpu_amdkfd_remove_eviction_fence( - bo, mem->process_info->eviction_fence, NULL, NULL); + bo, mem->process_info->eviction_fence); list_del_init(&mem->validate_list.head); if (size) @@ -1945,16 +1847,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) amdgpu_sync_create(&sync); - /* Avoid triggering eviction fences when unmapping invalid - * userptr BOs (waits for all fences, doesn't use - * FENCE_OWNER_VM) - */ - list_for_each_entry(peer_vm, &process_info->vm_list_head, - vm_list_node) - amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, - process_info->eviction_fence, - NULL, NULL); - ret = process_validate_vms(process_info); if (ret) goto unreserve_out; @@ -2015,10 +1907,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) ret = process_update_pds(process_info, &sync); unreserve_out: - list_for_each_entry(peer_vm, &process_info->vm_list_head, - vm_list_node) - amdgpu_bo_fence(peer_vm->root.base.bo, - &process_info->eviction_fence->base, true); ttm_eu_backoff_reservation(&ticket, &resv_list); amdgpu_sync_wait(&sync, false); amdgpu_sync_free(&sync); From 74b9b3ea0cb355d668eb0309d1ab57f403d52c69 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 5 Feb 2019 15:17:40 -0500 Subject: [PATCH 07/49] drm/amdgpu: Fix bugs in setting CP RB/MEC DOORBELL_RANGE registers CP_RB_DOORBELL_RANGE_LOWER/UPPER and CP_MEC_DOORBELL_RANGE_LOWER/UPPER are used for waking up an idle scheduler and for power gating support. Usually the first few doorbells in pci doorbell bar are used for RB and all leftover for MEC. This patch fixes the incorrect settings. Theoretically, gfx ring doorbells should come before all MEC doorbells to be consistent with the design. However, since the doorbell allocations are agreed by all and we are not free to change them, also considering the kernel MEC ring doorbells which are before gfx ring doorbells are not used often, we compromise by leaving the doorbell allocations unchanged. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 19 +++++++++++++++---- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 19 ++++++++++++++----- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b8e50a34bdb3..41cf9d0224d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4223,8 +4223,8 @@ static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu adev->doorbell_index.gfx_ring0); WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); - WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, - CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); + /* There is only one GFX queue */ + WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, tmp); } static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) @@ -4646,8 +4646,19 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) { if (adev->asic_type > CHIP_TONGA) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2); + /* The first few doorbells in pci doorbell bar are for GFX RB + * rings and all the leftover for MEC. + * So CP_MEC_DOORBELL_RANGE_LOWER should be set one index after + * CP_RB_DOORBELL_RANGE_UPPER, as we assume there is only one + * GFX RB rings. + */ + u32 tmp = REG_SET_FIELD(0, CP_MEC_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, + adev->gfx.gfx_ring[0].doorbell_index + 1); + + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, tmp); + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, + CP_MEC_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); } /* enable doorbells */ WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5533f6e4f4a4..36f417fd6ba9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2631,8 +2631,8 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) DOORBELL_RANGE_LOWER, ring->doorbell_index); WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); - WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, - CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); + /* There is only one GFX queue */ + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, tmp); /* start the ring */ @@ -2995,10 +2995,19 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) /* enable the doorbell if requested */ if (ring->use_doorbell) { - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, - (adev->doorbell_index.kiq * 2) << 2); + /* The first few doorbells in pci doorbell bar are for GFX RB + * rings and all the leftover for MEC. + * So CP_MEC_DOORBELL_RANGE_LOWER should be set one index after + * CP_RB_DOORBELL_RANGE_UPPER, as we assume there is only one + * GFX RB rings. + */ + u32 tmp = REG_SET_FIELD(0, CP_MEC_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, + adev->gfx.gfx_ring[0].doorbell_index + 2); + + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, tmp); WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell_index.userqueue_end * 2) << 2); + CP_MEC_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); } WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, From c0d9271ecbd891cdeb0fad1edcdd99ee717a655f Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Fri, 1 Feb 2019 18:36:21 -0500 Subject: [PATCH 08/49] drm/amdgpu: Delete user queue doorbell variables They are no longer used, so delete them to avoid confusion. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 8 -------- drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 2 -- drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 2 -- 3 files changed, 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 1cfec06f81d4..43546500ec26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -48,8 +48,6 @@ struct amdgpu_doorbell_index { uint32_t mec_ring5; uint32_t mec_ring6; uint32_t mec_ring7; - uint32_t userqueue_start; - uint32_t userqueue_end; uint32_t gfx_ring0; uint32_t sdma_engine[8]; uint32_t ih; @@ -111,8 +109,6 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL_MEC_RING5 = 0x008, AMDGPU_VEGA20_DOORBELL_MEC_RING6 = 0x009, AMDGPU_VEGA20_DOORBELL_MEC_RING7 = 0x00A, - AMDGPU_VEGA20_DOORBELL_USERQUEUE_START = 0x00B, - AMDGPU_VEGA20_DOORBELL_USERQUEUE_END = 0x08A, AMDGPU_VEGA20_DOORBELL_GFX_RING0 = 0x08B, /* SDMA:256~335*/ AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0 = 0x100, @@ -176,10 +172,6 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_MEC_RING6 = 0x09, AMDGPU_DOORBELL64_MEC_RING7 = 0x0a, - /* User queue doorbell range (128 doorbells) */ - AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b, - AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a, - /* Graphics engine */ AMDGPU_DOORBELL64_GFX_RING0 = 0x8b, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index 4b5d60ea3e78..62f49c895314 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -67,8 +67,6 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.mec_ring5 = AMDGPU_DOORBELL64_MEC_RING5; adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL64_MEC_RING6; adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL64_MEC_RING7; - adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL64_USERQUEUE_START; - adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL64_USERQUEUE_END; adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL64_GFX_RING0; adev->doorbell_index.sdma_engine[0] = AMDGPU_DOORBELL64_sDMA_ENGINE0; adev->doorbell_index.sdma_engine[1] = AMDGPU_DOORBELL64_sDMA_ENGINE1; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 53716c593b2b..1271e1702ad4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -65,8 +65,6 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.mec_ring5 = AMDGPU_VEGA20_DOORBELL_MEC_RING5; adev->doorbell_index.mec_ring6 = AMDGPU_VEGA20_DOORBELL_MEC_RING6; adev->doorbell_index.mec_ring7 = AMDGPU_VEGA20_DOORBELL_MEC_RING7; - adev->doorbell_index.userqueue_start = AMDGPU_VEGA20_DOORBELL_USERQUEUE_START; - adev->doorbell_index.userqueue_end = AMDGPU_VEGA20_DOORBELL_USERQUEUE_END; adev->doorbell_index.gfx_ring0 = AMDGPU_VEGA20_DOORBELL_GFX_RING0; adev->doorbell_index.sdma_engine[0] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0; adev->doorbell_index.sdma_engine[1] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1; From f7c8930d9e8b188caa53705883373c49f95fd284 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Wed, 13 Feb 2019 13:53:45 -0500 Subject: [PATCH 09/49] drm/amd/display: Fix deadlock with display during hanged ring recovery. When ring hang happens amdgpu_dm_commit_planes during flip is holding the BO reserved and then stack waiting for fences to signal in reservation_object_wait_timeout_rcu (which won't signal because there was a hnag). Then when we try to shutdown display block during reset recovery from drm_atomic_helper_suspend we also try to reserve the BO from dm_plane_helper_cleanup_fb ending in deadlock. Also remove useless WARN_ON Signed-off-by: Andrey Grodzovsky Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ad31d7b9912f..c7b66fa4a13e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4698,14 +4698,21 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, */ abo = gem_to_amdgpu_bo(fb->obj[0]); r = amdgpu_bo_reserve(abo, true); - if (unlikely(r != 0)) { + if (unlikely(r != 0)) DRM_ERROR("failed to reserve buffer before flip\n"); - WARN_ON(1); - } - /* Wait for all fences on this FB */ - WARN_ON(reservation_object_wait_timeout_rcu(abo->tbo.resv, true, false, - MAX_SCHEDULE_TIMEOUT) < 0); + /* + * Wait for all fences on this FB. Do limited wait to avoid + * deadlock during GPU reset when this fence will not signal + * but we hold reservation lock for the BO. + */ + r = reservation_object_wait_timeout_rcu(abo->tbo.resv, + true, false, + msecs_to_jiffies(5000)); + if (unlikely(r == 0)) + DRM_ERROR("Waiting for fences timed out."); + + amdgpu_bo_get_tiling_flags(abo, &tiling_flags); From 4d3d228e102cce18a1de6ff66761543267882947 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 13 Feb 2019 15:07:06 -0500 Subject: [PATCH 10/49] drm/amdgpu/powerplay: declare firmware for CI cards Missing firmware declaration caused firmware requirement to not be noted by the module and may cause firmware to not be available in initrd. Fixes: bc4b539e385088 "drm/amdgpu: remove old CI DPM implementation" Reviewed-by: James Zhu Tested-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c index a6edd5df33b0..4240aeec9000 100644 --- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c +++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c @@ -29,6 +29,10 @@ #include #include "smumgr.h" +MODULE_FIRMWARE("amdgpu/bonaire_smc.bin"); +MODULE_FIRMWARE("amdgpu/bonaire_k_smc.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_smc.bin"); +MODULE_FIRMWARE("amdgpu/hawaii_k_smc.bin"); MODULE_FIRMWARE("amdgpu/topaz_smc.bin"); MODULE_FIRMWARE("amdgpu/topaz_k_smc.bin"); MODULE_FIRMWARE("amdgpu/tonga_smc.bin"); From 9b49c19766a854feefa7fe5288c1915658a939f5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 12 Feb 2019 14:05:08 +0000 Subject: [PATCH 11/49] drm/amdgpu: fix several indentation issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are several statements that are incorrectly indented. Fix these. Reviewed-by: Christian König Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +- drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 2 +- drivers/gpu/drm/amd/amdgpu/si.c | 2 +- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c | 2 +- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bc62bf41b7e9..b65e18101108 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -207,7 +207,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) if (!r) { acpi_status = amdgpu_acpi_init(adev); if (acpi_status) - dev_dbg(&dev->pdev->dev, + dev_dbg(&dev->pdev->dev, "Error during ACPI methods call\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index db443ec53d3a..bea32f076b91 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2980,7 +2980,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - unsigned long flags; + unsigned long flags; unsigned crtc_id; struct amdgpu_crtc *amdgpu_crtc; struct amdgpu_flip_work *works; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index e347b407bd03..727370ccd854 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -32,7 +32,7 @@ static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev) { - u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 79c1a9bbcc21..9d8df68893b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1436,7 +1436,7 @@ static int si_common_early_init(void *handle) AMD_CG_SUPPORT_UVD_MGCG | AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_MGCG; - adev->pg_flags = 0; + adev->pg_flags = 0; adev->external_rev_id = (adev->rev_id == 0) ? 1 : (adev->rev_id == 1) ? 5 : 6; break; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c index d138ddae563d..58f5589aaf12 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c @@ -1211,7 +1211,7 @@ int smu7_power_control_set_level(struct pp_hwmgr *hwmgr) hwmgr->platform_descriptor.TDPAdjustment : (-1 * hwmgr->platform_descriptor.TDPAdjustment); - if (hwmgr->chip_id > CHIP_TONGA) + if (hwmgr->chip_id > CHIP_TONGA) target_tdp = ((100 + adjust_percent) * (int)(cac_table->usTDP * 256)) / 100; else target_tdp = ((100 + adjust_percent) * (int)(cac_table->usConfigurableTDP * 256)) / 100; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 0769b1ec562b..aad79affb081 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -3456,7 +3456,7 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr) disable_mclk_switching = ((1 < hwmgr->display_config->num_display) && !hwmgr->display_config->multi_monitor_in_sync) || vblank_too_short; - latency = hwmgr->display_config->dce_tolerable_mclk_in_active_latency; + latency = hwmgr->display_config->dce_tolerable_mclk_in_active_latency; /* gfxclk */ dpm_table = &(data->dpm_table.gfx_table); From 7a5e0d9ab6d9564f5f82d7a16355118aadd25ef8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 11 Feb 2019 16:49:47 -0500 Subject: [PATCH 12/49] drm/amdgpu: don't clamp debugfs register access to the BAR size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This prevents us from accessing extended registers in tools like umr. The register access functions already check if the offset is beyond the BAR size and use the indirect accessors with locking so this is safe. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index dd9a4fb9ce39..4ae3ff9a1d4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -158,9 +158,6 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, while (size) { uint32_t value; - if (*pos > adev->rmmio_size) - goto end; - if (read) { value = RREG32(*pos >> 2); r = put_user(value, (uint32_t *)buf); From 1decbf6bb0b4dc56c9da6c5e57b994ebfc2be3aa Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Wed, 30 Jan 2019 02:53:19 +0100 Subject: [PATCH 13/49] drm/sched: Fix entities with 0 rqs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some blocks in amdgpu can have 0 rqs. Job creation already fails with -ENOENT when entity->rq is NULL, so jobs cannot be pushed. Without a rq there is no scheduler to pop jobs, and rq selection already does the right thing with a list of length 0. So the operations we need to fix are: - Creation, do not set rq to rq_list[0] if the list can have length 0. - Do not flush any jobs when there is no rq. - On entity destruction handle the rq = NULL case. - on set_priority, do not try to change the rq if it is NULL. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/scheduler/sched_entity.c | 39 ++++++++++++++++-------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 4463d3826ecb..8e31b6628d09 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -52,12 +52,12 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, { int i; - if (!(entity && rq_list && num_rq_list > 0 && rq_list[0])) + if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0]))) return -EINVAL; memset(entity, 0, sizeof(struct drm_sched_entity)); INIT_LIST_HEAD(&entity->list); - entity->rq = rq_list[0]; + entity->rq = NULL; entity->guilty = guilty; entity->num_rq_list = num_rq_list; entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *), @@ -67,6 +67,10 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, for (i = 0; i < num_rq_list; ++i) entity->rq_list[i] = rq_list[i]; + + if (num_rq_list) + entity->rq = rq_list[0]; + entity->last_scheduled = NULL; spin_lock_init(&entity->rq_lock); @@ -165,6 +169,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout) struct task_struct *last_user; long ret = timeout; + if (!entity->rq) + return 0; + sched = entity->rq->sched; /** * The client will not queue more IBs during this fini, consume existing @@ -264,20 +271,24 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity) */ void drm_sched_entity_fini(struct drm_sched_entity *entity) { - struct drm_gpu_scheduler *sched; + struct drm_gpu_scheduler *sched = NULL; - sched = entity->rq->sched; - drm_sched_rq_remove_entity(entity->rq, entity); + if (entity->rq) { + sched = entity->rq->sched; + drm_sched_rq_remove_entity(entity->rq, entity); + } /* Consumption of existing IBs wasn't completed. Forcefully * remove them here. */ if (spsc_queue_peek(&entity->job_queue)) { - /* Park the kernel for a moment to make sure it isn't processing - * our enity. - */ - kthread_park(sched->thread); - kthread_unpark(sched->thread); + if (sched) { + /* Park the kernel for a moment to make sure it isn't processing + * our enity. + */ + kthread_park(sched->thread); + kthread_unpark(sched->thread); + } if (entity->dependency) { dma_fence_remove_callback(entity->dependency, &entity->cb); @@ -362,9 +373,11 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity, for (i = 0; i < entity->num_rq_list; ++i) drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority); - drm_sched_rq_remove_entity(entity->rq, entity); - drm_sched_entity_set_rq_priority(&entity->rq, priority); - drm_sched_rq_add_entity(entity->rq, entity); + if (entity->rq) { + drm_sched_rq_remove_entity(entity->rq, entity); + drm_sched_entity_set_rq_priority(&entity->rq, priority); + drm_sched_rq_add_entity(entity->rq, entity); + } spin_unlock(&entity->rq_lock); } From 2a84e48e9712ea8591a10dd59d59ccab3d54efd6 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Wed, 30 Jan 2019 02:53:20 +0100 Subject: [PATCH 14/49] drm/amdgpu: Only add rqs for initialized rings. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I don't see another way to figure out if a ring is initialized if the hardware block might not be initialized. Entities have been fixed up to handle num_rqs = 0. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index d85184b5b35c..7b526593eb77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -124,6 +124,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; unsigned num_rings; + unsigned num_rqs = 0; switch (i) { case AMDGPU_HW_IP_GFX: @@ -166,12 +167,16 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, break; } - for (j = 0; j < num_rings; ++j) - rqs[j] = &rings[j]->sched.sched_rq[priority]; + for (j = 0; j < num_rings; ++j) { + if (!rings[j]->adev) + continue; + + rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority]; + } for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) r = drm_sched_entity_init(&ctx->entities[i][j].entity, - rqs, num_rings, &ctx->guilty); + rqs, num_rqs, &ctx->guilty); if (r) goto error_cleanup_entities; } From 021830d24ba55a578f602979274965344c8e6284 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Wed, 30 Jan 2019 02:53:21 +0100 Subject: [PATCH 15/49] drm/amdgpu: Check if fd really is an amdgpu fd. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we interpret the file private data as drm & amdgpu data while it might not be, possibly allowing one to get memory corruption. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 16 ++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 10 +++++++--- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9efa681d0878..8d0d7f3dd5fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -411,6 +411,8 @@ struct amdgpu_fpriv { struct amdgpu_ctx_mgr ctx_mgr; }; +int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); + int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7f3aa7b7e1d8..637f927e3675 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1178,6 +1178,22 @@ static const struct file_operations amdgpu_driver_kms_fops = { #endif }; +int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) +{ + struct drm_file *file; + + if (!filp) + return -EINVAL; + + if (filp->f_op != &amdgpu_driver_kms_fops) { + return -EINVAL; + } + + file = filp->private_data; + *fpriv = file->driver_priv; + return 0; +} + static bool amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, bool in_vblank_irq, int *vpos, int *hpos, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 1cafe8d83a4d..0b70410488b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -54,16 +54,20 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, enum drm_sched_priority priority) { struct file *filp = fget(fd); - struct drm_file *file; struct amdgpu_fpriv *fpriv; struct amdgpu_ctx *ctx; uint32_t id; + int r; if (!filp) return -EINVAL; - file = filp->private_data; - fpriv = file->driver_priv; + r = amdgpu_file_to_fpriv(filp, &fpriv); + if (r) { + fput(filp); + return r; + } + idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id) amdgpu_ctx_priority_override(ctx, priority); From b5bb37eddb63b16b7ab959598d108b1c444be77d Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Wed, 30 Jan 2019 02:53:22 +0100 Subject: [PATCH 16/49] drm/amdgpu: Add command to override the context priority. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given a master fd we can then override the priority of the context in another fd. Using these overrides was recommended by Christian instead of trying to submit from a master fd, and I am adding a way to override a single context instead of the entire process so we can only upgrade a single Vulkan queue and not effectively the entire process. Reused the flags field as it was checked to be 0 anyways, so nothing used it. This is source-incompatible (due to the name change), but ABI compatible. Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 41 ++++++++++++++++++++++- include/uapi/drm/amdgpu_drm.h | 3 +- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 0b70410488b6..0767a93e4d91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -76,6 +76,39 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, return 0; } +static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev, + int fd, + unsigned ctx_id, + enum drm_sched_priority priority) +{ + struct file *filp = fget(fd); + struct amdgpu_fpriv *fpriv; + struct amdgpu_ctx *ctx; + int r; + + if (!filp) + return -EINVAL; + + r = amdgpu_file_to_fpriv(filp, &fpriv); + if (r) { + fput(filp); + return r; + } + + ctx = amdgpu_ctx_get(fpriv, ctx_id); + + if (!ctx) { + fput(filp); + return -EINVAL; + } + + amdgpu_ctx_priority_override(ctx, priority); + amdgpu_ctx_put(ctx); + fput(filp); + + return 0; +} + int amdgpu_sched_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { @@ -85,7 +118,7 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data, int r; priority = amdgpu_to_sched_priority(args->in.priority); - if (args->in.flags || priority == DRM_SCHED_PRIORITY_INVALID) + if (priority == DRM_SCHED_PRIORITY_INVALID) return -EINVAL; switch (args->in.op) { @@ -94,6 +127,12 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data, args->in.fd, priority); break; + case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE: + r = amdgpu_sched_context_priority_override(adev, + args->in.fd, + args->in.ctx_id, + priority); + break; default: DRM_ERROR("Invalid sched op specified: %d\n", args->in.op); r = -EINVAL; diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 2acbc8bc465b..4a53f6cfa034 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -272,13 +272,14 @@ union drm_amdgpu_vm { /* sched ioctl */ #define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE 1 +#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE 2 struct drm_amdgpu_sched_in { /* AMDGPU_SCHED_OP_* */ __u32 op; __u32 fd; __s32 priority; - __u32 flags; + __u32 ctx_id; }; union drm_amdgpu_sched { From 478168e1cbd0b3df52ca223e1d7af3c257588166 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 14 Feb 2019 15:54:57 -0500 Subject: [PATCH 17/49] drm/amdgpu: remove some old unused dpm helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Carried over from radeon, but no longer used. Reviewed-by: Michel Dänzer Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c | 88 ------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h | 9 --- 2 files changed, 97 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c index 1c4595562f8f..344967df3137 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c @@ -184,61 +184,6 @@ u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev) return vrefresh; } -void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, - u32 *p, u32 *u) -{ - u32 b_c = 0; - u32 i_c; - u32 tmp; - - i_c = (i * r_c) / 100; - tmp = i_c >> p_b; - - while (tmp) { - b_c++; - tmp >>= 1; - } - - *u = (b_c + 1) / 2; - *p = i_c / (1 << (2 * (*u))); -} - -int amdgpu_calculate_at(u32 t, u32 h, u32 fh, u32 fl, u32 *tl, u32 *th) -{ - u32 k, a, ah, al; - u32 t1; - - if ((fl == 0) || (fh == 0) || (fl > fh)) - return -EINVAL; - - k = (100 * fh) / fl; - t1 = (t * (k - 100)); - a = (1000 * (100 * h + t1)) / (10000 + (t1 / 100)); - a = (a + 5) / 10; - ah = ((a * t) + 5000) / 10000; - al = a - ah; - - *th = t - ah; - *tl = t + al; - - return 0; -} - -bool amdgpu_is_uvd_state(u32 class, u32 class2) -{ - if (class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE) - return true; - if (class & ATOM_PPLIB_CLASSIFICATION_HD2STATE) - return true; - if (class & ATOM_PPLIB_CLASSIFICATION_HDSTATE) - return true; - if (class & ATOM_PPLIB_CLASSIFICATION_SDSTATE) - return true; - if (class2 & ATOM_PPLIB_CLASSIFICATION2_MVC) - return true; - return false; -} - bool amdgpu_is_internal_thermal_sensor(enum amdgpu_int_thermal_type sensor) { switch (sensor) { @@ -949,39 +894,6 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev, return AMDGPU_PCIE_GEN1; } -u16 amdgpu_get_pcie_lane_support(struct amdgpu_device *adev, - u16 asic_lanes, - u16 default_lanes) -{ - switch (asic_lanes) { - case 0: - default: - return default_lanes; - case 1: - return 1; - case 2: - return 2; - case 4: - return 4; - case 8: - return 8; - case 12: - return 12; - case 16: - return 16; - } -} - -u8 amdgpu_encode_pci_lane_width(u32 lanes) -{ - u8 encoded_lanes[] = { 0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6 }; - - if (lanes > 16) - return 0; - - return encoded_lanes[lanes]; -} - struct amd_vce_state* amdgpu_get_vce_clock_state(void *handle, u32 idx) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index 2f61e9edb1c1..e871e022c129 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -486,10 +486,6 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev, u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev); u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev); void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev); -bool amdgpu_is_uvd_state(u32 class, u32 class2); -void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b, - u32 *p, u32 *u); -int amdgpu_calculate_at(u32 t, u32 h, u32 fh, u32 fl, u32 *tl, u32 *th); bool amdgpu_is_internal_thermal_sensor(enum amdgpu_int_thermal_type sensor); @@ -505,11 +501,6 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev, enum amdgpu_pcie_gen asic_gen, enum amdgpu_pcie_gen default_gen); -u16 amdgpu_get_pcie_lane_support(struct amdgpu_device *adev, - u16 asic_lanes, - u16 default_lanes); -u8 amdgpu_encode_pci_lane_width(u32 lanes); - struct amd_vce_state* amdgpu_get_vce_clock_state(void *handle, u32 idx); From 7452394310a9eac283fdbb31261b19e50fa3e546 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 15 Jan 2019 18:51:27 -0500 Subject: [PATCH 18/49] drm/amdkfd: Move a constant definition around The similar definitions should be consecutive. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 12b66330fc6d..e5ebcca7f031 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -97,17 +97,18 @@ #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) #define KFD_CWSR_TMA_OFFSET PAGE_SIZE +#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ + (KFD_MAX_NUM_OF_PROCESSES * \ + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) + +#define KFD_KERNEL_QUEUE_SIZE 2048 + /* * Kernel module parameter to specify maximum number of supported queues per * device */ extern int max_num_of_queues_per_device; -#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ - (KFD_MAX_NUM_OF_PROCESSES * \ - KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) - -#define KFD_KERNEL_QUEUE_SIZE 2048 /* Kernel module parameter to specify the scheduling policy */ extern int sched_policy; From 828845b7c86c5338f6ca02aaaaf4b525718f31b2 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 13 Feb 2019 13:13:50 -0500 Subject: [PATCH 19/49] drm/amdgpu: Add first_non_cp and last_non_cp in amdgpu_doorbell_index They will be used to inform KFD the doorbell range not usable for CP. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 9 +++++++++ drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 4 ++++ 3 files changed, 17 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 43546500ec26..5587fac671bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -69,6 +69,8 @@ struct amdgpu_doorbell_index { uint32_t vce_ring6_7; } uvd_vce; }; + uint32_t first_non_cp; + uint32_t last_non_cp; uint32_t max_assignment; /* Per engine SDMA doorbell size in dword */ uint32_t sdma_doorbell_range; @@ -139,6 +141,10 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3 = 0x18D, AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5 = 0x18E, AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7 = 0x18F, + + AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0, + AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7, + AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F, AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT; @@ -214,6 +220,9 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE, AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF, + AMDGPU_DOORBELL64_FIRST_NON_CP = AMDGPU_DOORBELL64_sDMA_ENGINE0, + AMDGPU_DOORBELL64_LAST_NON_CP = AMDGPU_DOORBELL64_VCE_RING6_7, + AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, AMDGPU_DOORBELL64_INVALID = 0xFFFF } AMDGPU_DOORBELL64_ASSIGNMENT; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index 62f49c895314..5e9e53143a8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -79,6 +79,10 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7; + + adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL64_FIRST_NON_CP; + adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP; + /* In unit of dword doorbell */ adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT << 1; adev->doorbell_index.sdma_doorbell_range = 4; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 1271e1702ad4..fb6398e38be9 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -83,6 +83,10 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7; + + adev->doorbell_index.first_non_cp = AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP; + adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP; + adev->doorbell_index.max_assignment = AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT << 1; adev->doorbell_index.sdma_doorbell_range = 20; } From 1f86805adc3432e92e7d87e1ff5da9826ef56eab Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 13 Feb 2019 13:15:05 -0500 Subject: [PATCH 20/49] drm/amdkfd: Fix bugs regarding CP queue doorbell mask on SOC15 Reserved doorbells for SDMA IH and VCN were not properly masked out when allocating doorbells for CP user queues. This patch fixed that. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 16 ++++++++++++---- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 11 +++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 14 +++++++++----- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 15 ++++++--------- 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index e957e42c539a..30e2b371578e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -196,11 +196,19 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) gpu_resources.sdma_doorbell[1][i+1] = adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1); } - /* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for - * SDMA, IH and VCN. So don't use them for the CP. + + /* Since SOC15, BIF starts to statically use the + * lower 12 bits of doorbell addresses for routing + * based on settings in registers like + * SDMA0_DOORBELL_RANGE etc.. + * In order to route a doorbell to CP engine, the lower + * 12 bits of its address has to be outside the range + * set for SDMA, VCN, and IH blocks. */ - gpu_resources.reserved_doorbell_mask = 0x1e0; - gpu_resources.reserved_doorbell_val = 0x0e0; + gpu_resources.non_cp_doorbells_start = + adev->doorbell_index.first_non_cp; + gpu_resources.non_cp_doorbells_end = + adev->doorbell_index.last_non_cp; kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index e5ebcca7f031..0eeee3c6d6dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -103,6 +103,17 @@ #define KFD_KERNEL_QUEUE_SIZE 2048 +/* + * 512 = 0x200 + * The doorbell index distance between SDMA RLC (2*i) and (2*i+1) in the + * same SDMA engine on SOC15, which has 8-byte doorbells for SDMA. + * 512 8-byte doorbell distance (i.e. one page away) ensures that SDMA RLC + * (2*i+1) doorbells (in terms of the lower 12 bit address) lie exactly in + * the OFFSET and SIZE set in registers like BIF_SDMA0_DOORBELL_RANGE. + */ +#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512 + + /* * Kernel module parameter to specify maximum number of supported queues per * device diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 80b36e860a0a..4bdae78bab8e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -607,13 +607,17 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, if (!qpd->doorbell_bitmap) return -ENOMEM; - /* Mask out any reserved doorbells */ - for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++) - if ((dev->shared_resources.reserved_doorbell_mask & i) == - dev->shared_resources.reserved_doorbell_val) { + /* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */ + for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) { + if (i >= dev->shared_resources.non_cp_doorbells_start + && i <= dev->shared_resources.non_cp_doorbells_end) { set_bit(i, qpd->doorbell_bitmap); - pr_debug("reserved doorbell 0x%03x\n", i); + set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, + qpd->doorbell_bitmap); + pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i, + i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET); } + } return 0; } diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 83d960110d23..0b6b34f4e5a1 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -140,17 +140,14 @@ struct kgd2kfd_shared_resources { /* Doorbell assignments (SOC15 and later chips only). Only * specific doorbells are routed to each SDMA engine. Others * are routed to IH and VCN. They are not usable by the CP. - * - * Any doorbell number D that satisfies the following condition - * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val - * - * KFD currently uses 1024 (= 0x3ff) doorbells per process. If - * doorbells 0x0e0-0x0ff and 0x2e0-0x2ff are reserved, that means - * mask would be set to 0x1e0 and val set to 0x0e0. */ unsigned int sdma_doorbell[2][8]; - unsigned int reserved_doorbell_mask; - unsigned int reserved_doorbell_val; + + /* From SOC15 onward, the doorbell index range not usable for CP + * queues. + */ + uint32_t non_cp_doorbells_start; + uint32_t non_cp_doorbells_end; /* Base address of doorbell aperture. */ phys_addr_t doorbell_physical_address; From 234441dd49bcd917d0acd23290d1c49f332a816b Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Wed, 9 Jan 2019 23:31:14 -0500 Subject: [PATCH 21/49] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources We can directly calculate sdma doorbell indexes in the process doorbell pages through the doorbell_index structure in amdgpu_device, so no need to cache them in kgd2kfd_shared_resources any more. This alleviates the adaptation needs when new SDMA configurations are introduced. Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 41 +++++-------------- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 16 +++++--- .../gpu/drm/amd/include/kgd_kfd_interface.h | 4 +- 3 files changed, 23 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 30e2b371578e..fe1d7368c1e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { - int i, n; + int i; int last_valid_bit; if (adev->kfd.dev) { @@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) .gpuvm_size = min(adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT, AMDGPU_GMC_HOLE_START), - .drm_render_minor = adev->ddev->render->index + .drm_render_minor = adev->ddev->render->index, + .sdma_doorbell_idx = adev->doorbell_index.sdma_engine, + }; /* this is going to have a few of the MSBs set that we need to @@ -172,31 +174,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); - if (adev->asic_type < CHIP_VEGA10) { - kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); - return; - } - - n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8; - - for (i = 0; i < n; i += 2) { - /* On SOC15 the BIF is involved in routing - * doorbells using the low 12 bits of the - * address. Communicate the assignments to - * KFD. KFD uses two doorbell pages per - * process in case of 64-bit doorbells so we - * can use each doorbell assignment twice. - */ - gpu_resources.sdma_doorbell[0][i] = - adev->doorbell_index.sdma_engine[0] + (i >> 1); - gpu_resources.sdma_doorbell[0][i+1] = - adev->doorbell_index.sdma_engine[0] + 0x200 + (i >> 1); - gpu_resources.sdma_doorbell[1][i] = - adev->doorbell_index.sdma_engine[1] + (i >> 1); - gpu_resources.sdma_doorbell[1][i+1] = - adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1); - } - /* Since SOC15, BIF starts to statically use the * lower 12 bits of doorbell addresses for routing * based on settings in registers like @@ -205,10 +182,12 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) * 12 bits of its address has to be outside the range * set for SDMA, VCN, and IH blocks. */ - gpu_resources.non_cp_doorbells_start = - adev->doorbell_index.first_non_cp; - gpu_resources.non_cp_doorbells_end = - adev->doorbell_index.last_non_cp; + if (adev->asic_type >= CHIP_VEGA10) { + gpu_resources.non_cp_doorbells_start = + adev->doorbell_index.first_non_cp; + gpu_resources.non_cp_doorbells_end = + adev->doorbell_index.last_non_cp; + } kgd2kfd_device_init(adev->kfd.dev, &gpu_resources); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8372556b52eb..c6c9530e704e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -134,12 +134,18 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) */ q->doorbell_id = q->properties.queue_id; } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - /* For SDMA queues on SOC15, use static doorbell - * assignments based on the engine and queue. + /* For SDMA queues on SOC15 with 8-byte doorbell, use static + * doorbell assignments based on the engine and queue id. + * The doobell index distance between RLC (2*i) and (2*i+1) + * for a SDMA engine is 512. */ - q->doorbell_id = dev->shared_resources.sdma_doorbell - [q->properties.sdma_engine_id] - [q->properties.sdma_queue_id]; + uint32_t *idx_offset = + dev->shared_resources.sdma_doorbell_idx; + + q->doorbell_id = idx_offset[q->properties.sdma_engine_id] + + (q->properties.sdma_queue_id & 1) + * KFD_QUEUE_DOORBELL_MIRROR_OFFSET + + (q->properties.sdma_queue_id >> 1); } else { /* For CP queues on SOC15 reserve a free doorbell ID */ unsigned int found; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 0b6b34f4e5a1..5f3c10ebff08 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -137,11 +137,11 @@ struct kgd2kfd_shared_resources { /* Bit n == 1 means Queue n is available for KFD */ DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES); - /* Doorbell assignments (SOC15 and later chips only). Only + /* SDMA doorbell assignments (SOC15 and later chips only). Only * specific doorbells are routed to each SDMA engine. Others * are routed to IH and VCN. They are not usable by the CP. */ - unsigned int sdma_doorbell[2][8]; + uint32_t *sdma_doorbell_idx; /* From SOC15 onward, the doorbell index range not usable for CP * queues. From 7ac65f42769f93d74c0b68ef1b054e01b971dba1 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Feb 2019 10:50:23 -0600 Subject: [PATCH 22/49] drm/amd/display/dc/bios_parser2: Mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index a1c56f29cfeb..fd5266a58297 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -265,6 +265,7 @@ static struct atom_display_object_path_v2 *get_bios_object( && id.enum_id == obj_id.enum_id) return &bp->object_info_tbl.v1_4->display_path[i]; } + /* fall through */ case OBJECT_TYPE_CONNECTOR: case OBJECT_TYPE_GENERIC: /* Both Generic and Connector Object ID @@ -277,6 +278,7 @@ static struct atom_display_object_path_v2 *get_bios_object( && id.enum_id == obj_id.enum_id) return &bp->object_info_tbl.v1_4->display_path[i]; } + /* fall through */ default: return NULL; } From 8328691d3b882d6b34d0928633efd4dbc1249c9d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Feb 2019 11:10:31 -0600 Subject: [PATCH 23/49] drm/radeon/si_dpm: Mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si_dpm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 0a785ef0ab66..c9f6cb77e857 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -5762,10 +5762,12 @@ static void si_request_link_speed_change_before_state_change(struct radeon_devic si_pi->force_pcie_gen = RADEON_PCIE_GEN2; if (current_link_speed == RADEON_PCIE_GEN2) break; + /* fall through */ case RADEON_PCIE_GEN2: if (radeon_acpi_pcie_performance_request(rdev, PCIE_PERF_REQ_PECI_GEN2, false) == 0) break; #endif + /* fall through */ default: si_pi->force_pcie_gen = si_get_current_pcie_speed(rdev); break; From c81e5efeec48bd9c8dab08554d0209e5ea6cca1c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Feb 2019 10:54:42 -0600 Subject: [PATCH 24/49] drm/amd/display/dce_mem_input: Mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 Notice that, in this particular case, the code comment is modified in accordance with what GCC is expecting to find. This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c index 85686d917636..a24a2bda8656 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c @@ -479,7 +479,7 @@ static void program_grph_pixel_format( case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F: sign = 1; floating = 1; - /* no break */ + /* fall through */ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: /* shouldn't this get float too? */ case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616: grph_depth = 3; From e38545908afb324f32790a8ef903f6a0a6df60bb Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Feb 2019 10:57:17 -0600 Subject: [PATCH 25/49] drm/amd/powerplay/smu7_hwmgr: Mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index c8f5c00dd1e7..48187acac59e 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -3681,10 +3681,12 @@ static int smu7_request_link_speed_change_before_state_change( data->force_pcie_gen = PP_PCIEGen2; if (current_link_speed == PP_PCIEGen2) break; + /* fall through */ case PP_PCIEGen2: if (0 == amdgpu_acpi_pcie_performance_request(hwmgr->adev, PCIE_PERF_REQ_GEN2, false)) break; #endif + /* fall through */ default: data->force_pcie_gen = smu7_get_current_pcie_speed(hwmgr); break; From ecdadace81f2140f1de469c0d0b74a34ebbd896e Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Feb 2019 11:08:18 -0600 Subject: [PATCH 26/49] drm/radeon/ci_dpm: Mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ci_dpm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index a97294ac96d5..a12439266bb0 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -4869,10 +4869,12 @@ static void ci_request_link_speed_change_before_state_change(struct radeon_devic pi->force_pcie_gen = RADEON_PCIE_GEN2; if (current_link_speed == RADEON_PCIE_GEN2) break; + /* fall through */ case RADEON_PCIE_GEN2: if (radeon_acpi_pcie_performance_request(rdev, PCIE_PERF_REQ_PECI_GEN2, false) == 0) break; #endif + /* fall through */ default: pi->force_pcie_gen = ci_get_current_pcie_speed(rdev); break; From 2f54a0412f31c1312a105d6c02adba9b2ae41c64 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Feb 2019 10:46:43 -0600 Subject: [PATCH 27/49] drm/amdgpu/si_dpm: Mark expected switch fall-throughs In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index da58040fdbdc..41e01a7f57a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -6216,10 +6216,12 @@ static void si_request_link_speed_change_before_state_change(struct amdgpu_devic si_pi->force_pcie_gen = AMDGPU_PCIE_GEN2; if (current_link_speed == AMDGPU_PCIE_GEN2) break; + /* fall through */ case AMDGPU_PCIE_GEN2: if (amdgpu_acpi_pcie_performance_request(adev, PCIE_PERF_REQ_PECI_GEN2, false) == 0) break; #endif + /* fall through */ default: si_pi->force_pcie_gen = si_get_current_pcie_speed(adev); break; From cc5034a5d293dd620484d1d836aa16c6764a1c8c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 15 Feb 2019 14:29:26 -0600 Subject: [PATCH 28/49] drm/radeon/evergreen_cs: fix missing break in switch statement Add missing break statement in order to prevent the code from falling through to case CB_TARGET_MASK. This bug was found thanks to the ongoing efforts to enable -Wimplicit-fallthrough. Fixes: dd220a00e8bd ("drm/radeon/kms: add support for streamout v7") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/evergreen_cs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index f471537c852f..1e14c6921454 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -1299,6 +1299,7 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) return -EINVAL; } ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff); + break; case CB_TARGET_MASK: track->cb_target_mask = radeon_get_ib_value(p, idx); track->cb_dirty = true; From e02c80d60c606ea9810565421e4e70c3996e3df7 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 19 Feb 2019 11:21:41 -0500 Subject: [PATCH 29/49] Revert "drm/amdgpu: Delete user queue doorbell variables" This reverts commit 9006c6bd9059cb9807fa863bafc1d776222cb61b. Signed-off-by: Yong Zhao Acked-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c | 2 ++ drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c | 2 ++ 3 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 5587fac671bb..68959b923f89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -48,6 +48,8 @@ struct amdgpu_doorbell_index { uint32_t mec_ring5; uint32_t mec_ring6; uint32_t mec_ring7; + uint32_t userqueue_start; + uint32_t userqueue_end; uint32_t gfx_ring0; uint32_t sdma_engine[8]; uint32_t ih; @@ -111,6 +113,8 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL_MEC_RING5 = 0x008, AMDGPU_VEGA20_DOORBELL_MEC_RING6 = 0x009, AMDGPU_VEGA20_DOORBELL_MEC_RING7 = 0x00A, + AMDGPU_VEGA20_DOORBELL_USERQUEUE_START = 0x00B, + AMDGPU_VEGA20_DOORBELL_USERQUEUE_END = 0x08A, AMDGPU_VEGA20_DOORBELL_GFX_RING0 = 0x08B, /* SDMA:256~335*/ AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0 = 0x100, @@ -178,6 +182,10 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_MEC_RING6 = 0x09, AMDGPU_DOORBELL64_MEC_RING7 = 0x0a, + /* User queue doorbell range (128 doorbells) */ + AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b, + AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a, + /* Graphics engine */ AMDGPU_DOORBELL64_GFX_RING0 = 0x8b, diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index 5e9e53143a8e..a8e92638a2e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -67,6 +67,8 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.mec_ring5 = AMDGPU_DOORBELL64_MEC_RING5; adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL64_MEC_RING6; adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL64_MEC_RING7; + adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL64_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL64_USERQUEUE_END; adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL64_GFX_RING0; adev->doorbell_index.sdma_engine[0] = AMDGPU_DOORBELL64_sDMA_ENGINE0; adev->doorbell_index.sdma_engine[1] = AMDGPU_DOORBELL64_sDMA_ENGINE1; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index fb6398e38be9..0db84386252a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -65,6 +65,8 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.mec_ring5 = AMDGPU_VEGA20_DOORBELL_MEC_RING5; adev->doorbell_index.mec_ring6 = AMDGPU_VEGA20_DOORBELL_MEC_RING6; adev->doorbell_index.mec_ring7 = AMDGPU_VEGA20_DOORBELL_MEC_RING7; + adev->doorbell_index.userqueue_start = AMDGPU_VEGA20_DOORBELL_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_VEGA20_DOORBELL_USERQUEUE_END; adev->doorbell_index.gfx_ring0 = AMDGPU_VEGA20_DOORBELL_GFX_RING0; adev->doorbell_index.sdma_engine[0] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0; adev->doorbell_index.sdma_engine[1] = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1; From a4a3ad3523359923dc93dfbbb13a89f2b8d5589a Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 19 Feb 2019 11:21:51 -0500 Subject: [PATCH 30/49] Revert "drm/amdgpu: Fix bugs in setting CP RB/MEC DOORBELL_RANGE registers" The original change caused a regression, so revert it until the new fix is ready. BUG: https://bugs.freedesktop.org/show_bug.cgi?id=109650 This reverts commit 764c85fef41722db0f21558c6c2fb38bee172d19. Signed-off-by: Yong Zhao Acked-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 19 ++++--------------- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 19 +++++-------------- 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 41cf9d0224d9..b8e50a34bdb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4223,8 +4223,8 @@ static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu adev->doorbell_index.gfx_ring0); WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); - /* There is only one GFX queue */ - WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, tmp); + WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); } static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) @@ -4646,19 +4646,8 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) { if (adev->asic_type > CHIP_TONGA) { - /* The first few doorbells in pci doorbell bar are for GFX RB - * rings and all the leftover for MEC. - * So CP_MEC_DOORBELL_RANGE_LOWER should be set one index after - * CP_RB_DOORBELL_RANGE_UPPER, as we assume there is only one - * GFX RB rings. - */ - u32 tmp = REG_SET_FIELD(0, CP_MEC_DOORBELL_RANGE_LOWER, - DOORBELL_RANGE_LOWER, - adev->gfx.gfx_ring[0].doorbell_index + 1); - - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, tmp); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - CP_MEC_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2); } /* enable doorbells */ WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 36f417fd6ba9..5533f6e4f4a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2631,8 +2631,8 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) DOORBELL_RANGE_LOWER, ring->doorbell_index); WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); - /* There is only one GFX queue */ - WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, tmp); + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); /* start the ring */ @@ -2995,19 +2995,10 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) /* enable the doorbell if requested */ if (ring->use_doorbell) { - /* The first few doorbells in pci doorbell bar are for GFX RB - * rings and all the leftover for MEC. - * So CP_MEC_DOORBELL_RANGE_LOWER should be set one index after - * CP_RB_DOORBELL_RANGE_UPPER, as we assume there is only one - * GFX RB rings. - */ - u32 tmp = REG_SET_FIELD(0, CP_MEC_DOORBELL_RANGE_LOWER, - DOORBELL_RANGE_LOWER, - adev->gfx.gfx_ring[0].doorbell_index + 2); - - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, tmp); + WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, + (adev->doorbell_index.kiq * 2) << 2); WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - CP_MEC_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); + (adev->doorbell_index.userqueue_end * 2) << 2); } WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, From 4dfdd0ee8529a9283bc3503919719ae69a41914e Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Tue, 29 Jan 2019 14:15:03 -0500 Subject: [PATCH 31/49] drm/amd/display: send pipe set command to dmcu when stream unblanks [Why] When stream is blanked, pipe set command is sent to dmcu to notify it that the abm pipe is disabled. When stream is unblanked, no notification is made to dmcu that the abm pipe has been enabled, resulting in abm not being enabled in the firmware. [How] When stream is unblanked, send a pipe set command to dmcu. Signed-off-by: Josip Pavic Reviewed-by: Anthony Koo Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_abm.c | 32 ++++++++++--------- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 4 ++- drivers/gpu/drm/amd/display/dc/inc/hw/abm.h | 1 + 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index 01e56f1a9f34..a740bc3418a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -175,7 +175,6 @@ static void dmcu_set_backlight_level( uint32_t controller_id) { unsigned int backlight_8_bit = 0; - uint32_t rampingBoundary = 0xFFFF; uint32_t s2; if (backlight_pwm_u16_16 & 0x10000) @@ -185,17 +184,6 @@ static void dmcu_set_backlight_level( // Take MSB of fractional part since backlight is not max backlight_8_bit = (backlight_pwm_u16_16 >> 8) & 0xFF; - /* set ramping boundary */ - REG_WRITE(MASTER_COMM_DATA_REG1, rampingBoundary); - - /* setDMCUParam_Pipe */ - REG_UPDATE_2(MASTER_COMM_CMD_REG, - MASTER_COMM_CMD_REG_BYTE0, MCP_ABM_PIPE_SET, - MASTER_COMM_CMD_REG_BYTE1, controller_id); - - /* notifyDMCUMsg */ - REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); - /* waitDMCUReadyForCmd */ REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 1, 80000); @@ -305,21 +293,34 @@ static bool dce_abm_set_level(struct abm *abm, uint32_t level) return true; } -static bool dce_abm_immediate_disable(struct abm *abm) +static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); + uint32_t rampingBoundary = 0xFFFF; REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 1, 80000); - /* setDMCUParam_ABMLevel */ + /* set ramping boundary */ + REG_WRITE(MASTER_COMM_DATA_REG1, rampingBoundary); + + /* setDMCUParam_Pipe */ REG_UPDATE_2(MASTER_COMM_CMD_REG, MASTER_COMM_CMD_REG_BYTE0, MCP_ABM_PIPE_SET, - MASTER_COMM_CMD_REG_BYTE1, MCP_DISABLE_ABM_IMMEDIATELY); + MASTER_COMM_CMD_REG_BYTE1, controller_id); /* notifyDMCUMsg */ REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); + return true; +} + +static bool dce_abm_immediate_disable(struct abm *abm) +{ + struct dce_abm *abm_dce = TO_DCE_ABM(abm); + + dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY); + abm->stored_backlight_registers.BL_PWM_CNTL = REG_READ(BL_PWM_CNTL); abm->stored_backlight_registers.BL_PWM_CNTL2 = @@ -419,6 +420,7 @@ static const struct abm_funcs dce_funcs = { .abm_init = dce_abm_init, .set_abm_level = dce_abm_set_level, .init_backlight = dce_abm_init_backlight, + .set_pipe = dce_abm_set_pipe, .set_backlight_level_pwm = dce_abm_set_backlight_level_pwm, .get_current_backlight = dce_abm_get_current_backlight, .get_target_backlight = dce_abm_get_target_backlight, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 117d9d8227f7..7f95808ad885 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2162,8 +2162,10 @@ static void dcn10_blank_pixel_data( if (!blank) { if (stream_res->tg->funcs->set_blank) stream_res->tg->funcs->set_blank(stream_res->tg, blank); - if (stream_res->abm) + if (stream_res->abm) { + stream_res->abm->funcs->set_pipe(stream_res->abm, stream_res->tg->inst + 1); stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level); + } } else if (blank) { if (stream_res->abm) stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h index abc961c0906e..86dc39a02408 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h @@ -46,6 +46,7 @@ struct abm_funcs { void (*abm_init)(struct abm *abm); bool (*set_abm_level)(struct abm *abm, unsigned int abm_level); bool (*set_abm_immediate_disable)(struct abm *abm); + bool (*set_pipe)(struct abm *abm, unsigned int controller_id); bool (*init_backlight)(struct abm *abm); /* backlight_pwm_u16_16 is unsigned 32 bit, From ce72741b53cdc70f9738c04920fefb93448f1f5e Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Sun, 20 Jan 2019 01:54:01 -0500 Subject: [PATCH 32/49] drm/amd/display: remove screen flashes on seamless boot [Why] We want boot to desktop to be seamless [How] During init pipes, avoid touching the pipes where GOP has already enabled the HW to the state we want. Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../display/dc/dce110/dce110_hw_sequencer.c | 10 ++++++- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 30 ++++++++++++++++++- .../gpu/drm/amd/display/include/dal_asic_id.h | 3 ++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index e1b285ea01ac..453ff071793b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1521,6 +1521,14 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) struct dc_link *edp_link = get_link_for_edp(dc); bool can_edp_fast_boot_optimize = false; bool apply_edp_fast_boot_optimization = false; + bool can_apply_seamless_boot = false; + + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->apply_seamless_boot_optimization) { + can_apply_seamless_boot = true; + break; + } + } if (edp_link) { /* this seems to cause blank screens on DCE8 */ @@ -1549,7 +1557,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) } } - if (!apply_edp_fast_boot_optimization) { + if (!apply_edp_fast_boot_optimization && !can_apply_seamless_boot) { if (edp_link_to_turnoff) { /*turn off backlight before DP_blank and encoder powered down*/ dc->hwss.edp_backlight_control(edp_link_to_turnoff, false); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 7f95808ad885..d42fade50e18 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -959,9 +959,25 @@ static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx) static void dcn10_init_pipes(struct dc *dc, struct dc_state *context) { int i; + bool can_apply_seamless_boot = false; + + for (i = 0; i < context->stream_count; i++) { + if (context->streams[i]->apply_seamless_boot_optimization) { + can_apply_seamless_boot = true; + break; + } + } for (i = 0; i < dc->res_pool->pipe_count; i++) { struct timing_generator *tg = dc->res_pool->timing_generators[i]; + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + /* There is assumption that pipe_ctx is not mapping irregularly + * to non-preferred front end. If pipe_ctx->stream is not NULL, + * we will use the pipe, so don't disable + */ + if (pipe_ctx->stream != NULL) + continue; if (tg->funcs->is_tg_enabled(tg)) tg->funcs->lock(tg); @@ -975,7 +991,9 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context) } } - dc->res_pool->mpc->funcs->mpc_init(dc->res_pool->mpc); + /* Cannot reset the MPC mux if seamless boot */ + if (!can_apply_seamless_boot) + dc->res_pool->mpc->funcs->mpc_init(dc->res_pool->mpc); for (i = 0; i < dc->res_pool->pipe_count; i++) { struct timing_generator *tg = dc->res_pool->timing_generators[i]; @@ -983,6 +1001,16 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context) struct dpp *dpp = dc->res_pool->dpps[i]; struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + // W/A for issue with dc_post_update_surfaces_to_stream + hubp->power_gated = true; + + /* There is assumption that pipe_ctx is not mapping irregularly + * to non-preferred front end. If pipe_ctx->stream is not NULL, + * we will use the pipe, so don't disable + */ + if (pipe_ctx->stream != NULL) + continue; + dpp->funcs->dpp_reset(dpp); pipe_ctx->stream_res.tg = tg; diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 4f501ddcfb8d..34d6fdcb32e2 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -131,6 +131,7 @@ #define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */ #define RAVEN_A0 0x01 #define RAVEN_B0 0x21 +#define PICASSO_A0 0x41 #if defined(CONFIG_DRM_AMD_DC_DCN1_01) /* DCN1_01 */ #define RAVEN2_A0 0x81 @@ -165,4 +166,6 @@ #define FAMILY_UNKNOWN 0xFF + + #endif /* __DAL_ASIC_ID_H__ */ From c19bd82f8b33f6cf505df0ff5027d4679fedaa3e Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Wed, 30 Jan 2019 11:05:38 -0500 Subject: [PATCH 33/49] drm/amd/display: Increase precision for backlight curve [Why] We are currently losing precision when we convert from 16 bit --> 8 bit --> 16 bit. [How] We shouldn't down convert unnecessarily and lose precision. Keep values at 16 bit and use directly. Signed-off-by: Anthony Koo Reviewed-by: Aric Cyr Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../amd/display/modules/power/power_helpers.c | 23 ++++--------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 3ba87b076287..038b88221c5f 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -165,18 +165,11 @@ struct iram_table_v_2_2 { }; #pragma pack(pop) -static uint16_t backlight_8_to_16(unsigned int backlight_8bit) -{ - return (uint16_t)(backlight_8bit * 0x101); -} - static void fill_backlight_transform_table(struct dmcu_iram_parameters params, struct iram_table_v_2 *table) { unsigned int i; unsigned int num_entries = NUM_BL_CURVE_SEGS; - unsigned int query_input_8bit; - unsigned int query_output_8bit; unsigned int lut_index; table->backlight_thresholds[0] = 0; @@ -194,16 +187,13 @@ static void fill_backlight_transform_table(struct dmcu_iram_parameters params, * format U4.10. */ for (i = 1; i+1 < num_entries; i++) { - query_input_8bit = DIV_ROUNDUP((i * 256), num_entries); - lut_index = (params.backlight_lut_array_size - 1) * i / (num_entries - 1); ASSERT(lut_index < params.backlight_lut_array_size); - query_output_8bit = params.backlight_lut_array[lut_index] >> 8; table->backlight_thresholds[i] = - backlight_8_to_16(query_input_8bit); + cpu_to_be16(DIV_ROUNDUP((i * 65536), num_entries)); table->backlight_offsets[i] = - backlight_8_to_16(query_output_8bit); + cpu_to_be16(params.backlight_lut_array[lut_index]); } } @@ -212,8 +202,6 @@ static void fill_backlight_transform_table_v_2_2(struct dmcu_iram_parameters par { unsigned int i; unsigned int num_entries = NUM_BL_CURVE_SEGS; - unsigned int query_input_8bit; - unsigned int query_output_8bit; unsigned int lut_index; table->backlight_thresholds[0] = 0; @@ -231,16 +219,13 @@ static void fill_backlight_transform_table_v_2_2(struct dmcu_iram_parameters par * format U4.10. */ for (i = 1; i+1 < num_entries; i++) { - query_input_8bit = DIV_ROUNDUP((i * 256), num_entries); - lut_index = (params.backlight_lut_array_size - 1) * i / (num_entries - 1); ASSERT(lut_index < params.backlight_lut_array_size); - query_output_8bit = params.backlight_lut_array[lut_index] >> 8; table->backlight_thresholds[i] = - backlight_8_to_16(query_input_8bit); + cpu_to_be16(DIV_ROUNDUP((i * 65536), num_entries)); table->backlight_offsets[i] = - backlight_8_to_16(query_output_8bit); + cpu_to_be16(params.backlight_lut_array[lut_index]); } } From 240d09d070a7e082d9cb3c2a6112ce3c43bf5df5 Mon Sep 17 00:00:00 2001 From: Gary Kattan Date: Fri, 25 Jan 2019 15:04:14 -0800 Subject: [PATCH 34/49] drm/amd/display: Ungate stream before programming registers [Why] Certain tests fail after a fresh reboot. This is caused by writing to registers prior to ungating the stream we're trying to program. [How] Make sure the stream is ungated before writing to its registers. This also enables power-gating plane resources before init_hw initializes them. Additionally, this does some refactoring to move gating/ungating from enable/disable_plane functions to where stream resources are enabled/disabled. Signed-off-by: Gary Kattan Reviewed-by: Dmytro Laktyushkin Acked-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c | 8 ++++++-- drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h | 4 ++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 453ff071793b..42ee0a6eeec0 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1300,6 +1300,10 @@ static enum dc_status apply_single_controller_ctx_to_hw( struct drr_params params = {0}; unsigned int event_triggers = 0; + if (dc->hwss.disable_stream_gating) { + dc->hwss.disable_stream_gating(dc, pipe_ctx); + } + if (pipe_ctx->stream_res.audio != NULL) { struct audio_output audio_output; @@ -2684,6 +2688,8 @@ static const struct hw_sequencer_funcs dce110_funcs = { .set_static_screen_control = set_static_screen_control, .reset_hw_ctx_wrap = dce110_reset_hw_ctx_wrap, .enable_stream_timing = dce110_enable_stream_timing, + .disable_stream_gating = NULL, + .enable_stream_gating = NULL, .setup_stereo = NULL, .set_avmute = dce110_set_avmute, .wait_for_mpcc_disconnect = dce110_wait_for_mpcc_disconnect, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index d42fade50e18..d4dde1d86b72 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1165,11 +1165,13 @@ static void reset_hw_ctx_wrap( struct clock_source *old_clk = pipe_ctx_old->clock_source; reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state); + if (dc->hwss.enable_stream_gating) { + dc->hwss.enable_stream_gating(dc, pipe_ctx); + } if (old_clk) old_clk->funcs->cs_power_down(old_clk); } } - } static bool patch_address_for_sbs_tb_stereo( @@ -2786,7 +2788,9 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready, .set_cursor_position = dcn10_set_cursor_position, .set_cursor_attribute = dcn10_set_cursor_attribute, - .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level + .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, + .disable_stream_gating = NULL, + .enable_stream_gating = NULL }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 341b4810288c..fc03320c81a1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -68,6 +68,10 @@ struct stream_resource; struct hw_sequencer_funcs { + void (*disable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx); + + void (*enable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx); + void (*init_hw)(struct dc *dc); void (*init_pipes)(struct dc *dc, struct dc_state *context); From 2010840b9b4667d2bd9b45617964d3fa7fb086ea Mon Sep 17 00:00:00 2001 From: Josip Pavic Date: Wed, 30 Jan 2019 16:23:47 -0500 Subject: [PATCH 35/49] drm/amd/display: send pipe set command to dmcu when backlight is set [Why] Previously, a change removed code that would send a pipe set command to dmcu each time the backlight was set, as it was thought to be superfluous. However, it is possible for the backlight to be set before a valid pipe has been set, which causes DMCU to hang after a DPMS restore on some systems. [How] Send a pipe set command to DMCU prior to setting the backlight. Signed-off-by: Josip Pavic Reviewed-by: Anthony Koo Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce/dce_abm.c | 45 ++++++++++---------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index a740bc3418a1..da96229db53a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -53,6 +53,27 @@ #define MCP_DISABLE_ABM_IMMEDIATELY 255 +static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id) +{ + struct dce_abm *abm_dce = TO_DCE_ABM(abm); + uint32_t rampingBoundary = 0xFFFF; + + REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, + 1, 80000); + + /* set ramping boundary */ + REG_WRITE(MASTER_COMM_DATA_REG1, rampingBoundary); + + /* setDMCUParam_Pipe */ + REG_UPDATE_2(MASTER_COMM_CMD_REG, + MASTER_COMM_CMD_REG_BYTE0, MCP_ABM_PIPE_SET, + MASTER_COMM_CMD_REG_BYTE1, controller_id); + + /* notifyDMCUMsg */ + REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); + + return true; +} static unsigned int calculate_16_bit_backlight_from_pwm(struct dce_abm *abm_dce) { @@ -184,6 +205,8 @@ static void dmcu_set_backlight_level( // Take MSB of fractional part since backlight is not max backlight_8_bit = (backlight_pwm_u16_16 >> 8) & 0xFF; + dce_abm_set_pipe(&abm_dce->base, controller_id); + /* waitDMCUReadyForCmd */ REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, 1, 80000); @@ -293,28 +316,6 @@ static bool dce_abm_set_level(struct abm *abm, uint32_t level) return true; } -static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id) -{ - struct dce_abm *abm_dce = TO_DCE_ABM(abm); - uint32_t rampingBoundary = 0xFFFF; - - REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0, - 1, 80000); - - /* set ramping boundary */ - REG_WRITE(MASTER_COMM_DATA_REG1, rampingBoundary); - - /* setDMCUParam_Pipe */ - REG_UPDATE_2(MASTER_COMM_CMD_REG, - MASTER_COMM_CMD_REG_BYTE0, MCP_ABM_PIPE_SET, - MASTER_COMM_CMD_REG_BYTE1, controller_id); - - /* notifyDMCUMsg */ - REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1); - - return true; -} - static bool dce_abm_immediate_disable(struct abm *abm) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); From 2d9e64317e3ab6e751d7bc10ad9839d3fd7a5efa Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 18 Jan 2019 13:17:55 -0500 Subject: [PATCH 36/49] drm/amd/display: Fix wrong z-order when updating overlay planes [Why] If a commit updates an overlay plane via the legacy plane IOCTL then the only plane in the state will be the overlay plane. Overlay planes need to be added first to the DC context, but in the scenario above the plane will be added last. This will result in wrong z-order during rendering. [How] If any non-cursor plane has been updated then the rest of the non-cursor planes should be added to the CRTC state. The cursor plane doesn't need to be included for stream updates and locking it will cause performance issues. It should be ignored. DC requires that the surface count passed during stream updates be the number of surfaces currently on the stream to enable fast updates. This previously wasn't the case without this patch, so this also allows this optimization to occur. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Leo Li Acked-by: Tony Cheng Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c7b66fa4a13e..409e603a3686 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5948,6 +5948,42 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; } + /* + * Add all primary and overlay planes on the CRTC to the state + * whenever a plane is enabled to maintain correct z-ordering + * and to enable fast surface updates. + */ + drm_for_each_crtc(crtc, dev) { + bool modified = false; + + for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { + if (plane->type == DRM_PLANE_TYPE_CURSOR) + continue; + + if (new_plane_state->crtc == crtc || + old_plane_state->crtc == crtc) { + modified = true; + break; + } + } + + if (!modified) + continue; + + drm_for_each_plane_mask(plane, state->dev, crtc->state->plane_mask) { + if (plane->type == DRM_PLANE_TYPE_CURSOR) + continue; + + new_plane_state = + drm_atomic_get_plane_state(state, plane); + + if (IS_ERR(new_plane_state)) { + ret = PTR_ERR(new_plane_state); + goto fail; + } + } + } + /* Remove exiting planes if they are modified */ for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) { ret = dm_update_plane_state(dc, state, plane, From 3e4ba0cdbb244a670779793117d6042bd6c0ce64 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Mon, 21 Jan 2019 09:44:47 -0500 Subject: [PATCH 37/49] drm/amd/display: Don't expose support for DRM_FORMAT_RGB888 [Why] This format isn't supported in DC and some IGT tests fail since we expose support for it. [How] Remove it. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Harry Wentland Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 409e603a3686..6c09b332639f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3777,7 +3777,6 @@ static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { * check will succeed, and let DC implement proper check */ static const uint32_t rgb_formats[] = { - DRM_FORMAT_RGB888, DRM_FORMAT_XRGB8888, DRM_FORMAT_ARGB8888, DRM_FORMAT_RGBA8888, From 6836d23916ada07389c78d88e701f0752317f1d0 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 22 Jan 2019 14:09:34 -0500 Subject: [PATCH 38/49] drm/amd/display: Fix update type mismatches in atomic check [Why] Whenever a stream or plane is added or removed from the context the pointer will change from old to new. We set lock and validation needed in these cases. But not all of these cases match update_type from dm_determine_update_type_for_commit - an example being overlay plane updates. There are warnings for a few of these cases that should be fixed. [How] We can closer align to DC (and lock_and_validation_needed) by comparing stream and plane pointers. Since the old stream/old plane state is never freed until sometime after the commit tail work finishes we are guaranteed to never get back the same block of memory when we remove and create a stream or plane state in the same commit. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Leo Li Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6c09b332639f..3e593d08deaf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5791,15 +5791,14 @@ dm_determine_update_type_for_commit(struct dc *dc, old_dm_crtc_state = to_dm_crtc_state(old_crtc_state); num_plane = 0; - if (!new_dm_crtc_state->stream) { - if (!new_dm_crtc_state->stream && old_dm_crtc_state->stream) { - update_type = UPDATE_TYPE_FULL; - goto cleanup; - } - - continue; + if (new_dm_crtc_state->stream != old_dm_crtc_state->stream) { + update_type = UPDATE_TYPE_FULL; + goto cleanup; } + if (!new_dm_crtc_state->stream) + continue; + for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, j) { new_plane_crtc = new_plane_state->crtc; old_plane_crtc = old_plane_state->crtc; @@ -5809,6 +5808,11 @@ dm_determine_update_type_for_commit(struct dc *dc, if (plane->type == DRM_PLANE_TYPE_CURSOR) continue; + if (new_dm_plane_state->dc_state != old_dm_plane_state->dc_state) { + update_type = UPDATE_TYPE_FULL; + goto cleanup; + } + if (!state->allow_modeset) continue; From 80c218d56e309d46d9dc3f3d5e4396b277bdd0b5 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 25 Jan 2019 15:30:24 -0500 Subject: [PATCH 39/49] drm/amd/display: Do cursor updates after stream updates [Why] Cursor updates used to happen after vblank/flip/stream updates before the stream update refactor. They now happen before stream updates which means that they're not going to be synced with fb changes and that they're going to programmed for pipes that we're disabling within the same commit. [How] Move them after stream updates. Signed-off-by: Nicholas Kazlauskas Reviewed-by: David Francis Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3e593d08deaf..7579592052cc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4663,10 +4663,9 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct dc_plane_state *dc_plane; struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state); - if (plane->type == DRM_PLANE_TYPE_CURSOR) { - handle_cursor_update(plane, old_plane_state); + /* Cursor plane is handled after stream updates */ + if (plane->type == DRM_PLANE_TYPE_CURSOR) continue; - } if (!fb || !crtc || pcrtc != crtc) continue; @@ -4866,6 +4865,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, mutex_unlock(&dm->dc_lock); } + for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) + if (plane->type == DRM_PLANE_TYPE_CURSOR) + handle_cursor_update(plane, old_plane_state); + cleanup: kfree(flip); kfree(full); From d8d2f174bcc2c26c3485c70e0c6fe22b27bce739 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Fri, 25 Jan 2019 15:23:09 -0500 Subject: [PATCH 40/49] drm/amd/display: Clear stream->mode_changed after commit [Why] The stream->mode_changed flag can persist in the following sequence of atomic commits: Commit 1: Enable CRTC0 (mode_changed = true), Enable CRTC1 (mode_changed = true) Commit 2: Disable CRTC1 (mode_changed = false) In this sequence we want to keep the exiting CRTC0 but it's not in the atomic state for the commit since it hasn't been modified. In this case the stream->mode_changed flag persists as true and we don't re-program the planes for the existing stream. [How] The flag needs to be cleared and it makes the most sense to do it within DC after the state has been committed. Nothing following dc_commit_state should think that the stream's mode has changed. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Leo Li Acked-by: Tony Cheng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 52f838442e21..8879cd4bb70c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1138,6 +1138,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c /* pplib is notified if disp_num changed */ dc->hwss.optimize_bandwidth(dc, context); + for (i = 0; i < context->stream_count; i++) + context->streams[i]->mode_changed = false; + dc_release_state(dc->current_state); dc->current_state = context; From d6001aed266391e05517ff03078c144d4b279d5d Mon Sep 17 00:00:00 2001 From: Yongqiang Sun Date: Fri, 25 Jan 2019 14:40:14 -0500 Subject: [PATCH 41/49] drm/amd/display: Refactor for setup periodic interrupt. [Why] Current periodic interrupt start point calc in optc is not clear. [How] 1. DM convert delta time to lines number and dc will calculate the start position as per lines number and interrupt type. 2. hwss calculates the start point as per line offset. 3. optc programs vertical interrupts register as per start point and interrupt source. Signed-off-by: Yongqiang Sun Reviewed-by: Tony Cheng Acked-by: Leo Li Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 12 +- drivers/gpu/drm/amd/display/dc/dc_stream.h | 24 ++- .../display/dc/dce110/dce110_hw_sequencer.c | 6 +- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 145 +++++++++++++++++- .../amd/display/dc/dcn10/dcn10_hw_sequencer.h | 2 + .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.c | 137 +++-------------- .../gpu/drm/amd/display/dc/dcn10/dcn10_optc.h | 13 +- .../amd/display/dc/inc/hw/timing_generator.h | 23 ++- .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 8 + 9 files changed, 217 insertions(+), 153 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 8879cd4bb70c..c68fbd55db3c 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1626,13 +1626,13 @@ static void commit_planes_do_stream_update(struct dc *dc, stream_update->adjust->v_total_min, stream_update->adjust->v_total_max); - if (stream_update->periodic_vsync_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) - pipe_ctx->stream_res.tg->funcs->program_vline_interrupt( - pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, VLINE0, &stream->periodic_vsync_config); + if (stream_update->periodic_interrupt0 && + dc->hwss.setup_periodic_interrupt) + dc->hwss.setup_periodic_interrupt(pipe_ctx, VLINE0); - if (stream_update->enhanced_sync_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt) - pipe_ctx->stream_res.tg->funcs->program_vline_interrupt( - pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, VLINE1, &stream->enhanced_sync_config); + if (stream_update->periodic_interrupt1 && + dc->hwss.setup_periodic_interrupt) + dc->hwss.setup_periodic_interrupt(pipe_ctx, VLINE1); if ((stream_update->hdr_static_metadata && !stream->use_dynamic_meta) || stream_update->vrr_infopacket || diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index a798694992b9..5657cb3a2ad3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -51,9 +51,19 @@ struct freesync_context { bool dummy; }; -union vline_config { - unsigned int line_number; - unsigned long long delta_in_ns; +enum vertical_interrupt_ref_point { + START_V_UPDATE = 0, + START_V_SYNC, + INVALID_POINT + + //For now, only v_update interrupt is used. + //START_V_BLANK, + //START_V_ACTIVE +}; + +struct periodic_interrupt_config { + enum vertical_interrupt_ref_point ref_point; + int lines_offset; }; @@ -106,8 +116,8 @@ struct dc_stream_state { /* DMCU info */ unsigned int abm_level; - union vline_config periodic_vsync_config; - union vline_config enhanced_sync_config; + struct periodic_interrupt_config periodic_interrupt0; + struct periodic_interrupt_config periodic_interrupt1; /* from core_stream struct */ struct dc_context *ctx; @@ -158,8 +168,8 @@ struct dc_stream_update { struct dc_info_packet *hdr_static_metadata; unsigned int *abm_level; - union vline_config *periodic_vsync_config; - union vline_config *enhanced_sync_config; + struct periodic_interrupt_config *periodic_interrupt0; + struct periodic_interrupt_config *periodic_interrupt1; struct dc_crtc_timing_adjust *adjust; struct dc_info_packet *vrr_infopacket; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 42ee0a6eeec0..5e4db3712eef 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1333,10 +1333,8 @@ static enum dc_status apply_single_controller_ctx_to_hw( if (!pipe_ctx->stream->apply_seamless_boot_optimization) dc->hwss.enable_stream_timing(pipe_ctx, context, dc); - if (pipe_ctx->stream_res.tg->funcs->program_vupdate_interrupt) - pipe_ctx->stream_res.tg->funcs->program_vupdate_interrupt( - pipe_ctx->stream_res.tg, - &stream->timing); + if (dc->hwss.setup_vupdate_interrupt) + dc->hwss.setup_vupdate_interrupt(pipe_ctx); params.vertical_total_min = stream->adjust.v_total_min; params.vertical_total_max = stream->adjust.v_total_max; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index d4dde1d86b72..8ba895c4b445 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2741,6 +2741,147 @@ static void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.dpp, &opt_attr); } +/** +* apply_front_porch_workaround TODO FPGA still need? +* +* This is a workaround for a bug that has existed since R5xx and has not been +* fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive. +*/ +static void apply_front_porch_workaround( + struct dc_crtc_timing *timing) +{ + if (timing->flags.INTERLACE == 1) { + if (timing->v_front_porch < 2) + timing->v_front_porch = 2; + } else { + if (timing->v_front_porch < 1) + timing->v_front_porch = 1; + } +} + +int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx) +{ + struct timing_generator *optc = pipe_ctx->stream_res.tg; + const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing; + struct dc_crtc_timing patched_crtc_timing; + int vesa_sync_start; + int asic_blank_end; + int interlace_factor; + int vertical_line_start; + + patched_crtc_timing = *dc_crtc_timing; + apply_front_porch_workaround(&patched_crtc_timing); + + interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1; + + vesa_sync_start = patched_crtc_timing.v_addressable + + patched_crtc_timing.v_border_bottom + + patched_crtc_timing.v_front_porch; + + asic_blank_end = (patched_crtc_timing.v_total - + vesa_sync_start - + patched_crtc_timing.v_border_top) + * interlace_factor; + + vertical_line_start = asic_blank_end - + optc->dlg_otg_param.vstartup_start + 1; + + return vertical_line_start; +} + +static void calc_vupdate_position( + struct pipe_ctx *pipe_ctx, + uint32_t *start_line, + uint32_t *end_line) +{ + const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing; + int vline_int_offset_from_vupdate = + pipe_ctx->stream->periodic_interrupt0.lines_offset; + int vupdate_offset_from_vsync = get_vupdate_offset_from_vsync(pipe_ctx); + int start_position; + + if (vline_int_offset_from_vupdate > 0) + vline_int_offset_from_vupdate--; + else if (vline_int_offset_from_vupdate < 0) + vline_int_offset_from_vupdate++; + + start_position = vline_int_offset_from_vupdate + vupdate_offset_from_vsync; + + if (start_position >= 0) + *start_line = start_position; + else + *start_line = dc_crtc_timing->v_total + start_position - 1; + + *end_line = *start_line + 2; + + if (*end_line >= dc_crtc_timing->v_total) + *end_line = 2; +} + +static void cal_vline_position( + struct pipe_ctx *pipe_ctx, + enum vline_select vline, + uint32_t *start_line, + uint32_t *end_line) +{ + enum vertical_interrupt_ref_point ref_point = INVALID_POINT; + + if (vline == VLINE0) + ref_point = pipe_ctx->stream->periodic_interrupt0.ref_point; + else if (vline == VLINE1) + ref_point = pipe_ctx->stream->periodic_interrupt1.ref_point; + + switch (ref_point) { + case START_V_UPDATE: + calc_vupdate_position( + pipe_ctx, + start_line, + end_line); + break; + case START_V_SYNC: + // Suppose to do nothing because vsync is 0; + break; + default: + ASSERT(0); + break; + } +} + +static void dcn10_setup_periodic_interrupt( + struct pipe_ctx *pipe_ctx, + enum vline_select vline) +{ + struct timing_generator *tg = pipe_ctx->stream_res.tg; + + if (vline == VLINE0) { + uint32_t start_line = 0; + uint32_t end_line = 0; + + cal_vline_position(pipe_ctx, vline, &start_line, &end_line); + + tg->funcs->setup_vertical_interrupt0(tg, start_line, end_line); + + } else if (vline == VLINE1) { + pipe_ctx->stream_res.tg->funcs->setup_vertical_interrupt1( + tg, + pipe_ctx->stream->periodic_interrupt1.lines_offset); + } +} + +static void dcn10_setup_vupdate_interrupt(struct pipe_ctx *pipe_ctx) +{ + struct timing_generator *tg = pipe_ctx->stream_res.tg; + int start_line = get_vupdate_offset_from_vsync(pipe_ctx); + + if (start_line < 0) { + ASSERT(0); + start_line = 0; + } + + if (tg->funcs->setup_vertical_interrupt2) + tg->funcs->setup_vertical_interrupt2(tg, start_line); +} + static const struct hw_sequencer_funcs dcn10_funcs = { .program_gamut_remap = program_gamut_remap, .init_hw = dcn10_init_hw, @@ -2790,7 +2931,9 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .set_cursor_attribute = dcn10_set_cursor_attribute, .set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level, .disable_stream_gating = NULL, - .enable_stream_gating = NULL + .enable_stream_gating = NULL, + .setup_periodic_interrupt = dcn10_setup_periodic_interrupt, + .setup_vupdate_interrupt = dcn10_setup_vupdate_interrupt }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index f8eea10e4c64..6d66084df55f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -81,4 +81,6 @@ struct pipe_ctx *find_top_pipe_for_stream( struct dc_state *context, const struct dc_stream_state *stream); +int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx); + #endif /* __DC_HWSS_DCN10_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c index 2f78a84f0dcb..0345d51e9d6f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c @@ -92,134 +92,36 @@ static void optc1_disable_stereo(struct timing_generator *optc) OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0); } -static uint32_t get_start_vline(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing) -{ - struct dc_crtc_timing patched_crtc_timing; - int vesa_sync_start; - int asic_blank_end; - int interlace_factor; - int vertical_line_start; - - patched_crtc_timing = *dc_crtc_timing; - optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); - - vesa_sync_start = patched_crtc_timing.h_addressable + - patched_crtc_timing.h_border_right + - patched_crtc_timing.h_front_porch; - - asic_blank_end = patched_crtc_timing.h_total - - vesa_sync_start - - patched_crtc_timing.h_border_left; - - interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1; - - vesa_sync_start = patched_crtc_timing.v_addressable + - patched_crtc_timing.v_border_bottom + - patched_crtc_timing.v_front_porch; - - asic_blank_end = (patched_crtc_timing.v_total - - vesa_sync_start - - patched_crtc_timing.v_border_top) - * interlace_factor; - - vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; - if (vertical_line_start < 0) { - ASSERT(0); - vertical_line_start = 0; - } - - return vertical_line_start; -} - -static void calc_vline_position( +void optc1_setup_vertical_interrupt0( struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, - unsigned long long vsync_delta, - uint32_t *start_line, - uint32_t *end_line) -{ - unsigned long long req_delta_tens_of_usec = div64_u64((vsync_delta + 9999), 10000); - unsigned long long pix_clk_hundreds_khz = div64_u64((dc_crtc_timing->pix_clk_100hz + 999), 1000); - uint32_t req_delta_lines = (uint32_t) div64_u64( - (req_delta_tens_of_usec * pix_clk_hundreds_khz + dc_crtc_timing->h_total - 1), - dc_crtc_timing->h_total); - - uint32_t vsync_line = get_start_vline(optc, dc_crtc_timing); - - if (req_delta_lines != 0) - req_delta_lines--; - - if (req_delta_lines > vsync_line) - *start_line = dc_crtc_timing->v_total - (req_delta_lines - vsync_line) - 1; - else - *start_line = vsync_line - req_delta_lines; - - *end_line = *start_line + 2; - - if (*end_line >= dc_crtc_timing->v_total) - *end_line = 2; -} - -void optc1_program_vline_interrupt( - struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, - enum vline_select vline, - const union vline_config *vline_config) + uint32_t start_line, + uint32_t end_line) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - uint32_t start_line = 0; - uint32_t end_line = 0; - switch (vline) { - case VLINE0: - calc_vline_position(optc, dc_crtc_timing, vline_config->delta_in_ns, &start_line, &end_line); - REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0, - OTG_VERTICAL_INTERRUPT0_LINE_START, start_line, - OTG_VERTICAL_INTERRUPT0_LINE_END, end_line); - break; - case VLINE1: - REG_SET(OTG_VERTICAL_INTERRUPT1_POSITION, 0, - OTG_VERTICAL_INTERRUPT1_LINE_START, vline_config->line_number); - break; - default: - break; - } + REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0, + OTG_VERTICAL_INTERRUPT0_LINE_START, start_line, + OTG_VERTICAL_INTERRUPT0_LINE_END, end_line); } -void optc1_program_vupdate_interrupt( +void optc1_setup_vertical_interrupt1( struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing) + uint32_t start_line) { struct optc *optc1 = DCN10TG_FROM_TG(optc); - int32_t vertical_line_start; - uint32_t asic_blank_end; - uint32_t vesa_sync_start; - struct dc_crtc_timing patched_crtc_timing; - patched_crtc_timing = *dc_crtc_timing; - optc1_apply_front_porch_workaround(optc, &patched_crtc_timing); + REG_SET(OTG_VERTICAL_INTERRUPT1_POSITION, 0, + OTG_VERTICAL_INTERRUPT1_LINE_START, start_line); +} - /* asic_h_blank_end = HsyncWidth + HbackPorch = - * vesa. usHorizontalTotal - vesa. usHorizontalSyncStart - - * vesa.h_left_border - */ - vesa_sync_start = patched_crtc_timing.h_addressable + - patched_crtc_timing.h_border_right + - patched_crtc_timing.h_front_porch; - - asic_blank_end = patched_crtc_timing.h_total - - vesa_sync_start - - patched_crtc_timing.h_border_left; - - /* Use OTG_VERTICAL_INTERRUPT2 replace VUPDATE interrupt, - * program the reg for interrupt postition. - */ - vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1; - if (vertical_line_start < 0) - vertical_line_start = 0; +void optc1_setup_vertical_interrupt2( + struct timing_generator *optc, + uint32_t start_line) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); REG_SET(OTG_VERTICAL_INTERRUPT2_POSITION, 0, - OTG_VERTICAL_INTERRUPT2_LINE_START, vertical_line_start); + OTG_VERTICAL_INTERRUPT2_LINE_START, start_line); } /** @@ -1480,8 +1382,9 @@ bool optc1_get_crc(struct timing_generator *optc, static const struct timing_generator_funcs dcn10_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, - .program_vline_interrupt = optc1_program_vline_interrupt, - .program_vupdate_interrupt = optc1_program_vupdate_interrupt, + .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0, + .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1, + .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2, .program_global_sync = optc1_program_global_sync, .enable_crtc = optc1_enable_crtc, .disable_crtc = optc1_disable_crtc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h index 24452f11c598..4eb9a898c237 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h @@ -483,11 +483,16 @@ void optc1_program_timing( const struct dc_crtc_timing *dc_crtc_timing, bool use_vbios); -void optc1_program_vline_interrupt( +void optc1_setup_vertical_interrupt0( struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, - enum vline_select vline, - const union vline_config *vline_config); + uint32_t start_line, + uint32_t end_line); +void optc1_setup_vertical_interrupt1( + struct timing_generator *optc, + uint32_t start_line); +void optc1_setup_vertical_interrupt2( + struct timing_generator *optc, + uint32_t start_line); void optc1_program_global_sync( struct timing_generator *optc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 03ae941895f3..c25f7df7b5e3 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -134,14 +134,6 @@ struct dc_crtc_timing; struct drr_params; -union vline_config; - - -enum vline_select { - VLINE0, - VLINE1, - VLINE2 -}; struct timing_generator_funcs { bool (*validate_timing)(struct timing_generator *tg, @@ -149,14 +141,17 @@ struct timing_generator_funcs { void (*program_timing)(struct timing_generator *tg, const struct dc_crtc_timing *timing, bool use_vbios); - void (*program_vline_interrupt)( + void (*setup_vertical_interrupt0)( struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing, - enum vline_select vline, - const union vline_config *vline_config); + uint32_t start_line, + uint32_t end_line); + void (*setup_vertical_interrupt1)( + struct timing_generator *optc, + uint32_t start_line); + void (*setup_vertical_interrupt2)( + struct timing_generator *optc, + uint32_t start_line); - void (*program_vupdate_interrupt)(struct timing_generator *optc, - const struct dc_crtc_timing *dc_crtc_timing); bool (*enable_crtc)(struct timing_generator *tg); bool (*disable_crtc)(struct timing_generator *tg); bool (*is_counter_moving)(struct timing_generator *tg); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index fc03320c81a1..7676f25216b1 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -38,6 +38,11 @@ enum pipe_gating_control { PIPE_GATING_CONTROL_INIT }; +enum vline_select { + VLINE0, + VLINE1 +}; + struct dce_hwseq_wa { bool blnd_crtc_trigger; bool DEGVIDCN10_253; @@ -224,6 +229,9 @@ struct hw_sequencer_funcs { void (*set_cursor_attribute)(struct pipe_ctx *pipe); void (*set_cursor_sdr_white_level)(struct pipe_ctx *pipe); + void (*setup_periodic_interrupt)(struct pipe_ctx *pipe_ctx, enum vline_select vline); + void (*setup_vupdate_interrupt)(struct pipe_ctx *pipe_ctx); + }; void color_space_to_black_color( From 7f5725f9802502651644e874bb9a59e66215839d Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 19 Feb 2019 12:42:32 -0600 Subject: [PATCH 42/49] drm/amd/powerplay/smu8_hwmgr: use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = kzalloc(size, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); Notice that, in this case, variable table_size is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c index 553a203ac47c..019d6a206492 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c @@ -272,12 +272,10 @@ static int smu8_init_dynamic_state_adjustment_rule_settings( struct pp_hwmgr *hwmgr, ATOM_CLK_VOLT_CAPABILITY *disp_voltage_table) { - uint32_t table_size = - sizeof(struct phm_clock_voltage_dependency_table) + - (7 * sizeof(struct phm_clock_voltage_dependency_record)); + struct phm_clock_voltage_dependency_table *table_clk_vlt; - struct phm_clock_voltage_dependency_table *table_clk_vlt = - kzalloc(table_size, GFP_KERNEL); + table_clk_vlt = kzalloc(struct_size(table_clk_vlt, entries, 7), + GFP_KERNEL); if (NULL == table_clk_vlt) { pr_err("Can not allocate memory!\n"); From bcb35dad1d3181e3cb4e5ac067bfc4aed53c592c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 19 Feb 2019 12:55:09 -0600 Subject: [PATCH 43/49] drm/amd/powerplay/smu10_hwmgr: use struct_size() in kzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = kzalloc(size, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); Notice that, in this case, variable table_size is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index f95c5f50eb0f..b45b0d0e7726 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -139,12 +139,10 @@ static int smu10_construct_max_power_limits_table(struct pp_hwmgr *hwmgr, static int smu10_init_dynamic_state_adjustment_rule_settings( struct pp_hwmgr *hwmgr) { - uint32_t table_size = - sizeof(struct phm_clock_voltage_dependency_table) + - (7 * sizeof(struct phm_clock_voltage_dependency_record)); + struct phm_clock_voltage_dependency_table *table_clk_vlt; - struct phm_clock_voltage_dependency_table *table_clk_vlt = - kzalloc(table_size, GFP_KERNEL); + table_clk_vlt = kzalloc(struct_size(table_clk_vlt, entries, 7), + GFP_KERNEL); if (NULL == table_clk_vlt) { pr_err("Can not allocate memory!\n"); From 8466cc61da89d33441e0d7a98de1ba98697cd465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 28 Jan 2019 13:41:58 +0100 Subject: [PATCH 44/49] drm/amdgpu: cleanup setting bulk_movable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only need to set this to false now when BOs are removed from the LRU. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6dda415bb3ee..d4c733885e87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -332,7 +332,6 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, if (bo->tbo.resv != vm->root.base.bo->tbo.resv) return; - vm->bulk_moveable = false; if (bo->tbo.type == ttm_bo_type_kernel) amdgpu_vm_bo_relocated(base); else @@ -698,8 +697,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_bo_base *bo_base, *tmp; int r = 0; - vm->bulk_moveable &= list_empty(&vm->evicted); - list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { struct amdgpu_bo *bo = bo_base->bo; @@ -2758,9 +2755,6 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_vm_bo_base **base; if (bo) { - if (bo->tbo.resv == vm->root.base.bo->tbo.resv) - vm->bulk_moveable = false; - for (base = &bo_va->base.bo->vm_bo; *base; base = &(*base)->next) { if (*base != &bo_va->base) From 661b96b21c25e05b9a02178b4f19fea83b5dd774 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 30 Jan 2019 13:41:05 +0100 Subject: [PATCH 45/49] drm/amdgpu: partial revert cleanup setting bulk_movable v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We still need to set bulk_movable to false when new BOs are added or removed. v2: also set it to false on removal Signed-off-by: Christian König Tested-by: StDenis, Tom Tested-by: Przemek Socha Reviewed-by: Zhou, David(ChunMing) Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d4c733885e87..b06ffd29ccc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -332,6 +332,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, if (bo->tbo.resv != vm->root.base.bo->tbo.resv) return; + vm->bulk_moveable = false; if (bo->tbo.type == ttm_bo_type_kernel) amdgpu_vm_bo_relocated(base); else @@ -2755,6 +2756,9 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_vm_bo_base **base; if (bo) { + if (bo->tbo.resv == vm->root.base.bo->tbo.resv) + vm->bulk_moveable = false; + for (base = &bo_va->base.bo->vm_bo; *base; base = &(*base)->next) { if (*base != &bo_va->base) From 94b94438120306850289558efe1eaef0a1eec403 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 10 Feb 2019 21:05:31 -0500 Subject: [PATCH 46/49] drm/amdgpu: add missing license on baco files Trivial. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/hwmgr/vega10_baco.c | 22 +++++++++++++++++++ .../gpu/drm/amd/powerplay/hwmgr/vega20_baco.c | 22 +++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c index f94dab27f486..d5232110ec84 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c @@ -1,3 +1,25 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ #include "amdgpu.h" #include "soc15.h" #include "soc15_hw_ip.h" diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c index 0d883b358df2..edf00da8424b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c @@ -1,3 +1,25 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ #include "amdgpu.h" #include "soc15.h" #include "soc15_hw_ip.h" From 41d3ae4b9ab720e4b3a1f220f1c9a882d3c94123 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 15 Feb 2019 17:14:36 -0500 Subject: [PATCH 47/49] drm/amdgpu/powerplay: fix return codes in BACO code Use a proper return code rather than -1. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c | 4 ++-- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c index d5232110ec84..7337be5602e4 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c @@ -136,7 +136,7 @@ int vega10_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) if (soc15_baco_program_registers(hwmgr, pre_baco_tbl, ARRAY_SIZE(pre_baco_tbl))) { if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnterBaco)) - return -1; + return -EINVAL; if (soc15_baco_program_registers(hwmgr, enter_baco_tbl, ARRAY_SIZE(enter_baco_tbl))) @@ -154,5 +154,5 @@ int vega10_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) } } - return -1; + return -EINVAL; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c index edf00da8424b..5e8602a79b1c 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c @@ -89,14 +89,14 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state) if(smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnterBaco, 0)) - return -1; + return -EINVAL; } else if (state == BACO_STATE_OUT) { if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ExitBaco)) - return -1; + return -EINVAL; if (!soc15_baco_program_registers(hwmgr, clean_baco_tbl, ARRAY_SIZE(clean_baco_tbl))) - return -1; + return -EINVAL; } return 0; From f1b4ac960d7ac4a6c1c7e21905ee5edda889166b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 15 Feb 2019 17:20:04 -0500 Subject: [PATCH 48/49] drm/amdgpu/powerplay: fix typo in BACO header guards s/BOCO/BACO/g Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h | 4 ++-- drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h index a93b1e6d1c66..f7a3ffa744b3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h @@ -20,8 +20,8 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#ifndef __VEGA10_BOCO_H__ -#define __VEGA10_BOCO_H__ +#ifndef __VEGA10_BACO_H__ +#define __VEGA10_BACO_H__ #include "hwmgr.h" #include "common_baco.h" diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h index c51988a9ed77..51c7f8392925 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h @@ -20,8 +20,8 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ -#ifndef __VEGA20_BOCO_H__ -#define __VEGA20_BOCO_H__ +#ifndef __VEGA20_BACO_H__ +#define __VEGA20_BACO_H__ #include "hwmgr.h" #include "common_baco.h" From 767e06a9924162ce8ca5890533932174b04471f3 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Sun, 3 Feb 2019 01:45:24 +0100 Subject: [PATCH 49/49] drm/amdgpu: Bump amdgpu version for context priority override. Signed-off-by: Bas Nieuwenhuizen Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 637f927e3675..7419ea8a388b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -73,9 +73,10 @@ * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation. * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID + * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 29 +#define KMS_DRIVER_MINOR 30 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0;