From 1a0f3667d876be86f31e5bdd3127064390aa9783 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 22 Apr 2020 11:42:37 +0800 Subject: [PATCH 01/88] drm/amdgpu: ignore TA ucode for SRIOV Signed-off-by: Monk Liu Acked-by: Yintian Tao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index d2d2363787ff..97c80f1d5731 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -137,6 +137,8 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) case CHIP_NAVI10: case CHIP_NAVI14: case CHIP_NAVI12: + if (amdgpu_sriov_vf(adev)) + break; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); if (err) { From 2f5a0a91191a5a86d81f17dd86b89dcf18e2a987 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 23 Apr 2020 11:09:23 +0800 Subject: [PATCH 02/88] drm/amdgpu: skip cg/pg set for SRIOV Signed-off-by: Monk Liu Acked-by: Yintian Tao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index e1687e408e1d..97659be2cf36 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7095,6 +7095,10 @@ static int gfx_v10_0_set_powergating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = (state == AMD_PG_STATE_GATE); + + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: @@ -7115,6 +7119,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) + return 0; + switch (adev->asic_type) { case CHIP_NAVI10: case CHIP_NAVI14: From c983361a724999fcf6a209b485188a9ee93a5e38 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 22 Apr 2020 12:15:23 +0800 Subject: [PATCH 03/88] drm/amdgpu: sriov is forbidden to call disable DPM Signed-off-by: Monk Liu Acked-by: Yintian Tao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 88b4e5642302..2bb1e0c6f344 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -1403,6 +1403,9 @@ failed: static int smu_stop_dpms(struct smu_context *smu) { + if (amdgpu_sriov_vf(smu->adev)) + return 0; + return smu_system_features_control(smu, false); } From c2ce6aebf09227160364e67f40d8c303875d6039 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 22 Apr 2020 19:27:44 +0800 Subject: [PATCH 04/88] drm/amdgpu: provide RREG32_SOC15_NO_KIQ, will be used later Signed-off-by: Monk Liu Acked-by: Yintian Tao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/soc15_common.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index c893c645a4b2..56d02aa690a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -35,6 +35,9 @@ #define RREG32_SOC15(ip, inst, reg) \ RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) +#define RREG32_SOC15_NO_KIQ(ip, inst, reg) \ + RREG32_NO_KIQ(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \ RREG32((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset) From b217e6f579d675f90361f1bbafba769601f0bc61 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 23 Apr 2020 14:57:40 +0800 Subject: [PATCH 05/88] drm/amdgpu: clear the messed up checking logic for ARCTURUS+ ASICS, we always support SW_SMU for bare-metal and for SRIOV one_vf_mode Signed-off-by: Monk Liu Acked-by: Yintian Tao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 2bb1e0c6f344..361a5b6cbe3b 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -571,15 +571,10 @@ bool is_support_sw_smu(struct amdgpu_device *adev) if (adev->asic_type == CHIP_VEGA20) return (amdgpu_dpm == 2) ? true : false; else if (adev->asic_type >= CHIP_ARCTURUS) { - if (amdgpu_sriov_vf(adev) && - !(adev->asic_type == CHIP_ARCTURUS && - amdgpu_sriov_is_pp_one_vf(adev))) - - return false; - else + if (amdgpu_sriov_is_pp_one_vf(adev) || !amdgpu_sriov_vf(adev)) return true; - } else - return false; + } + return false; } bool is_support_sw_smu_xgmi(struct amdgpu_device *adev) From 38748ad88a2f9673bb63dda607204bb3a9bc21a0 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 22 Apr 2020 19:28:48 +0800 Subject: [PATCH 06/88] drm/amdgpu: enable one vf mode for nv12 Signed-off-by: Monk Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 12 +++--- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 6 ++- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 49 ++++++++++++++++++---- 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 361a5b6cbe3b..5964d6323a13 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -347,13 +347,13 @@ int smu_get_dpm_freq_by_index(struct smu_context *smu, enum smu_clk_type clk_typ param = (uint32_t)(((clk_id & 0xffff) << 16) | (level & 0xffff)); ret = smu_send_smc_msg_with_param(smu, SMU_MSG_GetDpmFreqByIndex, - param, ¶m); + param, value); if (ret) return ret; /* BIT31: 0 - Fine grained DPM, 1 - Dicrete DPM * now, we un-support it */ - *value = param & 0x7fffffff; + *value = *value & 0x7fffffff; return ret; } @@ -535,7 +535,6 @@ int smu_update_table(struct smu_context *smu, enum smu_table_id table_index, int int table_id = smu_table_get_index(smu, table_index); uint32_t table_size; int ret = 0; - if (!table_data || table_id >= SMU_TABLE_COUNT || table_id < 0) return -EINVAL; @@ -691,7 +690,6 @@ int smu_feature_is_enabled(struct smu_context *smu, enum smu_feature_mask mask) if (smu->is_apu) return 1; - feature_id = smu_feature_get_index(smu, mask); if (feature_id < 0) return 0; @@ -1339,6 +1337,9 @@ static int smu_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct smu_context *smu = &adev->smu; + if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + return 0; + ret = smu_start_smc_engine(smu); if (ret) { pr_err("SMU is not ready yet!\n"); @@ -1352,9 +1353,6 @@ static int smu_hw_init(void *handle) smu_set_gfx_cgpg(&adev->smu, true); } - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) - return 0; - if (!smu->pm_enabled) return 0; diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index c94270f7c198..2184d247a9f7 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -1817,7 +1817,8 @@ static int navi10_get_power_limit(struct smu_context *smu, int power_src; if (!smu->power_limit) { - if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { + if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT) && + !amdgpu_sriov_vf(smu->adev)) { power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC); if (power_src < 0) return -EINVAL; @@ -1960,6 +1961,9 @@ static int navi10_set_default_od_settings(struct smu_context *smu, bool initiali OverDriveTable_t *od_table, *boot_od_table; int ret = 0; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + ret = smu_v11_0_set_default_od_settings(smu, initialize, sizeof(OverDriveTable_t)); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index a97b2964ca7c..3e1b3ed8a05e 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -57,7 +57,7 @@ static int smu_v11_0_send_msg_without_waiting(struct smu_context *smu, uint16_t msg) { struct amdgpu_device *adev = smu->adev; - WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66, msg); + WREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_66, msg); return 0; } @@ -65,7 +65,7 @@ static int smu_v11_0_read_arg(struct smu_context *smu, uint32_t *arg) { struct amdgpu_device *adev = smu->adev; - *arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); + *arg = RREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_82); return 0; } @@ -75,7 +75,7 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu) uint32_t cur_value, i, timeout = adev->usec_timeout * 10; for (i = 0; i < timeout; i++) { - cur_value = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); + cur_value = RREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_90); if ((cur_value & MP1_C2PMSG_90__CONTENT_MASK) != 0) return cur_value == 0x1 ? 0 : -EIO; @@ -83,7 +83,10 @@ static int smu_v11_0_wait_for_response(struct smu_context *smu) } /* timeout means wrong logic */ - return -ETIME; + if (i == timeout) + return -ETIME; + + return RREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_90) == 0x1 ? 0 : -EIO; } int @@ -107,9 +110,9 @@ smu_v11_0_send_msg_with_param(struct smu_context *smu, goto out; } - WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); + WREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_90, 0); - WREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82, param); + WREG32_SOC15_NO_KIQ(MP1, 0, mmMP1_SMN_C2PMSG_82, param); smu_v11_0_send_msg_without_waiting(smu, (uint16_t)index); @@ -119,6 +122,7 @@ smu_v11_0_send_msg_with_param(struct smu_context *smu, smu_get_message_name(smu, msg), index, param, ret); goto out; } + if (read_arg) { ret = smu_v11_0_read_arg(smu, read_arg); if (ret) { @@ -728,8 +732,9 @@ int smu_v11_0_parse_pptable(struct smu_context *smu) struct smu_table_context *table_context = &smu->smu_table; struct smu_table *table = &table_context->tables[SMU_TABLE_PPTABLE]; + /* during TDR we need to free and alloc the pptable */ if (table_context->driver_pptable) - return -EINVAL; + kfree(table_context->driver_pptable); table_context->driver_pptable = kzalloc(table->size, GFP_KERNEL); @@ -769,6 +774,9 @@ int smu_v11_0_set_deep_sleep_dcefclk(struct smu_context *smu, uint32_t clk) { int ret; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetMinDeepSleepDcefclk, clk, NULL); if (ret) @@ -812,6 +820,9 @@ int smu_v11_0_set_tool_table_location(struct smu_context *smu) int ret = 0; struct smu_table *tool_table = &smu->smu_table.tables[SMU_TABLE_PMSTATUSLOG]; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + if (tool_table->mc_address) { ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetToolsDramAddrHigh, @@ -831,6 +842,12 @@ int smu_v11_0_init_display_count(struct smu_context *smu, uint32_t count) { int ret = 0; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + + if (!smu->pm_enabled) + return ret; + ret = smu_send_smc_msg_with_param(smu, SMU_MSG_NumOfDisplays, count, NULL); return ret; } @@ -842,6 +859,9 @@ int smu_v11_0_set_allowed_mask(struct smu_context *smu) int ret = 0; uint32_t feature_mask[2]; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + mutex_lock(&feature->mutex); if (bitmap_empty(feature->allowed, SMU_FEATURE_MAX) || feature->feature_num < 64) goto failed; @@ -870,6 +890,9 @@ int smu_v11_0_get_enabled_mask(struct smu_context *smu, struct smu_feature *feature = &smu->smu_feature; int ret = 0; + if (amdgpu_sriov_vf(smu->adev) && !amdgpu_sriov_is_pp_one_vf(smu->adev)) + return 0; + if (!feature_mask || num < 2) return -EINVAL; @@ -925,6 +948,12 @@ int smu_v11_0_notify_display_change(struct smu_context *smu) { int ret = 0; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + + if (!smu->pm_enabled) + return ret; + if (smu_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT) && smu->adev->gmc.vram_type == AMDGPU_VRAM_TYPE_HBM) ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1, NULL); @@ -1084,6 +1113,9 @@ int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) int ret = 0; uint32_t max_power_limit; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + max_power_limit = smu_v11_0_get_max_power_limit(smu); if (n > max_power_limit) { @@ -1809,6 +1841,9 @@ int smu_v11_0_override_pcie_parameters(struct smu_context *smu) uint32_t pcie_gen = 0, pcie_width = 0; int ret; + if (amdgpu_sriov_vf(smu->adev)) + return 0; + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) pcie_gen = 3; else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) From 8efd72759e9ee9c6a5e841d1fdb27cd60d8b2ed2 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 22 Apr 2020 12:19:13 +0800 Subject: [PATCH 07/88] drm/amdgpu: skip sysfs node not belong to one vf mode Signed-off-by: Monk Liu Acked-by: Yintian Tao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 48 +++++++++++++++----------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 49e2e43f2e4a..c762deb5abc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -3271,26 +3271,27 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } - - ret = device_create_file(adev->dev, &dev_attr_pp_num_states); - if (ret) { - DRM_ERROR("failed to create device file pp_num_states\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_pp_cur_state); - if (ret) { - DRM_ERROR("failed to create device file pp_cur_state\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_pp_force_state); - if (ret) { - DRM_ERROR("failed to create device file pp_force_state\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_pp_table); - if (ret) { - DRM_ERROR("failed to create device file pp_table\n"); - return ret; + if (!amdgpu_sriov_vf(adev)) { + ret = device_create_file(adev->dev, &dev_attr_pp_num_states); + if (ret) { + DRM_ERROR("failed to create device file pp_num_states\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_cur_state); + if (ret) { + DRM_ERROR("failed to create device file pp_cur_state\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_force_state); + if (ret) { + DRM_ERROR("failed to create device file pp_force_state\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_table); + if (ret) { + DRM_ERROR("failed to create device file pp_table\n"); + return ret; + } } ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk); @@ -3337,6 +3338,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) return ret; } } + + /* the reset are not needed for SRIOV one vf mode */ + if (amdgpu_sriov_vf(adev)) { + adev->pm.sysfs_initialized = true; + return ret; + } + if (adev->asic_type != CHIP_ARCTURUS) { ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); if (ret) { From 79bebabb88cb8ef32fb55023f351e1a26ccb4e20 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Wed, 22 Apr 2020 12:09:16 +0800 Subject: [PATCH 08/88] drm/amdgpu: for nv12 always need smu ip because nv12 SRIOV support one vf mode Signed-off-by: Monk Liu Acked-by: Yintian Tao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 995bdec9fa7d..9c42316c47c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -498,8 +498,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - !amdgpu_sriov_vf(adev)) + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); From 312a79b6eafe5c45e3e232506a4a6e97d7cdbba4 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Tue, 21 Apr 2020 18:04:50 +0800 Subject: [PATCH 09/88] drm/amdgpu: extent threshold of waiting FLR_COMPLETE to 5s to satisfy WHOLE GPU reset which need 3+ seconds to finish Signed-off-by: Monk Liu Acked-by: Yintian Tao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h | 2 +- drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index 52a697545801..83b453f5d717 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -26,7 +26,7 @@ #define AI_MAILBOX_POLL_ACK_TIMEDOUT 500 #define AI_MAILBOX_POLL_MSG_TIMEDOUT 12000 -#define AI_MAILBOX_POLL_FLR_TIMEDOUT 500 +#define AI_MAILBOX_POLL_FLR_TIMEDOUT 5000 enum idh_request { IDH_REQ_GPU_INIT_ACCESS = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 45bcf438e607..52605e14a1a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -26,7 +26,7 @@ #define NV_MAILBOX_POLL_ACK_TIMEDOUT 500 #define NV_MAILBOX_POLL_MSG_TIMEDOUT 6000 -#define NV_MAILBOX_POLL_FLR_TIMEDOUT 500 +#define NV_MAILBOX_POLL_FLR_TIMEDOUT 5000 enum idh_request { IDH_REQ_GPU_INIT_ACCESS = 1, From 04e4e2e9558326316cf3e32d46cca83cc3861ef0 Mon Sep 17 00:00:00 2001 From: Yintian Tao Date: Thu, 23 Apr 2020 12:05:54 +0800 Subject: [PATCH 10/88] drm/amdgpu: protect ring overrun MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wait for the oldest sequence on the ring to be signaled in order to make sure there will be no command overrun. v2: fix coding stype and remove abs operation v3: remove the initialization of variable r Signed-off-by: Yintian Tao Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 10 +++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 22 ++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 8 +++++++- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 14 +++++++++++--- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 8 +++++++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++++- 7 files changed, 61 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7531527067df..d878fe7fee51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -192,14 +192,22 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, * Used For polling fence. * Returns 0 on success, -ENOMEM on failure. */ -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, + uint32_t timeout) { uint32_t seq; + signed long r; if (!s) return -EINVAL; seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + return -ETIMEDOUT; + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index a721b0e0ff69..0103acc57474 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -675,13 +675,15 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_device_wb_get(adev, ®_val_offs)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); - goto failed_kiq_read; + goto failed_unlock; } amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -712,7 +714,13 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) amdgpu_device_wb_free(adev, reg_val_offs); return value; +failed_undo: + amdgpu_ring_undo(ring); +failed_unlock: + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_read: + if (reg_val_offs) + amdgpu_device_wb_free(adev, reg_val_offs); pr_err("failed to read reg:%x\n", reg); return ~0; } @@ -730,7 +738,10 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) spin_lock_irqsave(&kiq->ring_lock, flags); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -759,6 +770,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) return; +failed_undo: + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_write: pr_err("failed to write reg:%x\n", reg); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index e1d894f0d654..7d39064f9361 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -105,7 +105,8 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, unsigned flags); -int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s, + uint32_t timeout); bool amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8c10084f44ef..cbbb8d02535a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -60,7 +60,10 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, ref, mask); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -82,6 +85,9 @@ void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, return; +failed_undo: + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq: pr_err("failed to write reg %x wait reg %x\n", reg0, reg1); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 496205a8ee0c..6c5ba34b98d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4054,9 +4054,8 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_device_wb_get(adev, ®_val_offs)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); pr_err("critical bug! too many kiq readers\n"); - goto failed_kiq_read; + goto failed_unlock; } amdgpu_ring_alloc(ring, 32); amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); @@ -4070,7 +4069,10 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) reg_val_offs * 4)); amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + reg_val_offs * 4)); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + amdgpu_ring_commit(ring); spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -4102,7 +4104,13 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) amdgpu_device_wb_free(adev, reg_val_offs); return value; +failed_undo: + amdgpu_ring_undo(ring); +failed_unlock: + spin_unlock_irqrestore(&kiq->ring_lock, flags); failed_kiq_read: + if (reg_val_offs) + amdgpu_device_wb_free(adev, reg_val_offs); pr_err("failed to read gpu clock\n"); return ~0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 94a6096a81f4..cd67aad3ae75 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -426,7 +426,13 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock(&kiq->ring_lock); + return -ETIME; + } + amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index fecdbc471983..0a6026308343 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -621,7 +621,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, pasid, 2, all_hub); kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); - amdgpu_fence_emit_polling(ring, &seq); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock(&kiq->ring_lock); + return -ETIME; + } + amdgpu_ring_commit(ring); spin_unlock(&adev->gfx.kiq.ring_lock); r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); From a96f661a471596f3f2945b335e8eac849e9b8a15 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 23 Apr 2020 15:17:28 +0100 Subject: [PATCH 11/88] drm/amd/display: remove redundant assignment to variable ret The variable ret is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index a87302f729c7..f9fa0f7712b3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -4231,7 +4231,7 @@ void dpcd_set_source_specific_data(struct dc_link *link) { const uint32_t post_oui_delay = 30; // 30ms uint8_t dspc = 0; - enum dc_status ret = DC_ERROR_UNEXPECTED; + enum dc_status ret; ret = core_link_read_dpcd(link, DP_DOWN_STREAM_PORT_COUNT, &dspc, sizeof(dspc)); From abb17b1edf7bd1eacb78b6862039e965cb442947 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 24 Apr 2020 13:56:40 +0100 Subject: [PATCH 12/88] drm/amdgpu/gmc: Use consistent variable on unlocks Currently the error returns paths are unlocking lock kiq->ring_lock however it seems this should be dev->gfx.kiq.ring_lock as this is the lock that is being locked and unlocked around the ring operations. This looks like a bug, but it's not. The kiq is just a local variable pointing to the same structure. Make it consistent. Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index cd67aad3ae75..eff25c72c6c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -429,7 +429,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) { amdgpu_ring_undo(ring); - spin_unlock(&kiq->ring_lock); + spin_unlock(&adev->gfx.kiq.ring_lock); return -ETIME; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 0a6026308343..055ecba754ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -624,7 +624,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) { amdgpu_ring_undo(ring); - spin_unlock(&kiq->ring_lock); + spin_unlock(&adev->gfx.kiq.ring_lock); return -ETIME; } From d971d42f0655fe448e34bcdbbe27ae2346301380 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 24 Apr 2020 12:12:26 +0100 Subject: [PATCH 13/88] amdgpu/dc: remove redundant assignment to variable 'option' The variable option is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c index 4245e1f818a3..e096d2b95ef9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_opp_csc_v.c @@ -679,8 +679,7 @@ void dce110_opp_v_set_csc_default( if (default_adjust->force_hw_default == false) { const struct out_csc_color_matrix *elm; /* currently parameter not in use */ - enum grph_color_adjust_option option = - GRPH_COLOR_MATRIX_HW_DEFAULT; + enum grph_color_adjust_option option; uint32_t i; /* * HW default false we program locally defined matrix From d18ba57c725e71b612f195e527f202a182649595 Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Fri, 24 Apr 2020 15:56:20 +0800 Subject: [PATCH 14/88] drm/amdgpu: Remove unneeded semicolon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes coccicheck warning: drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:2534:2-3: Unneeded semicolon Reported-by: Hulk Robot Signed-off-by: Zheng Bin Signed-off-by: Alex Deucher Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6c5ba34b98d8..b7b51037b1cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2531,7 +2531,7 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) break; default: break; - }; + } } static void gfx_v9_0_constants_init(struct amdgpu_device *adev) From dfe31f255fd62655de63ecc45b0e5d1b0504eabb Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 24 Apr 2020 18:16:06 -0400 Subject: [PATCH 15/88] drm/amdgpu: sw pstate switch should only be for vega20 Driver steered p-state switching is designed for Vega20 only. Also simplify early return for temporary disable due to SMU FW bug. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 54d8a3e7e75c..48c0ce13f68e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -395,7 +395,9 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; /* fw bug so temporarily disable pstate switching */ - if (!hive || adev->asic_type == CHIP_VEGA20) + return 0; + + if (!hive || adev->asic_type != CHIP_VEGA20) return 0; mutex_lock(&hive->hive_lock); From 94fa56605866282512a7589e22f47195cdcc5cf8 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 24 Apr 2020 15:32:41 +0800 Subject: [PATCH 16/88] drm/amdgpu: move kfd suspend after ip_suspend_phase1 This sequence change should be safe as what did in ip_suspend_phase1 is to suspend DCE only. And this is a prerequisite for coming redundant cg/pg ungate dropping. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f9b315e7e004..e51e73587f51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3435,12 +3435,12 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); - amdgpu_amdkfd_suspend(adev, !fbcon); - amdgpu_ras_suspend(adev); r = amdgpu_device_ip_suspend_phase1(adev); + amdgpu_amdkfd_suspend(adev, !fbcon); + /* evict vram memory */ amdgpu_bo_evict_vram(adev); From fde812b32c1e3f24da305aa5e761840c2460974b Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 24 Apr 2020 15:36:22 +0800 Subject: [PATCH 17/88] drm/amdgpu: drop redundant cg/pg ungate on runpm enter CG/PG ungate is already performed in ip_suspend_phase1. Otherwise, the CG/PG ungate will be performed twice. That will cause gfxoff disablement is performed twice also on runpm enter while gfxoff enablemnt once on rump exit. That will put gfxoff into disabled state. Signed-off-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index e51e73587f51..11f7c4e0a360 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3432,9 +3432,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) } } - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); - amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); - amdgpu_ras_suspend(adev); r = amdgpu_device_ip_suspend_phase1(adev); From 40e733147fe06bc2d87abd3207468b2ca549f4ac Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Sun, 26 Apr 2020 16:44:16 +0800 Subject: [PATCH 18/88] drm/amdgpu: switch to SMN interface to operate RSMU index mode This makes consistent with other regsiters' access in this module. Signed-off-by: Guchun Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 29 ++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 14d346321a5f..b735ee44f948 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -56,24 +56,43 @@ const uint32_t static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev) { - WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + uint32_t rsmu_umc_addr, rsmu_umc_val; + + rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4); + + rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, RSMU_UMC_INDEX_MODE_EN, 1); + + WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val); } static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) { - WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + uint32_t rsmu_umc_addr, rsmu_umc_val; + + rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4); + + rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, RSMU_UMC_INDEX_MODE_EN, 0); + + WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val); } static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev) { - uint32_t rsmu_umc_index; + uint32_t rsmu_umc_addr, rsmu_umc_val; - rsmu_umc_index = RREG32_SOC15(RSMU, 0, + rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4); - return REG_GET_FIELD(rsmu_umc_index, + return REG_GET_FIELD(rsmu_umc_val, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, RSMU_UMC_INDEX_MODE_EN); } From fd90456c757da8bcb75dd0d9221581008c01e380 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Sun, 26 Apr 2020 17:04:31 +0800 Subject: [PATCH 19/88] drm/amdgpu: decouple EccErrCnt query and clear operation Due to hardware bug that when RSMU UMC index is disabled, clear EccErrCnt at the first UMC instance will clean up all other EccErrCnt registes from other instances at the same time. This will break the correctable error count log in EccErrCnt register once querying it. So decouple both to make error count query workable. Signed-off-by: Guchun Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 83 +++++++++++++++++++++++++-- 1 file changed, 79 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index b735ee44f948..418cf097c918 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -104,6 +104,81 @@ static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev, return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst; } +static void umc_v6_1_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t umc_reg_offset) +{ + uint32_t ecc_err_cnt_addr; + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + + if (adev->asic_type == CHIP_ARCTURUS) { + /* UMC 6_1_2 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, + mmUMCCH0_0_EccErrCntSel_ARCT); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, + mmUMCCH0_0_EccErrCnt_ARCT); + } else { + /* UMC 6_1_1 registers */ + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, + mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, + mmUMCCH0_0_EccErrCnt); + } + + /* select the lower chip */ + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + + umc_reg_offset) * 4); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, + UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, + ecc_err_cnt_sel); + + /* clear lower chip error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V6_1_CE_CNT_INIT); + + /* select the higher chip */ + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + + umc_reg_offset) * 4); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, + UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, + ecc_err_cnt_sel); + + /* clear higher chip error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V6_1_CE_CNT_INIT); +} + +static void umc_v6_1_clear_error_count(struct amdgpu_device *adev) +{ + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + uint32_t umc_reg_offset = 0; + uint32_t rsmu_umc_index_state = + umc_v6_1_get_umc_index_mode_state(adev); + + if (rsmu_umc_index_state) + umc_v6_1_disable_umc_index_mode(adev); + + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_reg_offset = get_umc_6_reg_offset(adev, + umc_inst, + ch_inst); + + umc_v6_1_clear_error_count_per_channel(adev, + umc_reg_offset); + } + + if (rsmu_umc_index_state) + umc_v6_1_enable_umc_index_mode(adev); +} + static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset, unsigned long *error_count) @@ -136,23 +211,21 @@ static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 0); WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); *error_count += (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - UMC_V6_1_CE_CNT_INIT); - /* clear the lower chip err count */ - WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); /* select the higher chip and check the err counter */ ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, EccErrCntCsSel, 1); WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); *error_count += (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - UMC_V6_1_CE_CNT_INIT); - /* clear the higher chip err count */ - WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); /* check for SRAM correctable error MCUMC_STATUS is a 64 bit register */ @@ -228,6 +301,8 @@ static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, if (rsmu_umc_index_state) umc_v6_1_enable_umc_index_mode(adev); + + umc_v6_1_clear_error_count(adev); } static void umc_v6_1_query_error_address(struct amdgpu_device *adev, From b6e79d9a314ebe5fc44f882c607869d8f7ffb4a1 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 27 Apr 2020 14:36:55 +0800 Subject: [PATCH 20/88] drm/amdgpu: remove conversion to bool in amdgpu_device.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The '>' expression itself is bool, no need to convert it to bool again. This fixes the following coccicheck warning: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:3004:68-73: WARNING: conversion to bool not needed here Reviewed-by: Christian König Signed-off-by: Jason Yan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 11f7c4e0a360..09ede1733806 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2998,7 +2998,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); adev->gfx.gfx_off_req_count = 1; - adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; + adev->pm.ac_power = power_supply_is_system_supplied() > 0; /* Registers mapping */ /* TODO: block userspace mapping of io register */ From 2367cad7599f40d87cbec0aaf06f3709ea75082e Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 27 Apr 2020 14:37:14 +0800 Subject: [PATCH 21/88] drm/amd/display: remove conversion to bool in dcn20_mpc.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The '==' expression itself is bool, no need to convert it to bool again. This fixes the following coccicheck warning: drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c:455:70-75: WARNING: conversion to bool not needed here Reviewed-by: Christian König Signed-off-by: Jason Yan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index de9c857ab3e9..9d7432f3fb16 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -452,7 +452,7 @@ void mpc2_set_output_gamma( next_mode = LUT_RAM_A; mpc20_power_on_ogam_lut(mpc, mpcc_id, true); - mpc20_configure_ogam_lut(mpc, mpcc_id, next_mode == LUT_RAM_A ? true:false); + mpc20_configure_ogam_lut(mpc, mpcc_id, next_mode == LUT_RAM_A); if (next_mode == LUT_RAM_A) mpc2_program_luta(mpc, mpcc_id, params); From 46501bc35b7186a939edce15d3bc9ca2a38115b6 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Mon, 27 Apr 2020 14:37:26 +0800 Subject: [PATCH 22/88] drm/amd/display: remove conversion to bool in dc_link_ddc.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The '>' expression itself is bool, no need to convert it to bool again. This fixes the following coccicheck warning: drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c:602:28-33: WARNING: conversion to bool not needed here Reviewed-by: Christian König Signed-off-by: Jason Yan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 256889eed93e..aefd29a440b5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -599,7 +599,7 @@ bool dal_ddc_submit_aux_command(struct ddc_service *ddc, do { struct aux_payload current_payload; bool is_end_of_payload = (retrieved + DEFAULT_AUX_MAX_DATA_SIZE) > - payload->length ? true : false; + payload->length; current_payload.address = payload->address; current_payload.data = &payload->data[retrieved]; From c6d1ec4134edc1611da39777cdc5dba78c757d35 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Thu, 16 Apr 2020 14:08:59 -0500 Subject: [PATCH 23/88] drm/amdkfd: Put ASIC revision into HSA capability In order to surface the ASIC revision to user level, we want to put it into the HSA topology. This can be because different ASIC revisions may require user-level software to do different things (e.g. patch code for things that are changed in later hardware revisions). The ASIC revision from the hardware is maximum of 4 bits at this time, so put it into 4 of the open bits in the HSA capability. Then user-level software can use this capability information to know -- for each ASIC -- what revision-based things must be done. Signed-off-by: Joseph Greathouse Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 4 ++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 5 ++++- 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index abfbe89e805e..ad59ac4423b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -564,6 +564,13 @@ uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd) return adev->gds.gws_size; } +uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->rev_id; +} + int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 13feb313e9b3..d065c50582eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -175,6 +175,7 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_unique_id(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd); +uint32_t amdgpu_amdkfd_get_asic_rev_id(struct kgd_dev *kgd); uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src); /* Read user wptr from a specified user address space with page fault diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 8e6409bc7c91..1c090824cad7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1301,6 +1301,10 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; + dev->node_props.capability |= + ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) << + HSA_CAP_ASIC_REVISION_SHIFT) & + HSA_CAP_ASIC_REVISION_MASK); dev->node_props.location_id = pci_dev_id(gpu->pdev); dev->node_props.max_engine_clk_fcompute = amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 46eeecaf1b68..0c51bd3dcd59 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -41,7 +41,6 @@ #define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 #define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000 #define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12 -#define HSA_CAP_RESERVED 0xffffc000 #define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 #define HSA_CAP_DOORBELL_TYPE_1_0 0x1 @@ -51,6 +50,10 @@ #define HSA_CAP_SRAM_EDCSUPPORTED 0x00080000 #define HSA_CAP_MEM_EDCSUPPORTED 0x00100000 #define HSA_CAP_RASEVENTNOTIFY 0x00200000 +#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 +#define HSA_CAP_ASIC_REVISION_SHIFT 22 + +#define HSA_CAP_RESERVED 0xfc078000 struct kfd_node_properties { uint64_t hive_id; From a1cd1289a68c4f9fb7551edd06c6a6b4661a4cbb Mon Sep 17 00:00:00 2001 From: Tiecheng Zhou Date: Mon, 27 Apr 2020 09:44:38 +0800 Subject: [PATCH 24/88] Revert "drm/amd/powerplay: avoid using pm_en before it is initialized" This reverts commit c5207876232649ca5e5ddd6f966d2da75ffded8f. The commit being reverted changed the wrong place, it should have changed in func get_asic_baco_capability. Reviewed-by: Evan Quan Signed-off-by: Tiecheng Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index fdff3e1c5e95..71b843f542d8 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -1455,8 +1455,7 @@ static int pp_get_asic_baco_state(void *handle, int *state) if (!hwmgr) return -EINVAL; - if (!(hwmgr->not_vf && amdgpu_dpm) || - !hwmgr->hwmgr_func->get_asic_baco_state) + if (!hwmgr->pm_en || !hwmgr->hwmgr_func->get_asic_baco_state) return 0; mutex_lock(&hwmgr->smu_lock); From c7833d332e05cfa7b190b37dd3094c7257f1ea6d Mon Sep 17 00:00:00 2001 From: Tiecheng Zhou Date: Sun, 26 Apr 2020 19:03:17 +0800 Subject: [PATCH 25/88] drm/amd/powerplay: avoid using pm_en before it is initialized revised hwmgr->pm_en is initialized at hwmgr_hw_init. during amdgpu_device_init, there is amdgpu_asic_reset that calls to soc15_asic_reset (for V320 usecase, Vega10 asic), in which: 1) soc15_asic_reset_method calls to pp_get_asic_baco_capability (pm_en) 2) soc15_asic_baco_reset calls to pp_set_asic_baco_state (pm_en) pm_en is used in the above two cases while it has not yet been initialized So avoid using pm_en in the above two functions for V320 passthrough. Reviewed-by: Evan Quan Signed-off-by: Tiecheng Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amd_powerplay.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index 71b843f542d8..fc31499c2e5c 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -1438,7 +1438,8 @@ static int pp_get_asic_baco_capability(void *handle, bool *cap) if (!hwmgr) return -EINVAL; - if (!hwmgr->pm_en || !hwmgr->hwmgr_func->get_asic_baco_capability) + if (!(hwmgr->not_vf && amdgpu_dpm) || + !hwmgr->hwmgr_func->get_asic_baco_capability) return 0; mutex_lock(&hwmgr->smu_lock); @@ -1472,7 +1473,8 @@ static int pp_set_asic_baco_state(void *handle, int state) if (!hwmgr) return -EINVAL; - if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_asic_baco_state) + if (!(hwmgr->not_vf && amdgpu_dpm) || + !hwmgr->hwmgr_func->set_asic_baco_state) return 0; mutex_lock(&hwmgr->smu_lock); From cd4df4e6ed177ab0d1253cfeae14a2307e263754 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 27 Apr 2020 14:49:37 -0400 Subject: [PATCH 26/88] drm/amdgpu/vcn2.5: wait for tiles off after unpause Wait for tiles off after unpause to fix transcode timeout issue. It is a work around. Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 0fa1c5cec439..38ca4a712f12 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1404,7 +1404,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, { struct amdgpu_ring *ring; uint32_t reg_data = 0; - int ret_code; + int ret_code = 0; /* pause/unpause if state is changed */ if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { @@ -1414,7 +1414,6 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { - ret_code = 0; SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); @@ -1469,9 +1468,10 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); } } else { - /* unpause dpg, no need to wait */ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); } adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; } From c57053725d9bd7ab3e14fd0f8001714041df1280 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Fri, 17 Apr 2020 20:50:30 -0400 Subject: [PATCH 27/88] drm/amdgpu: add tiling flags from Mesa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DCC_INDEPENDENT_128B is needed for displayble DCC on gfx10. SCANOUT is not needed by the kernel, but Mesa uses it. Signed-off-by: Marek Olšák Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 65f69723cbdc..d28b4ce744d5 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -346,6 +346,10 @@ struct drm_amdgpu_gem_userptr { #define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF #define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 #define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 +#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 +#define AMDGPU_TILING_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_SCANOUT_MASK 0x1 /* Set/Get helpers for tiling flags. */ #define AMDGPU_TILING_SET(field, value) \ From 652a6a858fcfe2ee053856b52c4736f61b3982be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 27 Apr 2020 15:59:22 -0400 Subject: [PATCH 28/88] drm/amdgpu: invalidate L2 before SDMA IBs (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes GPU hangs due to cache coherency issues. v2: Split the version bump to a separate patch Signed-off-by: Marek Olšák Reviewed-by: Christian König Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h | 16 ++++++++++++++++ drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 14 +++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h index 074a9a09c0a7..a5b60c9a2418 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h +++ b/drivers/gpu/drm/amd/amdgpu/navi10_sdma_pkt_open.h @@ -73,6 +73,22 @@ #define SDMA_OP_AQL_COPY 0 #define SDMA_OP_AQL_BARRIER_OR 0 +#define SDMA_GCR_RANGE_IS_PA (1 << 18) +#define SDMA_GCR_SEQ(x) (((x) & 0x3) << 16) +#define SDMA_GCR_GL2_WB (1 << 15) +#define SDMA_GCR_GL2_INV (1 << 14) +#define SDMA_GCR_GL2_DISCARD (1 << 13) +#define SDMA_GCR_GL2_RANGE(x) (((x) & 0x3) << 11) +#define SDMA_GCR_GL2_US (1 << 10) +#define SDMA_GCR_GL1_INV (1 << 9) +#define SDMA_GCR_GLV_INV (1 << 8) +#define SDMA_GCR_GLK_INV (1 << 7) +#define SDMA_GCR_GLK_WB (1 << 6) +#define SDMA_GCR_GLM_INV (1 << 5) +#define SDMA_GCR_GLM_WB (1 << 4) +#define SDMA_GCR_GL1_RANGE(x) (((x) & 0x3) << 2) +#define SDMA_GCR_GLI_INV(x) (((x) & 0x3) << 0) + /*define for op field*/ #define SDMA_PKT_HEADER_op_offset 0 #define SDMA_PKT_HEADER_op_mask 0x000000FF diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index e206064ffb82..f9e92e85813a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -410,6 +410,18 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); + /* Invalidate L2, because if we don't do it, we might get stale cache + * lines from previous IBs. + */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV | + SDMA_GCR_GL2_WB | + SDMA_GCR_GLM_INV | + SDMA_GCR_GLM_WB) << 16); + amdgpu_ring_write(ring, 0xffffff80); + amdgpu_ring_write(ring, 0xffff); + /* An IB packet must end on a 8 DW boundary--the next dword * must be on a 8-dword boundary. Our IB packet below is 6 * dwords long, thus add x number of NOPs, such that, in @@ -1634,7 +1646,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ - .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ + .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */ .emit_ib = sdma_v5_0_ring_emit_ib, .emit_fence = sdma_v5_0_ring_emit_fence, .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync, From ff532461a42a00a30abde612a7d830506ca404ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 27 Apr 2020 16:25:39 -0400 Subject: [PATCH 29/88] drm/amdgpu: bump version for invalidate L2 before SDMA IBs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes GPU hangs due to cache coherency issues. Bump the driver version. Split out from the original patch. Signed-off-by: Marek Olšák Reviewed-by: Christian König Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8ea86ffdea0d..466bfe541e45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -85,9 +85,10 @@ * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches * - 3.35.0 - Add drm_amdgpu_info_device::tcc_disabled_mask * - 3.36.0 - Allow reading more status registers on si/cik + * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 36 +#define KMS_DRIVER_MINOR 37 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; From 1349f6fc334b3a3d7a338c70da07a3e0450da5c5 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Wed, 22 Apr 2020 18:07:47 -0400 Subject: [PATCH 30/88] drm/amd/display: 3.2.82 Signed-off-by: Aric Cyr Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 5432ca1657b1..84e5056521a3 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -42,7 +42,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.81" +#define DC_VER "3.2.82" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 1dfedb39d38f813357885e19badd1971c17f79a7 Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Wed, 22 Apr 2020 18:07:48 -0400 Subject: [PATCH 31/88] drm/amd/display: Do not disable pipe split if mode is not supported [WHY] If mode is not supported, pipe split should not be disabled. This may cause more modes to fail. [HOW] Check for mode support before disabling pipe split. This commit was previously reverted as it was thought to have problems, but those issues have been resolved. Signed-off-by: Sung Lee Reviewed-by: Yongqiang Sun Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 63044ae06327..f41fc322d50a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2623,19 +2623,24 @@ int dcn20_validate_apply_pipe_split_flags( /* Avoid split loop looks for lowest voltage level that allows most unsplit pipes possible */ if (avoid_split) { + int max_mpc_comb = context->bw_ctx.dml.vba.maxMpcComb; + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { if (!context->res_ctx.pipe_ctx[i].stream) continue; for (vlevel_split = vlevel; vlevel <= context->bw_ctx.dml.soc.num_states; vlevel++) - if (context->bw_ctx.dml.vba.NoOfDPP[vlevel][0][pipe_idx] == 1) + if (context->bw_ctx.dml.vba.NoOfDPP[vlevel][0][pipe_idx] == 1 && + context->bw_ctx.dml.vba.ModeSupport[vlevel][0]) break; /* Impossible to not split this pipe */ if (vlevel > context->bw_ctx.dml.soc.num_states) vlevel = vlevel_split; + else + max_mpc_comb = 0; pipe_idx++; } - context->bw_ctx.dml.vba.maxMpcComb = 0; + context->bw_ctx.dml.vba.maxMpcComb = max_mpc_comb; } /* Split loop sets which pipe should be split based on dml outputs and dc flags */ From d561754132e1ea4bc275977d5164719bb1b876f9 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 22 Apr 2020 18:07:49 -0400 Subject: [PATCH 32/88] drm/amd/display: Fix DMUB meta offset for new load method [Why] The new metadata offset is located at the end of the firmware binary without any additional padding. Firmware state is currently larger than 1024 bytes so new firmware state will hang when trying to access any data above 1024 bytes. [How] Specify the correct offset based on legacy vs new loading method. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Yongqiang Sun Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index 0a1a851741c5..a6e403227872 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -96,25 +96,27 @@ dmub_get_fw_meta_info(const struct dmub_srv_region_params *params) const union dmub_fw_meta *meta; const uint8_t *blob = NULL; uint32_t blob_size = 0; + uint32_t meta_offset = 0; if (params->fw_bss_data) { /* Legacy metadata region. */ blob = params->fw_bss_data; blob_size = params->bss_data_size; + meta_offset = DMUB_FW_META_OFFSET; } else if (params->fw_inst_const) { /* Combined metadata region. */ blob = params->fw_inst_const; blob_size = params->inst_const_size; + meta_offset = 0; } if (!blob || !blob_size) return NULL; - if (blob_size < sizeof(union dmub_fw_meta) + DMUB_FW_META_OFFSET) + if (blob_size < sizeof(union dmub_fw_meta) + meta_offset) return NULL; - meta = (const union dmub_fw_meta *)(blob + blob_size - - DMUB_FW_META_OFFSET - + meta = (const union dmub_fw_meta *)(blob + blob_size - meta_offset - sizeof(union dmub_fw_meta)); if (meta->info.magic_value != DMUB_FW_META_MAGIC) From d3b18f8c6fe32fcf42e72b6c55726774c2e999de Mon Sep 17 00:00:00 2001 From: Anthony Koo Date: Wed, 22 Apr 2020 18:07:50 -0400 Subject: [PATCH 33/88] drm/amd/display: clean up some header paths [Why] Some include paths don't need to have relative paths And some types missing [How] make some changes to headers and modify include path Signed-off-by: Anthony Koo Reviewed-by: Tony Cheng Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 2 +- drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 2 +- drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 4 ++-- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 5 ----- drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h | 11 +++++++++++ 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 754b6077539c..855431483699 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -27,7 +27,7 @@ #define _DMUB_DC_SRV_H_ #include "os_types.h" -#include "../dmub/inc/dmub_cmd.h" +#include "dmub/inc/dmub_cmd.h" struct dmub_srv; struct dmub_cmd_header; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index a19f359e45d7..992d869188c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -27,7 +27,7 @@ #include "dce_abm.h" #include "dc.h" #include "dc_dmub_srv.h" -#include "../../dmub/inc/dmub_srv.h" +#include "dmub/inc/dmub_srv.h" #include "core_types.h" #include "dm_services.h" #include "reg_helper.h" diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 3b8a49e8e665..7b32e5d60ed6 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -26,8 +26,8 @@ #include "dmub_psr.h" #include "dc.h" #include "dc_dmub_srv.h" -#include "../../dmub/inc/dmub_srv.h" -#include "../../dmub/inc/dmub_gpint_cmd.h" +#include "dmub/inc/dmub_srv.h" +#include "dmub/inc/dmub_gpint_cmd.h" #include "core_types.h" #define MAX_PIPES 6 diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 7c7a3561b6aa..6b48285446c3 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -215,11 +215,6 @@ struct dmub_rb_cmd_dpphy_init { uint8_t reserved[60]; }; -struct dmub_psr_debug_flags { - uint8_t visual_confirm : 1; - uint8_t reserved : 7; -}; - struct dmub_cmd_psr_copy_settings_data { uint16_t psr_level; uint8_t dpp_inst; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h index 41d524b0db2f..bed5b023a396 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_types.h @@ -49,6 +49,12 @@ extern "C" { #define dmub_udelay(microseconds) udelay(microseconds) #endif +/* Maximum number of streams on any ASIC. */ +#define DMUB_MAX_STREAMS 6 + +/* Maximum number of planes on any ASIC. */ +#define DMUB_MAX_PLANES 6 + union dmub_addr { struct { uint32_t low_part; @@ -57,6 +63,11 @@ union dmub_addr { uint64_t quad_part; }; +struct dmub_psr_debug_flags { + uint8_t visual_confirm : 1; + uint8_t reserved : 7; +}; + #if defined(__cplusplus) } #endif From 38a509d5d2603fef9d8867151381059bcbb3a6ca Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Wed, 22 Apr 2020 18:07:51 -0400 Subject: [PATCH 34/88] drm/amd/display: Add DML variable for future asics Signed-off-by: Joshua Aberback Reviewed-by: Dmytro Laktyushkin Reviewed-by: Jun Lei Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h | 1 + drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c | 1 + drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index 687010c17324..b2ecb174a93f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -121,6 +121,7 @@ struct _vcs_dpi_soc_bounding_box_st { }; struct _vcs_dpi_ip_params_st { + bool use_min_dcfclk; bool gpuvm_enable; bool hostvm_enable; unsigned int gpuvm_max_page_table_levels; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c index 6b525c52124c..6e4e8a452e66 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c @@ -280,6 +280,7 @@ static void fetch_ip_params(struct display_mode_lib *mode_lib) ip_params_st *ip = &mode_lib->vba.ip; // IP Parameters + mode_lib->vba.UseMinimumRequiredDCFCLK = ip->use_min_dcfclk; mode_lib->vba.MaxNumDPP = ip->max_num_dpp; mode_lib->vba.MaxNumOTG = ip->max_num_otg; mode_lib->vba.MaxNumHDMIFRLOutputs = ip->max_num_hdmi_frl_outputs; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index 5d82fc5a7ed7..a1884ffe63ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -898,6 +898,7 @@ struct vba_vars_st { bool dummystring[DC__NUM_DPP__MAX]; double BPP; enum odm_combine_policy ODMCombinePolicy; + bool UseMinimumRequiredDCFCLK; }; bool CalculateMinAndMaxPrefetchMode( From 3ebd17f53556fff2809793b5d2c2259a1f2714dd Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Wed, 22 Apr 2020 18:07:52 -0400 Subject: [PATCH 35/88] drm/amd/display: check if REFCLK_CNTL register is present Check before programming the register since it isn't present on all IPs using this code. Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Bernstein Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 6ad4ed7da629..bd2ccf8eb9cf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2304,7 +2304,8 @@ void dcn20_fpga_init_hw(struct dc *dc) REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, 2); REG_UPDATE(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, 1); - REG_WRITE(REFCLK_CNTL, 0); + if (REG(REFCLK_CNTL)) + REG_WRITE(REFCLK_CNTL, 0); // From 238387774232c9d294381d6f674c98682e9dbae8 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Wed, 22 Apr 2020 18:07:53 -0400 Subject: [PATCH 36/88] drm/amd/display: fix rn soc bb update Currently RN SOC bounding box update assumes we will get at least 2 clock states from SMU. This isn't always true and because of special casing on first clock state we end up with low disp, dpp, dsc and phy clocks. This change removes the special casing allowing the first state to acquire correct clocks. Signed-off-by: Dmytro Laktyushkin Reviewed-by: Eric Yang Acked-by: Aurabindo Pillai Acked-by: Tony Cheng Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn21/dcn21_resource.c | 69 ++++++++----------- 1 file changed, 27 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 78e6259b4ac9..8fcb03e65fdb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1379,64 +1379,49 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param { struct dcn21_resource_pool *pool = TO_DCN21_RES_POOL(dc->res_pool); struct clk_limit_table *clk_table = &bw_params->clk_table; - unsigned int i, j, k; - int closest_clk_lvl; + struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; + unsigned int i, j, closest_clk_lvl; // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment) && !IS_DIAG_DC(dc->ctx->dce_environment)) { + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { dcn2_1_ip.max_num_otg = pool->base.res_cap->num_timing_generator; dcn2_1_ip.max_num_dpp = pool->base.pipe_count; dcn2_1_soc.num_chans = bw_params->num_channels; - /* Vmin: leave lowest DCN clocks, override with dcfclk, fclk, memclk from fuse */ - dcn2_1_soc.clock_limits[0].state = 0; - dcn2_1_soc.clock_limits[0].dcfclk_mhz = clk_table->entries[0].dcfclk_mhz; - dcn2_1_soc.clock_limits[0].fabricclk_mhz = clk_table->entries[0].fclk_mhz; - dcn2_1_soc.clock_limits[0].socclk_mhz = clk_table->entries[0].socclk_mhz; - dcn2_1_soc.clock_limits[0].dram_speed_mts = clk_table->entries[0].memclk_mhz * 2; - - /* - * Other levels: find closest DCN clocks that fit the given clock limit using dcfclk - * as indicator - */ - - closest_clk_lvl = -1; - /* index currently being filled */ - k = 1; - for (i = 1; i < clk_table->num_entries; i++) { - /* loop backwards, skip duplicate state*/ - for (j = dcn2_1_soc.num_states - 1; j >= k; j--) { + ASSERT(clk_table->num_entries); + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn2_1_soc.num_states - 1; j >= 0; j--) { if ((unsigned int) dcn2_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { closest_clk_lvl = j; break; } } - /* if found a lvl that fits, use the DCN clks from it, if not, go to next clk limit*/ - if (closest_clk_lvl != -1) { - dcn2_1_soc.clock_limits[k].state = i; - dcn2_1_soc.clock_limits[k].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - dcn2_1_soc.clock_limits[k].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - dcn2_1_soc.clock_limits[k].socclk_mhz = clk_table->entries[i].socclk_mhz; - dcn2_1_soc.clock_limits[k].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; + clock_limits[i].state = i; + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2; - dcn2_1_soc.clock_limits[k].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - dcn2_1_soc.clock_limits[k].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - dcn2_1_soc.clock_limits[k].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - dcn2_1_soc.clock_limits[k].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - dcn2_1_soc.clock_limits[k].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - dcn2_1_soc.clock_limits[k].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - dcn2_1_soc.clock_limits[k].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; - k++; - } + clock_limits[i].dispclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + clock_limits[i].dppclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + clock_limits[i].dram_bw_per_chan_gbps = dcn2_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + clock_limits[i].dscclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + clock_limits[i].dtbclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + clock_limits[i].phyclk_d18_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + clock_limits[i].phyclk_mhz = dcn2_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) + dcn2_1_soc.clock_limits[i] = clock_limits[i]; + if (clk_table->num_entries) { + dcn2_1_soc.num_states = clk_table->num_entries; + /* duplicate last level */ + dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1]; + dcn2_1_soc.clock_limits[dcn2_1_soc.num_states].state = dcn2_1_soc.num_states; } - dcn2_1_soc.num_states = k; } - /* duplicate last level */ - dcn2_1_soc.clock_limits[dcn2_1_soc.num_states] = dcn2_1_soc.clock_limits[dcn2_1_soc.num_states - 1]; - dcn2_1_soc.clock_limits[dcn2_1_soc.num_states].state = dcn2_1_soc.num_states; - dml_init_instance(&dc->dml, &dcn2_1_soc, &dcn2_1_ip, DML_PROJECT_DCN21); } From b8a8d34b107fd3e4130a987f4168c68d3026fcb1 Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Wed, 22 Apr 2020 18:07:54 -0400 Subject: [PATCH 37/88] drm/amd/display: Fail validation if building scaling params fails [WHY & HOW] If building scaling parameters fails, validation should also fail. Signed-off-by: Sung Lee Reviewed-by: Dmytro Laktyushkin Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 16 ++++++++++------ .../drm/amd/display/dc/dcn20/dcn20_resource.h | 2 +- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index f41fc322d50a..3a8a4c54738a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1936,7 +1936,7 @@ bool dcn20_split_stream_for_odm( return true; } -void dcn20_split_stream_for_mpc( +bool dcn20_split_stream_for_mpc( struct resource_context *res_ctx, const struct resource_pool *pool, struct pipe_ctx *primary_pipe, @@ -1965,8 +1965,11 @@ void dcn20_split_stream_for_mpc( secondary_pipe->top_pipe = primary_pipe; ASSERT(primary_pipe->plane_state); - resource_build_scaling_params(primary_pipe); - resource_build_scaling_params(secondary_pipe); + if (!resource_build_scaling_params(primary_pipe) || + !resource_build_scaling_params(secondary_pipe)) + return false; + + return true; } void dcn20_populate_dml_writeback_from_context( @@ -2796,9 +2799,10 @@ bool dcn20_fast_validate_bw( goto validate_fail; dcn20_build_mapped_resource(dc, context, pipe->stream); } else - dcn20_split_stream_for_mpc( - &context->res_ctx, dc->res_pool, - pipe, hsplit_pipe); + if (!dcn20_split_stream_for_mpc( + &context->res_ctx, dc->res_pool, + pipe, hsplit_pipe)) + goto validate_fail; pipe_split_from[hsplit_pipe->pipe_idx] = pipe_idx; } } else if (hsplit_pipe && hsplit_pipe->plane_state == pipe->plane_state) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h index 9d5bff9455fd..578265ccbf5b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.h @@ -129,7 +129,7 @@ void dcn20_release_dsc(struct resource_context *res_ctx, const struct resource_pool *pool, struct display_stream_compressor **dsc); bool dcn20_validate_dsc(struct dc *dc, struct dc_state *new_ctx); -void dcn20_split_stream_for_mpc( +bool dcn20_split_stream_for_mpc( struct resource_context *res_ctx, const struct resource_pool *pool, struct pipe_ctx *primary_pipe, From 3a4837fb3c96aa6a0f017781a062687be4d6250b Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Wed, 22 Apr 2020 18:07:55 -0400 Subject: [PATCH 38/88] drm/amd/display: Change viewport limit to 12 for DCN2 [WHY & HOW] Viewport limit was set to 16 pixels due to an issue with MPO on small viewports. This restriction does not apply and the viewport limit can now be lowered. Signed-off-by: Sung Lee Reviewed-by: Dmytro Laktyushkin Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 12f5c6881cd0..1a01c038632b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1064,8 +1064,8 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) calculate_viewport(pipe_ctx); - if (pipe_ctx->plane_res.scl_data.viewport.height < 16 || - pipe_ctx->plane_res.scl_data.viewport.width < 16) { + if (pipe_ctx->plane_res.scl_data.viewport.height < 12 || + pipe_ctx->plane_res.scl_data.viewport.width < 12) { if (store_h_border_left) { restore_border_left_from_dst(pipe_ctx, store_h_border_left); From 1ab864a006690d7a387498589464f9ec8509ff9e Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 22 Apr 2020 18:07:56 -0400 Subject: [PATCH 39/88] drm/amd/display: Defer cursor update around VUPDATE for all ASIC [Why] Fixes the following scenario: - Flip has been prepared sometime during the frame, update pending - Cursor update happens right when VUPDATE would happen - OPTC lock acquired, VUPDATE is blocked until next frame - Flip is delayed potentially infinitely With the igt@kms_cursor_legacy cursor-vs-flip-legacy test we can observe nearly *13* frames of delay for some flips on Navi. [How] Apply the Raven workaround generically. When close enough to VUPDATE block cursor updates from occurring from the dc_stream_set_cursor_* helpers. This could perhaps be a little smarter by checking if there were pending updates or flips earlier in the frame on the HUBP side before applying the delay, but this should be fine for now. This fixes the kms_cursor_legacy test. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Aric Cyr Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_stream.c | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 6ddbb00ed37a..8c20e9e907b2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -239,24 +239,24 @@ static void delay_cursor_until_vupdate(struct pipe_ctx *pipe_ctx, struct dc *dc) struct dc_stream_state *stream = pipe_ctx->stream; unsigned int us_per_line; - if (stream->ctx->asic_id.chip_family == FAMILY_RV && - ASICREV_IS_RAVEN(stream->ctx->asic_id.hw_internal_rev)) { + if (!dc->hwss.get_vupdate_offset_from_vsync) + return; - vupdate_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx); - if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos)) - return; + vupdate_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx); + if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos)) + return; - if (vpos >= vupdate_line) - return; + if (vpos >= vupdate_line) + return; - us_per_line = stream->timing.h_total * 10000 / stream->timing.pix_clk_100hz; - lines_to_vupdate = vupdate_line - vpos; - us_to_vupdate = lines_to_vupdate * us_per_line; + us_per_line = + stream->timing.h_total * 10000 / stream->timing.pix_clk_100hz; + lines_to_vupdate = vupdate_line - vpos; + us_to_vupdate = lines_to_vupdate * us_per_line; - /* 70 us is a conservative estimate of cursor update time*/ - if (us_to_vupdate < 70) - udelay(us_to_vupdate); - } + /* 70 us is a conservative estimate of cursor update time*/ + if (us_to_vupdate < 70) + udelay(us_to_vupdate); #endif } From 77ef333e48ba21181b4b7b812ef623305b0b0c10 Mon Sep 17 00:00:00 2001 From: Sung Lee Date: Wed, 22 Apr 2020 18:07:57 -0400 Subject: [PATCH 40/88] drm/amd/display: Update downspread percent to match spreadsheet for DCN2.1 [WHY] The downspread percentage was copied over from a previous version of the display_mode_lib spreadsheet. This value has been updated, and the previous value is too high to allow for such modes as 4K120hz. The new value is sufficient for such modes. [HOW] Update the value in dcn21_resource to match the spreadsheet. Signed-off-by: Sung Lee Reviewed-by: Yongqiang Sun Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 8fcb03e65fdb..802372f09dc7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -286,7 +286,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .dram_channel_width_bytes = 4, .fabric_datapath_to_dcn_data_return_bytes = 32, .dcn_downspread_percent = 0.5, - .downspread_percent = 0.5, + .downspread_percent = 0.38, .dram_page_open_time_ns = 50.0, .dram_rw_turnaround_time_ns = 17.5, .dram_return_buffer_per_channel_bytes = 8192, From 0ed3bcc45987f0c127531199c288448c71e3c767 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Wed, 22 Apr 2020 18:07:58 -0400 Subject: [PATCH 41/88] drm/amd/display: Pass command instead of header into DMUB service [Why] We read memory that we shouldn't be touching if the struct isn't a full union dmub_rb_cmd. [How] Fix up all the callers and functions that take in the dmub_cmd_header to use the dmub_rb_cmd instead. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Tony Cheng Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/bios/command_table2.c | 62 +++++++++++++------ drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c | 2 +- drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h | 3 +- drivers/gpu/drm/amd/display/dc/dc_helper.c | 6 +- drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 10 +-- drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c | 8 +-- .../gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c | 31 ++++++---- .../gpu/drm/amd/display/dmub/inc/dmub_rb.h | 6 +- .../gpu/drm/amd/display/dmub/inc/dmub_srv.h | 3 +- .../gpu/drm/amd/display/dmub/src/dmub_srv.c | 2 +- 10 files changed, 79 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c index 8edc2506d49e..bed91572f82a 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table2.c @@ -113,13 +113,19 @@ static void encoder_control_dmcub( struct dc_dmub_srv *dmcub, struct dig_encoder_stream_setup_parameters_v1_5 *dig) { - struct dmub_rb_cmd_digx_encoder_control encoder_control = { 0 }; + union dmub_rb_cmd cmd; - encoder_control.header.type = DMUB_CMD__VBIOS; - encoder_control.header.sub_type = DMUB_CMD__VBIOS_DIGX_ENCODER_CONTROL; - encoder_control.encoder_control.dig.stream_param = *dig; + memset(&cmd, 0, sizeof(cmd)); - dc_dmub_srv_cmd_queue(dmcub, &encoder_control.header); + cmd.digx_encoder_control.header.type = DMUB_CMD__VBIOS; + cmd.digx_encoder_control.header.sub_type = + DMUB_CMD__VBIOS_DIGX_ENCODER_CONTROL; + cmd.digx_encoder_control.header.payload_bytes = + sizeof(cmd.digx_encoder_control) - + sizeof(cmd.digx_encoder_control.header); + cmd.digx_encoder_control.encoder_control.dig.stream_param = *dig; + + dc_dmub_srv_cmd_queue(dmcub, &cmd); dc_dmub_srv_cmd_execute(dmcub); dc_dmub_srv_wait_idle(dmcub); } @@ -238,14 +244,19 @@ static void transmitter_control_dmcub( struct dc_dmub_srv *dmcub, struct dig_transmitter_control_parameters_v1_6 *dig) { - struct dmub_rb_cmd_dig1_transmitter_control transmitter_control; + union dmub_rb_cmd cmd; - transmitter_control.header.type = DMUB_CMD__VBIOS; - transmitter_control.header.sub_type = + memset(&cmd, 0, sizeof(cmd)); + + cmd.dig1_transmitter_control.header.type = DMUB_CMD__VBIOS; + cmd.dig1_transmitter_control.header.sub_type = DMUB_CMD__VBIOS_DIG1_TRANSMITTER_CONTROL; - transmitter_control.transmitter_control.dig = *dig; + cmd.dig1_transmitter_control.header.payload_bytes = + sizeof(cmd.dig1_transmitter_control) - + sizeof(cmd.dig1_transmitter_control.header); + cmd.dig1_transmitter_control.transmitter_control.dig = *dig; - dc_dmub_srv_cmd_queue(dmcub, &transmitter_control.header); + dc_dmub_srv_cmd_queue(dmcub, &cmd); dc_dmub_srv_cmd_execute(dmcub); dc_dmub_srv_wait_idle(dmcub); } @@ -339,13 +350,18 @@ static void set_pixel_clock_dmcub( struct dc_dmub_srv *dmcub, struct set_pixel_clock_parameter_v1_7 *clk) { - struct dmub_rb_cmd_set_pixel_clock pixel_clock = { 0 }; + union dmub_rb_cmd cmd; - pixel_clock.header.type = DMUB_CMD__VBIOS; - pixel_clock.header.sub_type = DMUB_CMD__VBIOS_SET_PIXEL_CLOCK; - pixel_clock.pixel_clock.clk = *clk; + memset(&cmd, 0, sizeof(cmd)); - dc_dmub_srv_cmd_queue(dmcub, &pixel_clock.header); + cmd.set_pixel_clock.header.type = DMUB_CMD__VBIOS; + cmd.set_pixel_clock.header.sub_type = DMUB_CMD__VBIOS_SET_PIXEL_CLOCK; + cmd.set_pixel_clock.header.payload_bytes = + sizeof(cmd.set_pixel_clock) - + sizeof(cmd.set_pixel_clock.header); + cmd.set_pixel_clock.pixel_clock.clk = *clk; + + dc_dmub_srv_cmd_queue(dmcub, &cmd); dc_dmub_srv_cmd_execute(dmcub); dc_dmub_srv_wait_idle(dmcub); } @@ -705,13 +721,19 @@ static void enable_disp_power_gating_dmcub( struct dc_dmub_srv *dmcub, struct enable_disp_power_gating_parameters_v2_1 *pwr) { - struct dmub_rb_cmd_enable_disp_power_gating power_gating; + union dmub_rb_cmd cmd; - power_gating.header.type = DMUB_CMD__VBIOS; - power_gating.header.sub_type = DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING; - power_gating.power_gating.pwr = *pwr; + memset(&cmd, 0, sizeof(cmd)); - dc_dmub_srv_cmd_queue(dmcub, &power_gating.header); + cmd.enable_disp_power_gating.header.type = DMUB_CMD__VBIOS; + cmd.enable_disp_power_gating.header.sub_type = + DMUB_CMD__VBIOS_ENABLE_DISP_POWER_GATING; + cmd.enable_disp_power_gating.header.payload_bytes = + sizeof(cmd.enable_disp_power_gating) - + sizeof(cmd.enable_disp_power_gating.header); + cmd.enable_disp_power_gating.power_gating.pwr = *pwr; + + dc_dmub_srv_cmd_queue(dmcub, &cmd); dc_dmub_srv_cmd_execute(dmcub); dc_dmub_srv_wait_idle(dmcub); } diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index 59c298a6484f..907e0c5374bb 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -58,7 +58,7 @@ void dc_dmub_srv_destroy(struct dc_dmub_srv **dmub_srv) } void dc_dmub_srv_cmd_queue(struct dc_dmub_srv *dc_dmub_srv, - struct dmub_cmd_header *cmd) + union dmub_rb_cmd *cmd) { struct dmub_srv *dmub = dc_dmub_srv->dmub; struct dc_context *dc_ctx = dc_dmub_srv->ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h index 855431483699..6689ae33dee8 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.h @@ -30,7 +30,6 @@ #include "dmub/inc/dmub_cmd.h" struct dmub_srv; -struct dmub_cmd_header; struct dc_reg_helper_state { bool gather_in_progress; @@ -49,7 +48,7 @@ struct dc_dmub_srv { }; void dc_dmub_srv_cmd_queue(struct dc_dmub_srv *dc_dmub_srv, - struct dmub_cmd_header *cmd); + union dmub_rb_cmd *cmd); void dc_dmub_srv_cmd_execute(struct dc_dmub_srv *dc_dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c index 737048d8a96c..85a0170be544 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c @@ -50,7 +50,7 @@ static inline void submit_dmub_read_modify_write( gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress; ctx->dmub_srv->reg_helper_offload.gather_in_progress = false; - dc_dmub_srv_cmd_queue(ctx->dmub_srv, &cmd_buf->header); + dc_dmub_srv_cmd_queue(ctx->dmub_srv, &offload->cmd_data); ctx->dmub_srv->reg_helper_offload.gather_in_progress = gather; @@ -73,7 +73,7 @@ static inline void submit_dmub_burst_write( gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress; ctx->dmub_srv->reg_helper_offload.gather_in_progress = false; - dc_dmub_srv_cmd_queue(ctx->dmub_srv, &cmd_buf->header); + dc_dmub_srv_cmd_queue(ctx->dmub_srv, &offload->cmd_data); ctx->dmub_srv->reg_helper_offload.gather_in_progress = gather; @@ -92,7 +92,7 @@ static inline void submit_dmub_reg_wait( gather = ctx->dmub_srv->reg_helper_offload.gather_in_progress; ctx->dmub_srv->reg_helper_offload.gather_in_progress = false; - dc_dmub_srv_cmd_queue(ctx->dmub_srv, &cmd_buf->header); + dc_dmub_srv_cmd_queue(ctx->dmub_srv, &offload->cmd_data); memset(cmd_buf, 0, sizeof(*cmd_buf)); offload->reg_seq_count = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index 992d869188c5..09e19e3332e2 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -62,7 +62,7 @@ static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst) cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary; cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data); - dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.abm_set_pipe.header); + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); dc_dmub_srv_wait_idle(dc->dmub_srv); @@ -146,7 +146,7 @@ static void dmcub_set_backlight_level( cmd.abm_set_backlight.abm_set_backlight_data.frame_ramp = frame_ramp; cmd.abm_set_backlight.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_backlight_data); - dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.abm_set_backlight.header); + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); dc_dmub_srv_wait_idle(dc->dmub_srv); @@ -171,7 +171,7 @@ static void dmub_abm_enable_fractional_pwm(struct dc_context *dc) cmd.abm_set_pwm_frac.abm_set_pwm_frac_data.fractional_pwm = fractional_pwm; cmd.abm_set_pwm_frac.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pwm_frac_data); - dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.abm_set_pwm_frac.header); + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); dc_dmub_srv_wait_idle(dc->dmub_srv); } @@ -250,7 +250,7 @@ static bool dmub_abm_set_level(struct abm *abm, uint32_t level) cmd.abm_set_level.abm_set_level_data.level = level; cmd.abm_set_level.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_level_data); - dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.abm_set_level.header); + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); dc_dmub_srv_wait_idle(dc->dmub_srv); @@ -370,7 +370,7 @@ static bool dmub_abm_init_config(struct abm *abm, cmd.abm_init_config.abm_init_config_data.bytes = bytes; cmd.abm_init_config.header.payload_bytes = sizeof(struct dmub_cmd_abm_init_config_data); - dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.abm_init_config.header); + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); dc_dmub_srv_wait_idle(dc->dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index 7b32e5d60ed6..9f12c76f21ab 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -102,7 +102,7 @@ static bool dmub_psr_set_version(struct dmub_psr *dmub, struct dc_stream_state * cmd.psr_set_version.psr_set_version_data.version = stream->link->psr_settings.psr_version; cmd.psr_set_version.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_version_data); - dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_set_version.header); + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); dc_dmub_srv_wait_idle(dc->dmub_srv); @@ -126,7 +126,7 @@ static void dmub_psr_enable(struct dmub_psr *dmub, bool enable) cmd.psr_enable.header.payload_bytes = 0; // Send header only - dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_enable.header); + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); dc_dmub_srv_wait_idle(dc->dmub_srv); } @@ -150,7 +150,7 @@ static void dmub_psr_set_level(struct dmub_psr *dmub, uint16_t psr_level) cmd.psr_set_level.header.payload_bytes = sizeof(struct dmub_cmd_psr_set_level_data); cmd.psr_set_level.psr_set_level_data.psr_level = psr_level; - dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_set_level.header); + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); dc_dmub_srv_wait_idle(dc->dmub_srv); } @@ -227,7 +227,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, copy_settings_data->debug.visual_confirm = dc->dc->debug.visual_confirm == VISUAL_CONFIRM_PSR ? true : false; - dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd.psr_copy_settings.header); + dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd); dc_dmub_srv_cmd_execute(dc->dmub_srv); dc_dmub_srv_wait_idle(dc->dmub_srv); diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index d285ba622d61..960a0716dde5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -778,21 +778,28 @@ void dmcub_PLAT_54186_wa(struct hubp *hubp, struct surface_flip_registers *flip_ { struct dc_dmub_srv *dmcub = hubp->ctx->dmub_srv; struct dcn21_hubp *hubp21 = TO_DCN21_HUBP(hubp); - struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa = { 0 }; + union dmub_rb_cmd cmd; - PLAT_54186_wa.header.type = DMUB_CMD__PLAT_54186_WA; - PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS; - PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C; - PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; - PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; - PLAT_54186_wa.flip.flip_params.grph_stereo = flip_regs->grph_stereo; - PLAT_54186_wa.flip.flip_params.hubp_inst = hubp->inst; - PLAT_54186_wa.flip.flip_params.immediate = flip_regs->immediate; - PLAT_54186_wa.flip.flip_params.tmz_surface = flip_regs->tmz_surface; - PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid; + memset(&cmd, 0, sizeof(cmd)); + + cmd.PLAT_54186_wa.header.type = DMUB_CMD__PLAT_54186_WA; + cmd.PLAT_54186_wa.header.payload_bytes = sizeof(cmd.PLAT_54186_wa.flip); + cmd.PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS = + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS; + cmd.PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_C = + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_C; + cmd.PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH = + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH; + cmd.PLAT_54186_wa.flip.DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C = + flip_regs->DCSURF_PRIMARY_SURFACE_ADDRESS_HIGH_C; + cmd.PLAT_54186_wa.flip.flip_params.grph_stereo = flip_regs->grph_stereo; + cmd.PLAT_54186_wa.flip.flip_params.hubp_inst = hubp->inst; + cmd.PLAT_54186_wa.flip.flip_params.immediate = flip_regs->immediate; + cmd.PLAT_54186_wa.flip.flip_params.tmz_surface = flip_regs->tmz_surface; + cmd.PLAT_54186_wa.flip.flip_params.vmid = flip_regs->vmid; PERF_TRACE(); // TODO: remove after performance is stable. - dc_dmub_srv_cmd_queue(dmcub, &PLAT_54186_wa.header); + dc_dmub_srv_cmd_queue(dmcub, &cmd); PERF_TRACE(); // TODO: remove after performance is stable. dc_dmub_srv_cmd_execute(dmcub); PERF_TRACE(); // TODO: remove after performance is stable. diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h index df875fdd2ab0..2ae48c18bb5b 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_rb.h @@ -33,8 +33,6 @@ extern "C" { #endif -struct dmub_cmd_header; - struct dmub_rb_init_params { void *ctx; void *base_address; @@ -71,7 +69,7 @@ static inline bool dmub_rb_full(struct dmub_rb *rb) } static inline bool dmub_rb_push_front(struct dmub_rb *rb, - const struct dmub_cmd_header *cmd) + const union dmub_rb_cmd *cmd) { uint64_t volatile *dst = (uint64_t volatile *)(rb->base_address) + rb->wrpt / sizeof(uint64_t); const uint64_t *src = (const uint64_t *)cmd; @@ -93,7 +91,7 @@ static inline bool dmub_rb_push_front(struct dmub_rb *rb, } static inline bool dmub_rb_front(struct dmub_rb *rb, - struct dmub_cmd_header *cmd) + union dmub_rb_cmd *cmd) { uint8_t *rd_ptr = (uint8_t *)rb->base_address + rb->rptr; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h index e40d1cdbcfaa..9f10bfe3d2df 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_srv.h @@ -75,7 +75,6 @@ extern "C" { /* Forward declarations */ struct dmub_srv; -struct dmub_cmd_header; struct dmub_srv_common_regs; /* enum dmub_status - return code for dmcub functions */ @@ -458,7 +457,7 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub); * DMUB_STATUS_INVALID - unspecified error */ enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, - const struct dmub_cmd_header *cmd); + const union dmub_rb_cmd *cmd); /** * dmub_srv_cmd_execute() - Executes a queued sequence to the dmub diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index a6e403227872..66ba0b2d80d2 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -462,7 +462,7 @@ enum dmub_status dmub_srv_hw_reset(struct dmub_srv *dmub) } enum dmub_status dmub_srv_cmd_queue(struct dmub_srv *dmub, - const struct dmub_cmd_header *cmd) + const union dmub_rb_cmd *cmd) { if (!dmub->hw_init) return DMUB_STATUS_INVALID; From 18400f91c1c17777d98013ea15d620e7b4ac1cf4 Mon Sep 17 00:00:00 2001 From: Yongqiang Sun Date: Wed, 22 Apr 2020 18:07:59 -0400 Subject: [PATCH 42/88] drm/amd/display: Add panel cntl id for set backlight level. [Why & How] Add panel cntl instance when calling set backlight. Signed-off-by: Yongqiang Sun Reviewed-by: Anthony Koo Acked-by: Aurabindo Pillai Acked-by: Tony Cheng Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 4 +++- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 3 ++- drivers/gpu/drm/amd/display/dc/dce/dce_abm.c | 15 +++++++++------ drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c | 16 ++++++++++------ .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 9 ++++++--- .../gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 9 ++++++--- drivers/gpu/drm/amd/display/dc/inc/hw/abm.h | 5 +++-- drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 1 + 8 files changed, 40 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 0f7810571be3..ad817bd74586 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2210,7 +2210,9 @@ static void commit_planes_do_stream_update(struct dc *dc, if (should_program_abm) { if (*stream_update->abm_level == ABM_LEVEL_IMMEDIATE_DISABLE) { - pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm); + pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable( + pipe_ctx->stream_res.abm, + pipe_ctx->stream->link->panel_cntl->inst); } else { pipe_ctx->stream_res.abm->funcs->set_abm_level( pipe_ctx->stream_res.abm, stream->abm_level); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 9c4686edcf3e..a54b3e05f66b 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2552,6 +2552,7 @@ bool dc_link_set_backlight_level(const struct dc_link *link, backlight_pwm_u16_16, frame_ramp, controller_id, + link->panel_cntl->inst, fw_set_brightness); } @@ -2564,7 +2565,7 @@ bool dc_link_set_abm_disable(const struct dc_link *link) bool success = false; if (abm) - success = abm->funcs->set_abm_immediate_disable(abm); + success = abm->funcs->set_abm_immediate_disable(abm, link->panel_cntl->inst); return success; } diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c index 4dae9efebb6f..c15e60fb5ebc 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c @@ -55,7 +55,7 @@ #define MCP_DISABLE_ABM_IMMEDIATELY 255 -static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id) +static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id, uint32_t panel_inst) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); uint32_t rampingBoundary = 0xFFFF; @@ -201,7 +201,8 @@ static void dmcu_set_backlight_level( struct dce_abm *abm_dce, uint32_t backlight_pwm_u16_16, uint32_t frame_ramp, - uint32_t controller_id) + uint32_t controller_id, + uint32_t panel_id) { unsigned int backlight_8_bit = 0; uint32_t s2; @@ -213,7 +214,7 @@ static void dmcu_set_backlight_level( // Take MSB of fractional part since backlight is not max backlight_8_bit = (backlight_pwm_u16_16 >> 8) & 0xFF; - dce_abm_set_pipe(&abm_dce->base, controller_id); + dce_abm_set_pipe(&abm_dce->base, controller_id, panel_id); /* waitDMCUReadyForCmd */ REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, @@ -331,14 +332,14 @@ static bool dce_abm_set_level(struct abm *abm, uint32_t level) return true; } -static bool dce_abm_immediate_disable(struct abm *abm) +static bool dce_abm_immediate_disable(struct abm *abm, uint32_t panel_inst) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); if (abm->dmcu_is_running == false) return true; - dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY); + dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY, panel_inst); abm->stored_backlight_registers.BL_PWM_CNTL = REG_READ(BL_PWM_CNTL); @@ -420,6 +421,7 @@ static bool dce_abm_set_backlight_level_pwm( unsigned int backlight_pwm_u16_16, unsigned int frame_ramp, unsigned int controller_id, + unsigned int panel_inst, bool fw_set_brightness) { struct dce_abm *abm_dce = TO_DCE_ABM(abm); @@ -432,7 +434,8 @@ static bool dce_abm_set_backlight_level_pwm( dmcu_set_backlight_level(abm_dce, backlight_pwm_u16_16, frame_ramp, - controller_id); + controller_id, + panel_inst); else driver_set_backlight_level(abm_dce, backlight_pwm_u16_16); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c index 09e19e3332e2..06d39d529c09 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_abm.c @@ -50,7 +50,7 @@ #define DISABLE_ABM_IMMEDIATELY 255 -static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst) +static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst, uint32_t panel_inst) { union dmub_rb_cmd cmd; struct dc_context *dc = abm->ctx; @@ -59,6 +59,7 @@ static bool dmub_abm_set_pipe(struct abm *abm, uint32_t otg_inst) cmd.abm_set_pipe.header.type = DMUB_CMD__ABM; cmd.abm_set_pipe.header.sub_type = DMUB_CMD__ABM_SET_PIPE; cmd.abm_set_pipe.abm_set_pipe_data.otg_inst = otg_inst; + cmd.abm_set_pipe.abm_set_pipe_data.panel_inst = panel_inst; cmd.abm_set_pipe.abm_set_pipe_data.ramping_boundary = ramping_boundary; cmd.abm_set_pipe.header.payload_bytes = sizeof(struct dmub_cmd_abm_set_pipe_data); @@ -120,7 +121,8 @@ static void dmcub_set_backlight_level( struct dce_abm *dce_abm, uint32_t backlight_pwm_u16_16, uint32_t frame_ramp, - uint32_t otg_inst) + uint32_t otg_inst, + uint32_t panel_inst) { union dmub_rb_cmd cmd; struct dc_context *dc = dce_abm->base.ctx; @@ -134,7 +136,7 @@ static void dmcub_set_backlight_level( // Take MSB of fractional part since backlight is not max backlight_8_bit = (backlight_pwm_u16_16 >> 8) & 0xFF; - dmub_abm_set_pipe(&dce_abm->base, otg_inst); + dmub_abm_set_pipe(&dce_abm->base, otg_inst, panel_inst); REG_UPDATE(BL1_PWM_USER_LEVEL, BL1_PWM_USER_LEVEL, backlight_pwm_u16_16); @@ -257,11 +259,11 @@ static bool dmub_abm_set_level(struct abm *abm, uint32_t level) return true; } -static bool dmub_abm_immediate_disable(struct abm *abm) +static bool dmub_abm_immediate_disable(struct abm *abm, uint32_t panel_inst) { struct dce_abm *dce_abm = TO_DMUB_ABM(abm); - dmub_abm_set_pipe(abm, DISABLE_ABM_IMMEDIATELY); + dmub_abm_set_pipe(abm, DISABLE_ABM_IMMEDIATELY, panel_inst); abm->stored_backlight_registers.BL_PWM_CNTL = REG_READ(BL_PWM_CNTL); @@ -338,6 +340,7 @@ static bool dmub_abm_set_backlight_level_pwm( unsigned int backlight_pwm_u16_16, unsigned int frame_ramp, unsigned int otg_inst, + uint32_t panel_inst, bool fw_set_brightness) { struct dce_abm *dce_abm = TO_DMUB_ABM(abm); @@ -345,7 +348,8 @@ static bool dmub_abm_set_backlight_level_pwm( dmcub_set_backlight_level(dce_abm, backlight_pwm_u16_16, frame_ramp, - otg_inst); + otg_inst, + panel_inst); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 0d53e1d6d6b7..bfc8578eda15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -903,7 +903,8 @@ static void dcn10_reset_back_end_for_pipe( if (pipe_ctx->top_pipe == NULL) { if (pipe_ctx->stream_res.abm) - pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm); + pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm, + pipe_ctx->stream->link->panel_cntl->inst); pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg); @@ -2474,12 +2475,14 @@ void dcn10_blank_pixel_data( if (stream_res->tg->funcs->set_blank) stream_res->tg->funcs->set_blank(stream_res->tg, blank); if (stream_res->abm) { - stream_res->abm->funcs->set_pipe(stream_res->abm, stream_res->tg->inst + 1); + stream_res->abm->funcs->set_pipe(stream_res->abm, stream_res->tg->inst + 1, + stream->link->panel_cntl->inst); stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level); } } else if (blank) { if (stream_res->abm) - stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm); + stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm, + stream->link->panel_cntl->inst); if (stream_res->tg->funcs->set_blank) stream_res->tg->funcs->set_blank(stream_res->tg, blank); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index bd2ccf8eb9cf..3a290ccaeb82 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -962,7 +962,8 @@ void dcn20_blank_pixel_data( if (blank) { if (stream_res->abm) - stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm); + stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm, + stream->link->panel_cntl->inst); if (dc->debug.visual_confirm != VISUAL_CONFIRM_DISABLE) { test_pattern = CONTROLLER_DP_TEST_PATTERN_COLORSQUARES; @@ -997,7 +998,8 @@ void dcn20_blank_pixel_data( if (!blank) if (stream_res->abm) { - stream_res->abm->funcs->set_pipe(stream_res->abm, stream_res->tg->inst + 1); + stream_res->abm->funcs->set_pipe(stream_res->abm, stream_res->tg->inst + 1, + stream->link->panel_cntl->inst); stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level); } } @@ -2041,7 +2043,8 @@ static void dcn20_reset_back_end_for_pipe( if (pipe_ctx->top_pipe == NULL) { if (pipe_ctx->stream_res.abm) - pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm); + pipe_ctx->stream_res.abm->funcs->set_abm_immediate_disable(pipe_ctx->stream_res.abm, + pipe_ctx->stream->link->panel_cntl->inst); pipe_ctx->stream_res.tg->funcs->disable_crtc(pipe_ctx->stream_res.tg); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h index 0dd12c4fc23c..91a42f0f1fc4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h @@ -45,8 +45,8 @@ struct abm { struct abm_funcs { void (*abm_init)(struct abm *abm); bool (*set_abm_level)(struct abm *abm, unsigned int abm_level); - bool (*set_abm_immediate_disable)(struct abm *abm); - bool (*set_pipe)(struct abm *abm, unsigned int controller_id); + bool (*set_abm_immediate_disable)(struct abm *abm, unsigned int panel_inst); + bool (*set_pipe)(struct abm *abm, unsigned int controller_id, unsigned int panel_inst); bool (*init_backlight)(struct abm *abm); /* backlight_pwm_u16_16 is unsigned 32 bit, @@ -56,6 +56,7 @@ struct abm_funcs { unsigned int backlight_pwm_u16_16, unsigned int frame_ramp, unsigned int controller_id, + unsigned int panel_inst, bool fw_set_brightness); unsigned int (*get_current_backlight)(struct abm *abm); diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 6b48285446c3..eb10518dc058 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -261,6 +261,7 @@ struct dmub_rb_cmd_psr_set_version { struct dmub_cmd_abm_set_pipe_data { uint32_t ramping_boundary; uint32_t otg_inst; + uint32_t panel_inst; }; struct dmub_rb_cmd_abm_set_pipe { From 8d829836cf753228b014af3bf21d3e2232a5722b Mon Sep 17 00:00:00 2001 From: Joshua Aberback Date: Wed, 22 Apr 2020 18:08:00 -0400 Subject: [PATCH 43/88] drm/amd/display: Add dummy p-state latency bounding box override [Why] For debugging, it can be useful to be able to modify the dummy p-state latency, this will make it easier to do so. Signed-off-by: Joshua Aberback Reviewed-by: Wesley Chalmers Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 84e5056521a3..99c8e40049e6 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -478,6 +478,7 @@ struct dc_bounding_box_overrides { int urgent_latency_ns; int percent_of_ideal_drambw; int dram_clock_change_latency_ns; + int dummy_clock_change_latency_ns; /* This forces a hard min on the DCFCLK we use * for DML. Unlike the debug option for forcing * DCFCLK, this override affects watermark calculations diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 3a8a4c54738a..b7e4d0c2432c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -3467,6 +3467,13 @@ void dcn20_patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st bb->dram_clock_change_latency_us = dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; } + + if ((int)(bb->dummy_pstate_latency_us * 1000) + != dc->bb_overrides.dummy_clock_change_latency_ns + && dc->bb_overrides.dummy_clock_change_latency_ns) { + bb->dummy_pstate_latency_us = + dc->bb_overrides.dummy_clock_change_latency_ns / 1000.0; + } } static struct _vcs_dpi_soc_bounding_box_st *get_asic_rev_soc_bb( From 4b0e95d1838f0e1d8bd4ca79f1fa17d7af1ddad6 Mon Sep 17 00:00:00 2001 From: Yongqiang Sun Date: Wed, 22 Apr 2020 18:08:01 -0400 Subject: [PATCH 44/88] drm/amd/display: Add set backlight to hw sequencer. [Why & How] Add set backlight to hw sequencer, dmu communication will be handled in hw sequencer for new asics. Signed-off-by: Yongqiang Sun Reviewed-by: Anthony Koo Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 35 ++++++------------- .../display/dc/dce110/dce110_hw_sequencer.c | 34 +++++++++++++++++- .../display/dc/dce110/dce110_hw_sequencer.h | 4 +++ .../gpu/drm/amd/display/dc/dcn10/dcn10_init.c | 1 + .../gpu/drm/amd/display/dc/dcn20/dcn20_init.c | 1 + .../gpu/drm/amd/display/dc/dcn21/dcn21_init.c | 1 + .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 4 +++ 7 files changed, 55 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index a54b3e05f66b..67c5342cf89a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2509,35 +2509,21 @@ bool dc_link_set_backlight_level(const struct dc_link *link, uint32_t frame_ramp) { struct dc *dc = link->ctx->dc; - struct abm *abm = get_abm_from_stream_res(link); - struct dmcu *dmcu = dc->res_pool->dmcu; - unsigned int controller_id = 0; - bool fw_set_brightness = true; int i; + DC_LOGGER_INIT(link->ctx->logger); - - if (abm == NULL || (abm->funcs->set_backlight_level_pwm == NULL)) - return false; - - if (dmcu) - fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu); - DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n", backlight_pwm_u16_16, backlight_pwm_u16_16); if (dc_is_embedded_signal(link->connector_signal)) { + struct pipe_ctx *pipe_ctx = NULL; + for (i = 0; i < MAX_PIPES; i++) { if (dc->current_state->res_ctx.pipe_ctx[i].stream) { if (dc->current_state->res_ctx. pipe_ctx[i].stream->link == link) { - /* DMCU -1 for all controller id values, - * therefore +1 here - */ - controller_id = - dc->current_state-> - res_ctx.pipe_ctx[i].stream_res.tg->inst + - 1; + pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; /* Disable brightness ramping when the display is blanked * as it can hang the DMCU @@ -2547,13 +2533,14 @@ bool dc_link_set_backlight_level(const struct dc_link *link, } } } - abm->funcs->set_backlight_level_pwm( - abm, + + if (pipe_ctx == NULL) + ASSERT(false); + + dc->hwss.set_backlight_level( + pipe_ctx, backlight_pwm_u16_16, - frame_ramp, - controller_id, - link->panel_cntl->inst, - fw_set_brightness); + frame_ramp); } return true; diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 9cd130c8894a..30469026c642 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2714,6 +2714,37 @@ void dce110_set_cursor_attribute(struct pipe_ctx *pipe_ctx) pipe_ctx->plane_res.xfm, attributes); } +bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx, + uint32_t backlight_pwm_u16_16, + uint32_t frame_ramp) +{ + struct dc_link *link = pipe_ctx->stream->link; + struct dc *dc = link->ctx->dc; + struct abm *abm = pipe_ctx->stream_res.abm; + struct dmcu *dmcu = dc->res_pool->dmcu; + bool fw_set_brightness = true; + /* DMCU -1 for all controller id values, + * therefore +1 here + */ + uint32_t controller_id = pipe_ctx->stream_res.tg->inst + 1; + + if (abm == NULL || (abm->funcs->set_backlight_level_pwm == NULL)) + return false; + + if (dmcu) + fw_set_brightness = dmcu->funcs->is_dmcu_initialized(dmcu); + + abm->funcs->set_backlight_level_pwm( + abm, + backlight_pwm_u16_16, + frame_ramp, + controller_id, + link->panel_cntl->inst, + fw_set_brightness); + + return true; +} + static const struct hw_sequencer_funcs dce110_funcs = { .program_gamut_remap = program_gamut_remap, .program_output_csc = program_output_csc, @@ -2747,7 +2778,8 @@ static const struct hw_sequencer_funcs dce110_funcs = { .edp_power_control = dce110_edp_power_control, .edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready, .set_cursor_position = dce110_set_cursor_position, - .set_cursor_attribute = dce110_set_cursor_attribute + .set_cursor_attribute = dce110_set_cursor_attribute, + .set_backlight_level = dce110_set_backlight_level, }; static const struct hwseq_private_funcs dce110_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h index 26a9c14a58b1..e609a72b4634 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.h @@ -85,5 +85,9 @@ void dce110_edp_wait_for_hpd_ready( struct dc_link *link, bool power_up); +bool dce110_set_backlight_level(struct pipe_ctx *pipe_ctx, + uint32_t backlight_pwm_u16_16, + uint32_t frame_ramp); + #endif /* __DC_HWSS_DCE110_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c index b88ef9703b2b..6f3dbcdb06f5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c @@ -71,6 +71,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .set_clock = dcn10_set_clock, .get_clock = dcn10_get_clock, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .set_backlight_level = dce110_set_backlight_level, }; static const struct hwseq_private_funcs dcn10_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c index 1642bf546ceb..1e33e29b37f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c @@ -82,6 +82,7 @@ static const struct hw_sequencer_funcs dcn20_funcs = { .init_vm_ctx = dcn20_init_vm_ctx, .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, + .set_backlight_level = dce110_set_backlight_level, }; static const struct hwseq_private_funcs dcn20_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c index 8410a6305a9a..128d5e3729c6 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c @@ -91,6 +91,7 @@ static const struct hw_sequencer_funcs dcn21_funcs = { .optimize_pwr_state = dcn21_optimize_pwr_state, .exit_optimized_pwr_state = dcn21_exit_optimized_pwr_state, .power_down = dce110_power_down, + .set_backlight_level = dce110_set_backlight_level, }; static const struct hwseq_private_funcs dcn21_private_funcs = { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 9380721f28b8..ed3505cbba6e 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -191,6 +191,10 @@ struct hw_sequencer_funcs { unsigned int bufSize, unsigned int mask); void (*clear_status_bits)(struct dc *dc, unsigned int mask); + bool (*set_backlight_level)(struct pipe_ctx *pipe_ctx, + uint32_t backlight_pwm_u16_16, + uint32_t frame_ramp); + }; From a8bf7164908827178fe31fb21d2646c404c1990d Mon Sep 17 00:00:00 2001 From: Krunoslav Kovac Date: Wed, 22 Apr 2020 18:08:02 -0400 Subject: [PATCH 45/88] drm/amd/display: Internal refactoring to abstract color caps [Why&How] modules/color calculates various colour operations which are translated to abstracted HW. DCE 5-12 had almost no important changes, but starting with DCN1, every new generation comes with fairly major differences in color pipeline. We would hack it with some DCN checks, but a better approach is to abstract color pipe capabilities so modules/DM can decide mapping to HW block based on logical capabilities, Signed-off-by: Krunoslav Kovac Reviewed-by: Aric Cyr Acked-by: Anthony Koo Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../amd/display/amdgpu_dm/amdgpu_dm_color.c | 7 +-- drivers/gpu/drm/amd/display/dc/dc.h | 45 ++++++++++++++++++- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 34 ++++++++++++++ .../drm/amd/display/dc/dcn20/dcn20_resource.c | 35 ++++++++++++++- .../drm/amd/display/dc/dcn21/dcn21_resource.c | 35 ++++++++++++++- .../amd/display/modules/color/color_gamma.c | 31 ++++++++++--- .../amd/display/modules/color/color_gamma.h | 4 +- 7 files changed, 178 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c index 838f35668f12..4dfb6b55bb2e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_color.c @@ -239,7 +239,8 @@ static int __set_output_tf(struct dc_transfer_func *func, * instead to simulate this. */ gamma->type = GAMMA_CUSTOM; - res = mod_color_calculate_degamma_params(func, gamma, true); + res = mod_color_calculate_degamma_params(NULL, func, + gamma, true); } else { /* * Assume sRGB. The actual mapping will depend on whether the @@ -271,7 +272,7 @@ static int __set_input_tf(struct dc_transfer_func *func, __drm_lut_to_dc_gamma(lut, gamma, false); - res = mod_color_calculate_degamma_params(func, gamma, true); + res = mod_color_calculate_degamma_params(NULL, func, gamma, true); dc_gamma_release(&gamma); return res ? 0 : -ENOMEM; @@ -485,7 +486,7 @@ int amdgpu_dm_update_plane_color_mgmt(struct dm_crtc_state *crtc, dc_plane_state->in_transfer_func->tf = tf; if (tf != TRANSFER_FUNCTION_SRGB && - !mod_color_calculate_degamma_params( + !mod_color_calculate_degamma_params(NULL, dc_plane_state->in_transfer_func, NULL, false)) return -ENOMEM; } else { diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 99c8e40049e6..b4aeb5d8a818 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -98,6 +98,49 @@ struct dc_plane_cap { } max_downscale_factor; }; +// Color management caps (DPP and MPC) +struct rom_curve_caps { + uint16_t srgb : 1; + uint16_t bt2020 : 1; + uint16_t gamma2_2 : 1; + uint16_t pq : 1; + uint16_t hlg : 1; +}; + +struct dpp_color_caps { + uint16_t dcn_arch : 1; // all DCE generations treated the same + // input lut is different than most LUTs, just plain 256-entry lookup + uint16_t input_lut_shared : 1; // shared with DGAM + uint16_t icsc : 1; + uint16_t dgam_ram : 1; + uint16_t post_csc : 1; // before gamut remap + uint16_t gamma_corr : 1; + + // hdr_mult and gamut remap always available in DPP (in that order) + // 3d lut implies shaper LUT, + // it may be shared with MPC - check MPC:shared_3d_lut flag + uint16_t hw_3d_lut : 1; + uint16_t ogam_ram : 1; // blnd gam + uint16_t ocsc : 1; + struct rom_curve_caps dgam_rom_caps; + struct rom_curve_caps ogam_rom_caps; +}; + +struct mpc_color_caps { + uint16_t gamut_remap : 1; + uint16_t ogam_ram : 1; + uint16_t ocsc : 1; + uint16_t num_3dluts : 3; //3d lut always assumes a preceding shaper LUT + uint16_t shared_3d_lut:1; //can be in either DPP or MPC, but single instance + + struct rom_curve_caps ogam_rom_caps; +}; + +struct dc_color_caps { + struct dpp_color_caps dpp; + struct mpc_color_caps mpc; +}; + struct dc_caps { uint32_t max_streams; uint32_t max_links; @@ -120,9 +163,9 @@ struct dc_caps { bool psp_setup_panel_mode; bool extended_aux_timeout_support; bool dmcub_support; - bool hw_3d_lut; enum dp_protocol_version max_dp_protocol_version; struct dc_plane_cap planes[MAX_PLANES]; + struct dc_color_caps color; }; struct dc_bug_wa { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 43116749af9f..6d506c37fc71 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1384,6 +1384,40 @@ static bool dcn10_resource_construct( /* Raven DP PHY HBR2 eye diagram pattern is not stable. Use TP4 */ dc->caps.force_dp_tps4_for_cp2520 = true; + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 1; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 1; + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.dgam_rom_caps.pq = 0; + dc->caps.color.dpp.dgam_rom_caps.hlg = 0; + dc->caps.color.dpp.post_csc = 0; + dc->caps.color.dpp.gamma_corr = 0; + + dc->caps.color.dpp.hw_3d_lut = 0; + dc->caps.color.dpp.ogam_ram = 1; // RGAM on DCN1 + dc->caps.color.dpp.ogam_rom_caps.srgb = 1; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 1; + + /* no post-blend color operations */ + dc->caps.color.mpc.gamut_remap = 0; + dc->caps.color.mpc.num_3dluts = 0; + dc->caps.color.mpc.shared_3d_lut = 0; + dc->caps.color.mpc.ogam_ram = 0; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 0; + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index b7e4d0c2432c..f04d0872a8ed 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -3709,9 +3709,42 @@ static bool dcn20_resource_construct( dc->caps.max_slave_planes = 1; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; - dc->caps.hw_3d_lut = true; dc->caps.extended_aux_timeout_support = true; + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 1; + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.dgam_rom_caps.pq = 0; + dc->caps.color.dpp.dgam_rom_caps.hlg = 0; + dc->caps.color.dpp.post_csc = 0; + dc->caps.color.dpp.gamma_corr = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN2, only MPC ROM + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 0; + dc->caps.color.mpc.num_3dluts = 0; + dc->caps.color.mpc.shared_3d_lut = 0; + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) { dc->debug = debug_defaults_drv; } else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 802372f09dc7..d771fe1e2c4e 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -1798,7 +1798,6 @@ static bool dcn21_resource_construct( dc->caps.i2c_speed_in_khz = 100; dc->caps.max_cursor_size = 256; dc->caps.dmdata_alloc_size = 2048; - dc->caps.hw_3d_lut = true; dc->caps.max_slave_planes = 1; dc->caps.post_blend_color_processing = true; @@ -1807,6 +1806,40 @@ static bool dcn21_resource_construct( dc->caps.dmcub_support = true; dc->caps.is_apu = true; + /* Color pipeline capabilities */ + dc->caps.color.dpp.dcn_arch = 1; + dc->caps.color.dpp.input_lut_shared = 0; + dc->caps.color.dpp.icsc = 1; + dc->caps.color.dpp.dgam_ram = 1; + dc->caps.color.dpp.dgam_rom_caps.srgb = 1; + dc->caps.color.dpp.dgam_rom_caps.bt2020 = 1; + dc->caps.color.dpp.dgam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.dgam_rom_caps.pq = 0; + dc->caps.color.dpp.dgam_rom_caps.hlg = 0; + dc->caps.color.dpp.post_csc = 0; + dc->caps.color.dpp.gamma_corr = 0; + + dc->caps.color.dpp.hw_3d_lut = 1; + dc->caps.color.dpp.ogam_ram = 1; + // no OGAM ROM on DCN2 + dc->caps.color.dpp.ogam_rom_caps.srgb = 0; + dc->caps.color.dpp.ogam_rom_caps.bt2020 = 0; + dc->caps.color.dpp.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.dpp.ogam_rom_caps.pq = 0; + dc->caps.color.dpp.ogam_rom_caps.hlg = 0; + dc->caps.color.dpp.ocsc = 0; + + dc->caps.color.mpc.gamut_remap = 0; + dc->caps.color.mpc.num_3dluts = 0; + dc->caps.color.mpc.shared_3d_lut = 0; + dc->caps.color.mpc.ogam_ram = 1; + dc->caps.color.mpc.ogam_rom_caps.srgb = 0; + dc->caps.color.mpc.ogam_rom_caps.bt2020 = 0; + dc->caps.color.mpc.ogam_rom_caps.gamma2_2 = 0; + dc->caps.color.mpc.ogam_rom_caps.pq = 0; + dc->caps.color.mpc.ogam_rom_caps.hlg = 0; + dc->caps.color.mpc.ocsc = 1; + if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) { diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index d47253cdcc4e..9431b48aecb4 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1782,7 +1782,8 @@ rgb_user_alloc_fail: return ret; } -bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, +bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, + struct dc_transfer_func *input_tf, const struct dc_gamma *ramp, bool mapUserRamp) { struct dc_transfer_func_distributed_points *tf_pts = &input_tf->tf_pts; @@ -1801,11 +1802,29 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, /* we can use hardcoded curve for plain SRGB TF * If linear, it's bypass if on user ramp */ - if (input_tf->type == TF_TYPE_PREDEFINED && - (input_tf->tf == TRANSFER_FUNCTION_SRGB || - input_tf->tf == TRANSFER_FUNCTION_LINEAR) && - !mapUserRamp) - return true; + if (input_tf->type == TF_TYPE_PREDEFINED) { + if ((input_tf->tf == TRANSFER_FUNCTION_SRGB || + input_tf->tf == TRANSFER_FUNCTION_LINEAR) && + !mapUserRamp) + return true; + + if (dc_caps != NULL && + dc_caps->dpp.dcn_arch == 1) { + + if (input_tf->tf == TRANSFER_FUNCTION_PQ && + dc_caps->dpp.dgam_rom_caps.pq == 1) + return true; + + if (input_tf->tf == TRANSFER_FUNCTION_GAMMA22 && + dc_caps->dpp.dgam_rom_caps.gamma2_2 == 1) + return true; + + // HLG OOTF not accounted for + if (input_tf->tf == TRANSFER_FUNCTION_HLG && + dc_caps->dpp.dgam_rom_caps.hlg == 1) + return true; + } + } input_tf->type = TF_TYPE_DISTRIBUTED_POINTS; diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h index 9994817a9a03..7f56226ba77a 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.h +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.h @@ -30,6 +30,7 @@ struct dc_transfer_func; struct dc_gamma; struct dc_transfer_func_distributed_points; struct dc_rgb_fixed; +struct dc_color_caps; enum dc_transfer_func_predefined; /* For SetRegamma ADL interface support @@ -100,7 +101,8 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, const struct dc_gamma *ramp, bool mapUserRamp, bool canRomBeUsed, const struct freesync_hdr_tf_params *fs_params); -bool mod_color_calculate_degamma_params(struct dc_transfer_func *output_tf, +bool mod_color_calculate_degamma_params(struct dc_color_caps *dc_caps, + struct dc_transfer_func *output_tf, const struct dc_gamma *ramp, bool mapUserRamp); bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, From 1e461c37d1fb6712561ad682b2d67ed4f5cbd3ff Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Wed, 22 Apr 2020 18:08:03 -0400 Subject: [PATCH 46/88] drm/amd/display: Use cursor locking to prevent flip delays [Why] Current locking scheme for cursor can result in a flip missing its vsync, deferring it for one or more vsyncs. Result is a potential for stuttering when cursor is moved. [How] Use cursor update lock so that flips are not blocked while cursor is being programmed. Signed-off-by: Aric Cyr Reviewed-by: Nicholas Kazlauskas Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_stream.c | 40 ++----------------- .../display/dc/dce110/dce110_hw_sequencer.c | 1 + .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 10 +++++ .../amd/display/dc/dcn10/dcn10_hw_sequencer.h | 1 + .../gpu/drm/amd/display/dc/dcn10/dcn10_init.c | 1 + .../gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c | 15 +++++++ .../gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h | 20 +++++++--- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 14 ++++++- .../gpu/drm/amd/display/dc/dcn20/dcn20_init.c | 1 + .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c | 1 + .../gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h | 3 +- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 4 ++ .../gpu/drm/amd/display/dc/dcn21/dcn21_init.c | 1 + .../drm/amd/display/dc/dcn21/dcn21_resource.c | 4 ++ drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h | 16 ++++++++ .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 1 + 16 files changed, 88 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 8c20e9e907b2..4f0e7203dba4 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -231,34 +231,6 @@ struct dc_stream_status *dc_stream_get_status( return dc_stream_get_status_from_state(dc->current_state, stream); } -static void delay_cursor_until_vupdate(struct pipe_ctx *pipe_ctx, struct dc *dc) -{ -#if defined(CONFIG_DRM_AMD_DC_DCN) - unsigned int vupdate_line; - unsigned int lines_to_vupdate, us_to_vupdate, vpos, nvpos; - struct dc_stream_state *stream = pipe_ctx->stream; - unsigned int us_per_line; - - if (!dc->hwss.get_vupdate_offset_from_vsync) - return; - - vupdate_line = dc->hwss.get_vupdate_offset_from_vsync(pipe_ctx); - if (!dc_stream_get_crtc_position(dc, &stream, 1, &vpos, &nvpos)) - return; - - if (vpos >= vupdate_line) - return; - - us_per_line = - stream->timing.h_total * 10000 / stream->timing.pix_clk_100hz; - lines_to_vupdate = vupdate_line - vpos; - us_to_vupdate = lines_to_vupdate * us_per_line; - - /* 70 us is a conservative estimate of cursor update time*/ - if (us_to_vupdate < 70) - udelay(us_to_vupdate); -#endif -} /** * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address @@ -298,9 +270,7 @@ bool dc_stream_set_cursor_attributes( if (!pipe_to_program) { pipe_to_program = pipe_ctx; - - delay_cursor_until_vupdate(pipe_ctx, dc); - dc->hwss.pipe_control_lock(dc, pipe_to_program, true); + dc->hwss.cursor_lock(dc, pipe_to_program, true); } dc->hwss.set_cursor_attribute(pipe_ctx); @@ -309,7 +279,7 @@ bool dc_stream_set_cursor_attributes( } if (pipe_to_program) - dc->hwss.pipe_control_lock(dc, pipe_to_program, false); + dc->hwss.cursor_lock(dc, pipe_to_program, false); return true; } @@ -349,16 +319,14 @@ bool dc_stream_set_cursor_position( if (!pipe_to_program) { pipe_to_program = pipe_ctx; - - delay_cursor_until_vupdate(pipe_ctx, dc); - dc->hwss.pipe_control_lock(dc, pipe_to_program, true); + dc->hwss.cursor_lock(dc, pipe_to_program, true); } dc->hwss.set_cursor_position(pipe_ctx); } if (pipe_to_program) - dc->hwss.pipe_control_lock(dc, pipe_to_program, false); + dc->hwss.cursor_lock(dc, pipe_to_program, false); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 30469026c642..6bd8d4e1c294 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -2767,6 +2767,7 @@ static const struct hw_sequencer_funcs dce110_funcs = { .disable_plane = dce110_power_down_fe, .pipe_control_lock = dce_pipe_control_lock, .interdependent_update_lock = NULL, + .cursor_lock = dce_pipe_control_lock, .prepare_bandwidth = dce110_prepare_bandwidth, .optimize_bandwidth = dce110_optimize_bandwidth, .set_drr = set_drr, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index bfc8578eda15..2eb5d0e3e425 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1669,6 +1669,16 @@ void dcn10_pipe_control_lock( hws->funcs.verify_allow_pstate_change_high(dc); } +void dcn10_cursor_lock(struct dc *dc, struct pipe_ctx *pipe, bool lock) +{ + /* cursor lock is per MPCC tree, so only need to lock one pipe per stream */ + if (!pipe || pipe->top_pipe) + return; + + dc->res_pool->mpc->funcs->cursor_lock(dc->res_pool->mpc, + pipe->stream_res.opp->inst, lock); +} + static bool wait_for_reset_trigger_to_occur( struct dc_context *dc_ctx, struct timing_generator *tg) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h index 16a50e05ffbf..af51424315d5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h @@ -49,6 +49,7 @@ void dcn10_pipe_control_lock( struct dc *dc, struct pipe_ctx *pipe, bool lock); +void dcn10_cursor_lock(struct dc *dc, struct pipe_ctx *pipe, bool lock); void dcn10_blank_pixel_data( struct dc *dc, struct pipe_ctx *pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c index 6f3dbcdb06f5..6ff7e2bd1d4a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c @@ -50,6 +50,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .disable_audio_stream = dce110_disable_audio_stream, .disable_plane = dcn10_disable_plane, .pipe_control_lock = dcn10_pipe_control_lock, + .cursor_lock = dcn10_cursor_lock, .interdependent_update_lock = dcn10_lock_all_pipes, .prepare_bandwidth = dcn10_prepare_bandwidth, .optimize_bandwidth = dcn10_optimize_bandwidth, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c index 04f863499cfb..3fcd408e9103 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.c @@ -223,6 +223,9 @@ struct mpcc *mpc1_insert_plane( REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, dpp_id); REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, tree->opp_id); + /* Configure VUPDATE lock set for this MPCC to map to the OPP */ + REG_SET(MPCC_UPDATE_LOCK_SEL[mpcc_id], 0, MPCC_UPDATE_LOCK_SEL, tree->opp_id); + /* update mpc tree mux setting */ if (tree->opp_list == insert_above_mpcc) { /* insert the toppest mpcc */ @@ -318,6 +321,7 @@ void mpc1_remove_mpcc( REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf); REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf); REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, 0xf); + REG_SET(MPCC_UPDATE_LOCK_SEL[mpcc_id], 0, MPCC_UPDATE_LOCK_SEL, 0xf); /* mark this mpcc as not in use */ mpc10->mpcc_in_use_mask &= ~(1 << mpcc_id); @@ -328,6 +332,7 @@ void mpc1_remove_mpcc( REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf); REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf); REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, 0xf); + REG_SET(MPCC_UPDATE_LOCK_SEL[mpcc_id], 0, MPCC_UPDATE_LOCK_SEL, 0xf); } } @@ -361,6 +366,7 @@ void mpc1_mpc_init(struct mpc *mpc) REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf); REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf); REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, 0xf); + REG_SET(MPCC_UPDATE_LOCK_SEL[mpcc_id], 0, MPCC_UPDATE_LOCK_SEL, 0xf); mpc1_init_mpcc(&(mpc->mpcc_array[mpcc_id]), mpcc_id); } @@ -381,6 +387,7 @@ void mpc1_mpc_init_single_inst(struct mpc *mpc, unsigned int mpcc_id) REG_SET(MPCC_TOP_SEL[mpcc_id], 0, MPCC_TOP_SEL, 0xf); REG_SET(MPCC_BOT_SEL[mpcc_id], 0, MPCC_BOT_SEL, 0xf); REG_SET(MPCC_OPP_ID[mpcc_id], 0, MPCC_OPP_ID, 0xf); + REG_SET(MPCC_UPDATE_LOCK_SEL[mpcc_id], 0, MPCC_UPDATE_LOCK_SEL, 0xf); mpc1_init_mpcc(&(mpc->mpcc_array[mpcc_id]), mpcc_id); @@ -453,6 +460,13 @@ void mpc1_read_mpcc_state( MPCC_BUSY, &s->busy); } +void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock) +{ + struct dcn10_mpc *mpc10 = TO_DCN10_MPC(mpc); + + REG_SET(CUR[opp_id], 0, CUR_VUPDATE_LOCK_SET, lock ? 1 : 0); +} + static const struct mpc_funcs dcn10_mpc_funcs = { .read_mpcc_state = mpc1_read_mpcc_state, .insert_plane = mpc1_insert_plane, @@ -464,6 +478,7 @@ static const struct mpc_funcs dcn10_mpc_funcs = { .assert_mpcc_idle_before_connect = mpc1_assert_mpcc_idle_before_connect, .init_mpcc_list_from_hw = mpc1_init_mpcc_list_from_hw, .update_blending = mpc1_update_blending, + .cursor_lock = mpc1_cursor_lock, .set_denorm = NULL, .set_denorm_clamp = NULL, .set_output_csc = NULL, diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h index 962a68e322ee..66a4719c22a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_mpc.h @@ -39,11 +39,12 @@ SRII(MPCC_BG_G_Y, MPCC, inst),\ SRII(MPCC_BG_R_CR, MPCC, inst),\ SRII(MPCC_BG_B_CB, MPCC, inst),\ - SRII(MPCC_BG_B_CB, MPCC, inst),\ - SRII(MPCC_SM_CONTROL, MPCC, inst) + SRII(MPCC_SM_CONTROL, MPCC, inst),\ + SRII(MPCC_UPDATE_LOCK_SEL, MPCC, inst) #define MPC_OUT_MUX_COMMON_REG_LIST_DCN1_0(inst) \ - SRII(MUX, MPC_OUT, inst) + SRII(MUX, MPC_OUT, inst),\ + VUPDATE_SRII(CUR, VUPDATE_LOCK_SET, inst) #define MPC_COMMON_REG_VARIABLE_LIST \ uint32_t MPCC_TOP_SEL[MAX_MPCC]; \ @@ -55,7 +56,9 @@ uint32_t MPCC_BG_R_CR[MAX_MPCC]; \ uint32_t MPCC_BG_B_CB[MAX_MPCC]; \ uint32_t MPCC_SM_CONTROL[MAX_MPCC]; \ - uint32_t MUX[MAX_OPP]; + uint32_t MUX[MAX_OPP]; \ + uint32_t MPCC_UPDATE_LOCK_SEL[MAX_MPCC]; \ + uint32_t CUR[MAX_OPP]; #define MPC_COMMON_MASK_SH_LIST_DCN1_0(mask_sh)\ SF(MPCC0_MPCC_TOP_SEL, MPCC_TOP_SEL, mask_sh),\ @@ -78,7 +81,8 @@ SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FIELD_ALT, mask_sh),\ SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FORCE_NEXT_FRAME_POL, mask_sh),\ SF(MPCC0_MPCC_SM_CONTROL, MPCC_SM_FORCE_NEXT_TOP_POL, mask_sh),\ - SF(MPC_OUT0_MUX, MPC_OUT_MUX, mask_sh) + SF(MPC_OUT0_MUX, MPC_OUT_MUX, mask_sh),\ + SF(MPCC0_MPCC_UPDATE_LOCK_SEL, MPCC_UPDATE_LOCK_SEL, mask_sh) #define MPC_REG_FIELD_LIST(type) \ type MPCC_TOP_SEL;\ @@ -101,7 +105,9 @@ type MPCC_SM_FIELD_ALT;\ type MPCC_SM_FORCE_NEXT_FRAME_POL;\ type MPCC_SM_FORCE_NEXT_TOP_POL;\ - type MPC_OUT_MUX; + type MPC_OUT_MUX;\ + type MPCC_UPDATE_LOCK_SEL;\ + type CUR_VUPDATE_LOCK_SET; struct dcn_mpc_registers { MPC_COMMON_REG_VARIABLE_LIST @@ -192,4 +198,6 @@ void mpc1_read_mpcc_state( int mpcc_inst, struct mpcc_state *s); +void mpc1_cursor_lock(struct mpc *mpc, int opp_id, bool lock); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 6d506c37fc71..bacf865f55c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -182,6 +182,14 @@ enum dcn10_clk_src_array_id { .reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## reg_name +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## reg_name ## 0 ## _ ## block ## id ## _BASE_IDX) + \ + mm ## reg_name ## 0 ## _ ## block ## id + +/* set field/register/bitfield name */ +#define SFRB(field_name, reg_name, bitfield, post_fix)\ + .field_name = reg_name ## __ ## bitfield ## post_fix + /* NBIO */ #define NBIO_BASE_INNER(seg) \ NBIF_BASE__INST0_SEG ## seg @@ -432,11 +440,13 @@ static const struct dcn_mpc_registers mpc_regs = { }; static const struct dcn_mpc_shift mpc_shift = { - MPC_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT) + MPC_COMMON_MASK_SH_LIST_DCN1_0(__SHIFT),\ + SFRB(CUR_VUPDATE_LOCK_SET, CUR0_VUPDATE_LOCK_SET0, CUR0_VUPDATE_LOCK_SET, __SHIFT) }; static const struct dcn_mpc_mask mpc_mask = { - MPC_COMMON_MASK_SH_LIST_DCN1_0(_MASK), + MPC_COMMON_MASK_SH_LIST_DCN1_0(_MASK),\ + SFRB(CUR_VUPDATE_LOCK_SET, CUR0_VUPDATE_LOCK_SET0, CUR0_VUPDATE_LOCK_SET, _MASK) }; #define tg_regs(id)\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c index 1e33e29b37f8..c83d98e0d211 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c @@ -52,6 +52,7 @@ static const struct hw_sequencer_funcs dcn20_funcs = { .disable_plane = dcn20_disable_plane, .pipe_control_lock = dcn20_pipe_control_lock, .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, .prepare_bandwidth = dcn20_prepare_bandwidth, .optimize_bandwidth = dcn20_optimize_bandwidth, .update_bandwidth = dcn20_update_bandwidth, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c index 9d7432f3fb16..99cc095dc33c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.c @@ -545,6 +545,7 @@ const struct mpc_funcs dcn20_mpc_funcs = { .mpc_init = mpc1_mpc_init, .mpc_init_single_inst = mpc1_mpc_init_single_inst, .update_blending = mpc2_update_blending, + .cursor_lock = mpc1_cursor_lock, .get_mpcc_for_dpp = mpc2_get_mpcc_for_dpp, .wait_for_idle = mpc2_assert_idle_mpcc, .assert_mpcc_idle_before_connect = mpc2_assert_mpcc_idle_before_connect, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h index c78fd5123497..496658f420db 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mpc.h @@ -179,7 +179,8 @@ SF(MPC_OUT0_DENORM_CLAMP_G_Y, MPC_OUT_DENORM_CLAMP_MAX_G_Y, mask_sh),\ SF(MPC_OUT0_DENORM_CLAMP_G_Y, MPC_OUT_DENORM_CLAMP_MIN_G_Y, mask_sh),\ SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MAX_B_CB, mask_sh),\ - SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MIN_B_CB, mask_sh) + SF(MPC_OUT0_DENORM_CLAMP_B_CB, MPC_OUT_DENORM_CLAMP_MIN_B_CB, mask_sh),\ + SF(CUR_VUPDATE_LOCK_SET0, CUR_VUPDATE_LOCK_SET, mask_sh) /* * DCN2 MPC_OCSC debug status register: diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index f04d0872a8ed..60ea499c1ca8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -509,6 +509,10 @@ enum dcn20_clk_src_array_id { .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## reg_name +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + mm ## reg_name ## _ ## block ## id + /* NBIO */ #define NBIO_BASE_INNER(seg) \ NBIO_BASE__INST0_SEG ## seg diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c index 128d5e3729c6..ae05a8bfdae1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c @@ -53,6 +53,7 @@ static const struct hw_sequencer_funcs dcn21_funcs = { .disable_plane = dcn20_disable_plane, .pipe_control_lock = dcn20_pipe_control_lock, .interdependent_update_lock = dcn10_lock_all_pipes, + .cursor_lock = dcn10_cursor_lock, .prepare_bandwidth = dcn20_prepare_bandwidth, .optimize_bandwidth = dcn20_optimize_bandwidth, .update_bandwidth = dcn20_update_bandwidth, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index d771fe1e2c4e..ceaf70a934c3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -342,6 +342,10 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \ mm ## block ## id ## _ ## reg_name +#define VUPDATE_SRII(reg_name, block, id)\ + .reg_name[id] = BASE(mm ## reg_name ## _ ## block ## id ## _BASE_IDX) + \ + mm ## reg_name ## _ ## block ## id + /* NBIO */ #define NBIO_BASE_INNER(seg) \ NBIF0_BASE__INST0_SEG ## seg diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index 094afc4c8173..50ee8aa7ec3b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -210,6 +210,22 @@ struct mpc_funcs { struct mpcc_blnd_cfg *blnd_cfg, int mpcc_id); + /* + * Lock cursor updates for the specified OPP. + * OPP defines the set of MPCC that are locked together for cursor. + * + * Parameters: + * [in] mpc - MPC context. + * [in] opp_id - The OPP to lock cursor updates on + * [in] lock - lock/unlock the OPP + * + * Return: void + */ + void (*cursor_lock)( + struct mpc *mpc, + int opp_id, + bool lock); + struct mpcc* (*get_mpcc_for_dpp)( struct mpc_tree *tree, int dpp_id); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index ed3505cbba6e..1570fed20de0 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -90,6 +90,7 @@ struct hw_sequencer_funcs { struct dc_state *context, bool lock); void (*set_flip_control_gsl)(struct pipe_ctx *pipe_ctx, bool flip_immediate); + void (*cursor_lock)(struct dc *dc, struct pipe_ctx *pipe, bool lock); /* Timing Related */ void (*get_position)(struct pipe_ctx **pipe_ctx, int num_pipes, From b454e25f3e72fc29967afdee5ffee46c250a0dd1 Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Wed, 22 Apr 2020 18:08:04 -0400 Subject: [PATCH 47/88] drm/amd/display: 3.2.83 Signed-off-by: Aric Cyr Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index b4aeb5d8a818..8957429c6a24 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -42,7 +42,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.82" +#define DC_VER "3.2.83" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 82a7cf006ee1f91d7818eb298ae1ec052c9a14ee Mon Sep 17 00:00:00 2001 From: Aric Cyr Date: Wed, 22 Apr 2020 18:08:05 -0400 Subject: [PATCH 48/88] drm/amd/display: 3.2.83.1 Update firmware blobs Signed-off-by: Aric Cyr Acked-by: Aurabindo Pillai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 8957429c6a24..17075f99bc54 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -42,7 +42,7 @@ #include "inc/hw/dmcu.h" #include "dml/display_mode_lib.h" -#define DC_VER "3.2.83" +#define DC_VER "3.2.83.1" #define MAX_SURFACES 3 #define MAX_PLANES 6 From 35ce006004821c5e9ae8fe03d048567cec99c41e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 7 Aug 2019 21:43:24 -0500 Subject: [PATCH 49/88] drm/amdgpu: add UAPI for creating encrypted buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a flag to the GEM_CREATE ioctl to create encrypted buffers. Buffers with this flag set will be created with the TMZ bit set in the PTEs or engines accessing them. This is required in order to properly access the data from the engines. Signed-off-by: Alex Deucher Reviewed-by: Huang Rui Reviewed-by: Christian König --- include/uapi/drm/amdgpu_drm.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d28b4ce744d5..4a0829d35e72 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -133,6 +133,11 @@ extern "C" { * releasing the memory */ #define AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE (1 << 9) +/* Flag that BO will be encrypted and that the TMZ bit should be + * set in the PTEs when mapping this buffer via GPUVM or + * accessing it with various hw blocks + */ +#define AMDGPU_GEM_CREATE_ENCRYPTED (1 << 10) struct drm_amdgpu_gem_create_in { /** the requested memory size */ From e90c2b210bad457aab73d3357465afe4d4475f7e Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Mon, 23 Sep 2019 19:02:41 -0400 Subject: [PATCH 50/88] drm/amdgpu: add UAPI to create secure commands (v3) Add a flag to the command submission IOCTL structure which when present indicates that this command submission should be treated as secure. The kernel driver uses this flag to determine whether the engine should be transitioned to secure or unsecure, or the work can be submitted to a secure queue depending on the IP. v3: the flag is now at command submission IOCTL Signed-off-by: Luben Tuikov Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 4a0829d35e72..34cd0bae06bc 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -205,6 +205,9 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_QUERY_STATE 3 #define AMDGPU_CTX_OP_QUERY_STATE2 4 +/* Flag the command submission as secure */ +#define AMDGPU_CS_FLAGS_SECURE (1 << 0) + /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 /* this the context caused it */ @@ -564,7 +567,7 @@ struct drm_amdgpu_cs_in { /** Handle of resource list associated with CS */ __u32 bo_list_handle; __u32 num_chunks; - __u32 _pad; + __u32 flags; /** this points to __u64 * which point to cs chunks */ __u64 chunks; }; From c5efd80f48e481946ba201412c7f39b19b39a40b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 7 Aug 2019 22:31:50 -0500 Subject: [PATCH 51/88] drm/amdgpu: define the TMZ bit for the PTE Define the TMZ (encryption) bit in the page table entry (PTE) for Raven and newer asics. Signed-off-by: Alex Deucher Reviewed-by: Huang Rui --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index b13c14d6b820..fd61466dc226 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -54,6 +54,9 @@ struct amdgpu_bo_list_entry; #define AMDGPU_PTE_SYSTEM (1ULL << 1) #define AMDGPU_PTE_SNOOPED (1ULL << 2) +/* RV+ */ +#define AMDGPU_PTE_TMZ (1ULL << 3) + /* VI only */ #define AMDGPU_PTE_EXECUTABLE (1ULL << 4) From d7ccb38df5f7ee24e667a4c61b2ce3ff77a7fd6e Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Sun, 4 Aug 2019 16:33:16 +0800 Subject: [PATCH 52/88] drm/amdgpu: add tmz feature parameter (v2) This patch adds tmz parameter to enable/disable the feature in the amdgpu kernel module. Nomally, by default, it should be auto (rely on the hardware capability). But right now, it need to set "off" to avoid breaking other developers' work because it's not totally completed. Will set "auto" till the feature is stable and completely verified. v2: add "auto" option for future use. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 56da8920195d..fa57501b4ee9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -189,6 +189,8 @@ extern int sched_policy; static const int sched_policy = KFD_SCHED_POLICY_HWS; #endif +extern int amdgpu_tmz; + #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 466bfe541e45..7aa5ec357391 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -145,6 +145,7 @@ int amdgpu_discovery = -1; int amdgpu_mes = 0; int amdgpu_noretry; int amdgpu_force_asic_type = -1; +int amdgpu_tmz = 0; struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), @@ -729,6 +730,16 @@ uint amdgpu_dm_abm_level = 0; MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) "); module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); +/** + * DOC: tmz (int) + * Trusted Memory Zone (TMZ) is a method to protect data being written + * to or read from memory. + * + * The default value: 0 (off). TODO: change to auto till it is completed. + */ +MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)"); +module_param_named(tmz, amdgpu_tmz, int, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, From ae60305ac04ff7bb804fb17962140839daa628c2 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 14 Jan 2020 10:10:51 -0500 Subject: [PATCH 53/88] drm/amdgpu: add amdgpu_tmz data structure This patch to add amdgpu_tmz structure which stores all tmz related fields. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h | 36 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index fa57501b4ee9..f54532abfe78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -103,6 +103,7 @@ #include "amdgpu_umc.h" #include "amdgpu_mmhub.h" #include "amdgpu_df.h" +#include "amdgpu_tmz.h" #define MAX_GPU_INSTANCE 16 @@ -936,6 +937,9 @@ struct amdgpu_device { /* df */ struct amdgpu_df df; + /* tmz */ + struct amdgpu_tmz tmz; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; int num_ip_blocks; struct mutex mn_lock; @@ -947,7 +951,7 @@ struct amdgpu_device { atomic64_t gart_pin_size; /* soc15 register offset based on ip, instance and segment */ - uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; + uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; /* delayed work_func for deferring clockgating during resume */ struct delayed_work delayed_init_work; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h new file mode 100644 index 000000000000..24bbbc21702a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h @@ -0,0 +1,36 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_TMZ_H__ +#define __AMDGPU_TMZ_H__ + +#include "amdgpu.h" + +/* + * Trust memory zone stuff + */ +struct amdgpu_tmz { + bool enabled; +}; + +#endif From 01a8dcec1a08f7c13b7546742fc84dcd7114bf4e Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Thu, 19 Mar 2020 16:46:10 -0400 Subject: [PATCH 54/88] drm/amdgpu: add function to check tmz capability (v4) Add a function to check tmz capability with kernel parameter and ASIC type. v2: use a per device tmz variable instead of global amdgpu_tmz. v3: refine the comments for the function. (Luben) v4: add amdgpu_tmz.c/h for future use. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c | 49 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h | 3 ++ 4 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 210d57a4afc8..e3ba6c4c08e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -55,7 +55,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ - amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o + amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_tmz.o amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 09ede1733806..28e596a11298 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -65,6 +65,7 @@ #include "amdgpu_ras.h" #include "amdgpu_pmu.h" #include "amdgpu_fru_eeprom.h" +#include "amdgpu_tmz.h" #include #include @@ -1140,6 +1141,8 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); + adev->tmz.enabled = amdgpu_is_tmz(adev); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c new file mode 100644 index 000000000000..14a55003dd81 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c @@ -0,0 +1,49 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include "amdgpu.h" +#include "amdgpu_tmz.h" + + +/** + * amdgpu_is_tmz - validate trust memory zone + * + * @adev: amdgpu_device pointer + * + * Return true if @dev supports trusted memory zones (TMZ), and return false if + * @dev does not support TMZ. + */ +bool amdgpu_is_tmz(struct amdgpu_device *adev) +{ + if (!amdgpu_tmz) + return false; + + if (adev->asic_type < CHIP_RAVEN || adev->asic_type == CHIP_ARCTURUS) { + dev_warn(adev->dev, "doesn't support trusted memory zones (TMZ)\n"); + return false; + } + + dev_info(adev->dev, "TMZ feature is enabled\n"); + + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h index 24bbbc21702a..28e05177fb89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h @@ -33,4 +33,7 @@ struct amdgpu_tmz { bool enabled; }; + +extern bool amdgpu_is_tmz(struct amdgpu_device *adev); + #endif From eda982a67225a8fe46807dc45304469f612e99bb Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Thu, 8 Aug 2019 17:00:16 +0800 Subject: [PATCH 55/88] drm/amdgpu: add tmz bit in frame control packet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds tmz bit in frame control pm4 packet, and it will used in future. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/nvd.h | 1 + drivers/gpu/drm/amd/amdgpu/soc15d.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h index 1de984647dbb..f3d8771ebed4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nvd.h +++ b/drivers/gpu/drm/amd/amdgpu/nvd.h @@ -306,6 +306,7 @@ #define PACKET3_GET_LOD_STATS 0x8E #define PACKET3_DRAW_MULTI_PREAMBLE 0x8F #define PACKET3_FRAME_CONTROL 0x90 +# define FRAME_TMZ (1 << 0) # define FRAME_CMD(x) ((x) << 28) /* * x=0: tmz_begin diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index edfe50821cd9..295d68c5811d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -286,6 +286,7 @@ #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 #define PACKET3_SWITCH_BUFFER 0x8B #define PACKET3_FRAME_CONTROL 0x90 +# define FRAME_TMZ (1 << 0) # define FRAME_CMD(x) ((x) << 28) /* * x=0: tmz_begin From 155748c912e7063dfcf0cd071e289f4aff152672 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Mon, 9 Mar 2020 13:53:37 -0400 Subject: [PATCH 56/88] drm/amdgpu: expand the emit tmz interface with trusted flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch expands the emit_tmz function to support trusted flag while we want to set command buffer in trusted mode. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 16 ++++++++++++---- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 ++++++++++--- 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 1adaac972190..4b325a4f0b19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -233,7 +233,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } if (ring->funcs->emit_tmz) - amdgpu_ring_emit_tmz(ring, false); + amdgpu_ring_emit_tmz(ring, false, false); #ifdef CONFIG_X86_64 if (!(adev->flags & AMD_IS_APU)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 7d39064f9361..b6cae39b2e4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -177,7 +177,7 @@ struct amdgpu_ring_funcs { void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask); - void (*emit_tmz)(struct amdgpu_ring *ring, bool start); + void (*emit_tmz)(struct amdgpu_ring *ring, bool start, bool trusted); /* Try to soft recover the ring to make the fence signal */ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); int (*preempt_ib)(struct amdgpu_ring *ring); @@ -256,7 +256,7 @@ struct amdgpu_ring { #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) +#define amdgpu_ring_emit_tmz(r, b, s) (r)->funcs->emit_tmz((r), (b), (s)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 97659be2cf36..5ee885c41b83 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3037,7 +3037,8 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); +static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start, + bool trusted); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -7442,7 +7443,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flag gfx_v10_0_ring_emit_ce_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); - gfx_v10_0_ring_emit_tmz(ring, true); + gfx_v10_0_ring_emit_tmz(ring, true, false); dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { @@ -7600,10 +7601,17 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) sizeof(de_payload) >> 2); } -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start, + bool trusted) { amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); - amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ + /* + * cmd = 0: frame begin + * cmd = 1: frame end + */ + amdgpu_ring_write(ring, + ((ring->adev->tmz.enabled && trusted) ? FRAME_TMZ : 0) + | FRAME_CMD(start ? 0 : 1)); } static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index b7b51037b1cf..89eaa8a89069 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5442,10 +5442,17 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); } -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start, + bool trusted) { amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); - amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ + /* + * cmd = 0: frame begin + * cmd = 1: frame end + */ + amdgpu_ring_write(ring, + ((ring->adev->tmz.enabled && trusted) ? FRAME_TMZ : 0) + | FRAME_CMD(start ? 0 : 1)); } static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) @@ -5455,7 +5462,7 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) if (amdgpu_sriov_vf(ring->adev)) gfx_v9_0_ring_emit_ce_meta(ring); - gfx_v9_0_ring_emit_tmz(ring, true); + gfx_v9_0_ring_emit_tmz(ring, true, false); dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { From 8350361d2d75882573c7d1af228d7827666e6929 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Wed, 22 Apr 2020 17:49:17 -0400 Subject: [PATCH 57/88] drm/amdgpu: expand the context control interface with trust flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch expands the context control function to support trusted flag while we want to set command buffer in trusted mode. Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 +++- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 5 +++-- 7 files changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 4b325a4f0b19..ac9090a282d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -214,7 +214,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (job && ring->funcs->emit_cntxcntl) { status |= job->preamble_status; status |= job->preemption_status; - amdgpu_ring_emit_cntxcntl(ring, status); + amdgpu_ring_emit_cntxcntl(ring, status, false); } for (i = 0; i < num_ibs; ++i) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index b6cae39b2e4b..5956eff2d784 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -168,7 +168,8 @@ struct amdgpu_ring_funcs { void (*begin_use)(struct amdgpu_ring *ring); void (*end_use)(struct amdgpu_ring *ring); void (*emit_switch_buffer) (struct amdgpu_ring *ring); - void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); + void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags, + bool trusted); void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); @@ -251,7 +252,7 @@ struct amdgpu_ring { #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) -#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) +#define amdgpu_ring_emit_cntxcntl(r, d, s) (r)->funcs->emit_cntxcntl((r), (d), (s)) #define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 5ee885c41b83..201d0a2c460c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7435,7 +7435,9 @@ static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); } -static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, + uint32_t flags, + bool trusted) { uint32_t dw2 = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index aa1e1be852dd..283b7fc10f98 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -2969,7 +2969,8 @@ static uint64_t gfx_v6_0_get_gpu_clock_counter(struct amdgpu_device *adev) return clock; } -static void gfx_v6_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +static void gfx_v6_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags, + bool trusted) { if (flags & AMDGPU_HAVE_CTX_SWITCH) gfx_v6_0_ring_emit_vgt_flush(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index e5a88cad44cb..f26e91354ba8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2320,7 +2320,8 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, amdgpu_ring_write(ring, control); } -static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags, + bool trusted) { uint32_t dw2 = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 2fcf6865abba..d1312d829252 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6329,7 +6329,8 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); } -static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags, + bool trusted) { uint32_t dw2 = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 89eaa8a89069..9f78c00f4319 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5455,14 +5455,15 @@ static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start, | FRAME_CMD(start ? 0 : 1)); } -static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags, + bool trusted) { uint32_t dw2 = 0; if (amdgpu_sriov_vf(ring->adev)) gfx_v9_0_ring_emit_ce_meta(ring); - gfx_v9_0_ring_emit_tmz(ring, true, false); + gfx_v9_0_ring_emit_tmz(ring, true, trusted); dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { From cb5fae143d79d255251921066dbf8eae16383639 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Thu, 8 Aug 2019 20:05:15 +0800 Subject: [PATCH 58/88] drm/amdgpu: job is secure iff CS is secure (v5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mark a job as secure, if and only if the command submission flag has the secure flag set. v2: fix the null job pointer while in vmid 0 submission. v3: Context --> Command submission. v4: filling cs parser with cs->in.flags v5: move the job secure flag setting out of amdgpu_cs_submit() Signed-off-by: Huang Rui Signed-off-by: Luben Tuikov Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 3eee5c7d83e0..99de770a8e9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -232,6 +232,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs if (ret) goto free_all_kdata; + p->job->secure = cs->in.flags & AMDGPU_CS_FLAGS_SECURE; + if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { ret = -ECANCELED; goto free_all_kdata; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index ac9090a282d8..045951d2b46c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -214,7 +214,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (job && ring->funcs->emit_cntxcntl) { status |= job->preamble_status; status |= job->preemption_status; - amdgpu_ring_emit_cntxcntl(ring, status, false); + amdgpu_ring_emit_cntxcntl(ring, status, job->secure); } for (i = 0; i < num_ibs; ++i) { @@ -233,7 +233,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } if (ring->funcs->emit_tmz) - amdgpu_ring_emit_tmz(ring, false, false); + amdgpu_ring_emit_tmz(ring, false, job ? job->secure : false); #ifdef CONFIG_X86_64 if (!(adev->flags & AMD_IS_APU)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index d4262069d501..7f5ccee476a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -63,6 +63,8 @@ struct amdgpu_job { uint64_t uf_addr; uint64_t uf_sequence; + /* the job is due to a secure command submission */ + bool secure; }; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, From 4cd24494cc87468145ccacd885446b2fec6cb856 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 7 Aug 2019 22:32:46 -0500 Subject: [PATCH 59/88] drm/amdgpu: set TMZ bits in PTEs for secure BO (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If a buffer object is secure, i.e. created with AMDGPU_GEM_CREATE_ENCRYPTED, then the TMZ bit of the PTEs that belong the buffer object should be set. v1: design and draft the skeletion of TMZ bits setting on PTEs (Alex) v2: return failure once create secure BO on non-TMZ platform (Ray) v3: amdgpu_bo_encrypted() only checks the BO (Luben) v4: move TMZ flag setting into amdgpu_vm_bo_update (Christian) Signed-off-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Huang Rui Signed-off-by: Luben Tuikov Reviewed-by: Alex Deucher Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 12 +++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 11 +++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 32f36c940abb..26220ee87291 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -233,7 +233,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | - AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) + AMDGPU_GEM_CREATE_EXPLICIT_SYNC | + AMDGPU_GEM_CREATE_ENCRYPTED)) return -EINVAL; @@ -241,6 +242,11 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) return -EINVAL; + if (!adev->tmz.enabled && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) { + DRM_ERROR("Cannot allocate secure buffer while tmz is disabled\n"); + return -EINVAL; + } + /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) { @@ -262,6 +268,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, resv = vm->root.base.bo->tbo.base.resv; } + if (flags & AMDGPU_GEM_CREATE_ENCRYPTED) { + /* XXX: pad out alignment to meet TMZ requirements */ + } + r = amdgpu_gem_object_create(adev, size, args->in.alignment, (u32)(0xffffffff & args->in.domains), flags, ttm_bo_type_device, resv, &gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 5e39ecd8cc28..7d41f7b9a340 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -229,6 +229,17 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; } +/** + * amdgpu_bo_encrypted - test if the BO is encrypted + * @bo: pointer to a buffer object + * + * Return true if the buffer object is encrypted, false otherwise. + */ +static inline bool amdgpu_bo_encrypted(struct amdgpu_bo *bo) +{ + return bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED; +} + bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bcdaf5204d05..0d8210553059 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1784,6 +1784,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, if (bo) { flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); + + if (amdgpu_bo_encrypted(bo)) + flags |= AMDGPU_PTE_TMZ; + bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); } else { flags = 0x0; From 04379e9b04890e4c9ccfc8465447fac544b89e28 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 16 Oct 2019 11:22:07 +1100 Subject: [PATCH 60/88] drm/amdgpu: fix up for amdgpu_tmz.c and removal of drm/drmP.h Signed-off-by: Stephen Rothwell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c index 14a55003dd81..823527a0fa47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c @@ -20,7 +20,10 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#include +#include + +#include + #include "amdgpu.h" #include "amdgpu_tmz.h" From be7538ff7488445297d903f5419de8fb99adf85d Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 15 Oct 2019 15:37:48 +0800 Subject: [PATCH 61/88] drm/amdgpu: expand sdma copy_buffer interface with tmz parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch expands sdma copy_buffer interface with tmz parameter. Signed-off-by: Aaron Liu Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 3 ++- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 3 ++- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 3 ++- drivers/gpu/drm/amd/amdgpu/si_dma.c | 3 ++- 8 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 2a152516504a..e5b8fb8e75c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -89,7 +89,8 @@ struct amdgpu_buffer_funcs { /* dst addr in bytes */ uint64_t dst_offset, /* number of byte to transfer */ - uint32_t byte_count); + uint32_t byte_count, + bool tmz); /* maximum bytes in a single operation */ uint32_t fill_max_bytes; @@ -107,7 +108,7 @@ struct amdgpu_buffer_funcs { uint32_t byte_count); }; -#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) +#define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) struct amdgpu_sdma_instance * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1331b4c5bdca..298caa50eed6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2054,7 +2054,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, - dst_addr, num_bytes); + dst_addr, num_bytes, false); amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); @@ -2126,7 +2126,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint32_t cur_size_in_bytes = min(byte_count, max_bytes); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, - dst_offset, cur_size_in_bytes); + dst_offset, cur_size_in_bytes, false); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 7c2ee84fa4a6..20f108818b2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1313,7 +1313,8 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = byte_count; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 9a7f194c730a..5f304d61999e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1200,7 +1200,8 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index e6b5b4f672a0..c59f6f6f4c09 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1638,7 +1638,8 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index f092f12fecfe..4cabab097681 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2458,7 +2458,8 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index f9e92e85813a..e08933e82420 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1706,7 +1706,8 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index e4b8283ad11d..7d2bbcbe547b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -776,7 +776,8 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev) static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, - uint32_t byte_count) + uint32_t byte_count, + bool tmz) { ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, byte_count); From c9dc9cfe185f53ffb0b5d621b00eecd80f942a7c Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 15 Oct 2019 15:45:23 +0800 Subject: [PATCH 62/88] drm/amdgpu: expand amdgpu_copy_buffer interface with tmz parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch expands amdgpu_copy_buffer interface with tmz parameter. Signed-off-by: Aaron Liu Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index d1495e1c9289..d9b35df33806 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -40,7 +40,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, - false, false); + false, false, false); if (r) goto exit_do_move; r = dma_fence_wait(fence, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index c687f5415b3f..3d822eba9a5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -753,7 +753,7 @@ int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, amdgpu_bo_size(shadow), NULL, fence, - true, false); + true, false, false); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index b158230af8db..476f1f89aaad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -124,7 +124,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(gtt_obj[i]); r = amdgpu_copy_buffer(ring, gart_addr, vram_addr, - size, NULL, &fence, false, false); + size, NULL, &fence, false, false, false); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); @@ -170,7 +170,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) amdgpu_bo_kunmap(vram_obj); r = amdgpu_copy_buffer(ring, vram_addr, gart_addr, - size, NULL, &fence, false, false); + size, NULL, &fence, false, false, false); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 298caa50eed6..0116c4afe1da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -373,7 +373,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, } r = amdgpu_copy_buffer(ring, from, to, cur_size, - resv, &next, false, true); + resv, &next, false, true, false); if (r) goto error; @@ -2084,7 +2084,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush) + bool vm_needs_flush, bool tmz) { struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; @@ -2126,7 +2126,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint32_t cur_size_in_bytes = min(byte_count, max_bytes); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, - dst_offset, cur_size_in_bytes, false); + dst_offset, cur_size_in_bytes, tmz); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index bd05bbb4878d..dc6502d1060b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -87,7 +87,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush); + bool vm_needs_flush, bool tmz); int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_copy_mem *src, struct amdgpu_copy_mem *dst, From b7c163fe910fd38000f85b871bf25b120282e702 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Tue, 15 Oct 2019 16:47:44 +0800 Subject: [PATCH 63/88] drm/amdgpu: enable TMZ bit in sdma copy pkt for sdma v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable sdma TMZ mode via setting TMZ bit in sdma copy pkt for sdma v4 Signed-off-by: Aaron Liu Reviewed-by: Christian König Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 4cabab097681..c0ca9a8229e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2462,7 +2462,8 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | + SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); From b231531c50126c7bfc838e75dfc4de01f8ed90d3 Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Fri, 15 Nov 2019 16:18:03 +0800 Subject: [PATCH 64/88] drm/amdgpu: enable TMZ bit in sdma copy pkt for sdma v5 Enable sdma TMZ mode via setting TMZ bit in sdma copy pkt for sdma v5. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index e08933e82420..b544baf306f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1710,7 +1710,8 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, bool tmz) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | + SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); From 8fb2e01a1ed8616b7a2bc8b00b50356d26efba2e Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Fri, 15 Nov 2019 15:08:36 +0800 Subject: [PATCH 65/88] drm/amdgpu: enable TMZ bit in FRAME_CONTROL for gfx10 This patch enables TMZ bit in FRAME_CONTROL for gfx10. Signed-off-by: Aaron Liu Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 201d0a2c460c..979786065aa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7445,7 +7445,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, gfx_v10_0_ring_emit_ce_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); - gfx_v10_0_ring_emit_tmz(ring, true, false); + gfx_v10_0_ring_emit_tmz(ring, true, trusted); dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { From 4baa8ff0690e464443594353d63c989f0ed9f831 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 27 Nov 2019 15:55:35 -0500 Subject: [PATCH 66/88] drm/amdgpu: move CS secure flag next the structs where it's used MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So it's not mixed up with the CTX stuff. Reviewed-by: Zhan Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- include/uapi/drm/amdgpu_drm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 34cd0bae06bc..bea72eb8c147 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -205,9 +205,6 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_QUERY_STATE 3 #define AMDGPU_CTX_OP_QUERY_STATE2 4 -/* Flag the command submission as secure */ -#define AMDGPU_CS_FLAGS_SECURE (1 << 0) - /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 /* this the context caused it */ @@ -561,6 +558,9 @@ struct drm_amdgpu_cs_chunk { __u64 chunk_data; }; +/* Flag the command submission as secure */ +#define AMDGPU_CS_FLAGS_SECURE (1 << 0) + struct drm_amdgpu_cs_in { /** Rendering context id */ __u32 ctx_id; From 562366c9452c534242a55a47c6431ab1e9b6b07c Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 14 Jan 2020 18:55:22 +0800 Subject: [PATCH 67/88] drm/amdgpu: remove the alignment placeholder for secure buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The alignment should match the page size for secure buffer, so we didn't configure it anymore. Signed-off-by: Huang Rui Reviewed-by: Aaron Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 26220ee87291..dffc32cde02b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -268,10 +268,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, resv = vm->root.base.bo->tbo.base.resv; } - if (flags & AMDGPU_GEM_CREATE_ENCRYPTED) { - /* XXX: pad out alignment to meet TMZ requirements */ - } - r = amdgpu_gem_object_create(adev, size, args->in.alignment, (u32)(0xffffffff & args->in.domains), flags, ttm_bo_type_device, resv, &gobj); From c6252390fccdd768d1250a45cbd2a7e3610a1283 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Thu, 19 Mar 2020 16:47:51 -0400 Subject: [PATCH 68/88] drm/amdgpu: implement TMZ accessor (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement an accessor of adev->tmz.enabled. Let not code around access it as "if (adev->tmz.enabled)" as the organization may change. Instead... Recruit "bool amdgpu_is_tmz(adev)" to return exactly this Boolean value. That is, this function is now an accessor of an already initialized and set adev and adev->tmz. Add "void amdgpu_gmc_tmz_set(adev)" to check and set adev->gmc.tmz_enabled at initialization time. After which one uses "bool amdgpu_is_tmz(adev)" to query whether adev supports TMZ. Also, remove circular header file include. v2: Remove amdgpu_tmz.[ch] as requested. v3: Move TMZ into GMC. Signed-off-by: Luben Tuikov Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 10 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 25 +++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 4 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c | 52 ---------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h | 39 ---------------- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 10 files changed, 40 insertions(+), 103 deletions(-) delete mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c delete mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index e3ba6c4c08e3..210d57a4afc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -55,7 +55,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ - amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_tmz.o + amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f54532abfe78..589d8783fa21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -103,7 +103,6 @@ #include "amdgpu_umc.h" #include "amdgpu_mmhub.h" #include "amdgpu_df.h" -#include "amdgpu_tmz.h" #define MAX_GPU_INSTANCE 16 @@ -937,9 +936,6 @@ struct amdgpu_device { /* df */ struct amdgpu_df df; - /* tmz */ - struct amdgpu_tmz tmz; - struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; int num_ip_blocks; struct mutex mn_lock; @@ -1269,5 +1265,9 @@ _name##_show(struct device *dev, \ \ static struct device_attribute pmu_attr_##_name = __ATTR_RO(_name) -#endif +static inline bool amdgpu_is_tmz(struct amdgpu_device *adev) +{ + return adev->gmc.tmz_enabled; +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 28e596a11298..b038ddbb2ece 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -65,7 +65,6 @@ #include "amdgpu_ras.h" #include "amdgpu_pmu.h" #include "amdgpu_fru_eeprom.h" -#include "amdgpu_tmz.h" #include #include @@ -1141,7 +1140,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); - adev->tmz.enabled = amdgpu_is_tmz(adev); + amdgpu_gmc_tmz_set(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index dffc32cde02b..46cea436945f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -242,8 +242,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) return -EINVAL; - if (!adev->tmz.enabled && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) { - DRM_ERROR("Cannot allocate secure buffer while tmz is disabled\n"); + if (amdgpu_is_tmz(adev) && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) { + DRM_ERROR("Cannot allocate secure buffer since TMZ is disabled\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index a8ca808f453b..ce13c3758460 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -373,3 +373,28 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) return 0; } + +/** + * amdgpu_tmz_set -- check and set if a device supports TMZ + * @adev: amdgpu_device pointer + * + * Check and set if an the device @adev supports Trusted Memory + * Zones (TMZ). + */ +void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) +{ + if (!amdgpu_tmz) + return; + + if (adev->asic_type < CHIP_RAVEN || + adev->asic_type == CHIP_ARCTURUS) { + adev->gmc.tmz_enabled = false; + dev_warn(adev->dev, + "Trusted Memory Zone (TMZ) feature not supported\n"); + } else { + + adev->gmc.tmz_enabled = true; + dev_info(adev->dev, + "Trusted Memory Zone (TMZ) feature supported and enabled\n"); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 7546da0cc70c..2bd9423c1dab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -213,6 +213,8 @@ struct amdgpu_gmc { } fault_hash[AMDGPU_GMC_FAULT_HASH_SIZE]; uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER; + bool tmz_enabled; + const struct amdgpu_gmc_funcs *gmc_funcs; struct amdgpu_xgmi xgmi; @@ -276,4 +278,6 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); +extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c deleted file mode 100644 index 823527a0fa47..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2019 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include - -#include - -#include "amdgpu.h" -#include "amdgpu_tmz.h" - - -/** - * amdgpu_is_tmz - validate trust memory zone - * - * @adev: amdgpu_device pointer - * - * Return true if @dev supports trusted memory zones (TMZ), and return false if - * @dev does not support TMZ. - */ -bool amdgpu_is_tmz(struct amdgpu_device *adev) -{ - if (!amdgpu_tmz) - return false; - - if (adev->asic_type < CHIP_RAVEN || adev->asic_type == CHIP_ARCTURUS) { - dev_warn(adev->dev, "doesn't support trusted memory zones (TMZ)\n"); - return false; - } - - dev_info(adev->dev, "TMZ feature is enabled\n"); - - return true; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h deleted file mode 100644 index 28e05177fb89..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_tmz.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2019 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef __AMDGPU_TMZ_H__ -#define __AMDGPU_TMZ_H__ - -#include "amdgpu.h" - -/* - * Trust memory zone stuff - */ -struct amdgpu_tmz { - bool enabled; -}; - - -extern bool amdgpu_is_tmz(struct amdgpu_device *adev); - -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 979786065aa4..473c1c145332 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7612,7 +7612,7 @@ static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start, * cmd = 1: frame end */ amdgpu_ring_write(ring, - ((ring->adev->tmz.enabled && trusted) ? FRAME_TMZ : 0) + ((amdgpu_is_tmz(ring->adev) && trusted) ? FRAME_TMZ : 0) | FRAME_CMD(start ? 0 : 1)); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9f78c00f4319..bae5dd6ea348 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5451,7 +5451,7 @@ static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start, * cmd = 1: frame end */ amdgpu_ring_write(ring, - ((ring->adev->tmz.enabled && trusted) ? FRAME_TMZ : 0) + ((amdgpu_is_tmz(ring->adev) && trusted) ? FRAME_TMZ : 0) | FRAME_CMD(start ? 0 : 1)); } From 5888f07a657d1b653f0e59c492b07830f3487406 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Wed, 22 Apr 2020 17:54:55 -0400 Subject: [PATCH 69/88] drm/amd/display: Indicate use of TMZ buffers to DC [Why] Hubp needs to know whether a buffer is being scanned out from the trusted memory zone or not. [How] Check for the TMZ flag on the amdgpu_bo and set the tmz_surface flag in dc_plane_address accordingly. Signed-off-by: Harry Wentland Signed-off-by: Bhawanpreet Lakha Acked-by: Bhawanpreet Lakha Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6cd661545f35..98f39db81c7b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3309,7 +3309,7 @@ static int fill_dc_scaling_info(const struct drm_plane_state *state, } static int get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb, - uint64_t *tiling_flags) + uint64_t *tiling_flags, bool *tmz_surface) { struct amdgpu_bo *rbo = gem_to_amdgpu_bo(amdgpu_fb->base.obj[0]); int r = amdgpu_bo_reserve(rbo, false); @@ -3324,6 +3324,9 @@ static int get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb, if (tiling_flags) amdgpu_bo_get_tiling_flags(rbo, tiling_flags); + if (tmz_surface) + *tmz_surface = amdgpu_bo_encrypted(rbo); + amdgpu_bo_unreserve(rbo); return r; @@ -3411,6 +3414,7 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev, struct plane_size *plane_size, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address, + bool tmz_surface, bool force_disable_dcc) { const struct drm_framebuffer *fb = &afb->base; @@ -3421,6 +3425,8 @@ fill_plane_buffer_attributes(struct amdgpu_device *adev, memset(dcc, 0, sizeof(*dcc)); memset(address, 0, sizeof(*address)); + address->tmz_surface = tmz_surface; + if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { plane_size->surface_size.x = 0; plane_size->surface_size.y = 0; @@ -3611,6 +3617,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, const uint64_t tiling_flags, struct dc_plane_info *plane_info, struct dc_plane_address *address, + bool tmz_surface, bool force_disable_dcc) { const struct drm_framebuffer *fb = plane_state->fb; @@ -3693,7 +3700,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, plane_info->rotation, tiling_flags, &plane_info->tiling_info, &plane_info->plane_size, - &plane_info->dcc, address, + &plane_info->dcc, address, tmz_surface, force_disable_dcc); if (ret) return ret; @@ -3717,6 +3724,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, struct dc_plane_info plane_info; uint64_t tiling_flags; int ret; + bool tmz_surface = false; bool force_disable_dcc = false; ret = fill_dc_scaling_info(plane_state, &scaling_info); @@ -3728,7 +3736,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, dc_plane_state->clip_rect = scaling_info.clip_rect; dc_plane_state->scaling_quality = scaling_info.scaling_quality; - ret = get_fb_info(amdgpu_fb, &tiling_flags); + ret = get_fb_info(amdgpu_fb, &tiling_flags, &tmz_surface); if (ret) return ret; @@ -3736,6 +3744,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, ret = fill_dc_plane_info_and_addr(adev, plane_state, tiling_flags, &plane_info, &dc_plane_state->address, + tmz_surface, force_disable_dcc); if (ret) return ret; @@ -5354,6 +5363,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, uint64_t tiling_flags; uint32_t domain; int r; + bool tmz_surface = false; bool force_disable_dcc = false; dm_plane_state_old = to_dm_plane_state(plane->state); @@ -5403,6 +5413,8 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, amdgpu_bo_get_tiling_flags(rbo, &tiling_flags); + tmz_surface = amdgpu_bo_encrypted(rbo); + ttm_eu_backoff_reservation(&ticket, &list); afb->address = amdgpu_bo_gpu_offset(rbo); @@ -5418,7 +5430,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, adev, afb, plane_state->format, plane_state->rotation, tiling_flags, &plane_state->tiling_info, &plane_state->plane_size, &plane_state->dcc, - &plane_state->address, + &plane_state->address, tmz_surface, force_disable_dcc); } @@ -6592,6 +6604,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, unsigned long flags; struct amdgpu_bo *abo; uint64_t tiling_flags; + bool tmz_surface = false; uint32_t target_vblank, last_flip_vblank; bool vrr_active = amdgpu_dm_vrr_active(acrtc_state); bool pflip_present = false; @@ -6687,12 +6700,15 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, amdgpu_bo_get_tiling_flags(abo, &tiling_flags); + tmz_surface = amdgpu_bo_encrypted(abo); + amdgpu_bo_unreserve(abo); fill_dc_plane_info_and_addr( dm->adev, new_plane_state, tiling_flags, &bundle->plane_infos[planes_count], &bundle->flip_addrs[planes_count].address, + tmz_surface, false); DRM_DEBUG_DRIVER("plane: id=%d dcc_en=%d\n", @@ -8065,6 +8081,7 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm, struct dc_flip_addrs *flip_addr = &bundle->flip_addrs[num_plane]; struct dc_scaling_info *scaling_info = &bundle->scaling_infos[num_plane]; uint64_t tiling_flags; + bool tmz_surface = false; new_plane_crtc = new_plane_state->crtc; new_dm_plane_state = to_dm_plane_state(new_plane_state); @@ -8112,14 +8129,14 @@ dm_determine_update_type_for_commit(struct amdgpu_display_manager *dm, bundle->surface_updates[num_plane].scaling_info = scaling_info; if (amdgpu_fb) { - ret = get_fb_info(amdgpu_fb, &tiling_flags); + ret = get_fb_info(amdgpu_fb, &tiling_flags, &tmz_surface); if (ret) goto cleanup; ret = fill_dc_plane_info_and_addr( dm->adev, new_plane_state, tiling_flags, plane_info, - &flip_addr->address, + &flip_addr->address, tmz_surface, false); if (ret) goto cleanup; From 0bb5d5b03f78aeb5f87d47877eb15532875c64da Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Wed, 22 Apr 2020 17:56:56 -0400 Subject: [PATCH 70/88] drm/amdgpu: Move to a per-IB secure flag (TMZ) Move from a per-CS secure flag (TMZ) to a per-IB secure flag. Signed-off-by: Luben Tuikov Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 23 ++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 3 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 9 ++++----- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 23 +++++++---------------- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 3 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 20 ++++++-------------- include/uapi/drm/amdgpu_drm.h | 7 ++++--- 10 files changed, 44 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 99de770a8e9f..3eee5c7d83e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -232,8 +232,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs if (ret) goto free_all_kdata; - p->job->secure = cs->in.flags & AMDGPU_CS_FLAGS_SECURE; - if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { ret = -ECANCELED; goto free_all_kdata; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 045951d2b46c..cba22039df6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -133,6 +133,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, uint64_t fence_ctx; uint32_t status = 0, alloc_size; unsigned fence_flags = 0; + bool secure; unsigned i; int r = 0; @@ -214,9 +215,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (job && ring->funcs->emit_cntxcntl) { status |= job->preamble_status; status |= job->preemption_status; - amdgpu_ring_emit_cntxcntl(ring, status, job->secure); + amdgpu_ring_emit_cntxcntl(ring, status); } + secure = false; for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; @@ -228,12 +230,27 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; + /* If this IB is TMZ, add frame TMZ start packet, + * else, turn off TMZ. + */ + if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) { + if (!secure) { + secure = true; + amdgpu_ring_emit_tmz(ring, true); + } + } else if (secure) { + secure = false; + amdgpu_ring_emit_tmz(ring, false); + } + amdgpu_ring_emit_ib(ring, job, ib, status); status &= ~AMDGPU_HAVE_CTX_SWITCH; } - if (ring->funcs->emit_tmz) - amdgpu_ring_emit_tmz(ring, false, job ? job->secure : false); + if (secure) { + secure = false; + amdgpu_ring_emit_tmz(ring, false); + } #ifdef CONFIG_X86_64 if (!(adev->flags & AMD_IS_APU)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 7f5ccee476a4..81caac9b958a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -62,9 +62,6 @@ struct amdgpu_job { /* user fence handling */ uint64_t uf_addr; uint64_t uf_sequence; - - /* the job is due to a secure command submission */ - bool secure; }; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 5956eff2d784..7d39064f9361 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -168,8 +168,7 @@ struct amdgpu_ring_funcs { void (*begin_use)(struct amdgpu_ring *ring); void (*end_use)(struct amdgpu_ring *ring); void (*emit_switch_buffer) (struct amdgpu_ring *ring); - void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags, - bool trusted); + void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); @@ -178,7 +177,7 @@ struct amdgpu_ring_funcs { void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask); - void (*emit_tmz)(struct amdgpu_ring *ring, bool start, bool trusted); + void (*emit_tmz)(struct amdgpu_ring *ring, bool start); /* Try to soft recover the ring to make the fence signal */ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); int (*preempt_ib)(struct amdgpu_ring *ring); @@ -252,12 +251,12 @@ struct amdgpu_ring { #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) -#define amdgpu_ring_emit_cntxcntl(r, d, s) (r)->funcs->emit_cntxcntl((r), (d), (s)) +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) #define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) -#define amdgpu_ring_emit_tmz(r, b, s) (r)->funcs->emit_tmz((r), (b), (s)) +#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 473c1c145332..404c6d470515 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3037,8 +3037,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start, - bool trusted); +static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -7436,8 +7435,7 @@ static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring) } static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, - uint32_t flags, - bool trusted) + uint32_t flags) { uint32_t dw2 = 0; @@ -7445,8 +7443,6 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, gfx_v10_0_ring_emit_ce_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); - gfx_v10_0_ring_emit_tmz(ring, true, trusted); - dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { /* set load_global_config & load_global_uconfig */ @@ -7603,17 +7599,12 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) sizeof(de_payload) >> 2); } -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start, - bool trusted) +static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) { - amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); - /* - * cmd = 0: frame begin - * cmd = 1: frame end - */ - amdgpu_ring_write(ring, - ((amdgpu_is_tmz(ring->adev) && trusted) ? FRAME_TMZ : 0) - | FRAME_CMD(start ? 0 : 1)); + if (amdgpu_is_tmz(ring->adev)) { + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); + amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); + } } static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 283b7fc10f98..aa1e1be852dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -2969,8 +2969,7 @@ static uint64_t gfx_v6_0_get_gpu_clock_counter(struct amdgpu_device *adev) return clock; } -static void gfx_v6_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags, - bool trusted) +static void gfx_v6_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) { if (flags & AMDGPU_HAVE_CTX_SWITCH) gfx_v6_0_ring_emit_vgt_flush(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index f26e91354ba8..e5a88cad44cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2320,8 +2320,7 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, amdgpu_ring_write(ring, control); } -static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags, - bool trusted) +static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) { uint32_t dw2 = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index d1312d829252..2fcf6865abba 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6329,8 +6329,7 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); } -static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags, - bool trusted) +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) { uint32_t dw2 = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index bae5dd6ea348..4e042e974983 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5442,29 +5442,21 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); } -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start, - bool trusted) +static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) { - amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); - /* - * cmd = 0: frame begin - * cmd = 1: frame end - */ - amdgpu_ring_write(ring, - ((amdgpu_is_tmz(ring->adev) && trusted) ? FRAME_TMZ : 0) - | FRAME_CMD(start ? 0 : 1)); + if (amdgpu_is_tmz(ring->adev)) { + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); + amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); + } } -static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags, - bool trusted) +static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) { uint32_t dw2 = 0; if (amdgpu_sriov_vf(ring->adev)) gfx_v9_0_ring_emit_ce_meta(ring); - gfx_v9_0_ring_emit_tmz(ring, true, trusted); - dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { /* set load_global_config & load_global_uconfig */ diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index bea72eb8c147..e01b673f0449 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -558,9 +558,6 @@ struct drm_amdgpu_cs_chunk { __u64 chunk_data; }; -/* Flag the command submission as secure */ -#define AMDGPU_CS_FLAGS_SECURE (1 << 0) - struct drm_amdgpu_cs_in { /** Rendering context id */ __u32 ctx_id; @@ -601,6 +598,10 @@ union drm_amdgpu_cs { */ #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4) +/* Flag the IB as secure (TMZ) + */ +#define AMDGPU_IB_FLAGS_SECURE (1 << 5) + struct drm_amdgpu_cs_chunk_ib { __u32 _pad; /** AMDGPU_IB_FLAG_* */ From 11b407a781f42d8513da64499f24fafdfd32426f Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Tue, 18 Feb 2020 13:07:42 +0800 Subject: [PATCH 71/88] drm/amdgpu: fix the wrong logic checking when secure buffer is created (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the current amdgpu doesn't support TMZ, it will return the error if user mode would like to allocate secure buffer. v2: we didn't need this checking anymore. v3: only print message once time. Signed-off-by: Huang Rui Reviewed-by: Christian König Acked-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 46cea436945f..77d988a0033f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -242,8 +242,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, if (args->in.domains & ~AMDGPU_GEM_DOMAIN_MASK) return -EINVAL; - if (amdgpu_is_tmz(adev) && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) { - DRM_ERROR("Cannot allocate secure buffer since TMZ is disabled\n"); + if (!amdgpu_is_tmz(adev) && (flags & AMDGPU_GEM_CREATE_ENCRYPTED)) { + DRM_NOTE_ONCE("Cannot allocate secure buffer since TMZ is disabled\n"); return -EINVAL; } From bffc8c5caaa97d6791aef965b618d71faac07098 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 6 Mar 2020 14:36:43 -0500 Subject: [PATCH 72/88] drm/amdgpu: also add the TMZ flag to GART MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is necessary for TMZ handling during buffer moves and scanout. Signed-off-by: Christian König Reviewed-by: Alex Deucher Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0116c4afe1da..a3c103b4b0a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1028,6 +1028,9 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, struct amdgpu_ttm_tt *gtt = (void *)ttm; int r; + if (amdgpu_bo_encrypted(abo)) + flags |= AMDGPU_PTE_TMZ; + if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { uint64_t page_idx = 1; From effb97cc4ba638eb0a24d612bcea6ff2ef26cfee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 28 Feb 2020 14:48:06 +0100 Subject: [PATCH 73/88] drm/amdgpu: add TMZ handling to amdgpu_move_blit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This way we should be at least able to move buffers from VRAM to GTT. Signed-off-by: Christian König Reviewed-by: Alex Deucher Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 28 +++++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index a3c103b4b0a2..edb5badb7476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -65,7 +65,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem, unsigned num_pages, uint64_t offset, unsigned window, - struct amdgpu_ring *ring, + struct amdgpu_ring *ring, bool tmz, uint64_t *addr); /** @@ -290,17 +290,23 @@ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, /** * amdgpu_copy_ttm_mem_to_mem - Helper function for copy + * @adev: amdgpu device + * @src: buffer/address where to read from + * @dst: buffer/address where to write to + * @size: number of bytes to copy + * @tmz: if a secure copy should be used + * @resv: resv object to sync to + * @f: Returns the last fence if multiple jobs are submitted. * * The function copies @size bytes from {src->mem + src->offset} to * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a * move and different for a BO to BO copy. * - * @f: Returns the last fence if multiple jobs are submitted. */ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_copy_mem *src, struct amdgpu_copy_mem *dst, - uint64_t size, + uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f) { @@ -352,7 +358,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) { r = amdgpu_map_buffer(src->bo, src->mem, PFN_UP(cur_size + src_page_offset), - src_node_start, 0, ring, + src_node_start, 0, ring, tmz, &from); if (r) goto error; @@ -365,7 +371,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) { r = amdgpu_map_buffer(dst->bo, dst->mem, PFN_UP(cur_size + dst_page_offset), - dst_node_start, 1, ring, + dst_node_start, 1, ring, tmz, &to); if (r) goto error; @@ -373,7 +379,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, } r = amdgpu_copy_buffer(ring, from, to, cur_size, - resv, &next, false, true, false); + resv, &next, false, true, tmz); if (r) goto error; @@ -425,6 +431,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, struct ttm_mem_reg *old_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_copy_mem src, dst; struct dma_fence *fence = NULL; int r; @@ -438,14 +445,14 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, new_mem->num_pages << PAGE_SHIFT, + amdgpu_bo_encrypted(abo), bo->base.resv, &fence); if (r) goto error; /* clear the space being freed */ if (old_mem->mem_type == TTM_PL_VRAM && - (ttm_to_amdgpu_bo(bo)->flags & - AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { + (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, @@ -2022,7 +2029,7 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) static int amdgpu_map_buffer(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem, unsigned num_pages, uint64_t offset, unsigned window, - struct amdgpu_ring *ring, + struct amdgpu_ring *ring, bool tmz, uint64_t *addr) { struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; @@ -2064,6 +2071,9 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); + if (tmz) + flags |= AMDGPU_PTE_TMZ; + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, &job->ibs[0].ptr[num_dw]); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index dc6502d1060b..21182caade21 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -91,7 +91,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct amdgpu_copy_mem *src, struct amdgpu_copy_mem *dst, - uint64_t size, + uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f); int amdgpu_fill_buffer(struct amdgpu_bo *bo, From 218c0b7f183a88b3b2678fbca885a750dda2bff3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 2 Mar 2020 13:00:07 +0100 Subject: [PATCH 74/88] drm/amdgpu: stop evicting encrypted BOs to swap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Swapping out encrypted BOs doesn't work because they can't change their physical location without going through a bounce copy. As a workaround disable evicting encrypted BOs to the system domain for now. Signed-off-by: Christian König Reviewed-by: Alex Deucher Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index edb5badb7476..aa4ea437155e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1550,6 +1550,9 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, switch (bo->mem.mem_type) { case TTM_PL_TT: + if (amdgpu_bo_is_amdgpu_bo(bo) && + amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo))) + return false; return true; case TTM_PL_VRAM: From b71a564e2509e1000044a9873cbee6d6a6a5ab90 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Wed, 26 Feb 2020 15:30:36 -0500 Subject: [PATCH 75/88] drm/amdgpu: Fine-grained TMZ support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add fine-grained per-ASIC TMZ support. At the moment TMZ support is experimental for all ASICs which support it. Signed-off-by: Luben Tuikov Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 30 ++++++++++++++++--------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index ce13c3758460..acabb57aa8af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -383,18 +383,28 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) */ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) { - if (!amdgpu_tmz) - return; - - if (adev->asic_type < CHIP_RAVEN || - adev->asic_type == CHIP_ARCTURUS) { + switch (adev->asic_type) { + case CHIP_RAVEN: + case CHIP_RENOIR: + case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + /* Don't enable it by default yet. + */ + if (amdgpu_tmz < 1) { + adev->gmc.tmz_enabled = false; + dev_info(adev->dev, + "Trusted Memory Zone (TMZ) feature disabled as experimental (default)\n"); + } else { + adev->gmc.tmz_enabled = true; + dev_info(adev->dev, + "Trusted Memory Zone (TMZ) feature enabled as experimental (cmd line)\n"); + } + break; + default: adev->gmc.tmz_enabled = false; dev_warn(adev->dev, "Trusted Memory Zone (TMZ) feature not supported\n"); - } else { - - adev->gmc.tmz_enabled = true; - dev_info(adev->dev, - "Trusted Memory Zone (TMZ) feature supported and enabled\n"); + break; } } From f77c9aff85caa9e9b0c2b6fc5be1fc2c76fe02b2 Mon Sep 17 00:00:00 2001 From: Huang Rui Date: Mon, 9 Mar 2020 14:52:06 -0400 Subject: [PATCH 76/88] drm/amdgpu: Fix per-IB secure flag GFX hang Since commit "Move to a per-IB secure flag (TMZ)", we've been seeing hangs in GFX. We need to send FRAME CONTROL stop/start back-to-back, every time we flip the TMZ flag. That is, when we transition from TMZ to non-TMZ we have to send a stop with TMZ followed by a start with non-TMZ, and similarly for transitioning from non-TMZ into TMZ. This patch implements this, thus fixing the GFX hang. v1 -> v2: As suggested by Luben, and accept part of implemetation from this patch: - Put "secure" closed to the loop and use optimization - Change "secure" to bool again, and move "secure == -1" out of loop. v3: Small fixes/optimizations. Reported-and-Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Huang Rui Reviewed-by: Alex Deucher Signed-off-by: Luben Tuikov Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 28 ++++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 15 +++++++------ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 ++++++----- 4 files changed, 32 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index cba22039df6c..24ae9f6c4255 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -218,7 +218,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, amdgpu_ring_emit_cntxcntl(ring, status); } + /* Setup initial TMZiness and send it off. + */ secure = false; + if (job && ring->funcs->emit_frame_cntl) { + secure = ib->flags & AMDGPU_IB_FLAGS_SECURE; + amdgpu_ring_emit_frame_cntl(ring, true, secure); + } + for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; @@ -230,27 +237,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; - /* If this IB is TMZ, add frame TMZ start packet, - * else, turn off TMZ. - */ - if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) { - if (!secure) { - secure = true; - amdgpu_ring_emit_tmz(ring, true); + if (job && ring->funcs->emit_frame_cntl) { + if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) { + amdgpu_ring_emit_frame_cntl(ring, false, secure); + secure = !secure; + amdgpu_ring_emit_frame_cntl(ring, true, secure); } - } else if (secure) { - secure = false; - amdgpu_ring_emit_tmz(ring, false); } amdgpu_ring_emit_ib(ring, job, ib, status); status &= ~AMDGPU_HAVE_CTX_SWITCH; } - if (secure) { - secure = false; - amdgpu_ring_emit_tmz(ring, false); - } + if (job && ring->funcs->emit_frame_cntl) + amdgpu_ring_emit_frame_cntl(ring, false, secure); #ifdef CONFIG_X86_64 if (!(adev->flags & AMD_IS_APU)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 7d39064f9361..7390261095b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -177,7 +177,8 @@ struct amdgpu_ring_funcs { void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring, uint32_t reg0, uint32_t reg1, uint32_t ref, uint32_t mask); - void (*emit_tmz)(struct amdgpu_ring *ring, bool start); + void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start, + bool secure); /* Try to soft recover the ring to make the fence signal */ void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); int (*preempt_ib)(struct amdgpu_ring *ring); @@ -256,7 +257,7 @@ struct amdgpu_ring { #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) +#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 404c6d470515..9fe20b782e88 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3037,7 +3037,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev); static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev); static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume); static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start); +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -7599,12 +7599,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) sizeof(de_payload) >> 2); } -static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, + bool secure) { - if (amdgpu_is_tmz(ring->adev)) { - amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); - amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); - } + uint32_t v = secure ? FRAME_TMZ : 0; + + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); + amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); } static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, @@ -8058,7 +8059,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v10_0_ring_preempt_ib, - .emit_tmz = gfx_v10_0_ring_emit_tmz, + .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 4e042e974983..eedb92218ba5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5442,12 +5442,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); } -static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, + bool secure) { - if (amdgpu_is_tmz(ring->adev)) { - amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); - amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1)); - } + uint32_t v = secure ? FRAME_TMZ : 0; + + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); + amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); } static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) @@ -6699,7 +6700,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, - .emit_tmz = gfx_v9_0_ring_emit_tmz, + .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, From f0ee63cbc5264dbbdb994e87504b0b033149da55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 18 Mar 2020 10:28:20 +0100 Subject: [PATCH 77/88] drm/amdgpu: cleanup amdgpu_ttm_copy_mem_to_mem and amdgpu_map_buffer v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleanup amdgpu_ttm_copy_mem_to_mem by using fewer variables for the same value. Rename amdgpu_map_buffer to amdgpu_ttm_map_buffer, move it to avoid the forward decleration, cleanup by moving the map decission into the function and add some documentation. No functional change. v2: add some more cleanup suggested by Felix Signed-off-by: Christian König Reviewed-by: Felix Kuehling Reviewed-by: Huang Rui Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 268 ++++++++++++------------ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 4 +- 2 files changed, 135 insertions(+), 137 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index aa4ea437155e..7a73282d78e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -62,11 +62,6 @@ #define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128 -static int amdgpu_map_buffer(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem, unsigned num_pages, - uint64_t offset, unsigned window, - struct amdgpu_ring *ring, bool tmz, - uint64_t *addr); /** * amdgpu_init_mem_type - Initialize a memory manager for a specific type of @@ -277,7 +272,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, * */ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, - unsigned long *offset) + uint64_t *offset) { struct drm_mm_node *mm_node = mem->mm_node; @@ -288,6 +283,95 @@ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, return mm_node; } +/** + * amdgpu_ttm_map_buffer - Map memory into the GART windows + * @bo: buffer object to map + * @mem: memory object to map + * @mm_node: drm_mm node object to map + * @num_pages: number of pages to map + * @offset: offset into @mm_node where to start + * @window: which GART window to use + * @ring: DMA ring to use for the copy + * @tmz: if we should setup a TMZ enabled mapping + * @addr: resulting address inside the MC address space + * + * Setup one of the GART windows to access a specific piece of memory or return + * the physical address for local memory. + */ +static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, + struct ttm_mem_reg *mem, + struct drm_mm_node *mm_node, + unsigned num_pages, uint64_t offset, + unsigned window, struct amdgpu_ring *ring, + bool tmz, uint64_t *addr) +{ + struct ttm_dma_tt *dma = container_of(bo->ttm, struct ttm_dma_tt, ttm); + struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + unsigned num_dw, num_bytes; + dma_addr_t *dma_address; + struct dma_fence *fence; + uint64_t src_addr, dst_addr; + uint64_t flags; + int r; + + BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < + AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); + + /* Map only what can't be accessed directly */ + if (mem->start != AMDGPU_BO_INVALID_OFFSET) { + *addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset; + return 0; + } + + *addr = adev->gmc.gart_start; + *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE; + *addr += offset & ~PAGE_MASK; + + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); + num_bytes = num_pages * 8; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, + AMDGPU_IB_POOL_NORMAL, &job); + if (r) + return r; + + src_addr = num_dw * 4; + src_addr += job->ibs[0].gpu_addr; + + dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); + dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, + dst_addr, num_bytes, false); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + dma_address = &dma->dma_address[offset >> PAGE_SHIFT]; + flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem); + if (tmz) + flags |= AMDGPU_PTE_TMZ; + + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, + &job->ibs[0].ptr[num_dw]); + if (r) + goto error_free; + + r = amdgpu_job_submit(job, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + dma_fence_put(fence); + + return r; + +error_free: + amdgpu_job_free(job); + return r; +} + /** * amdgpu_copy_ttm_mem_to_mem - Helper function for copy * @adev: amdgpu device @@ -304,79 +388,62 @@ static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, * */ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, - struct amdgpu_copy_mem *src, - struct amdgpu_copy_mem *dst, + const struct amdgpu_copy_mem *src, + const struct amdgpu_copy_mem *dst, uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f) { + const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE); + + uint64_t src_node_size, dst_node_size, src_offset, dst_offset; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct drm_mm_node *src_mm, *dst_mm; - uint64_t src_node_start, dst_node_start, src_node_size, - dst_node_size, src_page_offset, dst_page_offset; struct dma_fence *fence = NULL; int r = 0; - const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * - AMDGPU_GPU_PAGE_SIZE); if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } - src_mm = amdgpu_find_mm_node(src->mem, &src->offset); - src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + - src->offset; - src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; - src_page_offset = src_node_start & (PAGE_SIZE - 1); + src_offset = src->offset; + src_mm = amdgpu_find_mm_node(src->mem, &src_offset); + src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset; - dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); - dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + - dst->offset; - dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; - dst_page_offset = dst_node_start & (PAGE_SIZE - 1); + dst_offset = dst->offset; + dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset); + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset; mutex_lock(&adev->mman.gtt_window_lock); while (size) { - unsigned long cur_size; - uint64_t from = src_node_start, to = dst_node_start; + uint32_t src_page_offset = src_offset & ~PAGE_MASK; + uint32_t dst_page_offset = dst_offset & ~PAGE_MASK; struct dma_fence *next; + uint32_t cur_size; + uint64_t from, to; /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst * begins at an offset, then adjust the size accordingly */ - cur_size = min3(min(src_node_size, dst_node_size), size, - GTT_MAX_BYTES); - if (cur_size + src_page_offset > GTT_MAX_BYTES || - cur_size + dst_page_offset > GTT_MAX_BYTES) - cur_size -= max(src_page_offset, dst_page_offset); + cur_size = min3(src_node_size, dst_node_size, size); + cur_size = min(GTT_MAX_BYTES - src_page_offset, cur_size); + cur_size = min(GTT_MAX_BYTES - dst_page_offset, cur_size); - /* Map only what needs to be accessed. Map src to window 0 and - * dst to window 1 - */ - if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) { - r = amdgpu_map_buffer(src->bo, src->mem, - PFN_UP(cur_size + src_page_offset), - src_node_start, 0, ring, tmz, - &from); - if (r) - goto error; - /* Adjust the offset because amdgpu_map_buffer returns - * start of mapped page - */ - from += src_page_offset; - } + /* Map src to window 0 and dst to window 1. */ + r = amdgpu_ttm_map_buffer(src->bo, src->mem, src_mm, + PFN_UP(cur_size + src_page_offset), + src_offset, 0, ring, tmz, &from); + if (r) + goto error; - if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) { - r = amdgpu_map_buffer(dst->bo, dst->mem, - PFN_UP(cur_size + dst_page_offset), - dst_node_start, 1, ring, tmz, - &to); - if (r) - goto error; - to += dst_page_offset; - } + r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, dst_mm, + PFN_UP(cur_size + dst_page_offset), + dst_offset, 1, ring, tmz, &to); + if (r) + goto error; r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, &next, false, true, tmz); @@ -392,23 +459,20 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, src_node_size -= cur_size; if (!src_node_size) { - src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, - src->mem); - src_node_size = (src_mm->size << PAGE_SHIFT); - src_page_offset = 0; + ++src_mm; + src_node_size = src_mm->size << PAGE_SHIFT; + src_offset = 0; } else { - src_node_start += cur_size; - src_page_offset = src_node_start & (PAGE_SIZE - 1); + src_offset += cur_size; } + dst_node_size -= cur_size; if (!dst_node_size) { - dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, - dst->mem); - dst_node_size = (dst_mm->size << PAGE_SHIFT); - dst_page_offset = 0; + ++dst_mm; + dst_node_size = dst_mm->size << PAGE_SHIFT; + dst_offset = 0; } else { - dst_node_start += cur_size; - dst_page_offset = dst_node_start & (PAGE_SIZE - 1); + dst_offset += cur_size; } } error: @@ -749,8 +813,8 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long page_offset) { + uint64_t offset = (page_offset << PAGE_SHIFT); struct drm_mm_node *mm; - unsigned long offset = (page_offset << PAGE_SHIFT); mm = amdgpu_find_mm_node(&bo->mem, &offset); return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + @@ -1601,8 +1665,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, if (bo->mem.mem_type != TTM_PL_VRAM) return -EIO; - nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); - pos = (nodes->start << PAGE_SHIFT) + offset; + pos = offset; + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &pos); + pos += (nodes->start << PAGE_SHIFT); while (len && pos < adev->gmc.mc_vram_size) { uint64_t aligned_pos = pos & ~(uint64_t)3; @@ -2029,73 +2094,6 @@ int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma) return ttm_bo_mmap(filp, vma, &adev->mman.bdev); } -static int amdgpu_map_buffer(struct ttm_buffer_object *bo, - struct ttm_mem_reg *mem, unsigned num_pages, - uint64_t offset, unsigned window, - struct amdgpu_ring *ring, bool tmz, - uint64_t *addr) -{ - struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; - struct amdgpu_device *adev = ring->adev; - struct ttm_tt *ttm = bo->ttm; - struct amdgpu_job *job; - unsigned num_dw, num_bytes; - dma_addr_t *dma_address; - struct dma_fence *fence; - uint64_t src_addr, dst_addr; - uint64_t flags; - int r; - - BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < - AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); - - *addr = adev->gmc.gart_start; - *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * - AMDGPU_GPU_PAGE_SIZE; - - num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); - num_bytes = num_pages * 8; - - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, - AMDGPU_IB_POOL_NORMAL, &job); - if (r) - return r; - - src_addr = num_dw * 4; - src_addr += job->ibs[0].gpu_addr; - - dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); - dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; - amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, - dst_addr, num_bytes, false); - - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - WARN_ON(job->ibs[0].length_dw > num_dw); - - dma_address = >t->ttm.dma_address[offset >> PAGE_SHIFT]; - flags = amdgpu_ttm_tt_pte_flags(adev, ttm, mem); - if (tmz) - flags |= AMDGPU_PTE_TMZ; - - r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, - &job->ibs[0].ptr[num_dw]); - if (r) - goto error_free; - - r = amdgpu_job_submit(job, &adev->mman.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &fence); - if (r) - goto error_free; - - dma_fence_put(fence); - - return r; - -error_free: - amdgpu_job_free(job); - return r; -} - int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 21182caade21..11c0e79e7106 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -89,8 +89,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush, bool tmz); int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, - struct amdgpu_copy_mem *src, - struct amdgpu_copy_mem *dst, + const struct amdgpu_copy_mem *src, + const struct amdgpu_copy_mem *dst, uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f); From 9504578314a70e6d96ee812dc93b5b5c9514b988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 19 Mar 2020 13:57:40 +0100 Subject: [PATCH 78/88] drm/amdgpu: add full TMZ support into amdgpu_ttm_map_buffer v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should allow us to also support VRAM->GTT moves. v2: fix missing vram_base_adjustment Signed-off-by: Christian König Reviewed-by: Huang Rui Tested-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++++++++++++++++++------ 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 7a73282d78e5..0d7ad6468ef5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -305,21 +305,21 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, unsigned window, struct amdgpu_ring *ring, bool tmz, uint64_t *addr) { - struct ttm_dma_tt *dma = container_of(bo->ttm, struct ttm_dma_tt, ttm); struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; unsigned num_dw, num_bytes; - dma_addr_t *dma_address; struct dma_fence *fence; uint64_t src_addr, dst_addr; + void *cpu_addr; uint64_t flags; + unsigned int i; int r; BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); /* Map only what can't be accessed directly */ - if (mem->start != AMDGPU_BO_INVALID_OFFSET) { + if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) { *addr = amdgpu_mm_node_addr(bo, mm_node, mem) + offset; return 0; } @@ -348,15 +348,37 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); - dma_address = &dma->dma_address[offset >> PAGE_SHIFT]; flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem); if (tmz) flags |= AMDGPU_PTE_TMZ; - r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, - &job->ibs[0].ptr[num_dw]); - if (r) - goto error_free; + cpu_addr = &job->ibs[0].ptr[num_dw]; + + if (mem->mem_type == TTM_PL_TT) { + struct ttm_dma_tt *dma; + dma_addr_t *dma_address; + + dma = container_of(bo->ttm, struct ttm_dma_tt, ttm); + dma_address = &dma->dma_address[offset >> PAGE_SHIFT]; + r = amdgpu_gart_map(adev, 0, num_pages, dma_address, flags, + cpu_addr); + if (r) + goto error_free; + } else { + dma_addr_t dma_address; + + dma_address = (mm_node->start << PAGE_SHIFT) + offset; + dma_address += adev->vm_manager.vram_base_offset; + + for (i = 0; i < num_pages; ++i) { + r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, + &dma_address, flags, cpu_addr); + if (r) + goto error_free; + + dma_address += PAGE_SIZE; + } + } r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); From b717fa5cb1a1aae9ea744a3b61e9d9aa1d258c9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 3 Apr 2020 15:56:12 +0200 Subject: [PATCH 79/88] drm/amdgpu: fix size calculation in amdgpu_ttm_copy_mem_to_mem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the node is larger than 4GB we overrun the size calculation. Fix this by correctly limiting the size to the window as well. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0d7ad6468ef5..1296499f0f54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -450,9 +450,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst * begins at an offset, then adjust the size accordingly */ - cur_size = min3(src_node_size, dst_node_size, size); - cur_size = min(GTT_MAX_BYTES - src_page_offset, cur_size); - cur_size = min(GTT_MAX_BYTES - dst_page_offset, cur_size); + cur_size = max(src_page_offset, dst_page_offset); + cur_size = min(min3(src_node_size, dst_node_size, size), + (uint64_t)(GTT_MAX_BYTES - cur_size)); /* Map src to window 0 and dst to window 1. */ r = amdgpu_ttm_map_buffer(src->bo, src->mem, src_mm, From b33f9d70b36889abafec3b638925640ac7c7fe12 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 23 Apr 2020 16:45:10 -0400 Subject: [PATCH 80/88] drm/amdgpu: check ring type for secure IBs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't support secure operation on compute rings at the moment so reject them. Reviewed-by: Andrey Grodzovsky Reviewed-by: Aaron Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 24ae9f6c4255..aebbbb573884 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -162,6 +162,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return -EINVAL; } + if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) && + (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)) { + dev_err(adev->dev, "secure submissions not supported on compute rings\n"); + return -EINVAL; + } + alloc_size = ring->funcs->emit_frame_size + num_ibs * ring->funcs->emit_ib_size; From e208586471181230b88851c14cac80036c128f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 16 Mar 2020 14:33:33 +0100 Subject: [PATCH 81/88] drm/amdgpu: partial revert VM sync changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We still need to add the VM update fences to the root PD. So make sure to never sync to those implicitely. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index b86392253696..b87ca171986a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -249,6 +249,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, owner != AMDGPU_FENCE_OWNER_UNDEFINED) continue; + /* Never sync to VM updates either. */ + if (fence_owner == AMDGPU_FENCE_OWNER_VM && + owner != AMDGPU_FENCE_OWNER_UNDEFINED) + continue; + /* Ignore fences depending on the sync mode */ switch (mode) { case AMDGPU_SYNC_ALWAYS: From 9ecefb19c3a6626c27ea7ee72d431f22462e1d54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 1 Apr 2020 11:18:21 +0200 Subject: [PATCH 82/88] drm/amdgpu: cleanup IB pool handling a bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the coding style, move and rename the definitions to better match what they are supposed to be doing. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 11 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 65 +++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 13 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 11 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 +- 10 files changed, 71 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 589d8783fa21..99e5f474505d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -204,8 +204,6 @@ extern int amdgpu_cik_support; #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 #define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2) -/* AMDGPU_IB_POOL_SIZE must be a power of 2 */ -#define AMDGPU_IB_POOL_SIZE 16 #define AMDGPU_DEBUGFS_MAX_COMPONENTS 32 #define AMDGPUFB_CONN_LIMIT 4 #define AMDGPU_BIOS_NUM_SCRATCH 16 @@ -402,13 +400,6 @@ struct amdgpu_sa_bo { int amdgpu_fence_slab_init(void); void amdgpu_fence_slab_fini(void); -enum amdgpu_ib_pool_type { - AMDGPU_IB_POOL_NORMAL = 0, - AMDGPU_IB_POOL_VM, - AMDGPU_IB_POOL_DIRECT, - - AMDGPU_IB_POOL_MAX -}; /* * IRQS. */ @@ -866,7 +857,7 @@ struct amdgpu_device { unsigned num_rings; struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; bool ib_pool_ready; - struct amdgpu_sa_manager ring_tmp_bo[AMDGPU_IB_POOL_MAX]; + struct amdgpu_sa_manager ib_pools[AMDGPU_IB_POOL_MAX]; struct amdgpu_sched gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX]; /* interrupts */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 3eee5c7d83e0..7653f62b1b2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -924,7 +924,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, ring = to_amdgpu_ring(entity->rq->sched); r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? - chunk_ib->ib_bytes : 0, AMDGPU_IB_POOL_NORMAL, ib); + chunk_ib->ib_bytes : 0, + AMDGPU_IB_POOL_DELAYED, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index aebbbb573884..c24366aacf3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -61,14 +61,13 @@ * Returns 0 on success, error on failure. */ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned size, - enum amdgpu_ib_pool_type pool_type, - struct amdgpu_ib *ib) + unsigned size, enum amdgpu_ib_pool_type pool_type, + struct amdgpu_ib *ib) { int r; if (size) { - r = amdgpu_sa_bo_new(&adev->ring_tmp_bo[pool_type], + r = amdgpu_sa_bo_new(&adev->ib_pools[pool_type], &ib->sa_bo, size, 256); if (r) { dev_err(adev->dev, "failed to get a new IB (%d)\n", r); @@ -305,30 +304,32 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, */ int amdgpu_ib_pool_init(struct amdgpu_device *adev) { - int r, i; unsigned size; + int r, i; - if (adev->ib_pool_ready) { + if (adev->ib_pool_ready) return 0; - } + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) { if (i == AMDGPU_IB_POOL_DIRECT) size = PAGE_SIZE * 2; else - size = AMDGPU_IB_POOL_SIZE*64*1024; - r = amdgpu_sa_bo_manager_init(adev, &adev->ring_tmp_bo[i], - size, - AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT); - if (r) { - for (i--; i >= 0; i--) - amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo[i]); - return r; - } + size = AMDGPU_IB_POOL_SIZE; + + r = amdgpu_sa_bo_manager_init(adev, &adev->ib_pools[i], + size, AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto error; } adev->ib_pool_ready = true; return 0; + +error: + while (i--) + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); + return r; } /** @@ -343,11 +344,12 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev) { int i; - if (adev->ib_pool_ready) { - for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) - amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo[i]); - adev->ib_pool_ready = false; - } + if (!adev->ib_pool_ready) + return; + + for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) + amdgpu_sa_bo_manager_fini(adev, &adev->ib_pools[i]); + adev->ib_pool_ready = false; } /** @@ -362,9 +364,9 @@ void amdgpu_ib_pool_fini(struct amdgpu_device *adev) */ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) { - unsigned i; - int r, ret = 0; long tmo_gfx, tmo_mm; + int r, ret = 0; + unsigned i; tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT; if (amdgpu_sriov_vf(adev)) { @@ -442,15 +444,16 @@ static int amdgpu_debugfs_sa_info(struct seq_file *m, void *data) struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - seq_printf(m, "-------------------- NORMAL -------------------- \n"); - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_NORMAL], m); - seq_printf(m, "---------------------- VM ---------------------- \n"); - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_VM], m); - seq_printf(m, "-------------------- DIRECT--------------------- \n"); - amdgpu_sa_bo_dump_debug_info(&adev->ring_tmp_bo[AMDGPU_IB_POOL_DIRECT], m); + seq_printf(m, "--------------------- DELAYED --------------------- \n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED], + m); + seq_printf(m, "-------------------- IMMEDIATE -------------------- \n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE], + m); + seq_printf(m, "--------------------- DIRECT ---------------------- \n"); + amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m); return 0; - } static const struct drm_info_list amdgpu_debugfs_sa_list[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 7390261095b7..107e80063553 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -50,6 +50,8 @@ #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) +#define AMDGPU_IB_POOL_SIZE (1024 * 1024) + enum amdgpu_ring_type { AMDGPU_RING_TYPE_GFX = AMDGPU_HW_IP_GFX, AMDGPU_RING_TYPE_COMPUTE = AMDGPU_HW_IP_COMPUTE, @@ -63,6 +65,17 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_KIQ }; +enum amdgpu_ib_pool_type { + /* Normal submissions to the top of the pipeline. */ + AMDGPU_IB_POOL_DELAYED, + /* Immediate submissions to the bottom of the pipeline. */ + AMDGPU_IB_POOL_IMMEDIATE, + /* Direct submission to the ring buffer during init and reset. */ + AMDGPU_IB_POOL_DIRECT, + + AMDGPU_IB_POOL_MAX +}; + struct amdgpu_device; struct amdgpu_ring; struct amdgpu_ib; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 476f1f89aaad..2f4d5ca9894f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -44,7 +44,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) /* Number of tests = * (Total GTT - IB pool - writeback page - ring buffers) / test size */ - n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024; + n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) if (adev->rings[i]) n -= adev->rings[i]->ring_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1296499f0f54..ea0199a8f9c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -333,7 +333,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, num_bytes = num_pages * 8; r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, - AMDGPU_IB_POOL_NORMAL, &job); + AMDGPU_IB_POOL_DELAYED, &job); if (r) return r; @@ -2122,6 +2122,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush, bool tmz) { + enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT : + AMDGPU_IB_POOL_DELAYED; struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; @@ -2139,8 +2141,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, - direct_submit ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job); + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job); if (r) return r; @@ -2229,7 +2230,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, /* for IB padding */ num_dw += 64; - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_NORMAL, &job); + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, + &job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 550282d9c1fc..5100ebe8858d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1056,8 +1056,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, goto err; } - r = amdgpu_job_alloc_with_ib(adev, 64, - direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job); + r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT : + AMDGPU_IB_POOL_DELAYED, &job); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index d090455282e5..ecaa2d7483b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -447,7 +447,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - AMDGPU_IB_POOL_DIRECT, &job); + AMDGPU_IB_POOL_DIRECT, &job); if (r) return r; @@ -526,7 +526,8 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, int i, r; r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, - direct ? AMDGPU_IB_POOL_DIRECT : AMDGPU_IB_POOL_NORMAL, &job); + direct ? AMDGPU_IB_POOL_DIRECT : + AMDGPU_IB_POOL_DELAYED, &job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index fbd451f3559a..b96c8d9a1946 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -61,11 +61,12 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, struct dma_resv *resv, enum amdgpu_sync_mode sync_mode) { + enum amdgpu_ib_pool_type pool = p->direct ? AMDGPU_IB_POOL_IMMEDIATE : + AMDGPU_IB_POOL_DELAYED; unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; int r; - r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, - p->direct ? AMDGPU_IB_POOL_VM : AMDGPU_IB_POOL_NORMAL, &p->job); + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job); if (r) return r; @@ -199,6 +200,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { + enum amdgpu_ib_pool_type pool = p->direct ? AMDGPU_IB_POOL_IMMEDIATE : + AMDGPU_IB_POOL_DELAYED; unsigned int i, ndw, nptes; uint64_t *pte; int r; @@ -224,8 +227,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW); ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); - r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, - p->direct ? AMDGPU_IB_POOL_VM : AMDGPU_IB_POOL_NORMAL, &p->job); + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, + &p->job); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index eff25c72c6c6..edaa50d850a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -372,7 +372,8 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * translation. Avoid this by doing the invalidation from the SDMA * itself. */ - r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_VM, &job); + r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, + &job); if (r) goto error_alloc; From eaad0c3aa978e7ed654e574691b56cc24d2d409a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 1 Apr 2020 12:50:56 +0200 Subject: [PATCH 83/88] drm/amdgpu: rename direct to immediate for VM updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To avoid confusion with direct ring submissions rename bottom of pipe VM table changes to immediate updates. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 6 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 60 ++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 10 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 23 ++++---- 5 files changed, 51 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 3a67f6c046d4..fe92dcd94d4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -282,7 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, !dma_fence_is_later(updates, (*id)->flushed_updates)) updates = NULL; - if ((*id)->owner != vm->direct.fence_context || + if ((*id)->owner != vm->immediate.fence_context || job->vm_pd_addr != (*id)->pd_gpu_addr || updates || !(*id)->last_flush || ((*id)->last_flush->context != fence_context && @@ -349,7 +349,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ - if ((*id)->owner != vm->direct.fence_context) + if ((*id)->owner != vm->immediate.fence_context) continue; if ((*id)->pd_gpu_addr != job->vm_pd_addr) @@ -448,7 +448,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, } id->pd_gpu_addr = job->vm_pd_addr; - id->owner = vm->direct.fence_context; + id->owner = vm->immediate.fence_context; if (job->vm_needs_flush) { dma_fence_put(id->last_flush); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0d8210553059..7324a91da123 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -726,7 +726,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @vm: VM to clear BO from * @bo: BO to clear - * @direct: use a direct update + * @immediate: use an immediate update * * Root PD needs to be reserved when calling this. * @@ -736,7 +736,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *bo, - bool direct) + bool immediate) { struct ttm_operation_ctx ctx = { true, false }; unsigned level = adev->vm_manager.root_level; @@ -795,7 +795,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; + params.immediate = immediate; r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) @@ -850,11 +850,11 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * @vm: requesting vm * @level: the page table level - * @direct: use a direct update + * @immediate: use a immediate update * @bp: resulting BO allocation parameters */ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int level, bool direct, + int level, bool immediate, struct amdgpu_bo_param *bp) { memset(bp, 0, sizeof(*bp)); @@ -870,7 +870,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, else if (!vm->root.base.bo || vm->root.base.bo->shadow) bp->flags |= AMDGPU_GEM_CREATE_SHADOW; bp->type = ttm_bo_type_kernel; - bp->no_wait_gpu = direct; + bp->no_wait_gpu = immediate; if (vm->root.base.bo) bp->resv = vm->root.base.bo->tbo.base.resv; } @@ -881,7 +881,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, * @adev: amdgpu_device pointer * @vm: VM to allocate page tables for * @cursor: Which page table to allocate - * @direct: use a direct update + * @immediate: use an immediate update * * Make sure a specific page table or directory is allocated. * @@ -892,7 +892,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_pt_cursor *cursor, - bool direct) + bool immediate) { struct amdgpu_vm_pt *entry = cursor->entry; struct amdgpu_bo_param bp; @@ -913,7 +913,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, if (entry->base.bo) return 0; - amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp); + amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp); r = amdgpu_bo_create(adev, &bp, &pt); if (r) @@ -925,7 +925,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); amdgpu_vm_bo_base_init(&entry->base, vm, pt); - r = amdgpu_vm_clear_bo(adev, vm, pt, direct); + r = amdgpu_vm_clear_bo(adev, vm, pt, immediate); if (r) goto error_free_pt; @@ -1276,7 +1276,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm - * @direct: submit directly to the paging queue + * @immediate: submit immediately to the paging queue * * Makes sure all directories are up to date. * @@ -1284,7 +1284,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, * 0 for success, error for failure. */ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, - struct amdgpu_vm *vm, bool direct) + struct amdgpu_vm *vm, bool immediate) { struct amdgpu_vm_update_params params; int r; @@ -1295,7 +1295,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; + params.immediate = immediate; r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) @@ -1451,7 +1451,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * address range are actually allocated */ r = amdgpu_vm_alloc_pts(params->adev, params->vm, - &cursor, params->direct); + &cursor, params->immediate); if (r) return r; } @@ -1557,7 +1557,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * * @adev: amdgpu_device pointer * @vm: requested vm - * @direct: direct submission in a page fault + * @immediate: immediate submission in a page fault * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry @@ -1572,7 +1572,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_vm *vm, bool direct, + struct amdgpu_vm *vm, bool immediate, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t addr, @@ -1586,7 +1586,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; + params.immediate = immediate; params.pages_addr = pages_addr; /* Implicitly sync to command submissions in the same VM before @@ -1606,8 +1606,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { struct amdgpu_bo *root = vm->root.base.bo; - if (!dma_fence_is_signaled(vm->last_direct)) - amdgpu_bo_fence(root, vm->last_direct, true); + if (!dma_fence_is_signaled(vm->last_immediate)) + amdgpu_bo_fence(root, vm->last_immediate, true); } r = vm->update_funcs->prepare(¶ms, resv, sync_mode); @@ -2582,7 +2582,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return false; /* Don't evict VM page tables while they are updated */ - if (!dma_fence_is_signaled(bo_base->vm->last_direct)) { + if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) { amdgpu_vm_eviction_unlock(bo_base->vm); return false; } @@ -2759,7 +2759,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) if (timeout <= 0) return timeout; - return dma_fence_wait_timeout(vm->last_direct, true, timeout); + return dma_fence_wait_timeout(vm->last_immediate, true, timeout); } /** @@ -2795,7 +2795,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, /* create scheduler entities for page table updates */ - r = drm_sched_entity_init(&vm->direct, DRM_SCHED_PRIORITY_NORMAL, + r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, adev->vm_manager.vm_pte_scheds, adev->vm_manager.vm_pte_num_scheds, NULL); if (r) @@ -2805,7 +2805,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, adev->vm_manager.vm_pte_scheds, adev->vm_manager.vm_pte_num_scheds, NULL); if (r) - goto error_free_direct; + goto error_free_immediate; vm->pte_support_ats = false; vm->is_compute_context = false; @@ -2831,7 +2831,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, else vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; - vm->last_direct = dma_fence_get_stub(); + vm->last_immediate = dma_fence_get_stub(); mutex_init(&vm->eviction_lock); vm->evicting = false; @@ -2885,11 +2885,11 @@ error_free_root: vm->root.base.bo = NULL; error_free_delayed: - dma_fence_put(vm->last_direct); + dma_fence_put(vm->last_immediate); drm_sched_entity_destroy(&vm->delayed); -error_free_direct: - drm_sched_entity_destroy(&vm->direct); +error_free_immediate: + drm_sched_entity_destroy(&vm->immediate); return r; } @@ -3086,8 +3086,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->pasid = 0; } - dma_fence_wait(vm->last_direct, false); - dma_fence_put(vm->last_direct); + dma_fence_wait(vm->last_immediate, false); + dma_fence_put(vm->last_immediate); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { @@ -3104,7 +3104,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_bo_unref(&root); WARN_ON(vm->root.base.bo); - drm_sched_entity_destroy(&vm->direct); + drm_sched_entity_destroy(&vm->immediate); drm_sched_entity_destroy(&vm->delayed); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fd61466dc226..2cb04b54ec58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -206,9 +206,9 @@ struct amdgpu_vm_update_params { struct amdgpu_vm *vm; /** - * @direct: if changes should be made directly + * @immediate: if changes should be made immediately */ - bool direct; + bool immediate; /** * @pages_addr: @@ -274,11 +274,11 @@ struct amdgpu_vm { struct dma_fence *last_update; /* Scheduler entities for page table updates */ - struct drm_sched_entity direct; + struct drm_sched_entity immediate; struct drm_sched_entity delayed; /* Last submission to the scheduler entities */ - struct dma_fence *last_direct; + struct dma_fence *last_immediate; unsigned int pasid; /* dedicated to vm */ @@ -379,7 +379,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, void *param); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); int amdgpu_vm_update_pdes(struct amdgpu_device *adev, - struct amdgpu_vm *vm, bool direct); + struct amdgpu_vm *vm, bool immediate); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index e38516304070..39c704a1fb0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -84,7 +84,7 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, pe += (unsigned long)amdgpu_bo_kptr(bo); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate); for (i = 0; i < count; i++) { value = p->pages_addr ? diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index b96c8d9a1946..c78bcebd9378 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -61,8 +61,8 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, struct dma_resv *resv, enum amdgpu_sync_mode sync_mode) { - enum amdgpu_ib_pool_type pool = p->direct ? AMDGPU_IB_POOL_IMMEDIATE : - AMDGPU_IB_POOL_DELAYED; + enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE + : AMDGPU_IB_POOL_DELAYED; unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; int r; @@ -96,7 +96,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, struct amdgpu_ring *ring; int r; - entity = p->direct ? &p->vm->direct : &p->vm->delayed; + entity = p->immediate ? &p->vm->immediate : &p->vm->delayed; ring = container_of(entity->rq->sched, struct amdgpu_ring, sched); WARN_ON(ib->length_dw == 0); @@ -106,15 +106,16 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, if (r) goto error; - if (p->direct) { + if (p->immediate) { tmp = dma_fence_get(f); - swap(p->vm->last_direct, tmp); + swap(p->vm->last_immediate, f); dma_fence_put(tmp); } else { - dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv, f); + dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv, + f); } - if (fence && !p->direct) + if (fence && !p->immediate) swap(*fence, f); dma_fence_put(f); return 0; @@ -144,7 +145,7 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, src += p->num_dw_left * 4; pe += amdgpu_gmc_sign_extend(bo->tbo.offset); - trace_amdgpu_vm_copy_ptes(pe, src, count, p->direct); + trace_amdgpu_vm_copy_ptes(pe, src, count, p->immediate); amdgpu_vm_copy_pte(p->adev, ib, pe, src, count); } @@ -171,7 +172,7 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, struct amdgpu_ib *ib = p->job->ibs; pe += amdgpu_gmc_sign_extend(bo->tbo.offset); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->direct); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags, p->immediate); if (count < 3) { amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags, count, incr); @@ -200,8 +201,8 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { - enum amdgpu_ib_pool_type pool = p->direct ? AMDGPU_IB_POOL_IMMEDIATE : - AMDGPU_IB_POOL_DELAYED; + enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE + : AMDGPU_IB_POOL_DELAYED; unsigned int i, ndw, nptes; uint64_t *pte; int r; From 9c466bcbda68d73c9ba26e8307b0dff11dd285d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 7 Apr 2020 14:54:23 +0200 Subject: [PATCH 84/88] drm/amdgpu: add new unlocked flag for PTE updates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For HMM support we need the ability to invalidate PTEs from a MM callback where we can't lock the root PD. Add a new flag to better support this instead of assuming that all invalidation updates are unlocked. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 42 ++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 9 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 12 +++--- 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 7324a91da123..ed26688f49da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1446,7 +1446,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t incr, entry_end, pe_start; struct amdgpu_bo *pt; - if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { + if (!params->unlocked) { /* make sure that the page tables covering the * address range are actually allocated */ @@ -1458,8 +1458,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, shift = amdgpu_vm_level_shift(adev, cursor.level); parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); - if (adev->asic_type < CHIP_VEGA10 && - (flags & AMDGPU_PTE_VALID)) { + if (params->unlocked) { + /* Unlocked updates are only allowed on the leaves */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + } else if (adev->asic_type < CHIP_VEGA10 && + (flags & AMDGPU_PTE_VALID)) { /* No huge page support before GMC v9 */ if (cursor.level != AMDGPU_VM_PTB) { if (!amdgpu_vm_pt_descendant(adev, &cursor)) @@ -1558,6 +1562,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * @adev: amdgpu_device pointer * @vm: requested vm * @immediate: immediate submission in a page fault + * @unlocked: unlocked invalidation during MM callback * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry @@ -1573,7 +1578,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate, - struct dma_resv *resv, + bool unlocked, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t addr, dma_addr_t *pages_addr, @@ -1603,11 +1608,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, goto error_unlock; } - if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { - struct amdgpu_bo *root = vm->root.base.bo; + if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { + struct dma_fence *tmp = dma_fence_get_stub(); - if (!dma_fence_is_signaled(vm->last_immediate)) - amdgpu_bo_fence(root, vm->last_immediate, true); + amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true); + swap(vm->last_unlocked, tmp); + dma_fence_put(tmp); } r = vm->update_funcs->prepare(¶ms, resv, sync_mode); @@ -1721,7 +1727,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } last = min((uint64_t)mapping->last, start + max_entries - 1); - r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, start, last, flags, addr, dma_addr, fence); if (r) @@ -2018,7 +2024,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, mapping->start, mapping->last, init_pte_value, 0, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); @@ -2582,7 +2588,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return false; /* Don't evict VM page tables while they are updated */ - if (!dma_fence_is_signaled(bo_base->vm->last_immediate)) { + if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) { amdgpu_vm_eviction_unlock(bo_base->vm); return false; } @@ -2759,7 +2765,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) if (timeout <= 0) return timeout; - return dma_fence_wait_timeout(vm->last_immediate, true, timeout); + return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); } /** @@ -2831,7 +2837,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, else vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; - vm->last_immediate = dma_fence_get_stub(); + vm->last_unlocked = dma_fence_get_stub(); mutex_init(&vm->eviction_lock); vm->evicting = false; @@ -2885,7 +2891,7 @@ error_free_root: vm->root.base.bo = NULL; error_free_delayed: - dma_fence_put(vm->last_immediate); + dma_fence_put(vm->last_unlocked); drm_sched_entity_destroy(&vm->delayed); error_free_immediate: @@ -3086,8 +3092,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->pasid = 0; } - dma_fence_wait(vm->last_immediate, false); - dma_fence_put(vm->last_immediate); + dma_fence_wait(vm->last_unlocked, false); + dma_fence_put(vm->last_unlocked); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { @@ -3337,8 +3343,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, value = 0; } - r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1, - flags, value, NULL, NULL); + r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr, + addr + 1, flags, value, NULL, NULL); if (r) goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 2cb04b54ec58..c8e68d7890bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -210,6 +210,11 @@ struct amdgpu_vm_update_params { */ bool immediate; + /** + * @unlocked: true if the root BO is not locked + */ + bool unlocked; + /** * @pages_addr: * @@ -277,8 +282,8 @@ struct amdgpu_vm { struct drm_sched_entity immediate; struct drm_sched_entity delayed; - /* Last submission to the scheduler entities */ - struct dma_fence *last_immediate; + /* Last unlocked submission to the scheduler entities */ + struct dma_fence *last_unlocked; unsigned int pasid; /* dedicated to vm */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index c78bcebd9378..8d9c6feba660 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -92,8 +92,8 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, { struct amdgpu_ib *ib = p->job->ibs; struct drm_sched_entity *entity; - struct dma_fence *f, *tmp; struct amdgpu_ring *ring; + struct dma_fence *f; int r; entity = p->immediate ? &p->vm->immediate : &p->vm->delayed; @@ -106,13 +106,13 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, if (r) goto error; - if (p->immediate) { - tmp = dma_fence_get(f); - swap(p->vm->last_immediate, f); + if (p->unlocked) { + struct dma_fence *tmp = dma_fence_get(f); + + swap(p->vm->last_unlocked, f); dma_fence_put(tmp); } else { - dma_resv_add_shared_fence(p->vm->root.base.bo->tbo.base.resv, - f); + amdgpu_bo_fence(p->vm->root.base.bo, f, true); } if (fence && !p->immediate) From 5654b897b5bf117b8c90b573c5455debf36dde01 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Fri, 10 Apr 2020 11:59:47 -0500 Subject: [PATCH 85/88] drm/amdgpu: pass unlocked flag to params at amdgpu_vm_bo_update_mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass unlocked flag value to amdgpu_vm_update_params.unlocked struct member at amdgpu_vm_bo_update_mapping. Reviewed-by: Christian König Signed-off-by: Alex Sierra Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ed26688f49da..414a0b1c2e5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1593,6 +1593,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, params.vm = vm; params.immediate = immediate; params.pages_addr = pages_addr; + params.unlocked = unlocked; /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. From 5bb4b78be9c67b02a7f138850e9e89825181f555 Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Mon, 6 May 2019 22:11:14 -0500 Subject: [PATCH 86/88] drm/amdkfd: New IOCTL to allocate queue GWS (v2) Add a new kfd ioctl to allocate queue GWS. Queue GWS is released on queue destroy. v2: re-introduce this API with the following fixes squashed in: - drm/amdkfd: fix null pointer dereference on dev - drm/amdkfd: Return proper error code for gws alloc API - drm/amdkfd: Remove GPU ID in GWS queue creation Signed-off-by: Oak Zeng Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 39 +++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 + .../amd/amdkfd/kfd_process_queue_manager.c | 9 +++++ include/uapi/linux/kfd_ioctl.h | 19 ++++++++- 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 0ec5f25adf56..5eb1314f500b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1584,6 +1584,43 @@ copy_from_user_failed: return err; } +static int kfd_ioctl_alloc_queue_gws(struct file *filep, + struct kfd_process *p, void *data) +{ + int retval; + struct kfd_ioctl_alloc_queue_gws_args *args = data; + struct queue *q; + struct kfd_dev *dev; + + if (!hws_gws_support) + return -ENODEV; + + mutex_lock(&p->mutex); + q = pqm_get_user_queue(&p->pqm, args->queue_id); + + if (q) { + dev = q->device; + } else { + retval = -EINVAL; + goto out_unlock; + } + + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { + retval = -ENODEV; + goto out_unlock; + } + + retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); + mutex_unlock(&p->mutex); + + args->first_gws = 0; + return retval; + +out_unlock: + mutex_unlock(&p->mutex); + return retval; +} + static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct kfd_process *p, void *data) { @@ -1786,6 +1823,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, kfd_ioctl_import_dmabuf, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, + kfd_ioctl_alloc_queue_gws, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4a3049841086..5e7f1fb6761b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -923,6 +923,8 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, void *gws); struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid); +struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, + unsigned int qid); int pqm_get_wave_state(struct process_queue_manager *pqm, unsigned int qid, void __user *ctl_stack, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 084c35f55d59..eb1635ac8988 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -476,6 +476,15 @@ struct kernel_queue *pqm_get_kernel_queue( return NULL; } +struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, + unsigned int qid) +{ + struct process_queue_node *pqn; + + pqn = get_queue_by_qid(pqm, qid); + return pqn ? pqn->q : NULL; +} + int pqm_get_wave_state(struct process_queue_manager *pqm, unsigned int qid, void __user *ctl_stack, diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 20917c59f39c..4f6676428c5c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -410,6 +410,20 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { __u32 n_success; /* to/from KFD */ }; +/* Allocate GWS for specific queue + * + * @queue_id: queue's id that GWS is allocated for + * @num_gws: how many GWS to allocate + * @first_gws: index of the first GWS allocated. + * only support contiguous GWS allocation + */ +struct kfd_ioctl_alloc_queue_gws_args { + __u32 queue_id; /* to KFD */ + __u32 num_gws; /* to KFD */ + __u32 first_gws; /* from KFD */ + __u32 pad; +}; + struct kfd_ioctl_get_dmabuf_info_args { __u64 size; /* from KFD */ __u64 metadata_ptr; /* to KFD */ @@ -529,7 +543,10 @@ enum kfd_mmio_remap { #define AMDKFD_IOC_IMPORT_DMABUF \ AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args) +#define AMDKFD_IOC_ALLOC_QUEUE_GWS \ + AMDKFD_IOWR(0x1E, struct kfd_ioctl_alloc_queue_gws_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1E +#define AMDKFD_COMMAND_END 0x1F #endif From 29633d0e204df1e051d9036e4f493f228ac19fb4 Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Wed, 15 Jan 2020 15:23:44 -0600 Subject: [PATCH 87/88] drm/amdkfd: Enable GWS based on FW Support Rather than only enabling GWS support based on the hws_gws_support modparm, also check whether the GPU's HWS firmware supports GWS. Leave the old modparm in place in case users want to test GWS on GPUs not yet in the support list. v2: fix broken syntax from the first patch. Signed-off-by: Joseph Greathouse Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 ++-- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 8 +++-- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 40 ++++++++++++++++++----- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 5 files changed, 41 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7aa5ec357391..719a963c31a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -689,13 +689,12 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau /** * DOC: hws_gws_support(bool) - * Whether HWS support gws barriers. Default value: false (not supported) - * This will be replaced with a MEC firmware version check once firmware - * is ready + * Assume that HWS supports GWS barriers regardless of what firmware version + * check says. Default value: false (rely on MEC2 firmware version check). */ bool hws_gws_support; module_param(hws_gws_support, bool, 0444); -MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)"); +MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)"); /** * DOC: queue_preemption_timeout_ms (int) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 5eb1314f500b..f8fa03a12add 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1592,9 +1592,6 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, struct queue *q; struct kfd_dev *dev; - if (!hws_gws_support) - return -ENODEV; - mutex_lock(&p->mutex); q = pqm_get_user_queue(&p->pqm, args->queue_id); @@ -1605,6 +1602,11 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, goto out_unlock; } + if (!dev->gws) { + retval = -ENODEV; + goto out_unlock; + } + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { retval = -ENODEV; goto out_unlock; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 05bc6d96ec52..0491ab2b4a9b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -569,6 +569,23 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) } } +static int kfd_gws_init(struct kfd_dev *kfd) +{ + int ret = 0; + + if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) + return 0; + + if (hws_gws_support + || (kfd->device_info->asic_family >= CHIP_VEGA10 + && kfd->device_info->asic_family <= CHIP_RAVEN + && kfd->mec2_fw_version >= 0x1b3)) + ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, + amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); + + return ret; +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) @@ -578,6 +595,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->ddev = ddev; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, KGD_ENGINE_MEC1); + kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, + KGD_ENGINE_MEC2); kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; @@ -598,13 +617,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } else kfd->max_proc_per_quantum = hws_max_conc_proc; - /* Allocate global GWS that is shared by all KFD processes */ - if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd, - amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) { - dev_err(kfd_device, "Could not allocate %d gws\n", - amdgpu_amdkfd_get_num_gws(kfd->kgd)); - goto out; - } /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; @@ -662,6 +674,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_queue_manager_error; } + /* If supported on this device, allocate global GWS that is shared + * by all KFD processes + */ + if (kfd_gws_init(kfd)) { + dev_err(kfd_device, "Could not allocate %d gws\n", + amdgpu_amdkfd_get_num_gws(kfd->kgd)); + goto gws_error; + } + if (kfd_iommu_device_init(kfd)) { dev_err(kfd_device, "Error initializing iommuv2\n"); goto device_iommu_error; @@ -691,6 +712,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_topology_add_device_error: kfd_resume_error: device_iommu_error: +gws_error: device_queue_manager_uninit(kfd->dqm); device_queue_manager_error: kfd_interrupt_exit(kfd); @@ -701,7 +723,7 @@ kfd_doorbell_error: kfd_gtt_sa_init_error: amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); alloc_gtt_mem_failure: - if (hws_gws_support) + if (kfd->gws) amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", @@ -720,7 +742,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); kfd_gtt_sa_fini(kfd); amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); - if (hws_gws_support) + if (kfd->gws) amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 5e7f1fb6761b..43b888b311c7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -282,6 +282,7 @@ struct kfd_dev { /* Firmware versions */ uint16_t mec_fw_version; + uint16_t mec2_fw_version; uint16_t sdma_fw_version; /* Maximum process number mapped to HW scheduler */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 1c090824cad7..bc4a22df12d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1319,7 +1319,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) gpu->device_info->num_xgmi_sdma_engines; dev->node_props.num_sdma_queues_per_engine = gpu->device_info->num_sdma_queues_per_engine; - dev->node_props.num_gws = (hws_gws_support && + dev->node_props.num_gws = (dev->gpu->gws && dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); From b8020b0304c8f44e5e29f0b1a04d31e0bf68d26a Mon Sep 17 00:00:00 2001 From: Joseph Greathouse Date: Wed, 18 Sep 2019 14:49:57 -0500 Subject: [PATCH 88/88] drm/amdkfd: Enable over-subscription with >1 GWS queue The current GWS usage model will only allows a single GWS-enabled process to be active on the GPU at once. This ensures that a barrier-using kernel gets a known amount of GPU hardware, to prevent deadlock due to inability to go beyond the GWS barrier. The HWS watches how many GWS entries are assigned to each process, and goes into over-subscription mode when two processes need more than the 64 that are available. The current KFD method for working with this is to allocate all 64 GWS entries to each GWS-capable process. When more than one GWS-enabled process is in the runlist, we must make sure the runlist is in over-subscription mode, so that the HWS gets a chained RUN_LIST packet and continues scheduling kernels. Signed-off-by: Joseph Greathouse Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 1 + .../drm/amd/amdkfd/kfd_device_queue_manager.c | 43 +++++++++++++++++-- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 1 + .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 6 ++- .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 13 ++++++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 + 8 files changed, 62 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f8fa03a12add..ff47b1f69b68 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -215,6 +215,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, } q_properties->is_interop = false; + q_properties->is_gws = false; q_properties->queue_percent = args->queue_percentage; q_properties->priority = args->queue_priority; q_properties->queue_address = args->ring_base_address; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 77ea0f0cb163..ae9547791813 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -505,8 +505,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_vmid(dqm, qpd, q); } qpd->queue_count--; - if (q->properties.is_active) + if (q->properties.is_active) { decrement_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } + } return retval; } @@ -583,6 +588,20 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) else if (!q->properties.is_active && prev_active) decrement_queue_count(dqm, q->properties.type); + if (q->gws && !q->properties.is_gws) { + if (q->properties.is_active) { + dqm->gws_queue_count++; + pdd->qpd.mapped_gws_queue = true; + } + q->properties.is_gws = true; + } else if (!q->gws && q->properties.is_gws) { + if (q->properties.is_active) { + dqm->gws_queue_count--; + pdd->qpd.mapped_gws_queue = false; + } + q->properties.is_gws = false; + } + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && @@ -631,6 +650,10 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, q->properties.type)]; q->properties.is_active = false; decrement_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) continue; @@ -744,6 +767,10 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, q->properties.type)]; q->properties.is_active = true; increment_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count++; + qpd->mapped_gws_queue = true; + } if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) continue; @@ -913,6 +940,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; dqm->active_cp_queue_count = 0; + dqm->gws_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -1082,7 +1110,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->active_queue_count = dqm->processes_count = 0; dqm->active_cp_queue_count = 0; - + dqm->gws_queue_count = 0; dqm->active_runlist = false; dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); @@ -1432,6 +1460,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); if (retval == -ETIME) qpd->reset_wavefronts = true; + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } } /* @@ -1650,8 +1682,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - if (q->properties.is_active) + if (q->properties.is_active) { decrement_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } + } dqm->total_queue_count--; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 50d919f814e9..4afa015c69b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -182,6 +182,7 @@ struct device_queue_manager { unsigned int processes_count; unsigned int active_queue_count; unsigned int active_cp_queue_count; + unsigned int gws_queue_count; unsigned int total_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index bae706462f96..a2b77d1df854 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -126,6 +126,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, prop.queue_size = queue_size; prop.is_interop = false; + prop.is_gws = false; prop.priority = 1; prop.queue_percent = 100; prop.type = type; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index efdb75e7677b..685ca82d42fe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -41,7 +41,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) { - unsigned int process_count, queue_count, compute_queue_count; + unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; unsigned int map_queue_size; unsigned int max_proc_per_quantum = 1; struct kfd_dev *dev = pm->dqm->dev; @@ -49,6 +49,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, process_count = pm->dqm->processes_count; queue_count = pm->dqm->active_queue_count; compute_queue_count = pm->dqm->active_cp_queue_count; + gws_queue_count = pm->dqm->gws_queue_count; /* check if there is over subscription * Note: the arbitration between the number of VMIDs and @@ -61,7 +62,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, max_proc_per_quantum = dev->max_proc_per_quantum; if ((process_count > max_proc_per_quantum) || - compute_queue_count > get_cp_queues_num(pm->dqm)) { + compute_queue_count > get_cp_queues_num(pm->dqm) || + gws_queue_count > 1) { *over_subscription = true; pr_debug("Over subscribed runlist\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 2de01009f1b6..bdca9dc5f118 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm, packet->bitfields2.pasid = qpd->pqm->process->pasid; packet->bitfields14.gds_size = qpd->gds_size & 0x3F; packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF; - packet->bitfields14.num_gws = qpd->num_gws; + packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0; packet->bitfields14.num_oac = qpd->num_oac; packet->bitfields14.sdma_enable = 1; packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 43b888b311c7..d48b33449267 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -411,6 +411,10 @@ enum KFD_QUEUE_PRIORITY { * @is_active: Defines if the queue is active or not. @is_active and * @is_evicted are protected by the DQM lock. * + * @is_gws: Defines if the queue has been updated to be GWS-capable or not. + * @is_gws should be protected by the DQM lock, since changing it can yield the + * possibility of updating DQM state on number of GWS queues. + * * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid * of the queue. * @@ -433,6 +437,7 @@ struct queue_properties { bool is_interop; bool is_evicted; bool is_active; + bool is_gws; /* Not relevant for user mode queues in cp scheduling */ unsigned int vmid; /* Relevant only for sdma queues*/ @@ -564,6 +569,14 @@ struct qcm_process_device { */ bool reset_wavefronts; + /* This flag tells us if this process has a GWS-capable + * queue that will be mapped into the runlist. It's + * possible to request a GWS BO, but not have the queue + * currently mapped, and this changes how the MAP_PROCESS + * PM4 packet is configured. + */ + bool mapped_gws_queue; + /* * All the memory management data should be here too */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index fe0cd49d4ea7..82b4c5a9382a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -858,6 +858,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, pdd->qpd.dqm = dev->dqm; pdd->qpd.pqm = &p->pqm; pdd->qpd.evicted = 0; + pdd->qpd.mapped_gws_queue = false; pdd->process = p; pdd->bound = PDD_UNBOUND; pdd->already_dequeued = false;