drm/amdgpu: change reset lock from mutex to rw_semaphore

clients don't need reset-lock for synchronization when no GPU recovery. v2: change to return the return value of down_read_killable. v3: if GPU recovery begin, VF ignore FLR notification. Reviewed-by: Monk Liu <monk.liu@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Dennis Li <Dennis.Li@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
2025-07-23 07:12:09 +00:00 · 2020-08-20 10:06:32 +08:00 · 2020-08-20 10:06:32 +08:00 · 6049db43d6
commit 6049db43d6
parent 66b8a9c0a7
5 changed files with 32 additions and 35 deletions
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@ -238,19 +238,15 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
 	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
 	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
-	int locked;

 	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
 	 * otherwise the mailbox msg will be ruined/reseted by
 	 * the VF FLR.
-	 *
-	 * we can unlock the lock_reset to allow "amdgpu_job_timedout"
-	 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
-	 * which means host side had finished this VF's FLR.
 	 */
-	locked = mutex_trylock(&adev->lock_reset);
-	if (locked)
-		atomic_set(&adev->in_gpu_reset, 1);
+	if (!down_read_trylock(&adev->reset_sem))
+		return;
+
+	atomic_set(&adev->in_gpu_reset, 1);

 	do {
 		if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
@ -261,10 +257,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
 	} while (timeout > 1);

 flr_done:
-	if (locked) {
-		atomic_set(&adev->in_gpu_reset, 0);
-		mutex_unlock(&adev->lock_reset);
-	}
+	atomic_set(&adev->in_gpu_reset, 0);
+	up_read(&adev->reset_sem);

 	/* Trigger recovery for world switch failure if no TDR */
 	if (amdgpu_device_should_recover_gpu(adev)