drm/amdgpu: skip bad page reservation once issuing from eeprom write

Once the ras recovery is issued from eeprom write itself,
bad page reservation should be ignored, otherwise, recursive
calling of writting to eeprom would happen.

Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Guchun Chen 2020-07-23 15:50:42 +08:00 committed by Alex Deucher
parent b82e65a935
commit 35cd2cdadb
2 changed files with 11 additions and 5 deletions

View file

@ -62,8 +62,6 @@ const char *ras_block_string[] = {
#define ras_err_str(i) (ras_error_string[ffs(i)])
#define ras_block_str(i) (ras_block_string[i])
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS 1
#define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2
#define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS)
/* inject address is 52 bits */

View file

@ -31,6 +31,10 @@
#include "ta_ras_if.h"
#include "amdgpu_ras_eeprom.h"
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
#define AMDGPU_RAS_FLAG_INIT_NEED_RESET (0x1 << 1)
#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV (0x1 << 2)
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
AMDGPU_RAS_BLOCK__SDMA,
@ -503,10 +507,14 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
{
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
/* save bad page to eeprom before gpu reset,
* i2c may be unstable in gpu reset
/*
* Save bad page to eeprom before gpu reset, i2c may be unstable
* in gpu reset.
*
* Also, exclude the case when ras recovery issuer is
* eeprom page write itself.
*/
if (in_task())
if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task())
amdgpu_ras_reserve_bad_pages(adev);
if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)