DLM: retry rcom when dlm_wait_function is timed out.

If a node sends a DLM_RCOM_STATUS command and an error occurs on the
receiving side, the DLM_RCOM_STATUS_REPLY response may not be returned.
We retransmitted the DLM_RCOM_STATUS command so that we do not wait for
an infinite response.

Signed-off-by: Tadashi Miyauchi <miyauchi@toshiba-tops.co.jp>
Signed-off-by: Tsutomu Owa <tsutomu.owa@toshiba.co.jp>
Signed-off-by: David Teigland <teigland@redhat.com>
This commit is contained in:
tsutomu.owa@toshiba.co.jp 2017-09-12 08:56:08 +00:00 committed by David Teigland
parent c7355827b2
commit 5966121241
2 changed files with 10 additions and 0 deletions

View file

@ -155,6 +155,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
goto out;
}
retry:
error = create_rcom(ls, nodeid, DLM_RCOM_STATUS,
sizeof(struct rcom_status), &rc, &mh);
if (error)
@ -169,6 +170,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls);
if (error == -ETIMEDOUT)
goto retry;
if (error)
goto out;
@ -276,6 +279,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
ls->ls_recover_nodeid = nodeid;
retry:
error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh);
if (error)
goto out;
@ -288,6 +292,8 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls);
if (error == -ETIMEDOUT)
goto retry;
out:
return error;
}