mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-06-17 20:25:19 +00:00
ocfs2: handle network errors during recovery
Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
This commit is contained in:
parent
c3187ce5e3
commit
29c0fa0f56
1 changed files with 36 additions and 17 deletions
|
@ -757,6 +757,7 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
|
||||||
struct list_head *iter;
|
struct list_head *iter;
|
||||||
int ret;
|
int ret;
|
||||||
u8 dead_node, reco_master;
|
u8 dead_node, reco_master;
|
||||||
|
int skip_all_done = 0;
|
||||||
|
|
||||||
dlm = item->dlm;
|
dlm = item->dlm;
|
||||||
dead_node = item->u.ral.dead_node;
|
dead_node = item->u.ral.dead_node;
|
||||||
|
@ -793,12 +794,18 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
|
||||||
dlm_move_reco_locks_to_list(dlm, &resources, dead_node);
|
dlm_move_reco_locks_to_list(dlm, &resources, dead_node);
|
||||||
|
|
||||||
/* now we can begin blasting lockreses without the dlm lock */
|
/* now we can begin blasting lockreses without the dlm lock */
|
||||||
|
|
||||||
|
/* any errors returned will be due to the new_master dying,
|
||||||
|
* the dlm_reco_thread should detect this */
|
||||||
list_for_each(iter, &resources) {
|
list_for_each(iter, &resources) {
|
||||||
res = list_entry (iter, struct dlm_lock_resource, recovering);
|
res = list_entry (iter, struct dlm_lock_resource, recovering);
|
||||||
ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
|
ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
|
||||||
DLM_MRES_RECOVERY);
|
DLM_MRES_RECOVERY);
|
||||||
if (ret < 0)
|
if (ret < 0) {
|
||||||
mlog_errno(ret);
|
mlog_errno(ret);
|
||||||
|
skip_all_done = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* move the resources back to the list */
|
/* move the resources back to the list */
|
||||||
|
@ -806,9 +813,12 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
|
||||||
list_splice_init(&resources, &dlm->reco.resources);
|
list_splice_init(&resources, &dlm->reco.resources);
|
||||||
spin_unlock(&dlm->spinlock);
|
spin_unlock(&dlm->spinlock);
|
||||||
|
|
||||||
ret = dlm_send_all_done_msg(dlm, dead_node, reco_master);
|
if (!skip_all_done) {
|
||||||
if (ret < 0)
|
ret = dlm_send_all_done_msg(dlm, dead_node, reco_master);
|
||||||
mlog_errno(ret);
|
if (ret < 0) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
free_page((unsigned long)data);
|
free_page((unsigned long)data);
|
||||||
}
|
}
|
||||||
|
@ -828,8 +838,14 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
|
||||||
|
|
||||||
ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
|
ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
|
||||||
sizeof(done_msg), send_to, &tmpret);
|
sizeof(done_msg), send_to, &tmpret);
|
||||||
/* negative status is ignored by the caller */
|
if (ret < 0) {
|
||||||
if (ret >= 0)
|
if (!dlm_is_host_down(ret)) {
|
||||||
|
mlog_errno(ret);
|
||||||
|
mlog(ML_ERROR, "%s: unknown error sending data-done "
|
||||||
|
"to %u\n", dlm->name, send_to);
|
||||||
|
BUG();
|
||||||
|
}
|
||||||
|
} else
|
||||||
ret = tmpret;
|
ret = tmpret;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -1109,22 +1125,25 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
|
||||||
* we must send it immediately. */
|
* we must send it immediately. */
|
||||||
ret = dlm_send_mig_lockres_msg(dlm, mres, send_to,
|
ret = dlm_send_mig_lockres_msg(dlm, mres, send_to,
|
||||||
res, total_locks);
|
res, total_locks);
|
||||||
if (ret < 0) {
|
if (ret < 0)
|
||||||
// TODO
|
goto error;
|
||||||
mlog(ML_ERROR, "dlm_send_mig_lockres_msg "
|
|
||||||
"returned %d, TODO\n", ret);
|
|
||||||
BUG();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* flush any remaining locks */
|
/* flush any remaining locks */
|
||||||
ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks);
|
ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks);
|
||||||
if (ret < 0) {
|
if (ret < 0)
|
||||||
// TODO
|
goto error;
|
||||||
mlog(ML_ERROR, "dlm_send_mig_lockres_msg returned %d, "
|
return ret;
|
||||||
"TODO\n", ret);
|
|
||||||
|
error:
|
||||||
|
mlog(ML_ERROR, "%s: dlm_send_mig_lockres_msg returned %d\n",
|
||||||
|
dlm->name, ret);
|
||||||
|
if (!dlm_is_host_down(ret))
|
||||||
BUG();
|
BUG();
|
||||||
}
|
mlog(0, "%s: node %u went down while sending %s "
|
||||||
|
"lockres %.*s\n", dlm->name, send_to,
|
||||||
|
flags & DLM_MRES_RECOVERY ? "recovery" : "migration",
|
||||||
|
res->lockname.len, res->lockname.name);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue