mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-06-17 20:25:19 +00:00
[PATCH] md: Improve locking around error handling
The error handling routines don't use proper locking, and so two concurrent errors could trigger a problem. So: - use test-and-set and test-and-clear to synchonise the In_sync bits with the ->degraded count - use the spinlock to protect updates to the degraded count (could use an atomic_t but that would be a bigger change in code, and isn't really justified) - remove un-necessary locking in raid5 Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
11ce99e625
commit
c04be0aa82
3 changed files with 31 additions and 17 deletions
|
@ -959,14 +959,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
* normal single drive
|
* normal single drive
|
||||||
*/
|
*/
|
||||||
return;
|
return;
|
||||||
if (test_bit(In_sync, &rdev->flags)) {
|
if (test_and_clear_bit(In_sync, &rdev->flags)) {
|
||||||
|
unsigned long flags;
|
||||||
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
mddev->degraded++;
|
mddev->degraded++;
|
||||||
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
/*
|
/*
|
||||||
* if recovery is running, make sure it aborts.
|
* if recovery is running, make sure it aborts.
|
||||||
*/
|
*/
|
||||||
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
|
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
|
||||||
}
|
}
|
||||||
clear_bit(In_sync, &rdev->flags);
|
|
||||||
set_bit(Faulty, &rdev->flags);
|
set_bit(Faulty, &rdev->flags);
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
|
printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
|
||||||
|
@ -1022,9 +1024,11 @@ static int raid1_spare_active(mddev_t *mddev)
|
||||||
mdk_rdev_t *rdev = conf->mirrors[i].rdev;
|
mdk_rdev_t *rdev = conf->mirrors[i].rdev;
|
||||||
if (rdev
|
if (rdev
|
||||||
&& !test_bit(Faulty, &rdev->flags)
|
&& !test_bit(Faulty, &rdev->flags)
|
||||||
&& !test_bit(In_sync, &rdev->flags)) {
|
&& !test_and_set_bit(In_sync, &rdev->flags)) {
|
||||||
|
unsigned long flags;
|
||||||
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
mddev->degraded--;
|
mddev->degraded--;
|
||||||
set_bit(In_sync, &rdev->flags);
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2048,7 +2052,7 @@ static int raid1_reshape(mddev_t *mddev)
|
||||||
mirror_info_t *newmirrors;
|
mirror_info_t *newmirrors;
|
||||||
conf_t *conf = mddev_to_conf(mddev);
|
conf_t *conf = mddev_to_conf(mddev);
|
||||||
int cnt, raid_disks;
|
int cnt, raid_disks;
|
||||||
|
unsigned long flags;
|
||||||
int d, d2;
|
int d, d2;
|
||||||
|
|
||||||
/* Cannot change chunk_size, layout, or level */
|
/* Cannot change chunk_size, layout, or level */
|
||||||
|
@ -2107,7 +2111,9 @@ static int raid1_reshape(mddev_t *mddev)
|
||||||
kfree(conf->poolinfo);
|
kfree(conf->poolinfo);
|
||||||
conf->poolinfo = newpoolinfo;
|
conf->poolinfo = newpoolinfo;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
mddev->degraded += (raid_disks - conf->raid_disks);
|
mddev->degraded += (raid_disks - conf->raid_disks);
|
||||||
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
conf->raid_disks = mddev->raid_disks = raid_disks;
|
conf->raid_disks = mddev->raid_disks = raid_disks;
|
||||||
mddev->delta_disks = 0;
|
mddev->delta_disks = 0;
|
||||||
|
|
||||||
|
|
|
@ -950,14 +950,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
* really dead" tests...
|
* really dead" tests...
|
||||||
*/
|
*/
|
||||||
return;
|
return;
|
||||||
if (test_bit(In_sync, &rdev->flags)) {
|
if (test_and_clear_bit(In_sync, &rdev->flags)) {
|
||||||
|
unsigned long flags;
|
||||||
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
mddev->degraded++;
|
mddev->degraded++;
|
||||||
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
/*
|
/*
|
||||||
* if recovery is running, make sure it aborts.
|
* if recovery is running, make sure it aborts.
|
||||||
*/
|
*/
|
||||||
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
|
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
|
||||||
}
|
}
|
||||||
clear_bit(In_sync, &rdev->flags);
|
|
||||||
set_bit(Faulty, &rdev->flags);
|
set_bit(Faulty, &rdev->flags);
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n"
|
printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n"
|
||||||
|
@ -1033,9 +1035,11 @@ static int raid10_spare_active(mddev_t *mddev)
|
||||||
tmp = conf->mirrors + i;
|
tmp = conf->mirrors + i;
|
||||||
if (tmp->rdev
|
if (tmp->rdev
|
||||||
&& !test_bit(Faulty, &tmp->rdev->flags)
|
&& !test_bit(Faulty, &tmp->rdev->flags)
|
||||||
&& !test_bit(In_sync, &tmp->rdev->flags)) {
|
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
|
||||||
|
unsigned long flags;
|
||||||
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
mddev->degraded--;
|
mddev->degraded--;
|
||||||
set_bit(In_sync, &tmp->rdev->flags);
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -636,7 +636,6 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
|
||||||
struct stripe_head *sh = bi->bi_private;
|
struct stripe_head *sh = bi->bi_private;
|
||||||
raid5_conf_t *conf = sh->raid_conf;
|
raid5_conf_t *conf = sh->raid_conf;
|
||||||
int disks = sh->disks, i;
|
int disks = sh->disks, i;
|
||||||
unsigned long flags;
|
|
||||||
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
|
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
|
||||||
|
|
||||||
if (bi->bi_size)
|
if (bi->bi_size)
|
||||||
|
@ -654,7 +653,6 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_irqsave(&conf->device_lock, flags);
|
|
||||||
if (!uptodate)
|
if (!uptodate)
|
||||||
md_error(conf->mddev, conf->disks[i].rdev);
|
md_error(conf->mddev, conf->disks[i].rdev);
|
||||||
|
|
||||||
|
@ -662,8 +660,7 @@ static int raid5_end_write_request (struct bio *bi, unsigned int bytes_done,
|
||||||
|
|
||||||
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
clear_bit(R5_LOCKED, &sh->dev[i].flags);
|
||||||
set_bit(STRIPE_HANDLE, &sh->state);
|
set_bit(STRIPE_HANDLE, &sh->state);
|
||||||
__release_stripe(conf, sh);
|
release_stripe(sh);
|
||||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -697,9 +694,11 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||||
|
|
||||||
if (!test_bit(Faulty, &rdev->flags)) {
|
if (!test_bit(Faulty, &rdev->flags)) {
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
if (test_bit(In_sync, &rdev->flags)) {
|
if (test_and_clear_bit(In_sync, &rdev->flags)) {
|
||||||
|
unsigned long flags;
|
||||||
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
mddev->degraded++;
|
mddev->degraded++;
|
||||||
clear_bit(In_sync, &rdev->flags);
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
/*
|
/*
|
||||||
* if recovery was running, make sure it aborts.
|
* if recovery was running, make sure it aborts.
|
||||||
*/
|
*/
|
||||||
|
@ -3418,9 +3417,11 @@ static int raid5_spare_active(mddev_t *mddev)
|
||||||
tmp = conf->disks + i;
|
tmp = conf->disks + i;
|
||||||
if (tmp->rdev
|
if (tmp->rdev
|
||||||
&& !test_bit(Faulty, &tmp->rdev->flags)
|
&& !test_bit(Faulty, &tmp->rdev->flags)
|
||||||
&& !test_bit(In_sync, &tmp->rdev->flags)) {
|
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
|
||||||
|
unsigned long flags;
|
||||||
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
mddev->degraded--;
|
mddev->degraded--;
|
||||||
set_bit(In_sync, &tmp->rdev->flags);
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
print_raid5_conf(conf);
|
print_raid5_conf(conf);
|
||||||
|
@ -3556,6 +3557,7 @@ static int raid5_start_reshape(mddev_t *mddev)
|
||||||
struct list_head *rtmp;
|
struct list_head *rtmp;
|
||||||
int spares = 0;
|
int spares = 0;
|
||||||
int added_devices = 0;
|
int added_devices = 0;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
if (mddev->degraded ||
|
if (mddev->degraded ||
|
||||||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||||
|
@ -3597,7 +3599,9 @@ static int raid5_start_reshape(mddev_t *mddev)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices;
|
mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices;
|
||||||
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
mddev->raid_disks = conf->raid_disks;
|
mddev->raid_disks = conf->raid_disks;
|
||||||
mddev->reshape_position = 0;
|
mddev->reshape_position = 0;
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue