Btrfs: add device counters for detected IO and checksum errors

The goal is to detect when drives start to get an increased error rate,
when drives should be replaced soon. Therefore statistic counters are
added that count IO errors (read, write and flush). Additionally, the
software detected errors like checksum errors and corrupted blocks are
counted.

Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de>
This commit is contained in:
Stefan Behrens 2012-05-25 16:06:08 +02:00 committed by Josef Bacik
parent d07eb91170
commit 442a4f6308
6 changed files with 230 additions and 24 deletions

View file

@ -2557,18 +2557,19 @@ recovery_tree_root:
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
{
char b[BDEVNAME_SIZE];
if (uptodate) {
set_buffer_uptodate(bh);
} else {
struct btrfs_device *device = (struct btrfs_device *)
bh->b_private;
printk_ratelimited(KERN_WARNING "lost page write due to "
"I/O error on %s\n",
bdevname(bh->b_bdev, b));
"I/O error on %s\n", device->name);
/* note, we dont' set_buffer_write_io_error because we have
* our own ways of dealing with the IO errors
*/
clear_buffer_uptodate(bh);
btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
}
unlock_buffer(bh);
put_bh(bh);
@ -2683,6 +2684,7 @@ static int write_dev_supers(struct btrfs_device *device,
set_buffer_uptodate(bh);
lock_buffer(bh);
bh->b_end_io = btrfs_end_buffer_write_sync;
bh->b_private = device;
}
/*
@ -2741,6 +2743,9 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
}
if (!bio_flagged(bio, BIO_UPTODATE)) {
ret = -EIO;
if (!bio_flagged(bio, BIO_EOPNOTSUPP))
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
/* drop the reference from the wait == 0 run */