mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-07-23 23:32:14 +00:00
- DM core fixes to ensure that bio submission follows a depth-first tree
walk; this is critical to allow forward progress without the need to use the bioset's BIOSET_NEED_RESCUER. - Remove DM core's BIOSET_NEED_RESCUER based dm_offload infrastructure. - DM core cleanups and improvements to make bio-based DM more efficient (e.g. reduced memory footprint as well leveraging per-bio-data more). - Introduce new bio-based mode (DM_TYPE_NVME_BIO_BASED) that leverages the more direct IO submission path in the block layer; this mode is used by DM multipath and also optimizes targets like DM thin-pool that stack directly on NVMe data device. - DM multipath improvements to factor out legacy SCSI-only (e.g. scsi_dh) code paths to allow for more optimized support for NVMe multipath. - A fix for DM multipath path selectors (service-time and queue-length) to select paths in a more balanced way; largely academic but doesn't hurt. - Numerous DM raid target fixes and improvements. - Add a new DM "unstriped" target that enables Intel to workaround firmware limitations in some NVMe drives that are striped internally (this target also works when stacked above the DM "striped" target). - Various Documentation fixes and improvements. - Misc. cleanups and fixes across various DM infrastructure and targets (e.g. bufio, flakey, log-writes, snapshot). -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJacgwPAAoJEMUj8QotnQNaEw0H/0XRTcg8/lRuGl46kdeI3PgR ZxUy4XgUrCLiACWO5yCU/nKipB32+3xTlTDTBcjmaBfX8HolH147Pasb1KdHqLVC dOWLMpjlFztb5fnuOMitJA05qQAbgRlZ52QdVk/FDo9yWicgWjQZduh8aYX53pHw 6XOYWzSFAXQcaduPdz6TLiPw479xBwIpXxQbrO09f4qt3Ub4bqknEhzFXc+6M7zl ejmW/bG2Qg6WmsfAuaAhFTV0LpTPSEzvaq9TfR7yqFU3DvDIAi7Yh8eQinIUDo4u txpOGoESRAMPAMKH0/UJdr/u7jTsfgJox4QEavWfnViPvkouah5KdjVOL1veZ5U= =R3dN -----END PGP SIGNATURE----- Merge tag 'for-4.16/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper updates from Mike Snitzer: - DM core fixes to ensure that bio submission follows a depth-first tree walk; this is critical to allow forward progress without the need to use the bioset's BIOSET_NEED_RESCUER. - Remove DM core's BIOSET_NEED_RESCUER based dm_offload infrastructure. - DM core cleanups and improvements to make bio-based DM more efficient (e.g. reduced memory footprint as well leveraging per-bio-data more). - Introduce new bio-based mode (DM_TYPE_NVME_BIO_BASED) that leverages the more direct IO submission path in the block layer; this mode is used by DM multipath and also optimizes targets like DM thin-pool that stack directly on NVMe data device. - DM multipath improvements to factor out legacy SCSI-only (e.g. scsi_dh) code paths to allow for more optimized support for NVMe multipath. - A fix for DM multipath path selectors (service-time and queue-length) to select paths in a more balanced way; largely academic but doesn't hurt. - Numerous DM raid target fixes and improvements. - Add a new DM "unstriped" target that enables Intel to workaround firmware limitations in some NVMe drives that are striped internally (this target also works when stacked above the DM "striped" target). - Various Documentation fixes and improvements. - Misc cleanups and fixes across various DM infrastructure and targets (e.g. bufio, flakey, log-writes, snapshot). * tag 'for-4.16/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (69 commits) dm cache: Documentation: update default migration_throttling value dm mpath selector: more evenly distribute ties dm unstripe: fix target length versus number of stripes size check dm thin: fix trailing semicolon in __remap_and_issue_shared_cell dm table: fix NVMe bio-based dm_table_determine_type() validation dm: various cleanups to md->queue initialization code dm mpath: delay the retry of a request if the target responded as busy dm mpath: return DM_MAPIO_DELAY_REQUEUE if QUEUE_IO or PG_INIT_REQUIRED dm mpath: return DM_MAPIO_REQUEUE on blk-mq rq allocation failure dm log writes: fix max length used for kstrndup dm: backfill missing calls to mutex_destroy() dm snapshot: use mutex instead of rw_semaphore dm flakey: check for null arg_name in parse_features() dm thin: extend thinpool status format string with omitted fields dm thin: fixes in thin-provisioning.txt dm thin: document representation of <highest mapped sector> when there is none dm thin: fix documentation relative to low water mark threshold dm cache: be consistent in specifying sectors and SI units in cache.txt dm cache: delete obsoleted paragraph in cache.txt dm cache: fix grammar in cache-policies.txt ...
This commit is contained in:
commit
0be600a5ad
31 changed files with 1410 additions and 672 deletions
|
@ -29,6 +29,9 @@
|
|||
*/
|
||||
#define MIN_RAID456_JOURNAL_SPACE (4*2048)
|
||||
|
||||
/* Global list of all raid sets */
|
||||
static LIST_HEAD(raid_sets);
|
||||
|
||||
static bool devices_handle_discard_safely = false;
|
||||
|
||||
/*
|
||||
|
@ -105,8 +108,6 @@ struct raid_dev {
|
|||
#define CTR_FLAG_JOURNAL_DEV (1 << __CTR_FLAG_JOURNAL_DEV)
|
||||
#define CTR_FLAG_JOURNAL_MODE (1 << __CTR_FLAG_JOURNAL_MODE)
|
||||
|
||||
#define RESUME_STAY_FROZEN_FLAGS (CTR_FLAG_DELTA_DISKS | CTR_FLAG_DATA_OFFSET)
|
||||
|
||||
/*
|
||||
* Definitions of various constructor flags to
|
||||
* be used in checks of valid / invalid flags
|
||||
|
@ -209,6 +210,8 @@ struct raid_dev {
|
|||
#define RT_FLAG_UPDATE_SBS 3
|
||||
#define RT_FLAG_RESHAPE_RS 4
|
||||
#define RT_FLAG_RS_SUSPENDED 5
|
||||
#define RT_FLAG_RS_IN_SYNC 6
|
||||
#define RT_FLAG_RS_RESYNCING 7
|
||||
|
||||
/* Array elements of 64 bit needed for rebuild/failed disk bits */
|
||||
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
|
||||
|
@ -224,8 +227,8 @@ struct rs_layout {
|
|||
|
||||
struct raid_set {
|
||||
struct dm_target *ti;
|
||||
struct list_head list;
|
||||
|
||||
uint32_t bitmap_loaded;
|
||||
uint32_t stripe_cache_entries;
|
||||
unsigned long ctr_flags;
|
||||
unsigned long runtime_flags;
|
||||
|
@ -270,6 +273,19 @@ static void rs_config_restore(struct raid_set *rs, struct rs_layout *l)
|
|||
mddev->new_chunk_sectors = l->new_chunk_sectors;
|
||||
}
|
||||
|
||||
/* Find any raid_set in active slot for @rs on global list */
|
||||
static struct raid_set *rs_find_active(struct raid_set *rs)
|
||||
{
|
||||
struct raid_set *r;
|
||||
struct mapped_device *md = dm_table_get_md(rs->ti->table);
|
||||
|
||||
list_for_each_entry(r, &raid_sets, list)
|
||||
if (r != rs && dm_table_get_md(r->ti->table) == md)
|
||||
return r;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* raid10 algorithms (i.e. formats) */
|
||||
#define ALGORITHM_RAID10_DEFAULT 0
|
||||
#define ALGORITHM_RAID10_NEAR 1
|
||||
|
@ -572,7 +588,7 @@ static const char *raid10_md_layout_to_format(int layout)
|
|||
}
|
||||
|
||||
/* Return md raid10 algorithm for @name */
|
||||
static int raid10_name_to_format(const char *name)
|
||||
static const int raid10_name_to_format(const char *name)
|
||||
{
|
||||
if (!strcasecmp(name, "near"))
|
||||
return ALGORITHM_RAID10_NEAR;
|
||||
|
@ -675,15 +691,11 @@ static struct raid_type *get_raid_type_by_ll(const int level, const int layout)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Conditionally change bdev capacity of @rs
|
||||
* in case of a disk add/remove reshape
|
||||
*/
|
||||
static void rs_set_capacity(struct raid_set *rs)
|
||||
/* Adjust rdev sectors */
|
||||
static void rs_set_rdev_sectors(struct raid_set *rs)
|
||||
{
|
||||
struct mddev *mddev = &rs->md;
|
||||
struct md_rdev *rdev;
|
||||
struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table));
|
||||
|
||||
/*
|
||||
* raid10 sets rdev->sector to the device size, which
|
||||
|
@ -692,8 +704,16 @@ static void rs_set_capacity(struct raid_set *rs)
|
|||
rdev_for_each(rdev, mddev)
|
||||
if (!test_bit(Journal, &rdev->flags))
|
||||
rdev->sectors = mddev->dev_sectors;
|
||||
}
|
||||
|
||||
set_capacity(gendisk, mddev->array_sectors);
|
||||
/*
|
||||
* Change bdev capacity of @rs in case of a disk add/remove reshape
|
||||
*/
|
||||
static void rs_set_capacity(struct raid_set *rs)
|
||||
{
|
||||
struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table));
|
||||
|
||||
set_capacity(gendisk, rs->md.array_sectors);
|
||||
revalidate_disk(gendisk);
|
||||
}
|
||||
|
||||
|
@ -744,6 +764,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
|
|||
|
||||
mddev_init(&rs->md);
|
||||
|
||||
INIT_LIST_HEAD(&rs->list);
|
||||
rs->raid_disks = raid_devs;
|
||||
rs->delta_disks = 0;
|
||||
|
||||
|
@ -761,6 +782,9 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
|
|||
for (i = 0; i < raid_devs; i++)
|
||||
md_rdev_init(&rs->dev[i].rdev);
|
||||
|
||||
/* Add @rs to global list. */
|
||||
list_add(&rs->list, &raid_sets);
|
||||
|
||||
/*
|
||||
* Remaining items to be initialized by further RAID params:
|
||||
* rs->md.persistent
|
||||
|
@ -773,6 +797,7 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
|
|||
return rs;
|
||||
}
|
||||
|
||||
/* Free all @rs allocations and remove it from global list. */
|
||||
static void raid_set_free(struct raid_set *rs)
|
||||
{
|
||||
int i;
|
||||
|
@ -790,6 +815,8 @@ static void raid_set_free(struct raid_set *rs)
|
|||
dm_put_device(rs->ti, rs->dev[i].data_dev);
|
||||
}
|
||||
|
||||
list_del(&rs->list);
|
||||
|
||||
kfree(rs);
|
||||
}
|
||||
|
||||
|
@ -1002,7 +1029,7 @@ static int validate_raid_redundancy(struct raid_set *rs)
|
|||
!rs->dev[i].rdev.sb_page)
|
||||
rebuild_cnt++;
|
||||
|
||||
switch (rs->raid_type->level) {
|
||||
switch (rs->md.level) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
|
@ -1017,6 +1044,11 @@ static int validate_raid_redundancy(struct raid_set *rs)
|
|||
break;
|
||||
case 10:
|
||||
copies = raid10_md_layout_to_copies(rs->md.new_layout);
|
||||
if (copies < 2) {
|
||||
DMERR("Bogus raid10 data copies < 2!");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (rebuild_cnt < copies)
|
||||
break;
|
||||
|
||||
|
@ -1576,6 +1608,24 @@ static sector_t __rdev_sectors(struct raid_set *rs)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Check that calculated dev_sectors fits all component devices. */
|
||||
static int _check_data_dev_sectors(struct raid_set *rs)
|
||||
{
|
||||
sector_t ds = ~0;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, &rs->md)
|
||||
if (!test_bit(Journal, &rdev->flags) && rdev->bdev) {
|
||||
ds = min(ds, to_sector(i_size_read(rdev->bdev->bd_inode)));
|
||||
if (ds < rs->md.dev_sectors) {
|
||||
rs->ti->error = "Component device(s) too small";
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Calculate the sectors per device and per array used for @rs */
|
||||
static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
|
||||
{
|
||||
|
@ -1625,7 +1675,7 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
|
|||
mddev->array_sectors = array_sectors;
|
||||
mddev->dev_sectors = dev_sectors;
|
||||
|
||||
return 0;
|
||||
return _check_data_dev_sectors(rs);
|
||||
bad:
|
||||
rs->ti->error = "Target length not divisible by number of data devices";
|
||||
return -EINVAL;
|
||||
|
@ -1674,8 +1724,11 @@ static void do_table_event(struct work_struct *ws)
|
|||
struct raid_set *rs = container_of(ws, struct raid_set, md.event_work);
|
||||
|
||||
smp_rmb(); /* Make sure we access most actual mddev properties */
|
||||
if (!rs_is_reshaping(rs))
|
||||
if (!rs_is_reshaping(rs)) {
|
||||
if (rs_is_raid10(rs))
|
||||
rs_set_rdev_sectors(rs);
|
||||
rs_set_capacity(rs);
|
||||
}
|
||||
dm_table_event(rs->ti->table);
|
||||
}
|
||||
|
||||
|
@ -1860,7 +1913,7 @@ static bool rs_reshape_requested(struct raid_set *rs)
|
|||
if (rs_takeover_requested(rs))
|
||||
return false;
|
||||
|
||||
if (!mddev->level)
|
||||
if (rs_is_raid0(rs))
|
||||
return false;
|
||||
|
||||
change = mddev->new_layout != mddev->layout ||
|
||||
|
@ -1868,7 +1921,7 @@ static bool rs_reshape_requested(struct raid_set *rs)
|
|||
rs->delta_disks;
|
||||
|
||||
/* Historical case to support raid1 reshape without delta disks */
|
||||
if (mddev->level == 1) {
|
||||
if (rs_is_raid1(rs)) {
|
||||
if (rs->delta_disks)
|
||||
return !!rs->delta_disks;
|
||||
|
||||
|
@ -1876,7 +1929,7 @@ static bool rs_reshape_requested(struct raid_set *rs)
|
|||
mddev->raid_disks != rs->raid_disks;
|
||||
}
|
||||
|
||||
if (mddev->level == 10)
|
||||
if (rs_is_raid10(rs))
|
||||
return change &&
|
||||
!__is_raid10_far(mddev->new_layout) &&
|
||||
rs->delta_disks >= 0;
|
||||
|
@ -2340,7 +2393,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
|
|||
DMERR("new device%s provided without 'rebuild'",
|
||||
new_devs > 1 ? "s" : "");
|
||||
return -EINVAL;
|
||||
} else if (rs_is_recovering(rs)) {
|
||||
} else if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) && rs_is_recovering(rs)) {
|
||||
DMERR("'rebuild' specified while raid set is not in-sync (recovery_cp=%llu)",
|
||||
(unsigned long long) mddev->recovery_cp);
|
||||
return -EINVAL;
|
||||
|
@ -2640,12 +2693,19 @@ static int rs_adjust_data_offsets(struct raid_set *rs)
|
|||
* Make sure we got a minimum amount of free sectors per device
|
||||
*/
|
||||
if (rs->data_offset &&
|
||||
to_sector(i_size_read(rdev->bdev->bd_inode)) - rdev->sectors < MIN_FREE_RESHAPE_SPACE) {
|
||||
to_sector(i_size_read(rdev->bdev->bd_inode)) - rs->md.dev_sectors < MIN_FREE_RESHAPE_SPACE) {
|
||||
rs->ti->error = data_offset ? "No space for forward reshape" :
|
||||
"No space for backward reshape";
|
||||
return -ENOSPC;
|
||||
}
|
||||
out:
|
||||
/*
|
||||
* Raise recovery_cp in case data_offset != 0 to
|
||||
* avoid false recovery positives in the constructor.
|
||||
*/
|
||||
if (rs->md.recovery_cp < rs->md.dev_sectors)
|
||||
rs->md.recovery_cp += rs->dev[0].rdev.data_offset;
|
||||
|
||||
/* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */
|
||||
rdev_for_each(rdev, &rs->md) {
|
||||
if (!test_bit(Journal, &rdev->flags)) {
|
||||
|
@ -2682,14 +2742,14 @@ static int rs_setup_takeover(struct raid_set *rs)
|
|||
sector_t new_data_offset = rs->dev[0].rdev.data_offset ? 0 : rs->data_offset;
|
||||
|
||||
if (rt_is_raid10(rs->raid_type)) {
|
||||
if (mddev->level == 0) {
|
||||
if (rs_is_raid0(rs)) {
|
||||
/* Userpace reordered disks -> adjust raid_disk indexes */
|
||||
__reorder_raid_disk_indexes(rs);
|
||||
|
||||
/* raid0 -> raid10_far layout */
|
||||
mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_FAR,
|
||||
rs->raid10_copies);
|
||||
} else if (mddev->level == 1)
|
||||
} else if (rs_is_raid1(rs))
|
||||
/* raid1 -> raid10_near layout */
|
||||
mddev->layout = raid10_format_to_md_layout(rs, ALGORITHM_RAID10_NEAR,
|
||||
rs->raid_disks);
|
||||
|
@ -2777,6 +2837,23 @@ static int rs_prepare_reshape(struct raid_set *rs)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Get reshape sectors from data_offsets or raid set */
|
||||
static sector_t _get_reshape_sectors(struct raid_set *rs)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
sector_t reshape_sectors = 0;
|
||||
|
||||
rdev_for_each(rdev, &rs->md)
|
||||
if (!test_bit(Journal, &rdev->flags)) {
|
||||
reshape_sectors = (rdev->data_offset > rdev->new_data_offset) ?
|
||||
rdev->data_offset - rdev->new_data_offset :
|
||||
rdev->new_data_offset - rdev->data_offset;
|
||||
break;
|
||||
}
|
||||
|
||||
return max(reshape_sectors, (sector_t) rs->data_offset);
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
* - change raid layout
|
||||
|
@ -2788,6 +2865,7 @@ static int rs_setup_reshape(struct raid_set *rs)
|
|||
{
|
||||
int r = 0;
|
||||
unsigned int cur_raid_devs, d;
|
||||
sector_t reshape_sectors = _get_reshape_sectors(rs);
|
||||
struct mddev *mddev = &rs->md;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
|
@ -2804,13 +2882,13 @@ static int rs_setup_reshape(struct raid_set *rs)
|
|||
/*
|
||||
* Adjust array size:
|
||||
*
|
||||
* - in case of adding disks, array size has
|
||||
* - in case of adding disk(s), array size has
|
||||
* to grow after the disk adding reshape,
|
||||
* which'll hapen in the event handler;
|
||||
* reshape will happen forward, so space has to
|
||||
* be available at the beginning of each disk
|
||||
*
|
||||
* - in case of removing disks, array size
|
||||
* - in case of removing disk(s), array size
|
||||
* has to shrink before starting the reshape,
|
||||
* which'll happen here;
|
||||
* reshape will happen backward, so space has to
|
||||
|
@ -2841,7 +2919,7 @@ static int rs_setup_reshape(struct raid_set *rs)
|
|||
rdev->recovery_offset = rs_is_raid1(rs) ? 0 : MaxSector;
|
||||
}
|
||||
|
||||
mddev->reshape_backwards = 0; /* adding disks -> forward reshape */
|
||||
mddev->reshape_backwards = 0; /* adding disk(s) -> forward reshape */
|
||||
|
||||
/* Remove disk(s) */
|
||||
} else if (rs->delta_disks < 0) {
|
||||
|
@ -2874,6 +2952,15 @@ static int rs_setup_reshape(struct raid_set *rs)
|
|||
mddev->reshape_backwards = rs->dev[0].rdev.data_offset ? 0 : 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust device size for forward reshape
|
||||
* because md_finish_reshape() reduces it.
|
||||
*/
|
||||
if (!mddev->reshape_backwards)
|
||||
rdev_for_each(rdev, &rs->md)
|
||||
if (!test_bit(Journal, &rdev->flags))
|
||||
rdev->sectors += reshape_sectors;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -2890,7 +2977,7 @@ static void configure_discard_support(struct raid_set *rs)
|
|||
/*
|
||||
* XXX: RAID level 4,5,6 require zeroing for safety.
|
||||
*/
|
||||
raid456 = (rs->md.level == 4 || rs->md.level == 5 || rs->md.level == 6);
|
||||
raid456 = rs_is_raid456(rs);
|
||||
|
||||
for (i = 0; i < rs->raid_disks; i++) {
|
||||
struct request_queue *q;
|
||||
|
@ -2915,7 +3002,7 @@ static void configure_discard_support(struct raid_set *rs)
|
|||
* RAID1 and RAID10 personalities require bio splitting,
|
||||
* RAID0/4/5/6 don't and process large discard bios properly.
|
||||
*/
|
||||
ti->split_discard_bios = !!(rs->md.level == 1 || rs->md.level == 10);
|
||||
ti->split_discard_bios = !!(rs_is_raid1(rs) || rs_is_raid10(rs));
|
||||
ti->num_discard_bios = 1;
|
||||
}
|
||||
|
||||
|
@ -2935,10 +3022,10 @@ static void configure_discard_support(struct raid_set *rs)
|
|||
static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
||||
{
|
||||
int r;
|
||||
bool resize;
|
||||
bool resize = false;
|
||||
struct raid_type *rt;
|
||||
unsigned int num_raid_params, num_raid_devs;
|
||||
sector_t calculated_dev_sectors, rdev_sectors;
|
||||
sector_t calculated_dev_sectors, rdev_sectors, reshape_sectors;
|
||||
struct raid_set *rs = NULL;
|
||||
const char *arg;
|
||||
struct rs_layout rs_layout;
|
||||
|
@ -3021,7 +3108,10 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
goto bad;
|
||||
}
|
||||
|
||||
resize = calculated_dev_sectors != rdev_sectors;
|
||||
|
||||
reshape_sectors = _get_reshape_sectors(rs);
|
||||
if (calculated_dev_sectors != rdev_sectors)
|
||||
resize = calculated_dev_sectors != (reshape_sectors ? rdev_sectors - reshape_sectors : rdev_sectors);
|
||||
|
||||
INIT_WORK(&rs->md.event_work, do_table_event);
|
||||
ti->private = rs;
|
||||
|
@ -3105,19 +3195,22 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
goto bad;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can only prepare for a reshape here, because the
|
||||
* raid set needs to run to provide the repective reshape
|
||||
* check functions via its MD personality instance.
|
||||
*
|
||||
* So do the reshape check after md_run() succeeded.
|
||||
*/
|
||||
r = rs_prepare_reshape(rs);
|
||||
if (r)
|
||||
return r;
|
||||
/* Out-of-place space has to be available to allow for a reshape unless raid1! */
|
||||
if (reshape_sectors || rs_is_raid1(rs)) {
|
||||
/*
|
||||
* We can only prepare for a reshape here, because the
|
||||
* raid set needs to run to provide the repective reshape
|
||||
* check functions via its MD personality instance.
|
||||
*
|
||||
* So do the reshape check after md_run() succeeded.
|
||||
*/
|
||||
r = rs_prepare_reshape(rs);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* Reshaping ain't recovery, so disable recovery */
|
||||
rs_setup_recovery(rs, MaxSector);
|
||||
/* Reshaping ain't recovery, so disable recovery */
|
||||
rs_setup_recovery(rs, MaxSector);
|
||||
}
|
||||
rs_set_cur(rs);
|
||||
} else {
|
||||
/* May not set recovery when a device rebuild is requested */
|
||||
|
@ -3144,7 +3237,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
mddev_lock_nointr(&rs->md);
|
||||
r = md_run(&rs->md);
|
||||
rs->md.in_sync = 0; /* Assume already marked dirty */
|
||||
|
||||
if (r) {
|
||||
ti->error = "Failed to run raid array";
|
||||
mddev_unlock(&rs->md);
|
||||
|
@ -3248,25 +3340,27 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
|
|||
}
|
||||
|
||||
/* Return string describing the current sync action of @mddev */
|
||||
static const char *decipher_sync_action(struct mddev *mddev)
|
||||
static const char *decipher_sync_action(struct mddev *mddev, unsigned long recovery)
|
||||
{
|
||||
if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
|
||||
if (test_bit(MD_RECOVERY_FROZEN, &recovery))
|
||||
return "frozen";
|
||||
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
(!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||
/* The MD sync thread can be done with io but still be running */
|
||||
if (!test_bit(MD_RECOVERY_DONE, &recovery) &&
|
||||
(test_bit(MD_RECOVERY_RUNNING, &recovery) ||
|
||||
(!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery)))) {
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
|
||||
return "reshape";
|
||||
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||
if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||
if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
|
||||
if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
|
||||
return "resync";
|
||||
else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
|
||||
else if (test_bit(MD_RECOVERY_CHECK, &recovery))
|
||||
return "check";
|
||||
return "repair";
|
||||
}
|
||||
|
||||
if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
|
||||
if (test_bit(MD_RECOVERY_RECOVER, &recovery))
|
||||
return "recover";
|
||||
}
|
||||
|
||||
|
@ -3283,7 +3377,7 @@ static const char *decipher_sync_action(struct mddev *mddev)
|
|||
* 'A' = Alive and in-sync raid set component _or_ alive raid4/5/6 'write_through' journal device
|
||||
* '-' = Non-existing device (i.e. uspace passed '- -' into the ctr)
|
||||
*/
|
||||
static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev, bool array_in_sync)
|
||||
static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev)
|
||||
{
|
||||
if (!rdev->bdev)
|
||||
return "-";
|
||||
|
@ -3291,85 +3385,108 @@ static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev,
|
|||
return "D";
|
||||
else if (test_bit(Journal, &rdev->flags))
|
||||
return (rs->journal_dev.mode == R5C_JOURNAL_MODE_WRITE_THROUGH) ? "A" : "a";
|
||||
else if (!array_in_sync || !test_bit(In_sync, &rdev->flags))
|
||||
else if (test_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags) ||
|
||||
(!test_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags) &&
|
||||
!test_bit(In_sync, &rdev->flags)))
|
||||
return "a";
|
||||
else
|
||||
return "A";
|
||||
}
|
||||
|
||||
/* Helper to return resync/reshape progress for @rs and @array_in_sync */
|
||||
static sector_t rs_get_progress(struct raid_set *rs,
|
||||
sector_t resync_max_sectors, bool *array_in_sync)
|
||||
/* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */
|
||||
static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
|
||||
sector_t resync_max_sectors)
|
||||
{
|
||||
sector_t r, curr_resync_completed;
|
||||
sector_t r;
|
||||
struct mddev *mddev = &rs->md;
|
||||
|
||||
curr_resync_completed = mddev->curr_resync_completed ?: mddev->recovery_cp;
|
||||
*array_in_sync = false;
|
||||
clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
||||
clear_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
|
||||
|
||||
if (rs_is_raid0(rs)) {
|
||||
r = resync_max_sectors;
|
||||
*array_in_sync = true;
|
||||
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
||||
|
||||
} else {
|
||||
r = mddev->reshape_position;
|
||||
|
||||
/* Reshape is relative to the array size */
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
|
||||
r != MaxSector) {
|
||||
if (r == MaxSector) {
|
||||
*array_in_sync = true;
|
||||
r = resync_max_sectors;
|
||||
} else {
|
||||
/* Got to reverse on backward reshape */
|
||||
if (mddev->reshape_backwards)
|
||||
r = mddev->array_sectors - r;
|
||||
|
||||
/* Devide by # of data stripes */
|
||||
sector_div(r, mddev_data_stripes(rs));
|
||||
}
|
||||
|
||||
/* Sync is relative to the component device size */
|
||||
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||
r = curr_resync_completed;
|
||||
if (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
|
||||
test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
|
||||
test_bit(MD_RECOVERY_RUNNING, &recovery))
|
||||
r = mddev->curr_resync_completed;
|
||||
else
|
||||
r = mddev->recovery_cp;
|
||||
|
||||
if ((r == MaxSector) ||
|
||||
(test_bit(MD_RECOVERY_DONE, &mddev->recovery) &&
|
||||
(mddev->curr_resync_completed == resync_max_sectors))) {
|
||||
if (r >= resync_max_sectors &&
|
||||
(!test_bit(MD_RECOVERY_REQUESTED, &recovery) ||
|
||||
(!test_bit(MD_RECOVERY_FROZEN, &recovery) &&
|
||||
!test_bit(MD_RECOVERY_NEEDED, &recovery) &&
|
||||
!test_bit(MD_RECOVERY_RUNNING, &recovery)))) {
|
||||
/*
|
||||
* Sync complete.
|
||||
*/
|
||||
*array_in_sync = true;
|
||||
r = resync_max_sectors;
|
||||
} else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
||||
/* In case we have finished recovering, the array is in sync. */
|
||||
if (test_bit(MD_RECOVERY_RECOVER, &recovery))
|
||||
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
||||
|
||||
} else if (test_bit(MD_RECOVERY_RECOVER, &recovery)) {
|
||||
/*
|
||||
* In case we are recovering, the array is not in sync
|
||||
* and health chars should show the recovering legs.
|
||||
*/
|
||||
;
|
||||
|
||||
} else if (test_bit(MD_RECOVERY_SYNC, &recovery) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
|
||||
/*
|
||||
* If "resync" is occurring, the raid set
|
||||
* is or may be out of sync hence the health
|
||||
* characters shall be 'a'.
|
||||
*/
|
||||
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
|
||||
|
||||
} else if (test_bit(MD_RECOVERY_RESHAPE, &recovery) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
|
||||
/*
|
||||
* If "reshape" is occurring, the raid set
|
||||
* is or may be out of sync hence the health
|
||||
* characters shall be 'a'.
|
||||
*/
|
||||
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
|
||||
|
||||
} else if (test_bit(MD_RECOVERY_REQUESTED, &recovery)) {
|
||||
/*
|
||||
* If "check" or "repair" is occurring, the raid set has
|
||||
* undergone an initial sync and the health characters
|
||||
* should not be 'a' anymore.
|
||||
*/
|
||||
*array_in_sync = true;
|
||||
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
||||
|
||||
} else {
|
||||
struct md_rdev *rdev;
|
||||
|
||||
/*
|
||||
* We are idle and recovery is needed, prevent 'A' chars race
|
||||
* caused by components still set to in-sync by constrcuctor.
|
||||
*/
|
||||
if (test_bit(MD_RECOVERY_NEEDED, &recovery))
|
||||
set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
|
||||
|
||||
/*
|
||||
* The raid set may be doing an initial sync, or it may
|
||||
* be rebuilding individual components. If all the
|
||||
* devices are In_sync, then it is the raid set that is
|
||||
* being initialized.
|
||||
*/
|
||||
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
||||
rdev_for_each(rdev, mddev)
|
||||
if (!test_bit(Journal, &rdev->flags) &&
|
||||
!test_bit(In_sync, &rdev->flags))
|
||||
*array_in_sync = true;
|
||||
#if 0
|
||||
r = 0; /* HM FIXME: TESTME: https://bugzilla.redhat.com/show_bug.cgi?id=1210637 ? */
|
||||
#endif
|
||||
!test_bit(In_sync, &rdev->flags)) {
|
||||
clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
return min(r, resync_max_sectors);
|
||||
}
|
||||
|
||||
/* Helper to return @dev name or "-" if !@dev */
|
||||
|
@ -3385,7 +3502,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
struct mddev *mddev = &rs->md;
|
||||
struct r5conf *conf = mddev->private;
|
||||
int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0;
|
||||
bool array_in_sync;
|
||||
unsigned long recovery;
|
||||
unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
|
||||
unsigned int sz = 0;
|
||||
unsigned int rebuild_disks;
|
||||
|
@ -3405,17 +3522,18 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
|
||||
/* Access most recent mddev properties for status output */
|
||||
smp_rmb();
|
||||
recovery = rs->md.recovery;
|
||||
/* Get sensible max sectors even if raid set not yet started */
|
||||
resync_max_sectors = test_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags) ?
|
||||
mddev->resync_max_sectors : mddev->dev_sectors;
|
||||
progress = rs_get_progress(rs, resync_max_sectors, &array_in_sync);
|
||||
progress = rs_get_progress(rs, recovery, resync_max_sectors);
|
||||
resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
|
||||
atomic64_read(&mddev->resync_mismatches) : 0;
|
||||
sync_action = decipher_sync_action(&rs->md);
|
||||
sync_action = decipher_sync_action(&rs->md, recovery);
|
||||
|
||||
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
|
||||
for (i = 0; i < rs->raid_disks; i++)
|
||||
DMEMIT(__raid_dev_status(rs, &rs->dev[i].rdev, array_in_sync));
|
||||
DMEMIT(__raid_dev_status(rs, &rs->dev[i].rdev));
|
||||
|
||||
/*
|
||||
* In-sync/Reshape ratio:
|
||||
|
@ -3466,7 +3584,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
* v1.10.0+:
|
||||
*/
|
||||
DMEMIT(" %s", test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ?
|
||||
__raid_dev_status(rs, &rs->journal_dev.rdev, 0) : "-");
|
||||
__raid_dev_status(rs, &rs->journal_dev.rdev) : "-");
|
||||
break;
|
||||
|
||||
case STATUSTYPE_TABLE:
|
||||
|
@ -3622,24 +3740,19 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
|||
blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs));
|
||||
}
|
||||
|
||||
static void raid_presuspend(struct dm_target *ti)
|
||||
{
|
||||
struct raid_set *rs = ti->private;
|
||||
|
||||
md_stop_writes(&rs->md);
|
||||
}
|
||||
|
||||
static void raid_postsuspend(struct dm_target *ti)
|
||||
{
|
||||
struct raid_set *rs = ti->private;
|
||||
|
||||
if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
|
||||
/* Writes have to be stopped before suspending to avoid deadlocks. */
|
||||
if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
|
||||
md_stop_writes(&rs->md);
|
||||
|
||||
mddev_lock_nointr(&rs->md);
|
||||
mddev_suspend(&rs->md);
|
||||
mddev_unlock(&rs->md);
|
||||
}
|
||||
|
||||
rs->md.ro = 1;
|
||||
}
|
||||
|
||||
static void attempt_restore_of_faulty_devices(struct raid_set *rs)
|
||||
|
@ -3816,10 +3929,33 @@ static int raid_preresume(struct dm_target *ti)
|
|||
struct raid_set *rs = ti->private;
|
||||
struct mddev *mddev = &rs->md;
|
||||
|
||||
/* This is a resume after a suspend of the set -> it's already started */
|
||||
/* This is a resume after a suspend of the set -> it's already started. */
|
||||
if (test_and_set_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags))
|
||||
return 0;
|
||||
|
||||
if (!test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
|
||||
struct raid_set *rs_active = rs_find_active(rs);
|
||||
|
||||
if (rs_active) {
|
||||
/*
|
||||
* In case no rebuilds have been requested
|
||||
* and an active table slot exists, copy
|
||||
* current resynchonization completed and
|
||||
* reshape position pointers across from
|
||||
* suspended raid set in the active slot.
|
||||
*
|
||||
* This resumes the new mapping at current
|
||||
* offsets to continue recover/reshape without
|
||||
* necessarily redoing a raid set partially or
|
||||
* causing data corruption in case of a reshape.
|
||||
*/
|
||||
if (rs_active->md.curr_resync_completed != MaxSector)
|
||||
mddev->curr_resync_completed = rs_active->md.curr_resync_completed;
|
||||
if (rs_active->md.reshape_position != MaxSector)
|
||||
mddev->reshape_position = rs_active->md.reshape_position;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The superblocks need to be updated on disk if the
|
||||
* array is new or new devices got added (thus zeroed
|
||||
|
@ -3851,11 +3987,10 @@ static int raid_preresume(struct dm_target *ti)
|
|||
mddev->resync_min = mddev->recovery_cp;
|
||||
}
|
||||
|
||||
rs_set_capacity(rs);
|
||||
|
||||
/* Check for any reshape request unless new raid set */
|
||||
if (test_and_clear_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
|
||||
if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
|
||||
/* Initiate a reshape. */
|
||||
rs_set_rdev_sectors(rs);
|
||||
mddev_lock_nointr(mddev);
|
||||
r = rs_start_reshape(rs);
|
||||
mddev_unlock(mddev);
|
||||
|
@ -3881,21 +4016,15 @@ static void raid_resume(struct dm_target *ti)
|
|||
attempt_restore_of_faulty_devices(rs);
|
||||
}
|
||||
|
||||
mddev->ro = 0;
|
||||
mddev->in_sync = 0;
|
||||
|
||||
/*
|
||||
* Keep the RAID set frozen if reshape/rebuild flags are set.
|
||||
* The RAID set is unfrozen once the next table load/resume,
|
||||
* which clears the reshape/rebuild flags, occurs.
|
||||
* This ensures that the constructor for the inactive table
|
||||
* retrieves an up-to-date reshape_position.
|
||||
*/
|
||||
if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS))
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
|
||||
if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
|
||||
/* Only reduce raid set size before running a disk removing reshape. */
|
||||
if (mddev->delta_disks < 0)
|
||||
rs_set_capacity(rs);
|
||||
|
||||
mddev_lock_nointr(mddev);
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
mddev->ro = 0;
|
||||
mddev->in_sync = 0;
|
||||
mddev_resume(mddev);
|
||||
mddev_unlock(mddev);
|
||||
}
|
||||
|
@ -3903,7 +4032,7 @@ static void raid_resume(struct dm_target *ti)
|
|||
|
||||
static struct target_type raid_target = {
|
||||
.name = "raid",
|
||||
.version = {1, 13, 0},
|
||||
.version = {1, 13, 2},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = raid_ctr,
|
||||
.dtr = raid_dtr,
|
||||
|
@ -3912,7 +4041,6 @@ static struct target_type raid_target = {
|
|||
.message = raid_message,
|
||||
.iterate_devices = raid_iterate_devices,
|
||||
.io_hints = raid_io_hints,
|
||||
.presuspend = raid_presuspend,
|
||||
.postsuspend = raid_postsuspend,
|
||||
.preresume = raid_preresume,
|
||||
.resume = raid_resume,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue