mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-06-17 20:25:19 +00:00
Btrfs: Fix streaming read performance with checksumming on
Large streaming reads make for large bios, which means each entry on the list async work queues represents a large amount of data. IO congestion throttling on the device was kicking in before the async worker threads decided a single thread was busy and needed some help. The end result was that a streaming read would result in a single CPU running at 100% instead of balancing the work off to other CPUs. This patch also changes the pre-IO checksum lookup done by reads to work on a per-bio basis instead of a per-page. This results in many extra btree lookups on large streaming reads. Doing the checksum lookup right before bio submit allows us to reuse searches while processing adjacent offsets. Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
parent
37d1aeee39
commit
61b4944018
5 changed files with 99 additions and 54 deletions
|
@ -160,7 +160,7 @@ void btrfs_init_workers(struct btrfs_workers *workers, int max)
|
||||||
INIT_LIST_HEAD(&workers->idle_list);
|
INIT_LIST_HEAD(&workers->idle_list);
|
||||||
spin_lock_init(&workers->lock);
|
spin_lock_init(&workers->lock);
|
||||||
workers->max_workers = max;
|
workers->max_workers = max;
|
||||||
workers->idle_thresh = 64;
|
workers->idle_thresh = 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1613,6 +1613,8 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||||
struct btrfs_key *location, int mod);
|
struct btrfs_key *location, int mod);
|
||||||
|
|
||||||
/* file-item.c */
|
/* file-item.c */
|
||||||
|
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
|
||||||
|
struct bio *bio);
|
||||||
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||||
struct btrfs_root *root,
|
struct btrfs_root *root,
|
||||||
u64 objectid, u64 pos, u64 disk_offset,
|
u64 objectid, u64 pos, u64 disk_offset,
|
||||||
|
|
|
@ -1357,10 +1357,25 @@ struct btrfs_root *open_ctree(struct super_block *sb,
|
||||||
*/
|
*/
|
||||||
btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
|
btrfs_init_workers(&fs_info->workers, fs_info->thread_pool_size);
|
||||||
btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size);
|
btrfs_init_workers(&fs_info->submit_workers, fs_info->thread_pool_size);
|
||||||
|
|
||||||
|
/* a higher idle thresh on the submit workers makes it much more
|
||||||
|
* likely that bios will be send down in a sane order to the
|
||||||
|
* devices
|
||||||
|
*/
|
||||||
|
fs_info->submit_workers.idle_thresh = 64;
|
||||||
|
|
||||||
btrfs_init_workers(&fs_info->fixup_workers, 1);
|
btrfs_init_workers(&fs_info->fixup_workers, 1);
|
||||||
btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
|
btrfs_init_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
|
||||||
btrfs_init_workers(&fs_info->endio_write_workers,
|
btrfs_init_workers(&fs_info->endio_write_workers,
|
||||||
fs_info->thread_pool_size);
|
fs_info->thread_pool_size);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* endios are largely parallel and should have a very
|
||||||
|
* low idle thresh
|
||||||
|
*/
|
||||||
|
fs_info->endio_workers.idle_thresh = 4;
|
||||||
|
fs_info->endio_write_workers.idle_thresh = 4;
|
||||||
|
|
||||||
btrfs_start_workers(&fs_info->workers, 1);
|
btrfs_start_workers(&fs_info->workers, 1);
|
||||||
btrfs_start_workers(&fs_info->submit_workers, 1);
|
btrfs_start_workers(&fs_info->submit_workers, 1);
|
||||||
btrfs_start_workers(&fs_info->fixup_workers, 1);
|
btrfs_start_workers(&fs_info->fixup_workers, 1);
|
||||||
|
|
|
@ -134,6 +134,83 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
|
||||||
|
struct bio *bio)
|
||||||
|
{
|
||||||
|
u32 sum;
|
||||||
|
struct bio_vec *bvec = bio->bi_io_vec;
|
||||||
|
int bio_index = 0;
|
||||||
|
u64 offset;
|
||||||
|
u64 item_start_offset = 0;
|
||||||
|
u64 item_last_offset = 0;
|
||||||
|
u32 diff;
|
||||||
|
int ret;
|
||||||
|
struct btrfs_path *path;
|
||||||
|
struct btrfs_csum_item *item = NULL;
|
||||||
|
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||||
|
|
||||||
|
path = btrfs_alloc_path();
|
||||||
|
path->reada = 2;
|
||||||
|
|
||||||
|
WARN_ON(bio->bi_vcnt <= 0);
|
||||||
|
|
||||||
|
while(bio_index < bio->bi_vcnt) {
|
||||||
|
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
|
||||||
|
ret = btrfs_find_ordered_sum(inode, offset, &sum);
|
||||||
|
if (ret == 0)
|
||||||
|
goto found;
|
||||||
|
|
||||||
|
if (!item || offset < item_start_offset ||
|
||||||
|
offset >= item_last_offset) {
|
||||||
|
struct btrfs_key found_key;
|
||||||
|
u32 item_size;
|
||||||
|
|
||||||
|
if (item)
|
||||||
|
btrfs_release_path(root, path);
|
||||||
|
item = btrfs_lookup_csum(NULL, root, path,
|
||||||
|
inode->i_ino, offset, 0);
|
||||||
|
if (IS_ERR(item)) {
|
||||||
|
ret = PTR_ERR(item);
|
||||||
|
if (ret == -ENOENT || ret == -EFBIG)
|
||||||
|
ret = 0;
|
||||||
|
sum = 0;
|
||||||
|
printk("no csum found for inode %lu start "
|
||||||
|
"%llu\n", inode->i_ino,
|
||||||
|
(unsigned long long)offset);
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
|
||||||
|
path->slots[0]);
|
||||||
|
|
||||||
|
item_start_offset = found_key.offset;
|
||||||
|
item_size = btrfs_item_size_nr(path->nodes[0],
|
||||||
|
path->slots[0]);
|
||||||
|
item_last_offset = item_start_offset +
|
||||||
|
(item_size / BTRFS_CRC32_SIZE) *
|
||||||
|
root->sectorsize;
|
||||||
|
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||||
|
struct btrfs_csum_item);
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* this byte range must be able to fit inside
|
||||||
|
* a single leaf so it will also fit inside a u32
|
||||||
|
*/
|
||||||
|
diff = offset - item_start_offset;
|
||||||
|
diff = diff / root->sectorsize;
|
||||||
|
diff = diff * BTRFS_CRC32_SIZE;
|
||||||
|
|
||||||
|
read_extent_buffer(path->nodes[0], &sum,
|
||||||
|
(unsigned long)item + diff,
|
||||||
|
BTRFS_CRC32_SIZE);
|
||||||
|
found:
|
||||||
|
set_state_private(io_tree, offset, sum);
|
||||||
|
bio_index++;
|
||||||
|
bvec++;
|
||||||
|
}
|
||||||
|
btrfs_free_path(path);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
|
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
|
||||||
struct bio *bio)
|
struct bio *bio)
|
||||||
{
|
{
|
||||||
|
|
|
@ -374,6 +374,10 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
|
||||||
BUG_ON(ret);
|
BUG_ON(ret);
|
||||||
|
|
||||||
if (!(rw & (1 << BIO_RW))) {
|
if (!(rw & (1 << BIO_RW))) {
|
||||||
|
if (!btrfs_test_opt(root, NODATASUM) &&
|
||||||
|
!btrfs_test_flag(inode, NODATASUM)) {
|
||||||
|
btrfs_lookup_bio_sums(root, inode, bio);
|
||||||
|
}
|
||||||
goto mapit;
|
goto mapit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -598,58 +602,6 @@ int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
|
||||||
return btrfs_finish_ordered_io(page->mapping->host, start, end);
|
return btrfs_finish_ordered_io(page->mapping->host, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
|
|
||||||
{
|
|
||||||
int ret = 0;
|
|
||||||
struct inode *inode = page->mapping->host;
|
|
||||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
|
||||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
|
||||||
struct btrfs_csum_item *item;
|
|
||||||
struct btrfs_path *path = NULL;
|
|
||||||
u32 csum;
|
|
||||||
|
|
||||||
if (btrfs_test_opt(root, NODATASUM) ||
|
|
||||||
btrfs_test_flag(inode, NODATASUM))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* It is possible there is an ordered extent that has
|
|
||||||
* not yet finished for this range in the file. If so,
|
|
||||||
* that extent will have a csum cached, and it will insert
|
|
||||||
* the sum after all the blocks in the extent are fully
|
|
||||||
* on disk. So, look for an ordered extent and use the
|
|
||||||
* sum if found. We have to do this before looking in the
|
|
||||||
* btree because csum items are pre-inserted based on
|
|
||||||
* the file size. btrfs_lookup_csum might find an item
|
|
||||||
* that still hasn't been fully filled.
|
|
||||||
*/
|
|
||||||
ret = btrfs_find_ordered_sum(inode, start, &csum);
|
|
||||||
if (ret == 0)
|
|
||||||
goto found;
|
|
||||||
|
|
||||||
ret = 0;
|
|
||||||
path = btrfs_alloc_path();
|
|
||||||
item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
|
|
||||||
if (IS_ERR(item)) {
|
|
||||||
ret = PTR_ERR(item);
|
|
||||||
/* a csum that isn't present is a preallocated region. */
|
|
||||||
if (ret == -ENOENT || ret == -EFBIG)
|
|
||||||
ret = 0;
|
|
||||||
csum = 0;
|
|
||||||
printk("no csum found for inode %lu start %Lu\n", inode->i_ino,
|
|
||||||
start);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
|
|
||||||
BTRFS_CRC32_SIZE);
|
|
||||||
found:
|
|
||||||
set_state_private(io_tree, start, csum);
|
|
||||||
out:
|
|
||||||
if (path)
|
|
||||||
btrfs_free_path(path);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct io_failure_record {
|
struct io_failure_record {
|
||||||
struct page *page;
|
struct page *page;
|
||||||
u64 start;
|
u64 start;
|
||||||
|
@ -3613,7 +3565,6 @@ static struct extent_io_ops btrfs_extent_io_ops = {
|
||||||
.fill_delalloc = run_delalloc_range,
|
.fill_delalloc = run_delalloc_range,
|
||||||
.submit_bio_hook = btrfs_submit_bio_hook,
|
.submit_bio_hook = btrfs_submit_bio_hook,
|
||||||
.merge_bio_hook = btrfs_merge_bio_hook,
|
.merge_bio_hook = btrfs_merge_bio_hook,
|
||||||
.readpage_io_hook = btrfs_readpage_io_hook,
|
|
||||||
.readpage_end_io_hook = btrfs_readpage_end_io_hook,
|
.readpage_end_io_hook = btrfs_readpage_end_io_hook,
|
||||||
.writepage_end_io_hook = btrfs_writepage_end_io_hook,
|
.writepage_end_io_hook = btrfs_writepage_end_io_hook,
|
||||||
.writepage_start_hook = btrfs_writepage_start_hook,
|
.writepage_start_hook = btrfs_writepage_start_hook,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue