mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-29 10:01:25 +00:00
fs: introduce write_begin, write_end, and perform_write aops
These are intended to replace prepare_write and commit_write with more flexible alternatives that are also able to avoid the buffered write deadlock problems efficiently (which prepare_write is unable to do). [mark.fasheh@oracle.com: API design contributions, code review and fixes] [akpm@linux-foundation.org: various fixes] [dmonakhov@sw.ru: new aop block_write_begin fix] Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com> Signed-off-by: Dmitriy Monakhov <dmonakhov@openvz.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
637aff46f9
commit
afddba49d1
11 changed files with 575 additions and 206 deletions
201
fs/buffer.c
201
fs/buffer.c
|
@ -1770,6 +1770,48 @@ recover:
|
|||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a page has any new buffers, zero them out here, and mark them uptodate
|
||||
* and dirty so they'll be written out (in order to prevent uninitialised
|
||||
* block data from leaking). And clear the new bit.
|
||||
*/
|
||||
void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
|
||||
{
|
||||
unsigned int block_start, block_end;
|
||||
struct buffer_head *head, *bh;
|
||||
|
||||
BUG_ON(!PageLocked(page));
|
||||
if (!page_has_buffers(page))
|
||||
return;
|
||||
|
||||
bh = head = page_buffers(page);
|
||||
block_start = 0;
|
||||
do {
|
||||
block_end = block_start + bh->b_size;
|
||||
|
||||
if (buffer_new(bh)) {
|
||||
if (block_end > from && block_start < to) {
|
||||
if (!PageUptodate(page)) {
|
||||
unsigned start, size;
|
||||
|
||||
start = max(from, block_start);
|
||||
size = min(to, block_end) - start;
|
||||
|
||||
zero_user_page(page, start, size, KM_USER0);
|
||||
set_buffer_uptodate(bh);
|
||||
}
|
||||
|
||||
clear_buffer_new(bh);
|
||||
mark_buffer_dirty(bh);
|
||||
}
|
||||
}
|
||||
|
||||
block_start = block_end;
|
||||
bh = bh->b_this_page;
|
||||
} while (bh != head);
|
||||
}
|
||||
EXPORT_SYMBOL(page_zero_new_buffers);
|
||||
|
||||
static int __block_prepare_write(struct inode *inode, struct page *page,
|
||||
unsigned from, unsigned to, get_block_t *get_block)
|
||||
{
|
||||
|
@ -1854,38 +1896,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
|
|||
if (!buffer_uptodate(*wait_bh))
|
||||
err = -EIO;
|
||||
}
|
||||
if (!err) {
|
||||
bh = head;
|
||||
do {
|
||||
if (buffer_new(bh))
|
||||
clear_buffer_new(bh);
|
||||
} while ((bh = bh->b_this_page) != head);
|
||||
return 0;
|
||||
}
|
||||
/* Error case: */
|
||||
/*
|
||||
* Zero out any newly allocated blocks to avoid exposing stale
|
||||
* data. If BH_New is set, we know that the block was newly
|
||||
* allocated in the above loop.
|
||||
*/
|
||||
bh = head;
|
||||
block_start = 0;
|
||||
do {
|
||||
block_end = block_start+blocksize;
|
||||
if (block_end <= from)
|
||||
goto next_bh;
|
||||
if (block_start >= to)
|
||||
break;
|
||||
if (buffer_new(bh)) {
|
||||
clear_buffer_new(bh);
|
||||
zero_user_page(page, block_start, bh->b_size, KM_USER0);
|
||||
set_buffer_uptodate(bh);
|
||||
mark_buffer_dirty(bh);
|
||||
}
|
||||
next_bh:
|
||||
block_start = block_end;
|
||||
bh = bh->b_this_page;
|
||||
} while (bh != head);
|
||||
if (unlikely(err))
|
||||
page_zero_new_buffers(page, from, to);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -1910,6 +1922,7 @@ static int __block_commit_write(struct inode *inode, struct page *page,
|
|||
set_buffer_uptodate(bh);
|
||||
mark_buffer_dirty(bh);
|
||||
}
|
||||
clear_buffer_new(bh);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1923,6 +1936,130 @@ static int __block_commit_write(struct inode *inode, struct page *page,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* block_write_begin takes care of the basic task of block allocation and
|
||||
* bringing partial write blocks uptodate first.
|
||||
*
|
||||
* If *pagep is not NULL, then block_write_begin uses the locked page
|
||||
* at *pagep rather than allocating its own. In this case, the page will
|
||||
* not be unlocked or deallocated on failure.
|
||||
*/
|
||||
int block_write_begin(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata,
|
||||
get_block_t *get_block)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
int status = 0;
|
||||
struct page *page;
|
||||
pgoff_t index;
|
||||
unsigned start, end;
|
||||
int ownpage = 0;
|
||||
|
||||
index = pos >> PAGE_CACHE_SHIFT;
|
||||
start = pos & (PAGE_CACHE_SIZE - 1);
|
||||
end = start + len;
|
||||
|
||||
page = *pagep;
|
||||
if (page == NULL) {
|
||||
ownpage = 1;
|
||||
page = __grab_cache_page(mapping, index);
|
||||
if (!page) {
|
||||
status = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
*pagep = page;
|
||||
} else
|
||||
BUG_ON(!PageLocked(page));
|
||||
|
||||
status = __block_prepare_write(inode, page, start, end, get_block);
|
||||
if (unlikely(status)) {
|
||||
ClearPageUptodate(page);
|
||||
|
||||
if (ownpage) {
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
*pagep = NULL;
|
||||
|
||||
/*
|
||||
* prepare_write() may have instantiated a few blocks
|
||||
* outside i_size. Trim these off again. Don't need
|
||||
* i_size_read because we hold i_mutex.
|
||||
*/
|
||||
if (pos + len > inode->i_size)
|
||||
vmtruncate(inode, inode->i_size);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL(block_write_begin);
|
||||
|
||||
int block_write_end(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
unsigned start;
|
||||
|
||||
start = pos & (PAGE_CACHE_SIZE - 1);
|
||||
|
||||
if (unlikely(copied < len)) {
|
||||
/*
|
||||
* The buffers that were written will now be uptodate, so we
|
||||
* don't have to worry about a readpage reading them and
|
||||
* overwriting a partial write. However if we have encountered
|
||||
* a short write and only partially written into a buffer, it
|
||||
* will not be marked uptodate, so a readpage might come in and
|
||||
* destroy our partial write.
|
||||
*
|
||||
* Do the simplest thing, and just treat any short write to a
|
||||
* non uptodate page as a zero-length write, and force the
|
||||
* caller to redo the whole thing.
|
||||
*/
|
||||
if (!PageUptodate(page))
|
||||
copied = 0;
|
||||
|
||||
page_zero_new_buffers(page, start+copied, start+len);
|
||||
}
|
||||
flush_dcache_page(page);
|
||||
|
||||
/* This could be a short (even 0-length) commit */
|
||||
__block_commit_write(inode, page, start, start+copied);
|
||||
|
||||
return copied;
|
||||
}
|
||||
EXPORT_SYMBOL(block_write_end);
|
||||
|
||||
int generic_write_end(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned copied,
|
||||
struct page *page, void *fsdata)
|
||||
{
|
||||
struct inode *inode = mapping->host;
|
||||
|
||||
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
|
||||
|
||||
/*
|
||||
* No need to use i_size_read() here, the i_size
|
||||
* cannot change under us because we hold i_mutex.
|
||||
*
|
||||
* But it's important to update i_size while still holding page lock:
|
||||
* page writeout could otherwise come in and zero beyond i_size.
|
||||
*/
|
||||
if (pos+copied > inode->i_size) {
|
||||
i_size_write(inode, pos+copied);
|
||||
mark_inode_dirty(inode);
|
||||
}
|
||||
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
|
||||
return copied;
|
||||
}
|
||||
EXPORT_SYMBOL(generic_write_end);
|
||||
|
||||
/*
|
||||
* Generic "read page" function for block devices that have the normal
|
||||
* get_block functionality. This is most of the block device filesystems.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue