mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-06-17 20:25:19 +00:00
fs: introduce new truncate sequence
Introduce a new truncate calling sequence into fs/mm subsystems. Rather than setattr > vmtruncate > truncate, have filesystems call their truncate sequence from ->setattr if filesystem specific operations are required. vmtruncate is deprecated, and truncate_pagecache and inode_newsize_ok helpers introduced previously should be used. simple_setattr is introduced for simple in-ram filesystems to implement the new truncate sequence. Eventually all filesystems should be converted to implement a setattr, and the default code in notify_change should go away. simple_setsize is also introduced to perform just the ATTR_SIZE portion of simple_setattr (ie. changing i_size and trimming pagecache). To implement the new truncate sequence: - filesystem specific manipulations (eg freeing blocks) must be done in the setattr method rather than ->truncate. - vmtruncate can not be used by core code to trim blocks past i_size in the event of write failure after allocation, so this must be performed in the fs code. - convert usage of helpers block_write_begin, nobh_write_begin, cont_write_begin, and *blockdev_direct_IO* to use _newtrunc postfixed variants. These avoid calling vmtruncate to trim blocks (see previous). - inode_setattr should not be used. generic_setattr is a new function to be used to copy simple attributes into the generic inode. - make use of the better opportunity to handle errors with the new sequence. Big problem with the previous calling sequence: the filesystem is not called until i_size has already changed. This means it is not allowed to fail the call, and also it does not know what the previous i_size was. Also, generic code calling vmtruncate to truncate allocated blocks in case of error had no good way to return a meaningful error (or, for example, atomically handle block deallocation). Cc: Christoph Hellwig <hch@lst.de> Acked-by: Jan Kara <jack@suse.cz> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
7000d3c424
commit
7bb46a6734
8 changed files with 300 additions and 63 deletions
123
fs/buffer.c
123
fs/buffer.c
|
@ -1949,14 +1949,11 @@ static int __block_commit_write(struct inode *inode, struct page *page,
|
|||
}
|
||||
|
||||
/*
|
||||
* block_write_begin takes care of the basic task of block allocation and
|
||||
* bringing partial write blocks uptodate first.
|
||||
*
|
||||
* If *pagep is not NULL, then block_write_begin uses the locked page
|
||||
* at *pagep rather than allocating its own. In this case, the page will
|
||||
* not be unlocked or deallocated on failure.
|
||||
* Filesystems implementing the new truncate sequence should use the
|
||||
* _newtrunc postfix variant which won't incorrectly call vmtruncate.
|
||||
* The filesystem needs to handle block truncation upon failure.
|
||||
*/
|
||||
int block_write_begin(struct file *file, struct address_space *mapping,
|
||||
int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata,
|
||||
get_block_t *get_block)
|
||||
|
@ -1992,20 +1989,50 @@ int block_write_begin(struct file *file, struct address_space *mapping,
|
|||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
*pagep = NULL;
|
||||
|
||||
/*
|
||||
* prepare_write() may have instantiated a few blocks
|
||||
* outside i_size. Trim these off again. Don't need
|
||||
* i_size_read because we hold i_mutex.
|
||||
*/
|
||||
if (pos + len > inode->i_size)
|
||||
vmtruncate(inode, inode->i_size);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return status;
|
||||
}
|
||||
EXPORT_SYMBOL(block_write_begin_newtrunc);
|
||||
|
||||
/*
|
||||
* block_write_begin takes care of the basic task of block allocation and
|
||||
* bringing partial write blocks uptodate first.
|
||||
*
|
||||
* If *pagep is not NULL, then block_write_begin uses the locked page
|
||||
* at *pagep rather than allocating its own. In this case, the page will
|
||||
* not be unlocked or deallocated on failure.
|
||||
*/
|
||||
int block_write_begin(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata,
|
||||
get_block_t *get_block)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
|
||||
pagep, fsdata, get_block);
|
||||
|
||||
/*
|
||||
* prepare_write() may have instantiated a few blocks
|
||||
* outside i_size. Trim these off again. Don't need
|
||||
* i_size_read because we hold i_mutex.
|
||||
*
|
||||
* Filesystems which pass down their own page also cannot
|
||||
* call into vmtruncate here because it would lead to lock
|
||||
* inversion problems (*pagep is locked). This is a further
|
||||
* example of where the old truncate sequence is inadequate.
|
||||
*/
|
||||
if (unlikely(ret) && *pagep == NULL) {
|
||||
loff_t isize = mapping->host->i_size;
|
||||
if (pos + len > isize)
|
||||
vmtruncate(mapping->host, isize);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(block_write_begin);
|
||||
|
||||
int block_write_end(struct file *file, struct address_space *mapping,
|
||||
|
@ -2324,7 +2351,7 @@ out:
|
|||
* For moronic filesystems that do not allow holes in file.
|
||||
* We may have to extend the file.
|
||||
*/
|
||||
int cont_write_begin(struct file *file, struct address_space *mapping,
|
||||
int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata,
|
||||
get_block_t *get_block, loff_t *bytes)
|
||||
|
@ -2345,11 +2372,30 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
|
|||
}
|
||||
|
||||
*pagep = NULL;
|
||||
err = block_write_begin(file, mapping, pos, len,
|
||||
err = block_write_begin_newtrunc(file, mapping, pos, len,
|
||||
flags, pagep, fsdata, get_block);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL(cont_write_begin_newtrunc);
|
||||
|
||||
int cont_write_begin(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata,
|
||||
get_block_t *get_block, loff_t *bytes)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
|
||||
pagep, fsdata, get_block, bytes);
|
||||
if (unlikely(ret)) {
|
||||
loff_t isize = mapping->host->i_size;
|
||||
if (pos + len > isize)
|
||||
vmtruncate(mapping->host, isize);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(cont_write_begin);
|
||||
|
||||
int block_prepare_write(struct page *page, unsigned from, unsigned to,
|
||||
|
@ -2381,7 +2427,7 @@ EXPORT_SYMBOL(block_commit_write);
|
|||
*
|
||||
* We are not allowed to take the i_mutex here so we have to play games to
|
||||
* protect against truncate races as the page could now be beyond EOF. Because
|
||||
* vmtruncate() writes the inode size before removing pages, once we have the
|
||||
* truncate writes the inode size before removing pages, once we have the
|
||||
* page lock we can determine safely if the page is beyond EOF. If it is not
|
||||
* beyond EOF, then the page is guaranteed safe against truncation until we
|
||||
* unlock the page.
|
||||
|
@ -2464,10 +2510,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
|
|||
}
|
||||
|
||||
/*
|
||||
* On entry, the page is fully not uptodate.
|
||||
* On exit the page is fully uptodate in the areas outside (from,to)
|
||||
* Filesystems implementing the new truncate sequence should use the
|
||||
* _newtrunc postfix variant which won't incorrectly call vmtruncate.
|
||||
* The filesystem needs to handle block truncation upon failure.
|
||||
*/
|
||||
int nobh_write_begin(struct file *file, struct address_space *mapping,
|
||||
int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata,
|
||||
get_block_t *get_block)
|
||||
|
@ -2500,8 +2547,8 @@ int nobh_write_begin(struct file *file, struct address_space *mapping,
|
|||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
*pagep = NULL;
|
||||
return block_write_begin(file, mapping, pos, len, flags, pagep,
|
||||
fsdata, get_block);
|
||||
return block_write_begin_newtrunc(file, mapping, pos, len,
|
||||
flags, pagep, fsdata, get_block);
|
||||
}
|
||||
|
||||
if (PageMappedToDisk(page))
|
||||
|
@ -2605,8 +2652,34 @@ out_release:
|
|||
page_cache_release(page);
|
||||
*pagep = NULL;
|
||||
|
||||
if (pos + len > inode->i_size)
|
||||
vmtruncate(inode, inode->i_size);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(nobh_write_begin_newtrunc);
|
||||
|
||||
/*
|
||||
* On entry, the page is fully not uptodate.
|
||||
* On exit the page is fully uptodate in the areas outside (from,to)
|
||||
*/
|
||||
int nobh_write_begin(struct file *file, struct address_space *mapping,
|
||||
loff_t pos, unsigned len, unsigned flags,
|
||||
struct page **pagep, void **fsdata,
|
||||
get_block_t *get_block)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
|
||||
pagep, fsdata, get_block);
|
||||
|
||||
/*
|
||||
* prepare_write() may have instantiated a few blocks
|
||||
* outside i_size. Trim these off again. Don't need
|
||||
* i_size_read because we hold i_mutex.
|
||||
*/
|
||||
if (unlikely(ret)) {
|
||||
loff_t isize = mapping->host->i_size;
|
||||
if (pos + len > isize)
|
||||
vmtruncate(mapping->host, isize);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue