Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2

* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (32 commits)
  [PATCH] ocfs2: zero_user_page conversion
  ocfs2: Support xfs style space reservation ioctls
  ocfs2: support for removing file regions
  ocfs2: update truncate handling of partial clusters
  ocfs2: btree support for removal of arbirtrary extents
  ocfs2: Support creation of unwritten extents
  ocfs2: support writing of unwritten extents
  ocfs2: small cleanup of ocfs2_write_begin_nolock()
  ocfs2: btree changes for unwritten extents
  ocfs2: abstract btree growing calls
  ocfs2: use all extent block suballocators
  ocfs2: plug truncate into cached dealloc routines
  ocfs2: simplify deallocation locking
  ocfs2: harden buffer check during mapping of page blocks
  ocfs2: shared writeable mmap
  ocfs2: factor out write aops into nolock variants
  ocfs2: rework ocfs2_buffered_write_cluster()
  ocfs2: take ip_alloc_sem during entire truncate
  ocfs2: Add "preferred slot" mount option
  [KJ PATCH] Replacing memset(<addr>,0,PAGE_SIZE) with clear_page() in fs/ocfs2/dlm/dlmrecovery.c
  ...
This commit is contained in:
Linus Torvalds 2007-07-16 10:52:55 -07:00
commit add096909d
39 changed files with 4650 additions and 1081 deletions

View file

@ -29,10 +29,11 @@
struct configfs_dirent {
atomic_t s_count;
int s_dependent_count;
struct list_head s_sibling;
struct list_head s_children;
struct list_head s_links;
void * s_element;
void * s_element;
int s_type;
umode_t s_mode;
struct dentry * s_dentry;
@ -41,8 +42,8 @@ struct configfs_dirent {
#define CONFIGFS_ROOT 0x0001
#define CONFIGFS_DIR 0x0002
#define CONFIGFS_ITEM_ATTR 0x0004
#define CONFIGFS_ITEM_LINK 0x0020
#define CONFIGFS_ITEM_ATTR 0x0004
#define CONFIGFS_ITEM_LINK 0x0020
#define CONFIGFS_USET_DIR 0x0040
#define CONFIGFS_USET_DEFAULT 0x0080
#define CONFIGFS_USET_DROPPING 0x0100

View file

@ -355,6 +355,10 @@ static int configfs_detach_prep(struct dentry *dentry)
/* Mark that we've taken i_mutex */
sd->s_type |= CONFIGFS_USET_DROPPING;
/*
* Yup, recursive. If there's a problem, blame
* deep nesting of default_groups
*/
ret = configfs_detach_prep(sd->s_dentry);
if (!ret)
continue;
@ -562,7 +566,7 @@ static int populate_groups(struct config_group *group)
/*
* All of link_obj/unlink_obj/link_group/unlink_group require that
* subsys->su_sem is held.
* subsys->su_mutex is held.
*/
static void unlink_obj(struct config_item *item)
@ -713,6 +717,28 @@ static void configfs_detach_group(struct config_item *item)
configfs_detach_item(item);
}
/*
* After the item has been detached from the filesystem view, we are
* ready to tear it out of the hierarchy. Notify the client before
* we do that so they can perform any cleanup that requires
* navigating the hierarchy. A client does not need to provide this
* callback. The subsystem semaphore MUST be held by the caller, and
* references must be valid for both items. It also assumes the
* caller has validated ci_type.
*/
static void client_disconnect_notify(struct config_item *parent_item,
struct config_item *item)
{
struct config_item_type *type;
type = parent_item->ci_type;
BUG_ON(!type);
if (type->ct_group_ops && type->ct_group_ops->disconnect_notify)
type->ct_group_ops->disconnect_notify(to_config_group(parent_item),
item);
}
/*
* Drop the initial reference from make_item()/make_group()
* This function assumes that reference is held on item
@ -733,11 +759,244 @@ static void client_drop_item(struct config_item *parent_item,
*/
if (type->ct_group_ops && type->ct_group_ops->drop_item)
type->ct_group_ops->drop_item(to_config_group(parent_item),
item);
item);
else
config_item_put(item);
}
#ifdef DEBUG
static void configfs_dump_one(struct configfs_dirent *sd, int level)
{
printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd));
#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type);
type_print(CONFIGFS_ROOT);
type_print(CONFIGFS_DIR);
type_print(CONFIGFS_ITEM_ATTR);
type_print(CONFIGFS_ITEM_LINK);
type_print(CONFIGFS_USET_DIR);
type_print(CONFIGFS_USET_DEFAULT);
type_print(CONFIGFS_USET_DROPPING);
#undef type_print
}
static int configfs_dump(struct configfs_dirent *sd, int level)
{
struct configfs_dirent *child_sd;
int ret = 0;
configfs_dump_one(sd, level);
if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT)))
return 0;
list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
ret = configfs_dump(child_sd, level + 2);
if (ret)
break;
}
return ret;
}
#endif
/*
* configfs_depend_item() and configfs_undepend_item()
*
* WARNING: Do not call these from a configfs callback!
*
* This describes these functions and their helpers.
*
* Allow another kernel system to depend on a config_item. If this
* happens, the item cannot go away until the dependant can live without
* it. The idea is to give client modules as simple an interface as
* possible. When a system asks them to depend on an item, they just
* call configfs_depend_item(). If the item is live and the client
* driver is in good shape, we'll happily do the work for them.
*
* Why is the locking complex? Because configfs uses the VFS to handle
* all locking, but this function is called outside the normal
* VFS->configfs path. So it must take VFS locks to prevent the
* VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc). This is
* why you can't call these functions underneath configfs callbacks.
*
* Note, btw, that this can be called at *any* time, even when a configfs
* subsystem isn't registered, or when configfs is loading or unloading.
* Just like configfs_register_subsystem(). So we take the same
* precautions. We pin the filesystem. We lock each i_mutex _in_order_
* on our way down the tree. If we can find the target item in the
* configfs tree, it must be part of the subsystem tree as well, so we
* do not need the subsystem semaphore. Holding the i_mutex chain locks
* out mkdir() and rmdir(), who might be racing us.
*/
/*
* configfs_depend_prep()
*
* Only subdirectories count here. Files (CONFIGFS_NOT_PINNED) are
* attributes. This is similar but not the same to configfs_detach_prep().
* Note that configfs_detach_prep() expects the parent to be locked when it
* is called, but we lock the parent *inside* configfs_depend_prep(). We
* do that so we can unlock it if we find nothing.
*
* Here we do a depth-first search of the dentry hierarchy looking for
* our object. We take i_mutex on each step of the way down. IT IS
* ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch,
* we'll drop the i_mutex.
*
* If the target is not found, -ENOENT is bubbled up and we have released
* all locks. If the target was found, the locks will be cleared by
* configfs_depend_rollback().
*
* This adds a requirement that all config_items be unique!
*
* This is recursive because the locking traversal is tricky. There isn't
* much on the stack, though, so folks that need this function - be careful
* about your stack! Patches will be accepted to make it iterative.
*/
static int configfs_depend_prep(struct dentry *origin,
struct config_item *target)
{
struct configfs_dirent *child_sd, *sd = origin->d_fsdata;
int ret = 0;
BUG_ON(!origin || !sd);
/* Lock this guy on the way down */
mutex_lock(&sd->s_dentry->d_inode->i_mutex);
if (sd->s_element == target) /* Boo-yah */
goto out;
list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
if (child_sd->s_type & CONFIGFS_DIR) {
ret = configfs_depend_prep(child_sd->s_dentry,
target);
if (!ret)
goto out; /* Child path boo-yah */
}
}
/* We looped all our children and didn't find target */
mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
ret = -ENOENT;
out:
return ret;
}
/*
* This is ONLY called if configfs_depend_prep() did its job. So we can
* trust the entire path from item back up to origin.
*
* We walk backwards from item, unlocking each i_mutex. We finish by
* unlocking origin.
*/
static void configfs_depend_rollback(struct dentry *origin,
struct config_item *item)
{
struct dentry *dentry = item->ci_dentry;
while (dentry != origin) {
mutex_unlock(&dentry->d_inode->i_mutex);
dentry = dentry->d_parent;
}
mutex_unlock(&origin->d_inode->i_mutex);
}
int configfs_depend_item(struct configfs_subsystem *subsys,
struct config_item *target)
{
int ret;
struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
struct config_item *s_item = &subsys->su_group.cg_item;
/*
* Pin the configfs filesystem. This means we can safely access
* the root of the configfs filesystem.
*/
ret = configfs_pin_fs();
if (ret)
return ret;
/*
* Next, lock the root directory. We're going to check that the
* subsystem is really registered, and so we need to lock out
* configfs_[un]register_subsystem().
*/
mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
root_sd = configfs_sb->s_root->d_fsdata;
list_for_each_entry(p, &root_sd->s_children, s_sibling) {
if (p->s_type & CONFIGFS_DIR) {
if (p->s_element == s_item) {
subsys_sd = p;
break;
}
}
}
if (!subsys_sd) {
ret = -ENOENT;
goto out_unlock_fs;
}
/* Ok, now we can trust subsys/s_item */
/* Scan the tree, locking i_mutex recursively, return 0 if found */
ret = configfs_depend_prep(subsys_sd->s_dentry, target);
if (ret)
goto out_unlock_fs;
/* We hold all i_mutexes from the subsystem down to the target */
p = target->ci_dentry->d_fsdata;
p->s_dependent_count += 1;
configfs_depend_rollback(subsys_sd->s_dentry, target);
out_unlock_fs:
mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
/*
* If we succeeded, the fs is pinned via other methods. If not,
* we're done with it anyway. So release_fs() is always right.
*/
configfs_release_fs();
return ret;
}
EXPORT_SYMBOL(configfs_depend_item);
/*
* Release the dependent linkage. This is much simpler than
* configfs_depend_item() because we know that that the client driver is
* pinned, thus the subsystem is pinned, and therefore configfs is pinned.
*/
void configfs_undepend_item(struct configfs_subsystem *subsys,
struct config_item *target)
{
struct configfs_dirent *sd;
/*
* Since we can trust everything is pinned, we just need i_mutex
* on the item.
*/
mutex_lock(&target->ci_dentry->d_inode->i_mutex);
sd = target->ci_dentry->d_fsdata;
BUG_ON(sd->s_dependent_count < 1);
sd->s_dependent_count -= 1;
/*
* After this unlock, we cannot trust the item to stay alive!
* DO NOT REFERENCE item after this unlock.
*/
mutex_unlock(&target->ci_dentry->d_inode->i_mutex);
}
EXPORT_SYMBOL(configfs_undepend_item);
static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
@ -783,7 +1042,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
down(&subsys->su_sem);
mutex_lock(&subsys->su_mutex);
group = NULL;
item = NULL;
if (type->ct_group_ops->make_group) {
@ -797,7 +1056,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (item)
link_obj(parent_item, item);
}
up(&subsys->su_sem);
mutex_unlock(&subsys->su_mutex);
kfree(name);
if (!item) {
@ -841,13 +1100,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
out_unlink:
if (ret) {
/* Tear down everything we built up */
down(&subsys->su_sem);
mutex_lock(&subsys->su_mutex);
client_disconnect_notify(parent_item, item);
if (group)
unlink_group(group);
else
unlink_obj(item);
client_drop_item(parent_item, item);
up(&subsys->su_sem);
mutex_unlock(&subsys->su_mutex);
if (module_got)
module_put(owner);
@ -881,6 +1143,13 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
if (sd->s_type & CONFIGFS_USET_DEFAULT)
return -EPERM;
/*
* Here's where we check for dependents. We're protected by
* i_mutex.
*/
if (sd->s_dependent_count)
return -EBUSY;
/* Get a working ref until we have the child */
parent_item = configfs_get_config_item(dentry->d_parent);
subsys = to_config_group(parent_item)->cg_subsys;
@ -910,17 +1179,19 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
if (sd->s_type & CONFIGFS_USET_DIR) {
configfs_detach_group(item);
down(&subsys->su_sem);
mutex_lock(&subsys->su_mutex);
client_disconnect_notify(parent_item, item);
unlink_group(to_config_group(item));
} else {
configfs_detach_item(item);
down(&subsys->su_sem);
mutex_lock(&subsys->su_mutex);
client_disconnect_notify(parent_item, item);
unlink_obj(item);
}
client_drop_item(parent_item, item);
up(&subsys->su_sem);
mutex_unlock(&subsys->su_mutex);
/* Drop our reference from above */
config_item_put(item);

View file

@ -27,19 +27,26 @@
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/mutex.h>
#include <asm/uaccess.h>
#include <asm/semaphore.h>
#include <linux/configfs.h>
#include "configfs_internal.h"
/*
* A simple attribute can only be 4096 characters. Why 4k? Because the
* original code limited it to PAGE_SIZE. That's a bad idea, though,
* because an attribute of 16k on ia64 won't work on x86. So we limit to
* 4k, our minimum common page size.
*/
#define SIMPLE_ATTR_SIZE 4096
struct configfs_buffer {
size_t count;
loff_t pos;
char * page;
struct configfs_item_operations * ops;
struct semaphore sem;
struct mutex mutex;
int needs_read_fill;
};
@ -69,7 +76,7 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
count = ops->show_attribute(item,attr,buffer->page);
buffer->needs_read_fill = 0;
BUG_ON(count > (ssize_t)PAGE_SIZE);
BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE);
if (count >= 0)
buffer->count = count;
else
@ -102,7 +109,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
struct configfs_buffer * buffer = file->private_data;
ssize_t retval = 0;
down(&buffer->sem);
mutex_lock(&buffer->mutex);
if (buffer->needs_read_fill) {
if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
goto out;
@ -112,7 +119,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
buffer->count);
out:
up(&buffer->sem);
mutex_unlock(&buffer->mutex);
return retval;
}
@ -137,8 +144,8 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size
if (!buffer->page)
return -ENOMEM;
if (count >= PAGE_SIZE)
count = PAGE_SIZE - 1;
if (count >= SIMPLE_ATTR_SIZE)
count = SIMPLE_ATTR_SIZE - 1;
error = copy_from_user(buffer->page,buf,count);
buffer->needs_read_fill = 1;
/* if buf is assumed to contain a string, terminate it by \0,
@ -193,13 +200,13 @@ configfs_write_file(struct file *file, const char __user *buf, size_t count, lof
struct configfs_buffer * buffer = file->private_data;
ssize_t len;
down(&buffer->sem);
mutex_lock(&buffer->mutex);
len = fill_write_buffer(buffer, buf, count);
if (len > 0)
len = flush_write_buffer(file->f_path.dentry, buffer, count);
if (len > 0)
*ppos += len;
up(&buffer->sem);
mutex_unlock(&buffer->mutex);
return len;
}
@ -253,7 +260,7 @@ static int check_perm(struct inode * inode, struct file * file)
error = -ENOMEM;
goto Enomem;
}
init_MUTEX(&buffer->sem);
mutex_init(&buffer->mutex);
buffer->needs_read_fill = 1;
buffer->ops = ops;
file->private_data = buffer;
@ -292,6 +299,7 @@ static int configfs_release(struct inode * inode, struct file * filp)
if (buffer) {
if (buffer->page)
free_page((unsigned long)buffer->page);
mutex_destroy(&buffer->mutex);
kfree(buffer);
}
return 0;

View file

@ -62,7 +62,6 @@ void config_item_init(struct config_item * item)
* dynamically allocated string that @item->ci_name points to.
* Otherwise, use the static @item->ci_namebuf array.
*/
int config_item_set_name(struct config_item * item, const char * fmt, ...)
{
int error = 0;
@ -139,12 +138,7 @@ struct config_item * config_item_get(struct config_item * item)
return item;
}
/**
* config_item_cleanup - free config_item resources.
* @item: item.
*/
void config_item_cleanup(struct config_item * item)
static void config_item_cleanup(struct config_item * item)
{
struct config_item_type * t = item->ci_type;
struct config_group * s = item->ci_group;
@ -179,39 +173,35 @@ void config_item_put(struct config_item * item)
kref_put(&item->ci_kref, config_item_release);
}
/**
* config_group_init - initialize a group for use
* @k: group
*/
void config_group_init(struct config_group *group)
{
config_item_init(&group->cg_item);
INIT_LIST_HEAD(&group->cg_children);
}
/**
* config_group_find_obj - search for item in group.
* config_group_find_item - search for item in group.
* @group: group we're looking in.
* @name: item's name.
*
* Lock group via @group->cg_subsys, and iterate over @group->cg_list,
* looking for a matching config_item. If matching item is found
* take a reference and return the item.
* Iterate over @group->cg_list, looking for a matching config_item.
* If matching item is found take a reference and return the item.
* Caller must have locked group via @group->cg_subsys->su_mtx.
*/
struct config_item * config_group_find_obj(struct config_group * group, const char * name)
struct config_item *config_group_find_item(struct config_group *group,
const char *name)
{
struct list_head * entry;
struct config_item * ret = NULL;
/* XXX LOCKING! */
list_for_each(entry,&group->cg_children) {
struct config_item * item = to_item(entry);
if (config_item_name(item) &&
!strcmp(config_item_name(item), name)) {
!strcmp(config_item_name(item), name)) {
ret = config_item_get(item);
break;
}
@ -219,9 +209,8 @@ struct config_item * config_group_find_obj(struct config_group * group, const ch
return ret;
}
EXPORT_SYMBOL(config_item_init);
EXPORT_SYMBOL(config_group_init);
EXPORT_SYMBOL(config_item_get);
EXPORT_SYMBOL(config_item_put);
EXPORT_SYMBOL(config_group_find_obj);
EXPORT_SYMBOL(config_group_find_item);

View file

@ -133,14 +133,6 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
return len;
}
#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
.attr = { .ca_name = __stringify(_name), \
.ca_mode = _mode, \
.ca_owner = THIS_MODULE }, \
.show = _read, \
.store = _write, \
}
#define CLUSTER_ATTR(name, check_zero) \
static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
{ \
@ -615,7 +607,7 @@ static struct clusters clusters_root = {
int dlm_config_init(void)
{
config_group_init(&clusters_root.subsys.su_group);
init_MUTEX(&clusters_root.subsys.su_sem);
mutex_init(&clusters_root.subsys.su_mutex);
return configfs_register_subsystem(&clusters_root.subsys);
}
@ -759,9 +751,9 @@ static struct space *get_space(char *name)
if (!space_list)
return NULL;
down(&space_list->cg_subsys->su_sem);
i = config_group_find_obj(space_list, name);
up(&space_list->cg_subsys->su_sem);
mutex_lock(&space_list->cg_subsys->su_mutex);
i = config_group_find_item(space_list, name);
mutex_unlock(&space_list->cg_subsys->su_mutex);
return to_space(i);
}
@ -780,7 +772,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
if (!comm_list)
return NULL;
down(&clusters_root.subsys.su_sem);
mutex_lock(&clusters_root.subsys.su_mutex);
list_for_each_entry(i, &comm_list->cg_children, ci_entry) {
cm = to_comm(i);
@ -800,7 +792,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
break;
}
}
up(&clusters_root.subsys.su_sem);
mutex_unlock(&clusters_root.subsys.su_mutex);
if (!found)
cm = NULL;

File diff suppressed because it is too large Load diff

View file

@ -34,7 +34,17 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
u32 cpos,
u64 start_blk,
u32 new_clusters,
u8 flags,
struct ocfs2_alloc_context *meta_ac);
struct ocfs2_cached_dealloc_ctxt;
int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
handle_t *handle, u32 cpos, u32 len, u32 phys,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
u32 cpos, u32 len, handle_t *handle,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_cached_dealloc_ctxt *dealloc);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct inode *inode,
struct ocfs2_dinode *fe);
@ -62,17 +72,41 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
struct ocfs2_dinode **tl_copy);
int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
struct ocfs2_dinode *tl_copy);
int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb);
int ocfs2_truncate_log_append(struct ocfs2_super *osb,
handle_t *handle,
u64 start_blk,
unsigned int num_clusters);
int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
/*
* Process local structure which describes the block unlinks done
* during an operation. This is populated via
* ocfs2_cache_block_dealloc().
*
* ocfs2_run_deallocs() should be called after the potentially
* de-allocating routines. No journal handles should be open, and most
* locks should have been dropped.
*/
struct ocfs2_cached_dealloc_ctxt {
struct ocfs2_per_slot_free_list *c_first_suballocator;
};
static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
{
c->c_first_suballocator = NULL;
}
int ocfs2_run_deallocs(struct ocfs2_super *osb,
struct ocfs2_cached_dealloc_ctxt *ctxt);
struct ocfs2_truncate_context {
struct inode *tc_ext_alloc_inode;
struct buffer_head *tc_ext_alloc_bh;
struct ocfs2_cached_dealloc_ctxt tc_dealloc;
int tc_ext_alloc_locked; /* is it cluster locked? */
/* these get destroyed once it's passed to ocfs2_commit_truncate. */
struct buffer_head *tc_last_eb_bh;
};
int ocfs2_zero_tail_for_truncate(struct inode *inode, handle_t *handle,
u64 new_i_size);
int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
u64 range_start, u64 range_end);
int ocfs2_prepare_truncate(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *fe_bh,
@ -84,6 +118,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
u32 cpos, struct buffer_head **leaf_bh);
int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
/*
* Helper function to look at the # of clusters in an extent record.

File diff suppressed because it is too large Load diff

View file

@ -42,57 +42,22 @@ int walk_page_buffers( handle_t *handle,
int (*fn)( handle_t *handle,
struct buffer_head *bh));
struct ocfs2_write_ctxt;
typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *,
u64 *, unsigned int *, unsigned int *);
int ocfs2_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata);
ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
size_t count, ocfs2_page_writer *actor,
void *priv);
int ocfs2_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
struct ocfs2_write_ctxt {
size_t w_count;
loff_t w_pos;
u32 w_cpos;
unsigned int w_finished_copy;
int ocfs2_write_end_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
/* This is true if page_size > cluster_size */
unsigned int w_large_pages;
/* Filler callback and private data */
ocfs2_page_writer *w_write_data_page;
void *w_private;
/* Only valid for the filler callback */
struct page *w_this_page;
unsigned int w_this_page_new;
};
struct ocfs2_buffered_write_priv {
char *b_src_buf;
const struct iovec *b_cur_iov; /* Current iovec */
size_t b_cur_off; /* Offset in the
* current iovec */
};
int ocfs2_map_and_write_user_data(struct inode *inode,
struct ocfs2_write_ctxt *wc,
u64 *p_blkno,
unsigned int *ret_from,
unsigned int *ret_to);
struct ocfs2_splice_write_priv {
struct splice_desc *s_sd;
struct pipe_buffer *s_buf;
struct pipe_inode_info *s_pipe;
/* Neither offset value is ever larger than one page */
unsigned int s_offset;
unsigned int s_buf_offset;
};
int ocfs2_map_and_write_splice_data(struct inode *inode,
struct ocfs2_write_ctxt *wc,
u64 *p_blkno,
unsigned int *ret_from,
unsigned int *ret_to);
int ocfs2_write_begin_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page);
/* all ocfs2_dio_end_io()'s fault */
#define ocfs2_iocb_is_rw_locked(iocb) \

View file

@ -1335,6 +1335,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
ret = wait_event_interruptible(o2hb_steady_queue,
atomic_read(&reg->hr_steady_iterations) == 0);
if (ret) {
/* We got interrupted (hello ptrace!). Clean up */
spin_lock(&o2hb_live_lock);
hb_task = reg->hr_task;
reg->hr_task = NULL;
@ -1345,7 +1346,16 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
goto out;
}
ret = count;
/* Ok, we were woken. Make sure it wasn't by drop_item() */
spin_lock(&o2hb_live_lock);
hb_task = reg->hr_task;
spin_unlock(&o2hb_live_lock);
if (hb_task)
ret = count;
else
ret = -EIO;
out:
if (filp)
fput(filp);
@ -1523,6 +1533,15 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
if (hb_task)
kthread_stop(hb_task);
/*
* If we're racing a dev_write(), we need to wake them. They will
* check reg->hr_task
*/
if (atomic_read(&reg->hr_steady_iterations) != 0) {
atomic_set(&reg->hr_steady_iterations, 0);
wake_up(&o2hb_steady_queue);
}
config_item_put(item);
}
@ -1665,7 +1684,67 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
}
EXPORT_SYMBOL_GPL(o2hb_setup_callback);
int o2hb_register_callback(struct o2hb_callback_func *hc)
static struct o2hb_region *o2hb_find_region(const char *region_uuid)
{
struct o2hb_region *p, *reg = NULL;
assert_spin_locked(&o2hb_live_lock);
list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
reg = p;
break;
}
}
return reg;
}
static int o2hb_region_get(const char *region_uuid)
{
int ret = 0;
struct o2hb_region *reg;
spin_lock(&o2hb_live_lock);
reg = o2hb_find_region(region_uuid);
if (!reg)
ret = -ENOENT;
spin_unlock(&o2hb_live_lock);
if (ret)
goto out;
ret = o2nm_depend_this_node();
if (ret)
goto out;
ret = o2nm_depend_item(&reg->hr_item);
if (ret)
o2nm_undepend_this_node();
out:
return ret;
}
static void o2hb_region_put(const char *region_uuid)
{
struct o2hb_region *reg;
spin_lock(&o2hb_live_lock);
reg = o2hb_find_region(region_uuid);
spin_unlock(&o2hb_live_lock);
if (reg) {
o2nm_undepend_item(&reg->hr_item);
o2nm_undepend_this_node();
}
}
int o2hb_register_callback(const char *region_uuid,
struct o2hb_callback_func *hc)
{
struct o2hb_callback_func *tmp;
struct list_head *iter;
@ -1681,6 +1760,12 @@ int o2hb_register_callback(struct o2hb_callback_func *hc)
goto out;
}
if (region_uuid) {
ret = o2hb_region_get(region_uuid);
if (ret)
goto out;
}
down_write(&o2hb_callback_sem);
list_for_each(iter, &hbcall->list) {
@ -1702,16 +1787,21 @@ out:
}
EXPORT_SYMBOL_GPL(o2hb_register_callback);
void o2hb_unregister_callback(struct o2hb_callback_func *hc)
void o2hb_unregister_callback(const char *region_uuid,
struct o2hb_callback_func *hc)
{
BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
__builtin_return_address(0), hc);
/* XXX Can this happen _with_ a region reference? */
if (list_empty(&hc->hc_item))
return;
if (region_uuid)
o2hb_region_put(region_uuid);
down_write(&o2hb_callback_sem);
list_del_init(&hc->hc_item);

View file

@ -69,8 +69,10 @@ void o2hb_setup_callback(struct o2hb_callback_func *hc,
o2hb_cb_func *func,
void *data,
int priority);
int o2hb_register_callback(struct o2hb_callback_func *hc);
void o2hb_unregister_callback(struct o2hb_callback_func *hc);
int o2hb_register_callback(const char *region_uuid,
struct o2hb_callback_func *hc);
void o2hb_unregister_callback(const char *region_uuid,
struct o2hb_callback_func *hc);
void o2hb_fill_node_map(unsigned long *map,
unsigned bytes);
void o2hb_init(void);

View file

@ -900,6 +900,46 @@ static struct o2nm_cluster_group o2nm_cluster_group = {
},
};
int o2nm_depend_item(struct config_item *item)
{
return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item);
}
void o2nm_undepend_item(struct config_item *item)
{
configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item);
}
int o2nm_depend_this_node(void)
{
int ret = 0;
struct o2nm_node *local_node;
local_node = o2nm_get_node_by_num(o2nm_this_node());
if (!local_node) {
ret = -EINVAL;
goto out;
}
ret = o2nm_depend_item(&local_node->nd_item);
o2nm_node_put(local_node);
out:
return ret;
}
void o2nm_undepend_this_node(void)
{
struct o2nm_node *local_node;
local_node = o2nm_get_node_by_num(o2nm_this_node());
BUG_ON(!local_node);
o2nm_undepend_item(&local_node->nd_item);
o2nm_node_put(local_node);
}
static void __exit exit_o2nm(void)
{
if (ocfs2_table_header)
@ -934,7 +974,7 @@ static int __init init_o2nm(void)
goto out_sysctl;
config_group_init(&o2nm_cluster_group.cs_subsys.su_group);
init_MUTEX(&o2nm_cluster_group.cs_subsys.su_sem);
mutex_init(&o2nm_cluster_group.cs_subsys.su_mutex);
ret = configfs_register_subsystem(&o2nm_cluster_group.cs_subsys);
if (ret) {
printk(KERN_ERR "nodemanager: Registration returned %d\n", ret);

View file

@ -77,4 +77,9 @@ struct o2nm_node *o2nm_get_node_by_ip(__be32 addr);
void o2nm_node_get(struct o2nm_node *node);
void o2nm_node_put(struct o2nm_node *node);
int o2nm_depend_item(struct config_item *item);
void o2nm_undepend_item(struct config_item *item);
int o2nm_depend_this_node(void);
void o2nm_undepend_this_node(void);
#endif /* O2CLUSTER_NODEMANAGER_H */

View file

@ -261,14 +261,12 @@ out:
static void o2net_complete_nodes_nsw(struct o2net_node *nn)
{
struct list_head *iter, *tmp;
struct o2net_status_wait *nsw, *tmp;
unsigned int num_kills = 0;
struct o2net_status_wait *nsw;
assert_spin_locked(&nn->nn_lock);
list_for_each_safe(iter, tmp, &nn->nn_status_list) {
nsw = list_entry(iter, struct o2net_status_wait, ns_node_item);
list_for_each_entry_safe(nsw, tmp, &nn->nn_status_list, ns_node_item) {
o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0);
num_kills++;
}
@ -764,13 +762,10 @@ EXPORT_SYMBOL_GPL(o2net_register_handler);
void o2net_unregister_handler_list(struct list_head *list)
{
struct list_head *pos, *n;
struct o2net_msg_handler *nmh;
struct o2net_msg_handler *nmh, *n;
write_lock(&o2net_handler_lock);
list_for_each_safe(pos, n, list) {
nmh = list_entry(pos, struct o2net_msg_handler,
nh_unregister_item);
list_for_each_entry_safe(nmh, n, list, nh_unregister_item) {
mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n",
nmh->nh_func, nmh->nh_msg_type, nmh->nh_key);
rb_erase(&nmh->nh_node, &o2net_handler_tree);
@ -1638,8 +1633,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
void o2net_unregister_hb_callbacks(void)
{
o2hb_unregister_callback(&o2net_hb_up);
o2hb_unregister_callback(&o2net_hb_down);
o2hb_unregister_callback(NULL, &o2net_hb_up);
o2hb_unregister_callback(NULL, &o2net_hb_down);
}
int o2net_register_hb_callbacks(void)
@ -1651,9 +1646,9 @@ int o2net_register_hb_callbacks(void)
o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB,
o2net_hb_node_up_cb, NULL, O2NET_HB_PRI);
ret = o2hb_register_callback(&o2net_hb_up);
ret = o2hb_register_callback(NULL, &o2net_hb_up);
if (ret == 0)
ret = o2hb_register_callback(&o2net_hb_down);
ret = o2hb_register_callback(NULL, &o2net_hb_down);
if (ret)
o2net_unregister_hb_callbacks();

View file

@ -368,7 +368,7 @@ int ocfs2_do_extend_dir(struct super_block *sb,
u32 offset = OCFS2_I(dir)->ip_clusters;
status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
1, parent_fe_bh, handle,
1, 0, parent_fe_bh, handle,
data_ac, meta_ac, NULL);
BUG_ON(status == -EAGAIN);
if (status < 0) {

View file

@ -1128,8 +1128,8 @@ bail:
static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
{
o2hb_unregister_callback(&dlm->dlm_hb_up);
o2hb_unregister_callback(&dlm->dlm_hb_down);
o2hb_unregister_callback(NULL, &dlm->dlm_hb_up);
o2hb_unregister_callback(NULL, &dlm->dlm_hb_down);
o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
}
@ -1141,13 +1141,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
status = o2hb_register_callback(&dlm->dlm_hb_down);
status = o2hb_register_callback(NULL, &dlm->dlm_hb_down);
if (status)
goto bail;
o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
status = o2hb_register_callback(&dlm->dlm_hb_up);
status = o2hb_register_callback(NULL, &dlm->dlm_hb_up);
if (status)
goto bail;

View file

@ -192,25 +192,20 @@ static void dlm_print_one_mle(struct dlm_master_list_entry *mle)
static void dlm_dump_mles(struct dlm_ctxt *dlm)
{
struct dlm_master_list_entry *mle;
struct list_head *iter;
mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name);
spin_lock(&dlm->master_lock);
list_for_each(iter, &dlm->master_list) {
mle = list_entry(iter, struct dlm_master_list_entry, list);
list_for_each_entry(mle, &dlm->master_list, list)
dlm_print_one_mle(mle);
}
spin_unlock(&dlm->master_lock);
}
int dlm_dump_all_mles(const char __user *data, unsigned int len)
{
struct list_head *iter;
struct dlm_ctxt *dlm;
spin_lock(&dlm_domain_lock);
list_for_each(iter, &dlm_domains) {
dlm = list_entry (iter, struct dlm_ctxt, list);
list_for_each_entry(dlm, &dlm_domains, list) {
mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name);
dlm_dump_mles(dlm);
}
@ -454,12 +449,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
char *name, unsigned int namelen)
{
struct dlm_master_list_entry *tmpmle;
struct list_head *iter;
assert_spin_locked(&dlm->master_lock);
list_for_each(iter, &dlm->master_list) {
tmpmle = list_entry(iter, struct dlm_master_list_entry, list);
list_for_each_entry(tmpmle, &dlm->master_list, list) {
if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
continue;
dlm_get_mle(tmpmle);
@ -472,13 +465,10 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
{
struct dlm_master_list_entry *mle;
struct list_head *iter;
assert_spin_locked(&dlm->spinlock);
list_for_each(iter, &dlm->mle_hb_events) {
mle = list_entry(iter, struct dlm_master_list_entry,
hb_events);
list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
if (node_up)
dlm_mle_node_up(dlm, mle, NULL, idx);
else
@ -2434,7 +2424,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
int ret;
int i;
int count = 0;
struct list_head *queue, *iter;
struct list_head *queue;
struct dlm_lock *lock;
assert_spin_locked(&res->spinlock);
@ -2453,8 +2443,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
ret = 0;
queue = &res->granted;
for (i = 0; i < 3; i++) {
list_for_each(iter, queue) {
lock = list_entry(iter, struct dlm_lock, list);
list_for_each_entry(lock, queue, list) {
++count;
if (lock->ml.node == dlm->node_num) {
mlog(0, "found a lock owned by this node still "
@ -2923,18 +2912,16 @@ again:
static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
struct list_head *iter, *iter2;
struct list_head *queue = &res->granted;
int i, bit;
struct dlm_lock *lock;
struct dlm_lock *lock, *next;
assert_spin_locked(&res->spinlock);
BUG_ON(res->owner == dlm->node_num);
for (i=0; i<3; i++) {
list_for_each_safe(iter, iter2, queue) {
lock = list_entry (iter, struct dlm_lock, list);
list_for_each_entry_safe(lock, next, queue, list) {
if (lock->ml.node != dlm->node_num) {
mlog(0, "putting lock for node %u\n",
lock->ml.node);
@ -2976,7 +2963,6 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
{
int i;
struct list_head *queue = &res->granted;
struct list_head *iter;
struct dlm_lock *lock;
int nodenum;
@ -2984,10 +2970,9 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
for (i=0; i<3; i++) {
list_for_each(iter, queue) {
list_for_each_entry(lock, queue, list) {
/* up to the caller to make sure this node
* is alive */
lock = list_entry (iter, struct dlm_lock, list);
if (lock->ml.node != dlm->node_num) {
spin_unlock(&res->spinlock);
return lock->ml.node;
@ -3234,8 +3219,7 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
{
struct list_head *iter, *iter2;
struct dlm_master_list_entry *mle;
struct dlm_master_list_entry *mle, *next;
struct dlm_lock_resource *res;
unsigned int hash;
@ -3245,9 +3229,7 @@ top:
/* clean the master list */
spin_lock(&dlm->master_lock);
list_for_each_safe(iter, iter2, &dlm->master_list) {
mle = list_entry(iter, struct dlm_master_list_entry, list);
list_for_each_entry_safe(mle, next, &dlm->master_list, list) {
BUG_ON(mle->type != DLM_MLE_BLOCK &&
mle->type != DLM_MLE_MASTER &&
mle->type != DLM_MLE_MIGRATION);

View file

@ -158,8 +158,7 @@ void dlm_dispatch_work(struct work_struct *work)
struct dlm_ctxt *dlm =
container_of(work, struct dlm_ctxt, dispatched_work);
LIST_HEAD(tmp_list);
struct list_head *iter, *iter2;
struct dlm_work_item *item;
struct dlm_work_item *item, *next;
dlm_workfunc_t *workfunc;
int tot=0;
@ -167,13 +166,12 @@ void dlm_dispatch_work(struct work_struct *work)
list_splice_init(&dlm->work_list, &tmp_list);
spin_unlock(&dlm->work_lock);
list_for_each_safe(iter, iter2, &tmp_list) {
list_for_each_entry(item, &tmp_list, list) {
tot++;
}
mlog(0, "%s: work thread has %d work items\n", dlm->name, tot);
list_for_each_safe(iter, iter2, &tmp_list) {
item = list_entry(iter, struct dlm_work_item, list);
list_for_each_entry_safe(item, next, &tmp_list, list) {
workfunc = item->func;
list_del_init(&item->list);
@ -549,7 +547,6 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
{
int status = 0;
struct dlm_reco_node_data *ndata;
struct list_head *iter;
int all_nodes_done;
int destroy = 0;
int pass = 0;
@ -567,8 +564,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
/* safe to access the node data list without a lock, since this
* process is the only one to change the list */
list_for_each(iter, &dlm->reco.node_data) {
ndata = list_entry (iter, struct dlm_reco_node_data, list);
list_for_each_entry(ndata, &dlm->reco.node_data, list) {
BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT);
ndata->state = DLM_RECO_NODE_DATA_REQUESTING;
@ -655,9 +651,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
* done, or if anyone died */
all_nodes_done = 1;
spin_lock(&dlm_reco_state_lock);
list_for_each(iter, &dlm->reco.node_data) {
ndata = list_entry (iter, struct dlm_reco_node_data, list);
list_for_each_entry(ndata, &dlm->reco.node_data, list) {
mlog(0, "checking recovery state of node %u\n",
ndata->node_num);
switch (ndata->state) {
@ -774,16 +768,14 @@ static int dlm_init_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
{
struct list_head *iter, *iter2;
struct dlm_reco_node_data *ndata;
struct dlm_reco_node_data *ndata, *next;
LIST_HEAD(tmplist);
spin_lock(&dlm_reco_state_lock);
list_splice_init(&dlm->reco.node_data, &tmplist);
spin_unlock(&dlm_reco_state_lock);
list_for_each_safe(iter, iter2, &tmplist) {
ndata = list_entry (iter, struct dlm_reco_node_data, list);
list_for_each_entry_safe(ndata, next, &tmplist, list) {
list_del_init(&ndata->list);
kfree(ndata);
}
@ -876,7 +868,6 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
struct dlm_lock_resource *res;
struct dlm_ctxt *dlm;
LIST_HEAD(resources);
struct list_head *iter;
int ret;
u8 dead_node, reco_master;
int skip_all_done = 0;
@ -920,8 +911,7 @@ static void dlm_request_all_locks_worker(struct dlm_work_item *item, void *data)
/* any errors returned will be due to the new_master dying,
* the dlm_reco_thread should detect this */
list_for_each(iter, &resources) {
res = list_entry (iter, struct dlm_lock_resource, recovering);
list_for_each_entry(res, &resources, recovering) {
ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
DLM_MRES_RECOVERY);
if (ret < 0) {
@ -983,7 +973,6 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
{
struct dlm_ctxt *dlm = data;
struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf;
struct list_head *iter;
struct dlm_reco_node_data *ndata = NULL;
int ret = -EINVAL;
@ -1000,8 +989,7 @@ int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
dlm->reco.dead_node, done->node_idx, dlm->node_num);
spin_lock(&dlm_reco_state_lock);
list_for_each(iter, &dlm->reco.node_data) {
ndata = list_entry (iter, struct dlm_reco_node_data, list);
list_for_each_entry(ndata, &dlm->reco.node_data, list) {
if (ndata->node_num != done->node_idx)
continue;
@ -1049,13 +1037,11 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
struct list_head *list,
u8 dead_node)
{
struct dlm_lock_resource *res;
struct list_head *iter, *iter2;
struct dlm_lock_resource *res, *next;
struct dlm_lock *lock;
spin_lock(&dlm->spinlock);
list_for_each_safe(iter, iter2, &dlm->reco.resources) {
res = list_entry (iter, struct dlm_lock_resource, recovering);
list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
/* always prune any $RECOVERY entries for dead nodes,
* otherwise hangs can occur during later recovery */
if (dlm_is_recovery_lock(res->lockname.name,
@ -1169,7 +1155,7 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
u8 flags, u8 master)
{
/* mres here is one full page */
memset(mres, 0, PAGE_SIZE);
clear_page(mres);
mres->lockname_len = namelen;
memcpy(mres->lockname, lockname, namelen);
mres->num_locks = 0;
@ -1252,7 +1238,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
struct dlm_migratable_lockres *mres,
u8 send_to, u8 flags)
{
struct list_head *queue, *iter;
struct list_head *queue;
int total_locks, i;
u64 mig_cookie = 0;
struct dlm_lock *lock;
@ -1278,9 +1264,7 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
total_locks = 0;
for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) {
queue = dlm_list_idx_to_ptr(res, i);
list_for_each(iter, queue) {
lock = list_entry (iter, struct dlm_lock, list);
list_for_each_entry(lock, queue, list) {
/* add another lock. */
total_locks++;
if (!dlm_add_lock_to_array(lock, mres, i))
@ -1717,7 +1701,6 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
struct dlm_lockstatus *lksb = NULL;
int ret = 0;
int i, j, bad;
struct list_head *iter;
struct dlm_lock *lock = NULL;
u8 from = O2NM_MAX_NODES;
unsigned int added = 0;
@ -1755,8 +1738,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
tmpq = dlm_list_idx_to_ptr(res, j);
list_for_each(iter, tmpq) {
lock = list_entry (iter, struct dlm_lock, list);
list_for_each_entry(lock, tmpq, list) {
if (lock->ml.cookie != ml->cookie)
lock = NULL;
else
@ -1930,8 +1912,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
int i;
struct list_head *queue, *iter, *iter2;
struct dlm_lock *lock;
struct list_head *queue;
struct dlm_lock *lock, *next;
res->state |= DLM_LOCK_RES_RECOVERING;
if (!list_empty(&res->recovering)) {
@ -1947,8 +1929,7 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
/* find any pending locks and put them back on proper list */
for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) {
queue = dlm_list_idx_to_ptr(res, i);
list_for_each_safe(iter, iter2, queue) {
lock = list_entry (iter, struct dlm_lock, list);
list_for_each_entry_safe(lock, next, queue, list) {
dlm_lock_get(lock);
if (lock->convert_pending) {
/* move converting lock back to granted */
@ -2013,18 +1994,15 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
u8 dead_node, u8 new_master)
{
int i;
struct list_head *iter, *iter2;
struct hlist_node *hash_iter;
struct hlist_head *bucket;
struct dlm_lock_resource *res;
struct dlm_lock_resource *res, *next;
mlog_entry_void();
assert_spin_locked(&dlm->spinlock);
list_for_each_safe(iter, iter2, &dlm->reco.resources) {
res = list_entry (iter, struct dlm_lock_resource, recovering);
list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
if (res->owner == dead_node) {
list_del_init(&res->recovering);
spin_lock(&res->spinlock);
@ -2099,7 +2077,7 @@ static inline int dlm_lvb_needs_invalidation(struct dlm_lock *lock, int local)
static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res, u8 dead_node)
{
struct list_head *iter, *queue;
struct list_head *queue;
struct dlm_lock *lock;
int blank_lvb = 0, local = 0;
int i;
@ -2121,8 +2099,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) {
queue = dlm_list_idx_to_ptr(res, i);
list_for_each(iter, queue) {
lock = list_entry (iter, struct dlm_lock, list);
list_for_each_entry(lock, queue, list) {
if (lock->ml.node == search_node) {
if (dlm_lvb_needs_invalidation(lock, local)) {
/* zero the lksb lvb and lockres lvb */
@ -2143,8 +2120,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res, u8 dead_node)
{
struct list_head *iter, *tmpiter;
struct dlm_lock *lock;
struct dlm_lock *lock, *next;
unsigned int freed = 0;
/* this node is the lockres master:
@ -2155,24 +2131,21 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
assert_spin_locked(&res->spinlock);
/* TODO: check pending_asts, pending_basts here */
list_for_each_safe(iter, tmpiter, &res->granted) {
lock = list_entry (iter, struct dlm_lock, list);
list_for_each_entry_safe(lock, next, &res->granted, list) {
if (lock->ml.node == dead_node) {
list_del_init(&lock->list);
dlm_lock_put(lock);
freed++;
}
}
list_for_each_safe(iter, tmpiter, &res->converting) {
lock = list_entry (iter, struct dlm_lock, list);
list_for_each_entry_safe(lock, next, &res->converting, list) {
if (lock->ml.node == dead_node) {
list_del_init(&lock->list);
dlm_lock_put(lock);
freed++;
}
}
list_for_each_safe(iter, tmpiter, &res->blocked) {
lock = list_entry (iter, struct dlm_lock, list);
list_for_each_entry_safe(lock, next, &res->blocked, list) {
if (lock->ml.node == dead_node) {
list_del_init(&lock->list);
dlm_lock_put(lock);

View file

@ -600,15 +600,13 @@ static inline int ocfs2_highest_compat_lock_level(int level)
static void lockres_set_flags(struct ocfs2_lock_res *lockres,
unsigned long newflags)
{
struct list_head *pos, *tmp;
struct ocfs2_mask_waiter *mw;
struct ocfs2_mask_waiter *mw, *tmp;
assert_spin_locked(&lockres->l_lock);
lockres->l_flags = newflags;
list_for_each_safe(pos, tmp, &lockres->l_mask_waiters) {
mw = list_entry(pos, struct ocfs2_mask_waiter, mw_item);
list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
continue;

View file

@ -32,6 +32,11 @@ static inline void le32_add_cpu(__le32 *var, u32 val)
*var = cpu_to_le32(le32_to_cpu(*var) + val);
}
static inline void le64_add_cpu(__le64 *var, u64 val)
{
*var = cpu_to_le64(le64_to_cpu(*var) + val);
}
static inline void le32_and_cpu(__le32 *var, u32 val)
{
*var = cpu_to_le32(le32_to_cpu(*var) & val);

View file

@ -109,17 +109,14 @@ static int ocfs2_extent_map_lookup(struct inode *inode, unsigned int cpos,
*/
void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
{
struct list_head *p, *n;
struct ocfs2_extent_map_item *emi;
struct ocfs2_extent_map_item *emi, *n;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_extent_map *em = &oi->ip_extent_map;
LIST_HEAD(tmp_list);
unsigned int range;
spin_lock(&oi->ip_lock);
list_for_each_safe(p, n, &em->em_list) {
emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
if (emi->ei_cpos >= cpos) {
/* Full truncate of this record. */
list_move(&emi->ei_list, &tmp_list);
@ -136,8 +133,7 @@ void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
}
spin_unlock(&oi->ip_lock);
list_for_each_safe(p, n, &tmp_list) {
emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
list_del(&emi->ei_list);
kfree(emi);
}
@ -377,37 +373,6 @@ out:
return ret;
}
/*
* Return the index of the extent record which contains cluster #v_cluster.
* -1 is returned if it was not found.
*
* Should work fine on interior and exterior nodes.
*/
static int ocfs2_search_extent_list(struct ocfs2_extent_list *el,
u32 v_cluster)
{
int ret = -1;
int i;
struct ocfs2_extent_rec *rec;
u32 rec_end, rec_start, clusters;
for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
rec = &el->l_recs[i];
rec_start = le32_to_cpu(rec->e_cpos);
clusters = ocfs2_rec_clusters(el, rec);
rec_end = rec_start + clusters;
if (v_cluster >= rec_start && v_cluster < rec_end) {
ret = i;
break;
}
}
return ret;
}
int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
u32 *p_cluster, u32 *num_clusters,
unsigned int *extent_flags)

View file

@ -263,6 +263,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
int status;
handle_t *handle;
struct ocfs2_dinode *di;
u64 cluster_bytes;
mlog_entry_void();
@ -286,7 +287,9 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
/*
* Do this before setting i_size.
*/
status = ocfs2_zero_tail_for_truncate(inode, handle, new_i_size);
cluster_bytes = ocfs2_align_bytes_to_clusters(inode->i_sb, new_i_size);
status = ocfs2_zero_range_for_truncate(inode, handle, new_i_size,
cluster_bytes);
if (status) {
mlog_errno(status);
goto out_commit;
@ -326,9 +329,6 @@ static int ocfs2_truncate_file(struct inode *inode,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)new_i_size);
unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(inode->i_mapping, new_i_size);
fe = (struct ocfs2_dinode *) di_bh->b_data;
if (!OCFS2_IS_VALID_DINODE(fe)) {
OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
@ -363,16 +363,23 @@ static int ocfs2_truncate_file(struct inode *inode,
if (new_i_size == le64_to_cpu(fe->i_size))
goto bail;
down_write(&OCFS2_I(inode)->ip_alloc_sem);
/* This forces other nodes to sync and drop their pages. Do
* this even if we have a truncate without allocation change -
* ocfs2 cluster sizes can be much greater than page size, so
* we have to truncate them anyway. */
status = ocfs2_data_lock(inode, 1);
if (status < 0) {
up_write(&OCFS2_I(inode)->ip_alloc_sem);
mlog_errno(status);
goto bail;
}
unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
truncate_inode_pages(inode->i_mapping, new_i_size);
/* alright, we're going to need to do a full blown alloc size
* change. Orphan the inode so that recovery can complete the
* truncate if necessary. This does the task of marking
@ -399,6 +406,8 @@ static int ocfs2_truncate_file(struct inode *inode,
bail_unlock_data:
ocfs2_data_unlock(inode, 1);
up_write(&OCFS2_I(inode)->ip_alloc_sem);
bail:
mlog_exit(status);
@ -419,6 +428,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
struct inode *inode,
u32 *logical_offset,
u32 clusters_to_add,
int mark_unwritten,
struct buffer_head *fe_bh,
handle_t *handle,
struct ocfs2_alloc_context *data_ac,
@ -431,9 +441,13 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
enum ocfs2_alloc_restarted reason = RESTART_NONE;
u32 bit_off, num_bits;
u64 block;
u8 flags = 0;
BUG_ON(!clusters_to_add);
if (mark_unwritten)
flags = OCFS2_EXT_UNWRITTEN;
free_extents = ocfs2_num_free_extents(osb, inode, fe);
if (free_extents < 0) {
status = free_extents;
@ -483,7 +497,7 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
*logical_offset, block, num_bits,
meta_ac);
flags, meta_ac);
if (status < 0) {
mlog_errno(status);
goto leave;
@ -516,25 +530,31 @@ leave:
* For a given allocation, determine which allocators will need to be
* accessed, and lock them, reserving the appropriate number of bits.
*
* Called from ocfs2_extend_allocation() for file systems which don't
* support holes, and from ocfs2_write() for file systems which
* understand sparse inodes.
* Sparse file systems call this from ocfs2_write_begin_nolock()
* and ocfs2_allocate_unwritten_extents().
*
* File systems which don't support holes call this from
* ocfs2_extend_allocation().
*/
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
u32 clusters_to_add,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac)
{
int ret, num_free_extents;
int ret = 0, num_free_extents;
unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
*meta_ac = NULL;
*data_ac = NULL;
if (data_ac)
*data_ac = NULL;
BUG_ON(clusters_to_add != 0 && data_ac == NULL);
mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
"clusters_to_add = %u\n",
"clusters_to_add = %u, extents_to_split = %u\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
le32_to_cpu(di->i_clusters), clusters_to_add);
le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
num_free_extents = ocfs2_num_free_extents(osb, inode, di);
if (num_free_extents < 0) {
@ -552,9 +572,12 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
*
* Most of the time we'll only be seeing this 1 cluster at a time
* anyway.
*
* Always lock for any unwritten extents - we might want to
* add blocks during a split.
*/
if (!num_free_extents ||
(ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) {
(ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);
if (ret < 0) {
if (ret != -ENOSPC)
@ -563,6 +586,9 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
}
}
if (clusters_to_add == 0)
goto out;
ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
if (ret < 0) {
if (ret != -ENOSPC)
@ -585,14 +611,13 @@ out:
return ret;
}
static int ocfs2_extend_allocation(struct inode *inode,
u32 clusters_to_add)
static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
u32 clusters_to_add, int mark_unwritten)
{
int status = 0;
int restart_func = 0;
int drop_alloc_sem = 0;
int credits;
u32 prev_clusters, logical_start;
u32 prev_clusters;
struct buffer_head *bh = NULL;
struct ocfs2_dinode *fe = NULL;
handle_t *handle = NULL;
@ -607,7 +632,7 @@ static int ocfs2_extend_allocation(struct inode *inode,
* This function only exists for file systems which don't
* support holes.
*/
BUG_ON(ocfs2_sparse_alloc(osb));
BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
OCFS2_BH_CACHED, inode);
@ -623,19 +648,10 @@ static int ocfs2_extend_allocation(struct inode *inode,
goto leave;
}
logical_start = OCFS2_I(inode)->ip_clusters;
restart_all:
BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
/* blocks peope in read/write from reading our allocation
* until we're done changing it. We depend on i_mutex to block
* other extend/truncate calls while we're here. Ordering wrt
* start_trans is important here -- always do it before! */
down_write(&OCFS2_I(inode)->ip_alloc_sem);
drop_alloc_sem = 1;
status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac,
status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac,
&meta_ac);
if (status) {
mlog_errno(status);
@ -668,6 +684,7 @@ restarted_transaction:
inode,
&logical_start,
clusters_to_add,
mark_unwritten,
bh,
handle,
data_ac,
@ -720,10 +737,6 @@ restarted_transaction:
OCFS2_I(inode)->ip_clusters, i_size_read(inode));
leave:
if (drop_alloc_sem) {
up_write(&OCFS2_I(inode)->ip_alloc_sem);
drop_alloc_sem = 0;
}
if (handle) {
ocfs2_commit_trans(osb, handle);
handle = NULL;
@ -749,6 +762,25 @@ leave:
return status;
}
static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
u32 clusters_to_add, int mark_unwritten)
{
int ret;
/*
* The alloc sem blocks peope in read/write from reading our
* allocation until we're done changing it. We depend on
* i_mutex to block other extend/truncate calls while we're
* here.
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add,
mark_unwritten);
up_write(&OCFS2_I(inode)->ip_alloc_sem);
return ret;
}
/* Some parts of this taken from generic_cont_expand, which turned out
* to be too fragile to do exactly what we need without us having to
* worry about recursive locking in ->prepare_write() and
@ -890,7 +922,9 @@ static int ocfs2_extend_file(struct inode *inode,
}
if (clusters_to_add) {
ret = ocfs2_extend_allocation(inode, clusters_to_add);
ret = ocfs2_extend_allocation(inode,
OCFS2_I(inode)->ip_clusters,
clusters_to_add, 0);
if (ret < 0) {
mlog_errno(ret);
goto out_unlock;
@ -995,6 +1029,13 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
goto bail_unlock;
}
/*
* This will intentionally not wind up calling vmtruncate(),
* since all the work for a size change has been done above.
* Otherwise, we could get into problems with truncate as
* ip_alloc_sem is used there to protect against i_size
* changes.
*/
status = inode_setattr(inode, attr);
if (status < 0) {
mlog_errno(status);
@ -1070,17 +1111,16 @@ out:
return ret;
}
static int ocfs2_write_remove_suid(struct inode *inode)
static int __ocfs2_write_remove_suid(struct inode *inode,
struct buffer_head *bh)
{
int ret;
struct buffer_head *bh = NULL;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
handle_t *handle;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di;
mlog_entry("(Inode %llu, mode 0%o)\n",
(unsigned long long)oi->ip_blkno, inode->i_mode);
(unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode);
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (handle == NULL) {
@ -1089,17 +1129,11 @@ static int ocfs2_write_remove_suid(struct inode *inode)
goto out;
}
ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
if (ret < 0) {
mlog_errno(ret);
goto out_trans;
}
ret = ocfs2_journal_access(handle, inode, bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret < 0) {
mlog_errno(ret);
goto out_bh;
goto out_trans;
}
inode->i_mode &= ~S_ISUID;
@ -1112,8 +1146,7 @@ static int ocfs2_write_remove_suid(struct inode *inode)
ret = ocfs2_journal_dirty(handle, bh);
if (ret < 0)
mlog_errno(ret);
out_bh:
brelse(bh);
out_trans:
ocfs2_commit_trans(osb, handle);
out:
@ -1159,6 +1192,460 @@ out:
return ret;
}
static int ocfs2_write_remove_suid(struct inode *inode)
{
int ret;
struct buffer_head *bh = NULL;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
ret = __ocfs2_write_remove_suid(inode, bh);
out:
brelse(bh);
return ret;
}
/*
* Allocate enough extents to cover the region starting at byte offset
* start for len bytes. Existing extents are skipped, any extents
* added are marked as "unwritten".
*/
static int ocfs2_allocate_unwritten_extents(struct inode *inode,
u64 start, u64 len)
{
int ret;
u32 cpos, phys_cpos, clusters, alloc_size;
/*
* We consider both start and len to be inclusive.
*/
cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len);
clusters -= cpos;
while (clusters) {
ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
&alloc_size, NULL);
if (ret) {
mlog_errno(ret);
goto out;
}
/*
* Hole or existing extent len can be arbitrary, so
* cap it to our own allocation request.
*/
if (alloc_size > clusters)
alloc_size = clusters;
if (phys_cpos) {
/*
* We already have an allocation at this
* region so we can safely skip it.
*/
goto next;
}
ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
if (ret) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out;
}
next:
cpos += alloc_size;
clusters -= alloc_size;
}
ret = 0;
out:
return ret;
}
static int __ocfs2_remove_inode_range(struct inode *inode,
struct buffer_head *di_bh,
u32 cpos, u32 phys_cpos, u32 len,
struct ocfs2_cached_dealloc_ctxt *dealloc)
{
int ret;
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct inode *tl_inode = osb->osb_tl_inode;
handle_t *handle;
struct ocfs2_alloc_context *meta_ac = NULL;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac);
if (ret) {
mlog_errno(ret);
return ret;
}
mutex_lock(&tl_inode->i_mutex);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
}
handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
if (handle == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
ret = ocfs2_journal_access(handle, inode, di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac,
dealloc);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
OCFS2_I(inode)->ip_clusters -= len;
di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
ret = ocfs2_journal_dirty(handle, di_bh);
if (ret) {
mlog_errno(ret);
goto out_commit;
}
ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
if (ret)
mlog_errno(ret);
out_commit:
ocfs2_commit_trans(osb, handle);
out:
mutex_unlock(&tl_inode->i_mutex);
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
return ret;
}
/*
* Truncate a byte range, avoiding pages within partial clusters. This
* preserves those pages for the zeroing code to write to.
*/
static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start,
u64 byte_len)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
loff_t start, end;
struct address_space *mapping = inode->i_mapping;
start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start);
end = byte_start + byte_len;
end = end & ~(osb->s_clustersize - 1);
if (start < end) {
unmap_mapping_range(mapping, start, end - start, 0);
truncate_inode_pages_range(mapping, start, end - 1);
}
}
static int ocfs2_zero_partial_clusters(struct inode *inode,
u64 start, u64 len)
{
int ret = 0;
u64 tmpend, end = start + len;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
unsigned int csize = osb->s_clustersize;
handle_t *handle;
/*
* The "start" and "end" values are NOT necessarily part of
* the range whose allocation is being deleted. Rather, this
* is what the user passed in with the request. We must zero
* partial clusters here. There's no need to worry about
* physical allocation - the zeroing code knows to skip holes.
*/
mlog(0, "byte start: %llu, end: %llu\n",
(unsigned long long)start, (unsigned long long)end);
/*
* If both edges are on a cluster boundary then there's no
* zeroing required as the region is part of the allocation to
* be truncated.
*/
if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0)
goto out;
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (handle == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
}
/*
* We want to get the byte offset of the end of the 1st cluster.
*/
tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
if (tmpend > end)
tmpend = end;
mlog(0, "1st range: start: %llu, tmpend: %llu\n",
(unsigned long long)start, (unsigned long long)tmpend);
ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
if (ret)
mlog_errno(ret);
if (tmpend < end) {
/*
* This may make start and end equal, but the zeroing
* code will skip any work in that case so there's no
* need to catch it up here.
*/
start = end & ~(osb->s_clustersize - 1);
mlog(0, "2nd range: start: %llu, end: %llu\n",
(unsigned long long)start, (unsigned long long)end);
ret = ocfs2_zero_range_for_truncate(inode, handle, start, end);
if (ret)
mlog_errno(ret);
}
ocfs2_commit_trans(osb, handle);
out:
return ret;
}
static int ocfs2_remove_inode_range(struct inode *inode,
struct buffer_head *di_bh, u64 byte_start,
u64 byte_len)
{
int ret = 0;
u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_cached_dealloc_ctxt dealloc;
ocfs2_init_dealloc_ctxt(&dealloc);
if (byte_len == 0)
return 0;
trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;
if (trunc_len >= trunc_start)
trunc_len -= trunc_start;
else
trunc_len = 0;
mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n",
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(unsigned long long)byte_start,
(unsigned long long)byte_len, trunc_start, trunc_len);
ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
if (ret) {
mlog_errno(ret);
goto out;
}
cpos = trunc_start;
while (trunc_len) {
ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
&alloc_size, NULL);
if (ret) {
mlog_errno(ret);
goto out;
}
if (alloc_size > trunc_len)
alloc_size = trunc_len;
/* Only do work for non-holes */
if (phys_cpos != 0) {
ret = __ocfs2_remove_inode_range(inode, di_bh, cpos,
phys_cpos, alloc_size,
&dealloc);
if (ret) {
mlog_errno(ret);
goto out;
}
}
cpos += alloc_size;
trunc_len -= alloc_size;
}
ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
out:
ocfs2_schedule_truncate_log_flush(osb, 1);
ocfs2_run_deallocs(osb, &dealloc);
return ret;
}
/*
* Parts of this function taken from xfs_change_file_space()
*/
int ocfs2_change_file_space(struct file *file, unsigned int cmd,
struct ocfs2_space_resv *sr)
{
int ret;
s64 llen;
struct inode *inode = file->f_path.dentry->d_inode;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct buffer_head *di_bh = NULL;
handle_t *handle;
unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits);
if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
!ocfs2_writes_unwritten_extents(osb))
return -ENOTTY;
else if ((cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) &&
!ocfs2_sparse_alloc(osb))
return -ENOTTY;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
return -EROFS;
mutex_lock(&inode->i_mutex);
/*
* This prevents concurrent writes on other nodes
*/
ret = ocfs2_rw_lock(inode, 1);
if (ret) {
mlog_errno(ret);
goto out;
}
ret = ocfs2_meta_lock(inode, &di_bh, 1);
if (ret) {
mlog_errno(ret);
goto out_rw_unlock;
}
if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
ret = -EPERM;
goto out_meta_unlock;
}
switch (sr->l_whence) {
case 0: /*SEEK_SET*/
break;
case 1: /*SEEK_CUR*/
sr->l_start += file->f_pos;
break;
case 2: /*SEEK_END*/
sr->l_start += i_size_read(inode);
break;
default:
ret = -EINVAL;
goto out_meta_unlock;
}
sr->l_whence = 0;
llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len;
if (sr->l_start < 0
|| sr->l_start > max_off
|| (sr->l_start + llen) < 0
|| (sr->l_start + llen) > max_off) {
ret = -EINVAL;
goto out_meta_unlock;
}
if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {
if (sr->l_len <= 0) {
ret = -EINVAL;
goto out_meta_unlock;
}
}
if (should_remove_suid(file->f_path.dentry)) {
ret = __ocfs2_write_remove_suid(inode, di_bh);
if (ret) {
mlog_errno(ret);
goto out_meta_unlock;
}
}
down_write(&OCFS2_I(inode)->ip_alloc_sem);
switch (cmd) {
case OCFS2_IOC_RESVSP:
case OCFS2_IOC_RESVSP64:
/*
* This takes unsigned offsets, but the signed ones we
* pass have been checked against overflow above.
*/
ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start,
sr->l_len);
break;
case OCFS2_IOC_UNRESVSP:
case OCFS2_IOC_UNRESVSP64:
ret = ocfs2_remove_inode_range(inode, di_bh, sr->l_start,
sr->l_len);
break;
default:
ret = -EINVAL;
}
up_write(&OCFS2_I(inode)->ip_alloc_sem);
if (ret) {
mlog_errno(ret);
goto out_meta_unlock;
}
/*
* We update c/mtime for these changes
*/
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
goto out_meta_unlock;
}
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
if (ret < 0)
mlog_errno(ret);
ocfs2_commit_trans(osb, handle);
out_meta_unlock:
brelse(di_bh);
ocfs2_meta_unlock(inode, 1);
out_rw_unlock:
ocfs2_rw_unlock(inode, 1);
mutex_unlock(&inode->i_mutex);
out:
return ret;
}
static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
loff_t *ppos,
size_t count,
@ -1329,15 +1816,16 @@ ocfs2_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
*basep = base;
}
static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp,
static struct page * ocfs2_get_write_source(char **ret_src_buf,
const struct iovec *cur_iov,
size_t iov_offset)
{
int ret;
char *buf;
char *buf = cur_iov->iov_base + iov_offset;
struct page *src_page = NULL;
unsigned long off;
buf = cur_iov->iov_base + iov_offset;
off = (unsigned long)(buf) & ~PAGE_CACHE_MASK;
if (!segment_eq(get_fs(), KERNEL_DS)) {
/*
@ -1349,18 +1837,17 @@ static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp
(unsigned long)buf & PAGE_CACHE_MASK, 1,
0, 0, &src_page, NULL);
if (ret == 1)
bp->b_src_buf = kmap(src_page);
*ret_src_buf = kmap(src_page) + off;
else
src_page = ERR_PTR(-EFAULT);
} else {
bp->b_src_buf = buf;
*ret_src_buf = buf;
}
return src_page;
}
static void ocfs2_put_write_source(struct ocfs2_buffered_write_priv *bp,
struct page *page)
static void ocfs2_put_write_source(struct page *page)
{
if (page) {
kunmap(page);
@ -1376,10 +1863,12 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
{
int ret = 0;
ssize_t copied, total = 0;
size_t iov_offset = 0;
size_t iov_offset = 0, bytes;
loff_t pos;
const struct iovec *cur_iov = iov;
struct ocfs2_buffered_write_priv bp;
struct page *page;
struct page *user_page, *page;
char *buf, *dst;
void *fsdata;
/*
* handle partial DIO write. Adjust cur_iov if needed.
@ -1387,21 +1876,38 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written);
do {
bp.b_cur_off = iov_offset;
bp.b_cur_iov = cur_iov;
pos = *ppos;
page = ocfs2_get_write_source(&bp, cur_iov, iov_offset);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset);
if (IS_ERR(user_page)) {
ret = PTR_ERR(user_page);
goto out;
}
copied = ocfs2_buffered_write_cluster(file, *ppos, count,
ocfs2_map_and_write_user_data,
&bp);
/* Stay within our page boundaries */
bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)),
(PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK)));
/* Stay within the vector boundary */
bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset);
/* Stay within count */
bytes = min(bytes, count);
ocfs2_put_write_source(&bp, page);
page = NULL;
ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0,
&page, &fsdata);
if (ret) {
mlog_errno(ret);
goto out;
}
dst = kmap_atomic(page, KM_USER0);
memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes);
kunmap_atomic(dst, KM_USER0);
flush_dcache_page(page);
ocfs2_put_write_source(user_page);
copied = ocfs2_write_end(file, file->f_mapping, pos, bytes,
bytes, page, fsdata);
if (copied < 0) {
mlog_errno(copied);
ret = copied;
@ -1409,7 +1915,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
}
total += copied;
*ppos = *ppos + copied;
*ppos = pos + copied;
count -= copied;
ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied);
@ -1579,52 +2085,46 @@ static int ocfs2_splice_write_actor(struct pipe_inode_info *pipe,
struct pipe_buffer *buf,
struct splice_desc *sd)
{
int ret, count, total = 0;
int ret, count;
ssize_t copied = 0;
struct ocfs2_splice_write_priv sp;
struct file *file = sd->u.file;
unsigned int offset;
struct page *page = NULL;
void *fsdata;
char *src, *dst;
ret = buf->ops->confirm(pipe, buf);
if (ret)
goto out;
sp.s_sd = sd;
sp.s_buf = buf;
sp.s_pipe = pipe;
sp.s_offset = sd->pos & ~PAGE_CACHE_MASK;
sp.s_buf_offset = buf->offset;
offset = sd->pos & ~PAGE_CACHE_MASK;
count = sd->len;
if (count + sp.s_offset > PAGE_CACHE_SIZE)
count = PAGE_CACHE_SIZE - sp.s_offset;
if (count + offset > PAGE_CACHE_SIZE)
count = PAGE_CACHE_SIZE - offset;
do {
/*
* splice wants us to copy up to one page at a
* time. For pagesize > cluster size, this means we
* might enter ocfs2_buffered_write_cluster() more
* than once, so keep track of our progress here.
*/
copied = ocfs2_buffered_write_cluster(sd->u.file,
(loff_t)sd->pos + total,
count,
ocfs2_map_and_write_splice_data,
&sp);
if (copied < 0) {
mlog_errno(copied);
ret = copied;
goto out;
}
ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0,
&page, &fsdata);
if (ret) {
mlog_errno(ret);
goto out;
}
count -= copied;
sp.s_offset += copied;
sp.s_buf_offset += copied;
total += copied;
} while (count);
src = buf->ops->map(pipe, buf, 1);
dst = kmap_atomic(page, KM_USER1);
memcpy(dst + offset, src + buf->offset, count);
kunmap_atomic(page, KM_USER1);
buf->ops->unmap(pipe, buf, src);
ret = 0;
copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count,
page, fsdata);
if (copied < 0) {
mlog_errno(copied);
ret = copied;
goto out;
}
out:
return total ? total : ret;
return copied ? copied : ret;
}
static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,

View file

@ -39,15 +39,16 @@ enum ocfs2_alloc_restarted {
};
int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
struct inode *inode,
u32 *cluster_start,
u32 *logical_offset,
u32 clusters_to_add,
int mark_unwritten,
struct buffer_head *fe_bh,
handle_t *handle,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
enum ocfs2_alloc_restarted *reason);
enum ocfs2_alloc_restarted *reason_ret);
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
u32 clusters_to_add,
u32 clusters_to_add, u32 extents_to_split,
struct ocfs2_alloc_context **data_ac,
struct ocfs2_alloc_context **meta_ac);
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
@ -61,4 +62,7 @@ int ocfs2_should_update_atime(struct inode *inode,
int ocfs2_update_inode_atime(struct inode *inode,
struct buffer_head *bh);
int ocfs2_change_file_space(struct file *file, unsigned int cmd,
struct ocfs2_space_resv *sr);
#endif /* OCFS2_FILE_H */

View file

@ -157,16 +157,16 @@ int ocfs2_register_hb_callbacks(struct ocfs2_super *osb)
if (ocfs2_mount_local(osb))
return 0;
status = o2hb_register_callback(&osb->osb_hb_down);
status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down);
if (status < 0) {
mlog_errno(status);
goto bail;
}
status = o2hb_register_callback(&osb->osb_hb_up);
status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up);
if (status < 0) {
mlog_errno(status);
o2hb_unregister_callback(&osb->osb_hb_down);
o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
}
bail:
@ -178,8 +178,8 @@ void ocfs2_clear_hb_callbacks(struct ocfs2_super *osb)
if (ocfs2_mount_local(osb))
return;
o2hb_unregister_callback(&osb->osb_hb_down);
o2hb_unregister_callback(&osb->osb_hb_up);
o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up);
}
void ocfs2_stop_heartbeat(struct ocfs2_super *osb)

View file

@ -14,6 +14,7 @@
#include "ocfs2.h"
#include "alloc.h"
#include "dlmglue.h"
#include "file.h"
#include "inode.h"
#include "journal.h"
@ -115,6 +116,7 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
{
unsigned int flags;
int status;
struct ocfs2_space_resv sr;
switch (cmd) {
case OCFS2_IOC_GETFLAGS:
@ -130,6 +132,14 @@ int ocfs2_ioctl(struct inode * inode, struct file * filp,
return ocfs2_set_inode_attr(inode, flags,
OCFS2_FL_MODIFIABLE);
case OCFS2_IOC_RESVSP:
case OCFS2_IOC_RESVSP64:
case OCFS2_IOC_UNRESVSP:
case OCFS2_IOC_UNRESVSP64:
if (copy_from_user(&sr, (int __user *) arg, sizeof(sr)))
return -EFAULT;
return ocfs2_change_file_space(filp, cmd, &sr);
default:
return -ENOTTY;
}
@ -148,6 +158,11 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
case OCFS2_IOC32_SETFLAGS:
cmd = OCFS2_IOC_SETFLAGS;
break;
case OCFS2_IOC_RESVSP:
case OCFS2_IOC_RESVSP64:
case OCFS2_IOC_UNRESVSP:
case OCFS2_IOC_UNRESVSP64:
break;
default:
return -ENOIOCTLCMD;
}

View file

@ -722,8 +722,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
container_of(work, struct ocfs2_journal, j_recovery_work);
struct ocfs2_super *osb = journal->j_osb;
struct ocfs2_dinode *la_dinode, *tl_dinode;
struct ocfs2_la_recovery_item *item;
struct list_head *p, *n;
struct ocfs2_la_recovery_item *item, *n;
LIST_HEAD(tmp_la_list);
mlog_entry_void();
@ -734,8 +733,7 @@ void ocfs2_complete_recovery(struct work_struct *work)
list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
spin_unlock(&journal->j_lock);
list_for_each_safe(p, n, &tmp_la_list) {
item = list_entry(p, struct ocfs2_la_recovery_item, lri_list);
list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
list_del_init(&item->lri_list);
mlog(0, "Complete recovery for slot %d\n", item->lri_slot);

View file

@ -289,6 +289,8 @@ int ocfs2_journal_dirty_data(handle_t *handle,
#define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \
+ OCFS2_TRUNCATE_LOG_UPDATE)
#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS)
/* data block for new dir/symlink, 2 for bitmap updates (bitmap fe +
* bitmap block for the new bit) */
#define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2)

View file

@ -37,11 +37,29 @@
#include "ocfs2.h"
#include "aops.h"
#include "dlmglue.h"
#include "file.h"
#include "inode.h"
#include "mmap.h"
static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
{
/* The best way to deal with signals in the vm path is
* to block them upfront, rather than allowing the
* locking paths to return -ERESTARTSYS. */
sigfillset(blocked);
/* We should technically never get a bad return value
* from sigprocmask */
return sigprocmask(SIG_BLOCK, blocked, oldset);
}
static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
{
return sigprocmask(SIG_SETMASK, oldset, NULL);
}
static struct page *ocfs2_nopage(struct vm_area_struct * area,
unsigned long address,
int *type)
@ -53,14 +71,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area,
mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address,
type);
/* The best way to deal with signals in this path is
* to block them upfront, rather than allowing the
* locking paths to return -ERESTARTSYS. */
sigfillset(&blocked);
/* We should technically never get a bad ret return
* from sigprocmask */
ret = sigprocmask(SIG_BLOCK, &blocked, &oldset);
ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
if (ret < 0) {
mlog_errno(ret);
goto out;
@ -68,7 +79,7 @@ static struct page *ocfs2_nopage(struct vm_area_struct * area,
page = filemap_nopage(area, address, type);
ret = sigprocmask(SIG_SETMASK, &oldset, NULL);
ret = ocfs2_vm_op_unblock_sigs(&oldset);
if (ret < 0)
mlog_errno(ret);
out:
@ -76,27 +87,135 @@ out:
return page;
}
static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
struct page *page)
{
int ret;
struct address_space *mapping = inode->i_mapping;
loff_t pos = page->index << PAGE_CACHE_SHIFT;
unsigned int len = PAGE_CACHE_SIZE;
pgoff_t last_index;
struct page *locked_page = NULL;
void *fsdata;
loff_t size = i_size_read(inode);
/*
* Another node might have truncated while we were waiting on
* cluster locks.
*/
last_index = size >> PAGE_CACHE_SHIFT;
if (page->index > last_index) {
ret = -EINVAL;
goto out;
}
/*
* The i_size check above doesn't catch the case where nodes
* truncated and then re-extended the file. We'll re-check the
* page mapping after taking the page lock inside of
* ocfs2_write_begin_nolock().
*/
if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
ret = -EINVAL;
goto out;
}
/*
* Call ocfs2_write_begin() and ocfs2_write_end() to take
* advantage of the allocation code there. We pass a write
* length of the whole page (chopped to i_size) to make sure
* the whole thing is allocated.
*
* Since we know the page is up to date, we don't have to
* worry about ocfs2_write_begin() skipping some buffer reads
* because the "write" would invalidate their data.
*/
if (page->index == last_index)
len = size & ~PAGE_CACHE_MASK;
ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
&fsdata, di_bh, page);
if (ret) {
if (ret != -ENOSPC)
mlog_errno(ret);
goto out;
}
ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
fsdata);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
BUG_ON(ret != len);
ret = 0;
out:
return ret;
}
static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
{
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
struct buffer_head *di_bh = NULL;
sigset_t blocked, oldset;
int ret, ret2;
ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
if (ret < 0) {
mlog_errno(ret);
return ret;
}
/*
* The cluster locks taken will block a truncate from another
* node. Taking the data lock will also ensure that we don't
* attempt page truncation as part of a downconvert.
*/
ret = ocfs2_meta_lock(inode, &di_bh, 1);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
/*
* The alloc sem should be enough to serialize with
* ocfs2_truncate_file() changing i_size as well as any thread
* modifying the inode btree.
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
ret = ocfs2_data_lock(inode, 1);
if (ret < 0) {
mlog_errno(ret);
goto out_meta_unlock;
}
ret = __ocfs2_page_mkwrite(inode, di_bh, page);
ocfs2_data_unlock(inode, 1);
out_meta_unlock:
up_write(&OCFS2_I(inode)->ip_alloc_sem);
brelse(di_bh);
ocfs2_meta_unlock(inode, 1);
out:
ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
if (ret2 < 0)
mlog_errno(ret2);
return ret;
}
static struct vm_operations_struct ocfs2_file_vm_ops = {
.nopage = ocfs2_nopage,
.nopage = ocfs2_nopage,
.page_mkwrite = ocfs2_page_mkwrite,
};
int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
{
int ret = 0, lock_level = 0;
struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb);
/*
* Only support shared writeable mmap for local mounts which
* don't know about holes.
*/
if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) &&
((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) &&
((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);
/* This is -EINVAL because generic_file_readonly_mmap
* returns it in a similar situation. */
return -EINVAL;
}
ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode,
file->f_vfsmnt, &lock_level);

View file

@ -1674,7 +1674,7 @@ static int ocfs2_symlink(struct inode *dir,
u32 offset = 0;
inode->i_op = &ocfs2_symlink_inode_operations;
status = ocfs2_do_extend_allocation(osb, inode, &offset, 1,
status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0,
new_fe_bh,
handle, data_ac, NULL,
NULL);

View file

@ -219,6 +219,7 @@ struct ocfs2_super
u16 max_slots;
s16 node_num;
s16 slot_num;
s16 preferred_slot;
int s_sectsize_bits;
int s_clustersize;
int s_clustersize_bits;
@ -305,6 +306,19 @@ static inline int ocfs2_sparse_alloc(struct ocfs2_super *osb)
return 0;
}
static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
{
/*
* Support for sparse files is a pre-requisite
*/
if (!ocfs2_sparse_alloc(osb))
return 0;
if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN)
return 1;
return 0;
}
/* set / clear functions because cluster events can make these happen
* in parallel so we want the transitions to be atomic. this also
* means that any future flags osb_flags must be protected by spinlock

View file

@ -88,7 +88,7 @@
#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
| OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
#define OCFS2_FEATURE_RO_COMPAT_SUPP 0
#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
/*
* Heartbeat-only devices are missing journals and other files. The
@ -116,6 +116,11 @@
*/
#define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001
/*
* Unwritten extents support.
*/
#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001
/* The byte offset of the first backup block will be 1G.
* The following will be 4G, 16G, 64G, 256G and 1T.
*/
@ -169,6 +174,32 @@
#define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int)
#define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int)
/*
* Space reservation / allocation / free ioctls and argument structure
* are designed to be compatible with XFS.
*
* ALLOCSP* and FREESP* are not and will never be supported, but are
* included here for completeness.
*/
struct ocfs2_space_resv {
__s16 l_type;
__s16 l_whence;
__s64 l_start;
__s64 l_len; /* len == 0 means until end of file */
__s32 l_sysid;
__u32 l_pid;
__s32 l_pad[4]; /* reserve area */
};
#define OCFS2_IOC_ALLOCSP _IOW ('X', 10, struct ocfs2_space_resv)
#define OCFS2_IOC_FREESP _IOW ('X', 11, struct ocfs2_space_resv)
#define OCFS2_IOC_RESVSP _IOW ('X', 40, struct ocfs2_space_resv)
#define OCFS2_IOC_UNRESVSP _IOW ('X', 41, struct ocfs2_space_resv)
#define OCFS2_IOC_ALLOCSP64 _IOW ('X', 36, struct ocfs2_space_resv)
#define OCFS2_IOC_FREESP64 _IOW ('X', 37, struct ocfs2_space_resv)
#define OCFS2_IOC_RESVSP64 _IOW ('X', 42, struct ocfs2_space_resv)
#define OCFS2_IOC_UNRESVSP64 _IOW ('X', 43, struct ocfs2_space_resv)
/*
* Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
*/

View file

@ -121,17 +121,25 @@ static s16 __ocfs2_node_num_to_slot(struct ocfs2_slot_info *si,
return ret;
}
static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si)
static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred)
{
int i;
s16 ret = OCFS2_INVALID_SLOT;
if (preferred >= 0 && preferred < si->si_num_slots) {
if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) {
ret = preferred;
goto out;
}
}
for(i = 0; i < si->si_num_slots; i++) {
if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) {
ret = (s16) i;
break;
}
}
out:
return ret;
}
@ -248,7 +256,7 @@ int ocfs2_find_slot(struct ocfs2_super *osb)
if (slot == OCFS2_INVALID_SLOT) {
/* if no slot yet, then just take 1st available
* one. */
slot = __ocfs2_find_empty_slot(si);
slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
if (slot == OCFS2_INVALID_SLOT) {
spin_unlock(&si->si_lock);
mlog(ML_ERROR, "no free slots available!\n");

View file

@ -98,14 +98,6 @@ static int ocfs2_relink_block_group(handle_t *handle,
u16 chain);
static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
u32 wanted);
static int ocfs2_free_suballoc_bits(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *alloc_bh,
unsigned int start_bit,
u64 bg_blkno,
unsigned int count);
static inline u64 ocfs2_which_suballoc_group(u64 block,
unsigned int bit);
static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
u64 bg_blkno,
u16 bg_bit_off);
@ -496,13 +488,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
(*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
(*ac)->ac_which = OCFS2_AC_USE_META;
#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
slot = 0;
#else
slot = osb->slot_num;
#endif
(*ac)->ac_group_search = ocfs2_block_group_search;
status = ocfs2_reserve_suballoc_bits(osb, (*ac),
@ -1626,12 +1612,12 @@ bail:
/*
* expects the suballoc inode to already be locked.
*/
static int ocfs2_free_suballoc_bits(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *alloc_bh,
unsigned int start_bit,
u64 bg_blkno,
unsigned int count)
int ocfs2_free_suballoc_bits(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *alloc_bh,
unsigned int start_bit,
u64 bg_blkno,
unsigned int count)
{
int status = 0;
u32 tmp_used;
@ -1703,13 +1689,6 @@ bail:
return status;
}
static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
{
u64 group = block - (u64) bit;
return group;
}
int ocfs2_free_dinode(handle_t *handle,
struct inode *inode_alloc_inode,
struct buffer_head *inode_alloc_bh,
@ -1723,19 +1702,6 @@ int ocfs2_free_dinode(handle_t *handle,
inode_alloc_bh, bit, bg_blkno, 1);
}
int ocfs2_free_extent_block(handle_t *handle,
struct inode *eb_alloc_inode,
struct buffer_head *eb_alloc_bh,
struct ocfs2_extent_block *eb)
{
u64 blk = le64_to_cpu(eb->h_blkno);
u16 bit = le16_to_cpu(eb->h_suballoc_bit);
u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh,
bit, bg_blkno, 1);
}
int ocfs2_free_clusters(handle_t *handle,
struct inode *bitmap_inode,
struct buffer_head *bitmap_bh,

View file

@ -86,20 +86,29 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
u32 *cluster_start,
u32 *num_clusters);
int ocfs2_free_suballoc_bits(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *alloc_bh,
unsigned int start_bit,
u64 bg_blkno,
unsigned int count);
int ocfs2_free_dinode(handle_t *handle,
struct inode *inode_alloc_inode,
struct buffer_head *inode_alloc_bh,
struct ocfs2_dinode *di);
int ocfs2_free_extent_block(handle_t *handle,
struct inode *eb_alloc_inode,
struct buffer_head *eb_alloc_bh,
struct ocfs2_extent_block *eb);
int ocfs2_free_clusters(handle_t *handle,
struct inode *bitmap_inode,
struct buffer_head *bitmap_bh,
u64 start_blk,
unsigned int num_clusters);
static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
{
u64 group = block - (u64) bit;
return group;
}
static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb,
u64 bg_blkno)
{

View file

@ -82,7 +82,8 @@ MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
static int ocfs2_parse_options(struct super_block *sb, char *options,
unsigned long *mount_opt, int is_remount);
unsigned long *mount_opt, s16 *slot,
int is_remount);
static void ocfs2_put_super(struct super_block *sb);
static int ocfs2_mount_volume(struct super_block *sb);
static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
@ -114,8 +115,6 @@ static void ocfs2_write_super(struct super_block *sb);
static struct inode *ocfs2_alloc_inode(struct super_block *sb);
static void ocfs2_destroy_inode(struct inode *inode);
static unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
static const struct super_operations ocfs2_sops = {
.statfs = ocfs2_statfs,
.alloc_inode = ocfs2_alloc_inode,
@ -140,6 +139,7 @@ enum {
Opt_data_ordered,
Opt_data_writeback,
Opt_atime_quantum,
Opt_slot,
Opt_err,
};
@ -154,6 +154,7 @@ static match_table_t tokens = {
{Opt_data_ordered, "data=ordered"},
{Opt_data_writeback, "data=writeback"},
{Opt_atime_quantum, "atime_quantum=%u"},
{Opt_slot, "preferred_slot=%u"},
{Opt_err, NULL}
};
@ -318,7 +319,7 @@ static void ocfs2_destroy_inode(struct inode *inode)
/* From xfs_super.c:xfs_max_file_offset
* Copyright (c) 2000-2004 Silicon Graphics, Inc.
*/
static unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
{
unsigned int pagefactor = 1;
unsigned int bitshift = BITS_PER_LONG - 1;
@ -355,9 +356,10 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
int incompat_features;
int ret = 0;
unsigned long parsed_options;
s16 slot;
struct ocfs2_super *osb = OCFS2_SB(sb);
if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) {
ret = -EINVAL;
goto out;
}
@ -534,6 +536,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
struct dentry *root;
int status, sector_size;
unsigned long parsed_opt;
s16 slot;
struct inode *inode = NULL;
struct ocfs2_super *osb = NULL;
struct buffer_head *bh = NULL;
@ -541,7 +544,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
mlog_entry("%p, %p, %i", sb, data, silent);
if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) {
if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) {
status = -EINVAL;
goto read_super_error;
}
@ -571,6 +574,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
brelse(bh);
bh = NULL;
osb->s_mount_opt = parsed_opt;
osb->preferred_slot = slot;
sb->s_magic = OCFS2_SUPER_MAGIC;
@ -713,6 +717,7 @@ static struct file_system_type ocfs2_fs_type = {
static int ocfs2_parse_options(struct super_block *sb,
char *options,
unsigned long *mount_opt,
s16 *slot,
int is_remount)
{
int status;
@ -722,6 +727,7 @@ static int ocfs2_parse_options(struct super_block *sb,
options ? options : "(none)");
*mount_opt = 0;
*slot = OCFS2_INVALID_SLOT;
if (!options) {
status = 1;
@ -782,6 +788,15 @@ static int ocfs2_parse_options(struct super_block *sb,
else
osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
break;
case Opt_slot:
option = 0;
if (match_int(&args[0], &option)) {
status = 0;
goto bail;
}
if (option)
*slot = (s16)option;
break;
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "

View file

@ -45,4 +45,6 @@ void __ocfs2_abort(struct super_block *sb,
#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
#endif /* OCFS2_SUPER_H */