NFS client updates for Linux 3.17

Highlights include:
 
 - Stable fix for a bug in nfs3_list_one_acl()
 - Speed up NFS path walks by supporting LOOKUP_RCU
 - More read/write code cleanups
 - pNFS fixes for layout return on close
 - Fixes for the RCU handling in the rpcsec_gss code
 - More NFS/RDMA fixes
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJT65zoAAoJEGcL54qWCgDyvq8QAJ+OKuC5dpngrZ13i4ZJIcK1
 TJSkWCr44FhYPlrmkLCntsGX6C0376oFEtJ5uqloqK0+/QtvwRNVSQMKaJopKIVY
 mR4En0WwpigxVQdW2lgto6bfOhzMVO+llVdmicEVrU8eeSThATxGNv7rxRzWorvL
 RX3TwBkWSc0kLtPi66VRFQ1z+gg5I0kngyyhsKnLOaHHtpTYP2JDZlRPRkokXPUg
 nmNedmC3JrFFkarroFIfYr54Qit2GW/eI2zVhOwHGCb45j4b2wntZ6wr7LpUdv3A
 OGDBzw59cTpcx3Hij9CFvLYVV9IJJHBNd2MJqdQRtgWFfs+aTkZdk4uilUJCIzZh
 f4BujQAlm/4X1HbPxsSvkCRKga7mesGM7e0sBDPHC1vu0mSaY1cakcj2kQLTpbQ7
 gqa1cR3pZ+4shCq37cLwWU0w1yElYe1c4otjSCttPCrAjXbXJZSFzYnHm8DwKROR
 t+yEDRL5BIXPu1nEtSnD2+xTQ3vUIYXooZWEmqLKgRtBTtPmgSn9Vd8P1OQXmMNo
 VJyFXyjNx5WH06Wbc/jLzQ1/cyhuPmJWWyWMJlVROyv+FXk9DJUFBZuTkpMrIPcF
 NlBXLV1GnA7PzMD9Xt9bwqteERZl6fOUDJLWS9P74kTk5c2kD+m+GaqC/rBTKKXc
 ivr2s7aIDV48jhnwBSVL
 =KE07
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-3.17-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

   - stable fix for a bug in nfs3_list_one_acl()
   - speed up NFS path walks by supporting LOOKUP_RCU
   - more read/write code cleanups
   - pNFS fixes for layout return on close
   - fixes for the RCU handling in the rpcsec_gss code
   - more NFS/RDMA fixes"

* tag 'nfs-for-3.17-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (79 commits)
  nfs: reject changes to resvport and sharecache during remount
  NFS: Avoid infinite loop when RELEASE_LOCKOWNER getting expired error
  SUNRPC: remove all refcounting of groupinfo from rpcauth_lookupcred
  NFS: fix two problems in lookup_revalidate in RCU-walk
  NFS: allow lockless access to access_cache
  NFS: teach nfs_lookup_verify_inode to handle LOOKUP_RCU
  NFS: teach nfs_neg_need_reval to understand LOOKUP_RCU
  NFS: support RCU_WALK in nfs_permission()
  sunrpc/auth: allow lockless (rcu) lookup of credential cache.
  NFS: prepare for RCU-walk support but pushing tests later in code.
  NFS: nfs4_lookup_revalidate: only evaluate parent if it will be used.
  NFS: add checks for returned value of try_module_get()
  nfs: clear_request_commit while holding i_lock
  pnfs: add pnfs_put_lseg_async
  pnfs: find swapped pages on pnfs commit lists too
  nfs: fix comment and add warn_on for PG_INODE_REF
  nfs: check wait_on_bit_lock err in page_group_lock
  sunrpc: remove "ec" argument from encrypt_v2 operation
  sunrpc: clean up sparse endianness warnings in gss_krb5_wrap.c
  sunrpc: clean up sparse endianness warnings in gss_krb5_seal.c
  ...
This commit is contained in:
Linus Torvalds 2014-08-13 18:13:19 -06:00
commit 06b8ab5528
54 changed files with 1968 additions and 1289 deletions

View file

@ -210,8 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
SetPageUptodate(bvec->bv_page); SetPageUptodate(bvec->bv_page);
if (err) { if (err) {
struct nfs_pgio_data *rdata = par->data; struct nfs_pgio_header *header = par->data;
struct nfs_pgio_header *header = rdata->header;
if (!header->pnfs_error) if (!header->pnfs_error)
header->pnfs_error = -EIO; header->pnfs_error = -EIO;
@ -224,43 +223,44 @@ static void bl_end_io_read(struct bio *bio, int err)
static void bl_read_cleanup(struct work_struct *work) static void bl_read_cleanup(struct work_struct *work)
{ {
struct rpc_task *task; struct rpc_task *task;
struct nfs_pgio_data *rdata; struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__); dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work); task = container_of(work, struct rpc_task, u.tk_work);
rdata = container_of(task, struct nfs_pgio_data, task); hdr = container_of(task, struct nfs_pgio_header, task);
pnfs_ld_read_done(rdata); pnfs_ld_read_done(hdr);
} }
static void static void
bl_end_par_io_read(void *data, int unused) bl_end_par_io_read(void *data, int unused)
{ {
struct nfs_pgio_data *rdata = data; struct nfs_pgio_header *hdr = data;
rdata->task.tk_status = rdata->header->pnfs_error; hdr->task.tk_status = hdr->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup); INIT_WORK(&hdr->task.u.tk_work, bl_read_cleanup);
schedule_work(&rdata->task.u.tk_work); schedule_work(&hdr->task.u.tk_work);
} }
static enum pnfs_try_status static enum pnfs_try_status
bl_read_pagelist(struct nfs_pgio_data *rdata) bl_read_pagelist(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *header = rdata->header; struct nfs_pgio_header *header = hdr;
int i, hole; int i, hole;
struct bio *bio = NULL; struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, extent_length = 0; sector_t isect, extent_length = 0;
struct parallel_io *par; struct parallel_io *par;
loff_t f_offset = rdata->args.offset; loff_t f_offset = hdr->args.offset;
size_t bytes_left = rdata->args.count; size_t bytes_left = hdr->args.count;
unsigned int pg_offset, pg_len; unsigned int pg_offset, pg_len;
struct page **pages = rdata->args.pages; struct page **pages = hdr->args.pages;
int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; int pg_index = hdr->args.pgbase >> PAGE_CACHE_SHIFT;
const bool is_dio = (header->dreq != NULL); const bool is_dio = (header->dreq != NULL);
dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); hdr->page_array.npages, f_offset,
(unsigned int)hdr->args.count);
par = alloc_parallel(rdata); par = alloc_parallel(hdr);
if (!par) if (!par)
goto use_mds; goto use_mds;
par->pnfs_callback = bl_end_par_io_read; par->pnfs_callback = bl_end_par_io_read;
@ -268,7 +268,7 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
isect = (sector_t) (f_offset >> SECTOR_SHIFT); isect = (sector_t) (f_offset >> SECTOR_SHIFT);
/* Code assumes extents are page-aligned */ /* Code assumes extents are page-aligned */
for (i = pg_index; i < rdata->pages.npages; i++) { for (i = pg_index; i < hdr->page_array.npages; i++) {
if (!extent_length) { if (!extent_length) {
/* We've used up the previous extent */ /* We've used up the previous extent */
bl_put_extent(be); bl_put_extent(be);
@ -317,7 +317,8 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
struct pnfs_block_extent *be_read; struct pnfs_block_extent *be_read;
be_read = (hole && cow_read) ? cow_read : be; be_read = (hole && cow_read) ? cow_read : be;
bio = do_add_page_to_bio(bio, rdata->pages.npages - i, bio = do_add_page_to_bio(bio,
hdr->page_array.npages - i,
READ, READ,
isect, pages[i], be_read, isect, pages[i], be_read,
bl_end_io_read, par, bl_end_io_read, par,
@ -332,10 +333,10 @@ bl_read_pagelist(struct nfs_pgio_data *rdata)
extent_length -= PAGE_CACHE_SECTORS; extent_length -= PAGE_CACHE_SECTORS;
} }
if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
rdata->res.eof = 1; hdr->res.eof = 1;
rdata->res.count = header->inode->i_size - rdata->args.offset; hdr->res.count = header->inode->i_size - hdr->args.offset;
} else { } else {
rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset; hdr->res.count = (isect << SECTOR_SHIFT) - hdr->args.offset;
} }
out: out:
bl_put_extent(be); bl_put_extent(be);
@ -390,8 +391,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
} }
if (unlikely(err)) { if (unlikely(err)) {
struct nfs_pgio_data *data = par->data; struct nfs_pgio_header *header = par->data;
struct nfs_pgio_header *header = data->header;
if (!header->pnfs_error) if (!header->pnfs_error)
header->pnfs_error = -EIO; header->pnfs_error = -EIO;
@ -405,8 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
{ {
struct parallel_io *par = bio->bi_private; struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct nfs_pgio_data *data = par->data; struct nfs_pgio_header *header = par->data;
struct nfs_pgio_header *header = data->header;
if (!uptodate) { if (!uptodate) {
if (!header->pnfs_error) if (!header->pnfs_error)
@ -423,32 +422,32 @@ static void bl_end_io_write(struct bio *bio, int err)
static void bl_write_cleanup(struct work_struct *work) static void bl_write_cleanup(struct work_struct *work)
{ {
struct rpc_task *task; struct rpc_task *task;
struct nfs_pgio_data *wdata; struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__); dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work); task = container_of(work, struct rpc_task, u.tk_work);
wdata = container_of(task, struct nfs_pgio_data, task); hdr = container_of(task, struct nfs_pgio_header, task);
if (likely(!wdata->header->pnfs_error)) { if (likely(!hdr->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */ /* Marks for LAYOUTCOMMIT */
mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg), mark_extents_written(BLK_LSEG2EXT(hdr->lseg),
wdata->args.offset, wdata->args.count); hdr->args.offset, hdr->args.count);
} }
pnfs_ld_write_done(wdata); pnfs_ld_write_done(hdr);
} }
/* Called when last of bios associated with a bl_write_pagelist call finishes */ /* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data, int num_se) static void bl_end_par_io_write(void *data, int num_se)
{ {
struct nfs_pgio_data *wdata = data; struct nfs_pgio_header *hdr = data;
if (unlikely(wdata->header->pnfs_error)) { if (unlikely(hdr->pnfs_error)) {
bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval, bl_free_short_extents(&BLK_LSEG2EXT(hdr->lseg)->bl_inval,
num_se); num_se);
} }
wdata->task.tk_status = wdata->header->pnfs_error; hdr->task.tk_status = hdr->pnfs_error;
wdata->verf.committed = NFS_FILE_SYNC; hdr->verf.committed = NFS_FILE_SYNC;
INIT_WORK(&wdata->task.u.tk_work, bl_write_cleanup); INIT_WORK(&hdr->task.u.tk_work, bl_write_cleanup);
schedule_work(&wdata->task.u.tk_work); schedule_work(&hdr->task.u.tk_work);
} }
/* FIXME STUB - mark intersection of layout and page as bad, so is not /* FIXME STUB - mark intersection of layout and page as bad, so is not
@ -673,18 +672,17 @@ check_page:
} }
static enum pnfs_try_status static enum pnfs_try_status
bl_write_pagelist(struct nfs_pgio_data *wdata, int sync) bl_write_pagelist(struct nfs_pgio_header *header, int sync)
{ {
struct nfs_pgio_header *header = wdata->header;
int i, ret, npg_zero, pg_index, last = 0; int i, ret, npg_zero, pg_index, last = 0;
struct bio *bio = NULL; struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL; struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, last_isect = 0, extent_length = 0; sector_t isect, last_isect = 0, extent_length = 0;
struct parallel_io *par = NULL; struct parallel_io *par = NULL;
loff_t offset = wdata->args.offset; loff_t offset = header->args.offset;
size_t count = wdata->args.count; size_t count = header->args.count;
unsigned int pg_offset, pg_len, saved_len; unsigned int pg_offset, pg_len, saved_len;
struct page **pages = wdata->args.pages; struct page **pages = header->args.pages;
struct page *page; struct page *page;
pgoff_t index; pgoff_t index;
u64 temp; u64 temp;
@ -699,11 +697,11 @@ bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n"); dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
goto out_mds; goto out_mds;
} }
/* At this point, wdata->pages is a (sequential) list of nfs_pages. /* At this point, header->page_aray is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error * We want to write each, and if there is an error set pnfs_error
* to have it redone using nfs. * to have it redone using nfs.
*/ */
par = alloc_parallel(wdata); par = alloc_parallel(header);
if (!par) if (!par)
goto out_mds; goto out_mds;
par->pnfs_callback = bl_end_par_io_write; par->pnfs_callback = bl_end_par_io_write;
@ -790,8 +788,8 @@ next_page:
bio = bl_submit_bio(WRITE, bio); bio = bl_submit_bio(WRITE, bio);
/* Middle pages */ /* Middle pages */
pg_index = wdata->args.pgbase >> PAGE_CACHE_SHIFT; pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
for (i = pg_index; i < wdata->pages.npages; i++) { for (i = pg_index; i < header->page_array.npages; i++) {
if (!extent_length) { if (!extent_length) {
/* We've used up the previous extent */ /* We've used up the previous extent */
bl_put_extent(be); bl_put_extent(be);
@ -862,7 +860,8 @@ next_page:
} }
bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, bio = do_add_page_to_bio(bio, header->page_array.npages - i,
WRITE,
isect, pages[i], be, isect, pages[i], be,
bl_end_io_write, par, bl_end_io_write, par,
pg_offset, pg_len); pg_offset, pg_len);
@ -890,7 +889,7 @@ next_page:
} }
write_done: write_done:
wdata->res.count = wdata->args.count; header->res.count = header->args.count;
out: out:
bl_put_extent(be); bl_put_extent(be);
bl_put_extent(cow_read); bl_put_extent(cow_read);
@ -1063,7 +1062,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
pages = kzalloc(max_pages * sizeof(struct page *), GFP_NOFS); pages = kcalloc(max_pages, sizeof(struct page *), GFP_NOFS);
if (pages == NULL) { if (pages == NULL) {
kfree(dev); kfree(dev);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);

View file

@ -428,6 +428,18 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
if (p == NULL) if (p == NULL)
return 0; return 0;
/*
* Did we get the acceptor from userland during the SETCLIENID
* negotiation?
*/
if (clp->cl_acceptor)
return !strcmp(p, clp->cl_acceptor);
/*
* Otherwise try to verify it using the cl_hostname. Note that this
* doesn't work if a non-canonical hostname was used in the devname.
*/
/* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */ /* Expect a GSS_C_NT_HOSTBASED_NAME like "nfs@serverhostname" */
if (memcmp(p, "nfs@", 4) != 0) if (memcmp(p, "nfs@", 4) != 0)

View file

@ -110,8 +110,8 @@ struct nfs_subversion *get_nfs_version(unsigned int version)
mutex_unlock(&nfs_version_mutex); mutex_unlock(&nfs_version_mutex);
} }
if (!IS_ERR(nfs)) if (!IS_ERR(nfs) && !try_module_get(nfs->owner))
try_module_get(nfs->owner); return ERR_PTR(-EAGAIN);
return nfs; return nfs;
} }
@ -158,7 +158,8 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
goto error_0; goto error_0;
clp->cl_nfs_mod = cl_init->nfs_mod; clp->cl_nfs_mod = cl_init->nfs_mod;
try_module_get(clp->cl_nfs_mod->owner); if (!try_module_get(clp->cl_nfs_mod->owner))
goto error_dealloc;
clp->rpc_ops = clp->cl_nfs_mod->rpc_ops; clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
@ -190,6 +191,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
error_cleanup: error_cleanup:
put_nfs_version(clp->cl_nfs_mod); put_nfs_version(clp->cl_nfs_mod);
error_dealloc:
kfree(clp); kfree(clp);
error_0: error_0:
return ERR_PTR(err); return ERR_PTR(err);
@ -252,6 +254,7 @@ void nfs_free_client(struct nfs_client *clp)
put_net(clp->cl_net); put_net(clp->cl_net);
put_nfs_version(clp->cl_nfs_mod); put_nfs_version(clp->cl_nfs_mod);
kfree(clp->cl_hostname); kfree(clp->cl_hostname);
kfree(clp->cl_acceptor);
kfree(clp); kfree(clp);
dprintk("<-- nfs_free_client()\n"); dprintk("<-- nfs_free_client()\n");
@ -482,8 +485,13 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id); struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops; const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops;
if (cl_init->hostname == NULL) {
WARN_ON(1);
return NULL;
}
dprintk("--> nfs_get_client(%s,v%u)\n", dprintk("--> nfs_get_client(%s,v%u)\n",
cl_init->hostname ?: "", rpc_ops->version); cl_init->hostname, rpc_ops->version);
/* see if the client already exists */ /* see if the client already exists */
do { do {
@ -510,7 +518,7 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
} while (!IS_ERR(new)); } while (!IS_ERR(new));
dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n", dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
cl_init->hostname ?: "", PTR_ERR(new)); cl_init->hostname, PTR_ERR(new));
return new; return new;
} }
EXPORT_SYMBOL_GPL(nfs_get_client); EXPORT_SYMBOL_GPL(nfs_get_client);

View file

@ -41,14 +41,8 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags); set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
} }
/** static int
* nfs_have_delegation - check if inode has a delegation nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
* @inode: inode to check
* @flags: delegation types to check for
*
* Returns one if inode has the indicated delegation, otherwise zero.
*/
int nfs4_have_delegation(struct inode *inode, fmode_t flags)
{ {
struct nfs_delegation *delegation; struct nfs_delegation *delegation;
int ret = 0; int ret = 0;
@ -58,12 +52,34 @@ int nfs4_have_delegation(struct inode *inode, fmode_t flags)
delegation = rcu_dereference(NFS_I(inode)->delegation); delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation != NULL && (delegation->type & flags) == flags && if (delegation != NULL && (delegation->type & flags) == flags &&
!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
nfs_mark_delegation_referenced(delegation); if (mark)
nfs_mark_delegation_referenced(delegation);
ret = 1; ret = 1;
} }
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;
} }
/**
* nfs_have_delegation - check if inode has a delegation, mark it
* NFS_DELEGATION_REFERENCED if there is one.
* @inode: inode to check
* @flags: delegation types to check for
*
* Returns one if inode has the indicated delegation, otherwise zero.
*/
int nfs4_have_delegation(struct inode *inode, fmode_t flags)
{
return nfs4_do_check_delegation(inode, flags, true);
}
/*
* nfs4_check_delegation - check if inode has a delegation, do not mark
* NFS_DELEGATION_REFERENCED if it has one.
*/
int nfs4_check_delegation(struct inode *inode, fmode_t flags)
{
return nfs4_do_check_delegation(inode, flags, false);
}
static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
{ {

View file

@ -59,6 +59,7 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags); int nfs4_have_delegation(struct inode *inode, fmode_t flags);
int nfs4_check_delegation(struct inode *inode, fmode_t flags);
#endif #endif

View file

@ -988,9 +988,13 @@ EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
* A check for whether or not the parent directory has changed. * A check for whether or not the parent directory has changed.
* In the case it has, we assume that the dentries are untrustworthy * In the case it has, we assume that the dentries are untrustworthy
* and may need to be looked up again. * and may need to be looked up again.
* If rcu_walk prevents us from performing a full check, return 0.
*/ */
static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
int rcu_walk)
{ {
int ret;
if (IS_ROOT(dentry)) if (IS_ROOT(dentry))
return 1; return 1;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
@ -998,7 +1002,11 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
if (!nfs_verify_change_attribute(dir, dentry->d_time)) if (!nfs_verify_change_attribute(dir, dentry->d_time))
return 0; return 0;
/* Revalidate nfsi->cache_change_attribute before we declare a match */ /* Revalidate nfsi->cache_change_attribute before we declare a match */
if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) if (rcu_walk)
ret = nfs_revalidate_inode_rcu(NFS_SERVER(dir), dir);
else
ret = nfs_revalidate_inode(NFS_SERVER(dir), dir);
if (ret < 0)
return 0; return 0;
if (!nfs_verify_change_attribute(dir, dentry->d_time)) if (!nfs_verify_change_attribute(dir, dentry->d_time))
return 0; return 0;
@ -1042,6 +1050,8 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
out: out:
return (inode->i_nlink == 0) ? -ENOENT : 0; return (inode->i_nlink == 0) ? -ENOENT : 0;
out_force: out_force:
if (flags & LOOKUP_RCU)
return -ECHILD;
ret = __nfs_revalidate_inode(server, inode); ret = __nfs_revalidate_inode(server, inode);
if (ret != 0) if (ret != 0)
return ret; return ret;
@ -1054,6 +1064,9 @@ out_force:
* *
* If parent mtime has changed, we revalidate, else we wait for a * If parent mtime has changed, we revalidate, else we wait for a
* period corresponding to the parent's attribute cache timeout value. * period corresponding to the parent's attribute cache timeout value.
*
* If LOOKUP_RCU prevents us from performing a full check, return 1
* suggesting a reval is needed.
*/ */
static inline static inline
int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
@ -1064,7 +1077,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
return 0; return 0;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
return 1; return 1;
return !nfs_check_verifier(dir, dentry); return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
} }
/* /*
@ -1088,21 +1101,30 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
struct nfs4_label *label = NULL; struct nfs4_label *label = NULL;
int error; int error;
if (flags & LOOKUP_RCU) if (flags & LOOKUP_RCU) {
return -ECHILD; parent = ACCESS_ONCE(dentry->d_parent);
dir = ACCESS_ONCE(parent->d_inode);
parent = dget_parent(dentry); if (!dir)
dir = parent->d_inode; return -ECHILD;
} else {
parent = dget_parent(dentry);
dir = parent->d_inode;
}
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = dentry->d_inode; inode = dentry->d_inode;
if (!inode) { if (!inode) {
if (nfs_neg_need_reval(dir, dentry, flags)) if (nfs_neg_need_reval(dir, dentry, flags)) {
if (flags & LOOKUP_RCU)
return -ECHILD;
goto out_bad; goto out_bad;
}
goto out_valid_noent; goto out_valid_noent;
} }
if (is_bad_inode(inode)) { if (is_bad_inode(inode)) {
if (flags & LOOKUP_RCU)
return -ECHILD;
dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
__func__, dentry); __func__, dentry);
goto out_bad; goto out_bad;
@ -1112,12 +1134,20 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto out_set_verifier; goto out_set_verifier;
/* Force a full look up iff the parent directory has changed */ /* Force a full look up iff the parent directory has changed */
if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) { if (!nfs_is_exclusive_create(dir, flags) &&
if (nfs_lookup_verify_inode(inode, flags)) nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
if (nfs_lookup_verify_inode(inode, flags)) {
if (flags & LOOKUP_RCU)
return -ECHILD;
goto out_zap_parent; goto out_zap_parent;
}
goto out_valid; goto out_valid;
} }
if (flags & LOOKUP_RCU)
return -ECHILD;
if (NFS_STALE(inode)) if (NFS_STALE(inode))
goto out_bad; goto out_bad;
@ -1153,13 +1183,18 @@ out_set_verifier:
/* Success: notify readdir to use READDIRPLUS */ /* Success: notify readdir to use READDIRPLUS */
nfs_advise_use_readdirplus(dir); nfs_advise_use_readdirplus(dir);
out_valid_noent: out_valid_noent:
dput(parent); if (flags & LOOKUP_RCU) {
if (parent != ACCESS_ONCE(dentry->d_parent))
return -ECHILD;
} else
dput(parent);
dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
__func__, dentry); __func__, dentry);
return 1; return 1;
out_zap_parent: out_zap_parent:
nfs_zap_caches(dir); nfs_zap_caches(dir);
out_bad: out_bad:
WARN_ON(flags & LOOKUP_RCU);
nfs_free_fattr(fattr); nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle); nfs_free_fhandle(fhandle);
nfs4_label_free(label); nfs4_label_free(label);
@ -1185,6 +1220,7 @@ out_zap_parent:
__func__, dentry); __func__, dentry);
return 0; return 0;
out_error: out_error:
WARN_ON(flags & LOOKUP_RCU);
nfs_free_fattr(fattr); nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle); nfs_free_fhandle(fhandle);
nfs4_label_free(label); nfs4_label_free(label);
@ -1529,14 +1565,9 @@ EXPORT_SYMBOL_GPL(nfs_atomic_open);
static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{ {
struct dentry *parent = NULL;
struct inode *inode; struct inode *inode;
struct inode *dir;
int ret = 0; int ret = 0;
if (flags & LOOKUP_RCU)
return -ECHILD;
if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
goto no_open; goto no_open;
if (d_mountpoint(dentry)) if (d_mountpoint(dentry))
@ -1545,34 +1576,47 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto no_open; goto no_open;
inode = dentry->d_inode; inode = dentry->d_inode;
parent = dget_parent(dentry);
dir = parent->d_inode;
/* We can't create new files in nfs_open_revalidate(), so we /* We can't create new files in nfs_open_revalidate(), so we
* optimize away revalidation of negative dentries. * optimize away revalidation of negative dentries.
*/ */
if (inode == NULL) { if (inode == NULL) {
struct dentry *parent;
struct inode *dir;
if (flags & LOOKUP_RCU) {
parent = ACCESS_ONCE(dentry->d_parent);
dir = ACCESS_ONCE(parent->d_inode);
if (!dir)
return -ECHILD;
} else {
parent = dget_parent(dentry);
dir = parent->d_inode;
}
if (!nfs_neg_need_reval(dir, dentry, flags)) if (!nfs_neg_need_reval(dir, dentry, flags))
ret = 1; ret = 1;
else if (flags & LOOKUP_RCU)
ret = -ECHILD;
if (!(flags & LOOKUP_RCU))
dput(parent);
else if (parent != ACCESS_ONCE(dentry->d_parent))
return -ECHILD;
goto out; goto out;
} }
/* NFS only supports OPEN on regular files */ /* NFS only supports OPEN on regular files */
if (!S_ISREG(inode->i_mode)) if (!S_ISREG(inode->i_mode))
goto no_open_dput; goto no_open;
/* We cannot do exclusive creation on a positive dentry */ /* We cannot do exclusive creation on a positive dentry */
if (flags & LOOKUP_EXCL) if (flags & LOOKUP_EXCL)
goto no_open_dput; goto no_open;
/* Let f_op->open() actually open (and revalidate) the file */ /* Let f_op->open() actually open (and revalidate) the file */
ret = 1; ret = 1;
out: out:
dput(parent);
return ret; return ret;
no_open_dput:
dput(parent);
no_open: no_open:
return nfs_lookup_revalidate(dentry, flags); return nfs_lookup_revalidate(dentry, flags);
} }
@ -2028,10 +2072,14 @@ static DEFINE_SPINLOCK(nfs_access_lru_lock);
static LIST_HEAD(nfs_access_lru_list); static LIST_HEAD(nfs_access_lru_list);
static atomic_long_t nfs_access_nr_entries; static atomic_long_t nfs_access_nr_entries;
static unsigned long nfs_access_max_cachesize = ULONG_MAX;
module_param(nfs_access_max_cachesize, ulong, 0644);
MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
static void nfs_access_free_entry(struct nfs_access_entry *entry) static void nfs_access_free_entry(struct nfs_access_entry *entry)
{ {
put_rpccred(entry->cred); put_rpccred(entry->cred);
kfree(entry); kfree_rcu(entry, rcu_head);
smp_mb__before_atomic(); smp_mb__before_atomic();
atomic_long_dec(&nfs_access_nr_entries); atomic_long_dec(&nfs_access_nr_entries);
smp_mb__after_atomic(); smp_mb__after_atomic();
@ -2048,19 +2096,14 @@ static void nfs_access_free_list(struct list_head *head)
} }
} }
unsigned long static unsigned long
nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc) nfs_do_access_cache_scan(unsigned int nr_to_scan)
{ {
LIST_HEAD(head); LIST_HEAD(head);
struct nfs_inode *nfsi, *next; struct nfs_inode *nfsi, *next;
struct nfs_access_entry *cache; struct nfs_access_entry *cache;
int nr_to_scan = sc->nr_to_scan;
gfp_t gfp_mask = sc->gfp_mask;
long freed = 0; long freed = 0;
if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
return SHRINK_STOP;
spin_lock(&nfs_access_lru_lock); spin_lock(&nfs_access_lru_lock);
list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
struct inode *inode; struct inode *inode;
@ -2093,12 +2136,40 @@ remove_lru_entry:
return freed; return freed;
} }
unsigned long
nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
int nr_to_scan = sc->nr_to_scan;
gfp_t gfp_mask = sc->gfp_mask;
if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
return SHRINK_STOP;
return nfs_do_access_cache_scan(nr_to_scan);
}
unsigned long unsigned long
nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc) nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{ {
return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries)); return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
} }
static void
nfs_access_cache_enforce_limit(void)
{
long nr_entries = atomic_long_read(&nfs_access_nr_entries);
unsigned long diff;
unsigned int nr_to_scan;
if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
return;
nr_to_scan = 100;
diff = nr_entries - nfs_access_max_cachesize;
if (diff < nr_to_scan)
nr_to_scan = diff;
nfs_do_access_cache_scan(nr_to_scan);
}
static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
{ {
struct rb_root *root_node = &nfsi->access_cache; struct rb_root *root_node = &nfsi->access_cache;
@ -2186,6 +2257,38 @@ out_zap:
return -ENOENT; return -ENOENT;
} }
static int nfs_access_get_cached_rcu(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res)
{
/* Only check the most recently returned cache entry,
* but do it without locking.
*/
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_access_entry *cache;
int err = -ECHILD;
struct list_head *lh;
rcu_read_lock();
if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
goto out;
lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
cache = list_entry(lh, struct nfs_access_entry, lru);
if (lh == &nfsi->access_cache_entry_lru ||
cred != cache->cred)
cache = NULL;
if (cache == NULL)
goto out;
if (!nfs_have_delegated_attributes(inode) &&
!time_in_range_open(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo))
goto out;
res->jiffies = cache->jiffies;
res->cred = cache->cred;
res->mask = cache->mask;
err = 0;
out:
rcu_read_unlock();
return err;
}
static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
{ {
struct nfs_inode *nfsi = NFS_I(inode); struct nfs_inode *nfsi = NFS_I(inode);
@ -2229,6 +2332,11 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
cache->cred = get_rpccred(set->cred); cache->cred = get_rpccred(set->cred);
cache->mask = set->mask; cache->mask = set->mask;
/* The above field assignments must be visible
* before this item appears on the lru. We cannot easily
* use rcu_assign_pointer, so just force the memory barrier.
*/
smp_wmb();
nfs_access_add_rbtree(inode, cache); nfs_access_add_rbtree(inode, cache);
/* Update accounting */ /* Update accounting */
@ -2244,6 +2352,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
&nfs_access_lru_list); &nfs_access_lru_list);
spin_unlock(&nfs_access_lru_lock); spin_unlock(&nfs_access_lru_lock);
} }
nfs_access_cache_enforce_limit();
} }
EXPORT_SYMBOL_GPL(nfs_access_add_cache); EXPORT_SYMBOL_GPL(nfs_access_add_cache);
@ -2267,10 +2376,16 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
trace_nfs_access_enter(inode); trace_nfs_access_enter(inode);
status = nfs_access_get_cached(inode, cred, &cache); status = nfs_access_get_cached_rcu(inode, cred, &cache);
if (status != 0)
status = nfs_access_get_cached(inode, cred, &cache);
if (status == 0) if (status == 0)
goto out_cached; goto out_cached;
status = -ECHILD;
if (mask & MAY_NOT_BLOCK)
goto out;
/* Be clever: ask server to check for all possible rights */ /* Be clever: ask server to check for all possible rights */
cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ;
cache.cred = cred; cache.cred = cred;
@ -2321,9 +2436,6 @@ int nfs_permission(struct inode *inode, int mask)
struct rpc_cred *cred; struct rpc_cred *cred;
int res = 0; int res = 0;
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
nfs_inc_stats(inode, NFSIOS_VFSACCESS); nfs_inc_stats(inode, NFSIOS_VFSACCESS);
if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
@ -2350,12 +2462,23 @@ force_lookup:
if (!NFS_PROTO(inode)->access) if (!NFS_PROTO(inode)->access)
goto out_notsup; goto out_notsup;
cred = rpc_lookup_cred(); /* Always try fast lookups first */
if (!IS_ERR(cred)) { rcu_read_lock();
res = nfs_do_access(inode, cred, mask); cred = rpc_lookup_cred_nonblock();
put_rpccred(cred); if (!IS_ERR(cred))
} else res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
else
res = PTR_ERR(cred); res = PTR_ERR(cred);
rcu_read_unlock();
if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
/* Fast lookup failed, try the slow way */
cred = rpc_lookup_cred();
if (!IS_ERR(cred)) {
res = nfs_do_access(inode, cred, mask);
put_rpccred(cred);
} else
res = PTR_ERR(cred);
}
out: out:
if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
res = -EACCES; res = -EACCES;
@ -2364,6 +2487,9 @@ out:
inode->i_sb->s_id, inode->i_ino, mask, res); inode->i_sb->s_id, inode->i_ino, mask, res);
return res; return res;
out_notsup: out_notsup:
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
res = nfs_revalidate_inode(NFS_SERVER(inode), inode); res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (res == 0) if (res == 0)
res = generic_permission(inode, mask); res = generic_permission(inode, mask);

View file

@ -148,8 +148,8 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
{ {
struct nfs_writeverf *verfp; struct nfs_writeverf *verfp;
verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
hdr->data->ds_idx); hdr->ds_idx);
WARN_ON_ONCE(verfp->committed >= 0); WARN_ON_ONCE(verfp->committed >= 0);
memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
WARN_ON_ONCE(verfp->committed < 0); WARN_ON_ONCE(verfp->committed < 0);
@ -169,8 +169,8 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
{ {
struct nfs_writeverf *verfp; struct nfs_writeverf *verfp;
verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, verfp = nfs_direct_select_verf(dreq, hdr->ds_clp,
hdr->data->ds_idx); hdr->ds_idx);
if (verfp->committed < 0) { if (verfp->committed < 0) {
nfs_direct_set_hdr_verf(dreq, hdr); nfs_direct_set_hdr_verf(dreq, hdr);
return 0; return 0;
@ -715,7 +715,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
{ {
struct nfs_direct_req *dreq = hdr->dreq; struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_commit_info cinfo; struct nfs_commit_info cinfo;
int bit = -1; bool request_commit = false;
struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct nfs_page *req = nfs_list_entry(hdr->pages.next);
if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
@ -729,27 +729,20 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
dreq->flags = 0; dreq->flags = 0;
dreq->error = hdr->error; dreq->error = hdr->error;
} }
if (dreq->error != 0) if (dreq->error == 0) {
bit = NFS_IOHDR_ERROR;
else {
dreq->count += hdr->good_bytes; dreq->count += hdr->good_bytes;
if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { if (nfs_write_need_commit(hdr)) {
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
bit = NFS_IOHDR_NEED_RESCHED;
} else if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
bit = NFS_IOHDR_NEED_RESCHED; request_commit = true;
else if (dreq->flags == 0) { else if (dreq->flags == 0) {
nfs_direct_set_hdr_verf(dreq, hdr); nfs_direct_set_hdr_verf(dreq, hdr);
bit = NFS_IOHDR_NEED_COMMIT; request_commit = true;
dreq->flags = NFS_ODIRECT_DO_COMMIT; dreq->flags = NFS_ODIRECT_DO_COMMIT;
} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { request_commit = true;
if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr))
dreq->flags = dreq->flags =
NFS_ODIRECT_RESCHED_WRITES; NFS_ODIRECT_RESCHED_WRITES;
bit = NFS_IOHDR_NEED_RESCHED;
} else
bit = NFS_IOHDR_NEED_COMMIT;
} }
} }
} }
@ -759,9 +752,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
req = nfs_list_entry(hdr->pages.next); req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req); nfs_list_remove_request(req);
switch (bit) { if (request_commit) {
case NFS_IOHDR_NEED_RESCHED:
case NFS_IOHDR_NEED_COMMIT:
kref_get(&req->wb_kref); kref_get(&req->wb_kref);
nfs_mark_request_commit(req, hdr->lseg, &cinfo); nfs_mark_request_commit(req, hdr->lseg, &cinfo);
} }

View file

@ -84,45 +84,37 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
BUG(); BUG();
} }
static void filelayout_reset_write(struct nfs_pgio_data *data) static void filelayout_reset_write(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header; struct rpc_task *task = &hdr->task;
struct rpc_task *task = &data->task;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS " dprintk("%s Reset task %5u for i/o through MDS "
"(req %s/%llu, %u bytes @ offset %llu)\n", __func__, "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
data->task.tk_pid, hdr->task.tk_pid,
hdr->inode->i_sb->s_id, hdr->inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(hdr->inode), (unsigned long long)NFS_FILEID(hdr->inode),
data->args.count, hdr->args.count,
(unsigned long long)data->args.offset); (unsigned long long)hdr->args.offset);
task->tk_status = pnfs_write_done_resend_to_mds(hdr->inode, task->tk_status = pnfs_write_done_resend_to_mds(hdr);
&hdr->pages,
hdr->completion_ops,
hdr->dreq);
} }
} }
static void filelayout_reset_read(struct nfs_pgio_data *data) static void filelayout_reset_read(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header; struct rpc_task *task = &hdr->task;
struct rpc_task *task = &data->task;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
dprintk("%s Reset task %5u for i/o through MDS " dprintk("%s Reset task %5u for i/o through MDS "
"(req %s/%llu, %u bytes @ offset %llu)\n", __func__, "(req %s/%llu, %u bytes @ offset %llu)\n", __func__,
data->task.tk_pid, hdr->task.tk_pid,
hdr->inode->i_sb->s_id, hdr->inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(hdr->inode), (unsigned long long)NFS_FILEID(hdr->inode),
data->args.count, hdr->args.count,
(unsigned long long)data->args.offset); (unsigned long long)hdr->args.offset);
task->tk_status = pnfs_read_done_resend_to_mds(hdr->inode, task->tk_status = pnfs_read_done_resend_to_mds(hdr);
&hdr->pages,
hdr->completion_ops,
hdr->dreq);
} }
} }
@ -243,18 +235,17 @@ wait_on_recovery:
/* NFS_PROTO call done callback routines */ /* NFS_PROTO call done callback routines */
static int filelayout_read_done_cb(struct rpc_task *task, static int filelayout_read_done_cb(struct rpc_task *task,
struct nfs_pgio_data *data) struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header;
int err; int err;
trace_nfs4_pnfs_read(data, task->tk_status); trace_nfs4_pnfs_read(hdr, task->tk_status);
err = filelayout_async_handle_error(task, data->args.context->state, err = filelayout_async_handle_error(task, hdr->args.context->state,
data->ds_clp, hdr->lseg); hdr->ds_clp, hdr->lseg);
switch (err) { switch (err) {
case -NFS4ERR_RESET_TO_MDS: case -NFS4ERR_RESET_TO_MDS:
filelayout_reset_read(data); filelayout_reset_read(hdr);
return task->tk_status; return task->tk_status;
case -EAGAIN: case -EAGAIN:
rpc_restart_call_prepare(task); rpc_restart_call_prepare(task);
@ -270,15 +261,14 @@ static int filelayout_read_done_cb(struct rpc_task *task,
* rfc5661 is not clear about which credential should be used. * rfc5661 is not clear about which credential should be used.
*/ */
static void static void
filelayout_set_layoutcommit(struct nfs_pgio_data *wdata) filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = wdata->header;
if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds || if (FILELAYOUT_LSEG(hdr->lseg)->commit_through_mds ||
wdata->res.verf->committed == NFS_FILE_SYNC) hdr->res.verf->committed == NFS_FILE_SYNC)
return; return;
pnfs_set_layoutcommit(wdata); pnfs_set_layoutcommit(hdr);
dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
} }
@ -305,83 +295,82 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
*/ */
static void filelayout_read_prepare(struct rpc_task *task, void *data) static void filelayout_read_prepare(struct rpc_task *task, void *data)
{ {
struct nfs_pgio_data *rdata = data; struct nfs_pgio_header *hdr = data;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) { if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO); rpc_exit(task, -EIO);
return; return;
} }
if (filelayout_reset_to_mds(rdata->header->lseg)) { if (filelayout_reset_to_mds(hdr->lseg)) {
dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
filelayout_reset_read(rdata); filelayout_reset_read(hdr);
rpc_exit(task, 0); rpc_exit(task, 0);
return; return;
} }
rdata->pgio_done_cb = filelayout_read_done_cb; hdr->pgio_done_cb = filelayout_read_done_cb;
if (nfs41_setup_sequence(rdata->ds_clp->cl_session, if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
&rdata->args.seq_args, &hdr->args.seq_args,
&rdata->res.seq_res, &hdr->res.seq_res,
task)) task))
return; return;
if (nfs4_set_rw_stateid(&rdata->args.stateid, rdata->args.context, if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
rdata->args.lock_context, FMODE_READ) == -EIO) hdr->args.lock_context, FMODE_READ) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */ rpc_exit(task, -EIO); /* lost lock, terminate I/O */
} }
static void filelayout_read_call_done(struct rpc_task *task, void *data) static void filelayout_read_call_done(struct rpc_task *task, void *data)
{ {
struct nfs_pgio_data *rdata = data; struct nfs_pgio_header *hdr = data;
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
if (test_bit(NFS_IOHDR_REDO, &rdata->header->flags) && if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) { task->tk_status == 0) {
nfs41_sequence_done(task, &rdata->res.seq_res); nfs41_sequence_done(task, &hdr->res.seq_res);
return; return;
} }
/* Note this may cause RPC to be resent */ /* Note this may cause RPC to be resent */
rdata->header->mds_ops->rpc_call_done(task, data); hdr->mds_ops->rpc_call_done(task, data);
} }
static void filelayout_read_count_stats(struct rpc_task *task, void *data) static void filelayout_read_count_stats(struct rpc_task *task, void *data)
{ {
struct nfs_pgio_data *rdata = data; struct nfs_pgio_header *hdr = data;
rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics); rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
} }
static void filelayout_read_release(void *data) static void filelayout_read_release(void *data)
{ {
struct nfs_pgio_data *rdata = data; struct nfs_pgio_header *hdr = data;
struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout; struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo); filelayout_fenceme(lo->plh_inode, lo);
nfs_put_client(rdata->ds_clp); nfs_put_client(hdr->ds_clp);
rdata->header->mds_ops->rpc_release(data); hdr->mds_ops->rpc_release(data);
} }
static int filelayout_write_done_cb(struct rpc_task *task, static int filelayout_write_done_cb(struct rpc_task *task,
struct nfs_pgio_data *data) struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header;
int err; int err;
trace_nfs4_pnfs_write(data, task->tk_status); trace_nfs4_pnfs_write(hdr, task->tk_status);
err = filelayout_async_handle_error(task, data->args.context->state, err = filelayout_async_handle_error(task, hdr->args.context->state,
data->ds_clp, hdr->lseg); hdr->ds_clp, hdr->lseg);
switch (err) { switch (err) {
case -NFS4ERR_RESET_TO_MDS: case -NFS4ERR_RESET_TO_MDS:
filelayout_reset_write(data); filelayout_reset_write(hdr);
return task->tk_status; return task->tk_status;
case -EAGAIN: case -EAGAIN:
rpc_restart_call_prepare(task); rpc_restart_call_prepare(task);
return -EAGAIN; return -EAGAIN;
} }
filelayout_set_layoutcommit(data); filelayout_set_layoutcommit(hdr);
return 0; return 0;
} }
@ -419,57 +408,57 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
static void filelayout_write_prepare(struct rpc_task *task, void *data) static void filelayout_write_prepare(struct rpc_task *task, void *data)
{ {
struct nfs_pgio_data *wdata = data; struct nfs_pgio_header *hdr = data;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) { if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) {
rpc_exit(task, -EIO); rpc_exit(task, -EIO);
return; return;
} }
if (filelayout_reset_to_mds(wdata->header->lseg)) { if (filelayout_reset_to_mds(hdr->lseg)) {
dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid);
filelayout_reset_write(wdata); filelayout_reset_write(hdr);
rpc_exit(task, 0); rpc_exit(task, 0);
return; return;
} }
if (nfs41_setup_sequence(wdata->ds_clp->cl_session, if (nfs41_setup_sequence(hdr->ds_clp->cl_session,
&wdata->args.seq_args, &hdr->args.seq_args,
&wdata->res.seq_res, &hdr->res.seq_res,
task)) task))
return; return;
if (nfs4_set_rw_stateid(&wdata->args.stateid, wdata->args.context, if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
wdata->args.lock_context, FMODE_WRITE) == -EIO) hdr->args.lock_context, FMODE_WRITE) == -EIO)
rpc_exit(task, -EIO); /* lost lock, terminate I/O */ rpc_exit(task, -EIO); /* lost lock, terminate I/O */
} }
static void filelayout_write_call_done(struct rpc_task *task, void *data) static void filelayout_write_call_done(struct rpc_task *task, void *data)
{ {
struct nfs_pgio_data *wdata = data; struct nfs_pgio_header *hdr = data;
if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) && if (test_bit(NFS_IOHDR_REDO, &hdr->flags) &&
task->tk_status == 0) { task->tk_status == 0) {
nfs41_sequence_done(task, &wdata->res.seq_res); nfs41_sequence_done(task, &hdr->res.seq_res);
return; return;
} }
/* Note this may cause RPC to be resent */ /* Note this may cause RPC to be resent */
wdata->header->mds_ops->rpc_call_done(task, data); hdr->mds_ops->rpc_call_done(task, data);
} }
static void filelayout_write_count_stats(struct rpc_task *task, void *data) static void filelayout_write_count_stats(struct rpc_task *task, void *data)
{ {
struct nfs_pgio_data *wdata = data; struct nfs_pgio_header *hdr = data;
rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics); rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics);
} }
static void filelayout_write_release(void *data) static void filelayout_write_release(void *data)
{ {
struct nfs_pgio_data *wdata = data; struct nfs_pgio_header *hdr = data;
struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout; struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo); filelayout_fenceme(lo->plh_inode, lo);
nfs_put_client(wdata->ds_clp); nfs_put_client(hdr->ds_clp);
wdata->header->mds_ops->rpc_release(data); hdr->mds_ops->rpc_release(data);
} }
static void filelayout_commit_prepare(struct rpc_task *task, void *data) static void filelayout_commit_prepare(struct rpc_task *task, void *data)
@ -529,19 +518,18 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
}; };
static enum pnfs_try_status static enum pnfs_try_status
filelayout_read_pagelist(struct nfs_pgio_data *data) filelayout_read_pagelist(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg; struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds; struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt; struct rpc_clnt *ds_clnt;
loff_t offset = data->args.offset; loff_t offset = hdr->args.offset;
u32 j, idx; u32 j, idx;
struct nfs_fh *fh; struct nfs_fh *fh;
dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
__func__, hdr->inode->i_ino, __func__, hdr->inode->i_ino,
data->args.pgbase, (size_t)data->args.count, offset); hdr->args.pgbase, (size_t)hdr->args.count, offset);
/* Retrieve the correct rpc_client for the byte range */ /* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset); j = nfs4_fl_calc_j_index(lseg, offset);
@ -559,30 +547,29 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
/* No multipath support. Use first DS */ /* No multipath support. Use first DS */
atomic_inc(&ds->ds_clp->cl_count); atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp; hdr->ds_clp = ds->ds_clp;
data->ds_idx = idx; hdr->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j); fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh) if (fh)
data->args.fh = fh; hdr->args.fh = fh;
data->args.offset = filelayout_get_dserver_offset(lseg, offset); hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
data->mds_offset = offset; hdr->mds_offset = offset;
/* Perform an asynchronous read to ds */ /* Perform an asynchronous read to ds */
nfs_initiate_pgio(ds_clnt, data, nfs_initiate_pgio(ds_clnt, hdr,
&filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED; return PNFS_ATTEMPTED;
} }
/* Perform async writes. */ /* Perform async writes. */
static enum pnfs_try_status static enum pnfs_try_status
filelayout_write_pagelist(struct nfs_pgio_data *data, int sync) filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
{ {
struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg; struct pnfs_layout_segment *lseg = hdr->lseg;
struct nfs4_pnfs_ds *ds; struct nfs4_pnfs_ds *ds;
struct rpc_clnt *ds_clnt; struct rpc_clnt *ds_clnt;
loff_t offset = data->args.offset; loff_t offset = hdr->args.offset;
u32 j, idx; u32 j, idx;
struct nfs_fh *fh; struct nfs_fh *fh;
@ -598,21 +585,20 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
return PNFS_NOT_ATTEMPTED; return PNFS_NOT_ATTEMPTED;
dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n", dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d\n",
__func__, hdr->inode->i_ino, sync, (size_t) data->args.count, __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count)); offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
data->pgio_done_cb = filelayout_write_done_cb; hdr->pgio_done_cb = filelayout_write_done_cb;
atomic_inc(&ds->ds_clp->cl_count); atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp; hdr->ds_clp = ds->ds_clp;
data->ds_idx = idx; hdr->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j); fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh) if (fh)
data->args.fh = fh; hdr->args.fh = fh;
hdr->args.offset = filelayout_get_dserver_offset(lseg, offset);
data->args.offset = filelayout_get_dserver_offset(lseg, offset);
/* Perform an asynchronous write */ /* Perform an asynchronous write */
nfs_initiate_pgio(ds_clnt, data, nfs_initiate_pgio(ds_clnt, hdr,
&filelayout_write_call_ops, sync, &filelayout_write_call_ops, sync,
RPC_TASK_SOFTCONN); RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED; return PNFS_ATTEMPTED;
@ -1023,6 +1009,7 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
/* The generic layer is about to remove the req from the commit list. /* The generic layer is about to remove the req from the commit list.
* If this will make the bucket empty, it will need to put the lseg reference. * If this will make the bucket empty, it will need to put the lseg reference.
* Note this is must be called holding the inode (/cinfo) lock
*/ */
static void static void
filelayout_clear_request_commit(struct nfs_page *req, filelayout_clear_request_commit(struct nfs_page *req,
@ -1030,7 +1017,6 @@ filelayout_clear_request_commit(struct nfs_page *req,
{ {
struct pnfs_layout_segment *freeme = NULL; struct pnfs_layout_segment *freeme = NULL;
spin_lock(cinfo->lock);
if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
goto out; goto out;
cinfo->ds->nwritten--; cinfo->ds->nwritten--;
@ -1045,22 +1031,25 @@ filelayout_clear_request_commit(struct nfs_page *req,
} }
out: out:
nfs_request_remove_commit_list(req, cinfo); nfs_request_remove_commit_list(req, cinfo);
spin_unlock(cinfo->lock); pnfs_put_lseg_async(freeme);
pnfs_put_lseg(freeme);
} }
static struct list_head * static void
filelayout_choose_commit_list(struct nfs_page *req, filelayout_mark_request_commit(struct nfs_page *req,
struct pnfs_layout_segment *lseg, struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo) struct nfs_commit_info *cinfo)
{ {
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
u32 i, j; u32 i, j;
struct list_head *list; struct list_head *list;
struct pnfs_commit_bucket *buckets; struct pnfs_commit_bucket *buckets;
if (fl->commit_through_mds) if (fl->commit_through_mds) {
return &cinfo->mds->list; list = &cinfo->mds->list;
spin_lock(cinfo->lock);
goto mds_commit;
}
/* Note that we are calling nfs4_fl_calc_j_index on each page /* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server. An attractive * that ends up being committed to a data server. An attractive
@ -1084,19 +1073,22 @@ filelayout_choose_commit_list(struct nfs_page *req,
} }
set_bit(PG_COMMIT_TO_DS, &req->wb_flags); set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
cinfo->ds->nwritten++; cinfo->ds->nwritten++;
mds_commit:
/* nfs_request_add_commit_list(). We need to add req to list without
* dropping cinfo lock.
*/
set_bit(PG_CLEAN, &(req)->wb_flags);
nfs_list_add_request(req, list);
cinfo->mds->ncommit++;
spin_unlock(cinfo->lock); spin_unlock(cinfo->lock);
return list; if (!cinfo->dreq) {
} inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
static void BDI_RECLAIMABLE);
filelayout_mark_request_commit(struct nfs_page *req, __mark_inode_dirty(req->wb_context->dentry->d_inode,
struct pnfs_layout_segment *lseg, I_DIRTY_DATASYNC);
struct nfs_commit_info *cinfo) }
{
struct list_head *list;
list = filelayout_choose_commit_list(req, lseg, cinfo);
nfs_request_add_commit_list(req, list, cinfo);
} }
static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@ -1244,15 +1236,63 @@ restart:
spin_unlock(cinfo->lock); spin_unlock(cinfo->lock);
} }
/* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest
* for @page
* @cinfo - commit info for current inode
* @page - page to search for matching head request
*
* Returns a the head request if one is found, otherwise returns NULL.
*/
static struct nfs_page *
filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page)
{
struct nfs_page *freq, *t;
struct pnfs_commit_bucket *b;
int i;
/* Linearly search the commit lists for each bucket until a matching
* request is found */
for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
list_for_each_entry_safe(freq, t, &b->written, wb_list) {
if (freq->wb_page == page)
return freq->wb_head;
}
list_for_each_entry_safe(freq, t, &b->committing, wb_list) {
if (freq->wb_page == page)
return freq->wb_head;
}
}
return NULL;
}
static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx)
{
struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
struct pnfs_commit_bucket *bucket = fl_cinfo->buckets;
struct pnfs_layout_segment *freeme;
int i;
for (i = idx; i < fl_cinfo->nbuckets; i++, bucket++) {
if (list_empty(&bucket->committing))
continue;
nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
spin_lock(cinfo->lock);
freeme = bucket->clseg;
bucket->clseg = NULL;
spin_unlock(cinfo->lock);
pnfs_put_lseg(freeme);
}
}
static unsigned int static unsigned int
alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
{ {
struct pnfs_ds_commit_info *fl_cinfo; struct pnfs_ds_commit_info *fl_cinfo;
struct pnfs_commit_bucket *bucket; struct pnfs_commit_bucket *bucket;
struct nfs_commit_data *data; struct nfs_commit_data *data;
int i, j; int i;
unsigned int nreq = 0; unsigned int nreq = 0;
struct pnfs_layout_segment *freeme;
fl_cinfo = cinfo->ds; fl_cinfo = cinfo->ds;
bucket = fl_cinfo->buckets; bucket = fl_cinfo->buckets;
@ -1272,16 +1312,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
} }
/* Clean up on error */ /* Clean up on error */
for (j = i; j < fl_cinfo->nbuckets; j++, bucket++) { filelayout_retry_commit(cinfo, i);
if (list_empty(&bucket->committing))
continue;
nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
spin_lock(cinfo->lock);
freeme = bucket->clseg;
bucket->clseg = NULL;
spin_unlock(cinfo->lock);
pnfs_put_lseg(freeme);
}
/* Caller will clean up entries put on list */ /* Caller will clean up entries put on list */
return nreq; return nreq;
} }
@ -1301,8 +1332,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
data->lseg = NULL; data->lseg = NULL;
list_add(&data->pages, &list); list_add(&data->pages, &list);
nreq++; nreq++;
} else } else {
nfs_retry_commit(mds_pages, NULL, cinfo); nfs_retry_commit(mds_pages, NULL, cinfo);
filelayout_retry_commit(cinfo, 0);
cinfo->completion_ops->error_cleanup(NFS_I(inode));
return -ENOMEM;
}
} }
nreq += alloc_ds_commits(cinfo, &list); nreq += alloc_ds_commits(cinfo, &list);
@ -1380,6 +1415,7 @@ static struct pnfs_layoutdriver_type filelayout_type = {
.clear_request_commit = filelayout_clear_request_commit, .clear_request_commit = filelayout_clear_request_commit,
.scan_commit_lists = filelayout_scan_commit_lists, .scan_commit_lists = filelayout_scan_commit_lists,
.recover_commit_reqs = filelayout_recover_commit_reqs, .recover_commit_reqs = filelayout_recover_commit_reqs,
.search_commit_reqs = filelayout_search_commit_reqs,
.commit_pagelist = filelayout_commit_pagelist, .commit_pagelist = filelayout_commit_pagelist,
.read_pagelist = filelayout_read_pagelist, .read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist, .write_pagelist = filelayout_write_pagelist,

View file

@ -695,7 +695,7 @@ filelayout_get_device_info(struct inode *inode,
if (pdev == NULL) if (pdev == NULL)
return NULL; return NULL;
pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags);
if (pages == NULL) { if (pages == NULL) {
kfree(pdev); kfree(pdev);
return NULL; return NULL;

View file

@ -1002,6 +1002,15 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
} }
EXPORT_SYMBOL_GPL(nfs_revalidate_inode); EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode)
{
if (!(NFS_I(inode)->cache_validity &
(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
&& !nfs_attribute_cache_expired(inode))
return NFS_STALE(inode) ? -ESTALE : 0;
return -ECHILD;
}
static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
{ {
struct nfs_inode *nfsi = NFS_I(inode); struct nfs_inode *nfsi = NFS_I(inode);

View file

@ -247,11 +247,11 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
int nfs_iocounter_wait(struct nfs_io_counter *c); int nfs_iocounter_wait(struct nfs_io_counter *c);
extern const struct nfs_pageio_ops nfs_pgio_rw_ops; extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *); struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
void nfs_rw_header_free(struct nfs_pgio_header *); void nfs_pgio_header_free(struct nfs_pgio_header *);
void nfs_pgio_data_release(struct nfs_pgio_data *); void nfs_pgio_data_destroy(struct nfs_pgio_header *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *, int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *,
const struct rpc_call_ops *, int, int); const struct rpc_call_ops *, int, int);
void nfs_free_request(struct nfs_page *req); void nfs_free_request(struct nfs_page *req);
@ -451,6 +451,7 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst,
void nfs_mark_request_commit(struct nfs_page *req, void nfs_mark_request_commit(struct nfs_page *req,
struct pnfs_layout_segment *lseg, struct pnfs_layout_segment *lseg,
struct nfs_commit_info *cinfo); struct nfs_commit_info *cinfo);
int nfs_write_need_commit(struct nfs_pgio_header *);
int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
int how, struct nfs_commit_info *cinfo); int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list, void nfs_retry_commit(struct list_head *page_list,
@ -491,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
/* nfs4proc.c */ /* nfs4proc.c */
extern void __nfs4_read_done_cb(struct nfs_pgio_data *); extern void __nfs4_read_done_cb(struct nfs_pgio_header *);
extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms, const struct rpc_timeout *timeparms,
const char *ip_addr); const char *ip_addr);

View file

@ -256,7 +256,7 @@ nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
char *p = data + *result; char *p = data + *result;
acl = get_acl(inode, type); acl = get_acl(inode, type);
if (!acl) if (IS_ERR_OR_NULL(acl))
return 0; return 0;
posix_acl_release(acl); posix_acl_release(acl);

View file

@ -795,41 +795,44 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return status; return status;
} }
static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data) static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{ {
struct inode *inode = data->header->inode; struct inode *inode = hdr->inode;
if (nfs3_async_handle_jukebox(task, inode)) if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN; return -EAGAIN;
nfs_invalidate_atime(inode); nfs_invalidate_atime(inode);
nfs_refresh_inode(inode, &data->fattr); nfs_refresh_inode(inode, &hdr->fattr);
return 0; return 0;
} }
static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{ {
msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
} }
static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{ {
rpc_call_start(task); rpc_call_start(task);
return 0; return 0;
} }
static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data) static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{ {
struct inode *inode = data->header->inode; struct inode *inode = hdr->inode;
if (nfs3_async_handle_jukebox(task, inode)) if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN; return -EAGAIN;
if (task->tk_status >= 0) if (task->tk_status >= 0)
nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
return 0; return 0;
} }
static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) static void nfs3_proc_write_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{ {
msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE]; msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
} }

View file

@ -54,7 +54,7 @@ struct nfs4_minor_version_ops {
const nfs4_stateid *); const nfs4_stateid *);
int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
struct nfs_fsinfo *); struct nfs_fsinfo *);
int (*free_lock_state)(struct nfs_server *, void (*free_lock_state)(struct nfs_server *,
struct nfs4_lock_state *); struct nfs4_lock_state *);
const struct rpc_call_ops *call_sync_ops; const struct rpc_call_ops *call_sync_ops;
const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *reboot_recovery_ops;
@ -129,27 +129,17 @@ enum {
* LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN)
*/ */
struct nfs4_lock_owner {
unsigned int lo_type;
#define NFS4_ANY_LOCK_TYPE (0U)
#define NFS4_FLOCK_LOCK_TYPE (1U << 0)
#define NFS4_POSIX_LOCK_TYPE (1U << 1)
union {
fl_owner_t posix_owner;
pid_t flock_owner;
} lo_u;
};
struct nfs4_lock_state { struct nfs4_lock_state {
struct list_head ls_locks; /* Other lock stateids */ struct list_head ls_locks; /* Other lock stateids */
struct nfs4_state * ls_state; /* Pointer to open state */ struct nfs4_state * ls_state; /* Pointer to open state */
#define NFS_LOCK_INITIALIZED 0 #define NFS_LOCK_INITIALIZED 0
#define NFS_LOCK_LOST 1 #define NFS_LOCK_LOST 1
unsigned long ls_flags; unsigned long ls_flags;
struct nfs_seqid_counter ls_seqid; struct nfs_seqid_counter ls_seqid;
nfs4_stateid ls_stateid; nfs4_stateid ls_stateid;
atomic_t ls_count; atomic_t ls_count;
struct nfs4_lock_owner ls_owner; fl_owner_t ls_owner;
struct work_struct ls_release;
}; };
/* bits for nfs4_state->flags */ /* bits for nfs4_state->flags */
@ -337,11 +327,11 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
*/ */
static inline void static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
struct rpc_message *msg, struct nfs_pgio_data *wdata) struct rpc_message *msg, struct nfs_pgio_header *hdr)
{ {
if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) && if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
!test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags)) !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
wdata->args.stable = NFS_FILE_SYNC; hdr->args.stable = NFS_FILE_SYNC;
} }
#else /* CONFIG_NFS_v4_1 */ #else /* CONFIG_NFS_v4_1 */
static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
@ -369,7 +359,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
static inline void static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp, nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
struct rpc_message *msg, struct nfs_pgio_data *wdata) struct rpc_message *msg, struct nfs_pgio_header *hdr)
{ {
} }
#endif /* CONFIG_NFS_V4_1 */ #endif /* CONFIG_NFS_V4_1 */

View file

@ -855,6 +855,11 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
}; };
struct rpc_timeout ds_timeout; struct rpc_timeout ds_timeout;
struct nfs_client *clp; struct nfs_client *clp;
char buf[INET6_ADDRSTRLEN + 1];
if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
return ERR_PTR(-EINVAL);
cl_init.hostname = buf;
/* /*
* Set an authflavor equual to the MDS value. Use the MDS nfs_client * Set an authflavor equual to the MDS value. Use the MDS nfs_client

View file

@ -1952,6 +1952,14 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
return status; return status;
} }
/*
* Additional permission checks in order to distinguish between an
* open for read, and an open for execute. This works around the
* fact that NFSv4 OPEN treats read and execute permissions as being
* the same.
* Note that in the non-execute case, we want to turn off permission
* checking if we just created a new file (POSIX open() semantics).
*/
static int nfs4_opendata_access(struct rpc_cred *cred, static int nfs4_opendata_access(struct rpc_cred *cred,
struct nfs4_opendata *opendata, struct nfs4_opendata *opendata,
struct nfs4_state *state, fmode_t fmode, struct nfs4_state *state, fmode_t fmode,
@ -1966,14 +1974,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
return 0; return 0;
mask = 0; mask = 0;
/* don't check MAY_WRITE - a newly created file may not have /*
* write mode bits, but POSIX allows the creating process to write. * Use openflags to check for exec, because fmode won't
* use openflags to check for exec, because fmode won't * always have FMODE_EXEC set when file open for exec.
* always have FMODE_EXEC set when file open for exec. */ */
if (openflags & __FMODE_EXEC) { if (openflags & __FMODE_EXEC) {
/* ONLY check for exec rights */ /* ONLY check for exec rights */
mask = MAY_EXEC; mask = MAY_EXEC;
} else if (fmode & FMODE_READ) } else if ((fmode & FMODE_READ) && !opendata->file_created)
mask = MAY_READ; mask = MAY_READ;
cache.cred = cred; cache.cred = cred;
@ -2216,8 +2224,15 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
ret = _nfs4_proc_open(opendata); ret = _nfs4_proc_open(opendata);
if (ret != 0) if (ret != 0) {
if (ret == -ENOENT) {
d_drop(opendata->dentry);
d_add(opendata->dentry, NULL);
nfs_set_verifier(opendata->dentry,
nfs_save_change_attribute(opendata->dir->d_inode));
}
goto out; goto out;
}
state = nfs4_opendata_to_nfs4_state(opendata); state = nfs4_opendata_to_nfs4_state(opendata);
ret = PTR_ERR(state); ret = PTR_ERR(state);
@ -2647,6 +2662,48 @@ static const struct rpc_call_ops nfs4_close_ops = {
.rpc_release = nfs4_free_closedata, .rpc_release = nfs4_free_closedata,
}; };
static bool nfs4_state_has_opener(struct nfs4_state *state)
{
/* first check existing openers */
if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 &&
state->n_rdonly != 0)
return true;
if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 &&
state->n_wronly != 0)
return true;
if (test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 &&
state->n_rdwr != 0)
return true;
return false;
}
static bool nfs4_roc(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *ctx;
struct nfs4_state *state;
spin_lock(&inode->i_lock);
list_for_each_entry(ctx, &nfsi->open_files, list) {
state = ctx->state;
if (state == NULL)
continue;
if (nfs4_state_has_opener(state)) {
spin_unlock(&inode->i_lock);
return false;
}
}
spin_unlock(&inode->i_lock);
if (nfs4_check_delegation(inode, FMODE_READ))
return false;
return pnfs_roc(inode);
}
/* /*
* It is possible for data to be read/written from a mem-mapped file * It is possible for data to be read/written from a mem-mapped file
* after the sys_close call (which hits the vfs layer as a flush). * after the sys_close call (which hits the vfs layer as a flush).
@ -2697,7 +2754,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
calldata->res.fattr = &calldata->fattr; calldata->res.fattr = &calldata->fattr;
calldata->res.seqid = calldata->arg.seqid; calldata->res.seqid = calldata->arg.seqid;
calldata->res.server = server; calldata->res.server = server;
calldata->roc = pnfs_roc(state->inode); calldata->roc = nfs4_roc(state->inode);
nfs_sb_active(calldata->inode->i_sb); nfs_sb_active(calldata->inode->i_sb);
msg.rpc_argp = &calldata->arg; msg.rpc_argp = &calldata->arg;
@ -4033,24 +4090,25 @@ static bool nfs4_error_stateid_expired(int err)
return false; return false;
} }
void __nfs4_read_done_cb(struct nfs_pgio_data *data) void __nfs4_read_done_cb(struct nfs_pgio_header *hdr)
{ {
nfs_invalidate_atime(data->header->inode); nfs_invalidate_atime(hdr->inode);
} }
static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
{ {
struct nfs_server *server = NFS_SERVER(data->header->inode); struct nfs_server *server = NFS_SERVER(hdr->inode);
trace_nfs4_read(data, task->tk_status); trace_nfs4_read(hdr, task->tk_status);
if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { if (nfs4_async_handle_error(task, server,
hdr->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task); rpc_restart_call_prepare(task);
return -EAGAIN; return -EAGAIN;
} }
__nfs4_read_done_cb(data); __nfs4_read_done_cb(hdr);
if (task->tk_status > 0) if (task->tk_status > 0)
renew_lease(server, data->timestamp); renew_lease(server, hdr->timestamp);
return 0; return 0;
} }
@ -4068,54 +4126,59 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
return true; return true;
} }
static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data) static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{ {
dprintk("--> %s\n", __func__); dprintk("--> %s\n", __func__);
if (!nfs4_sequence_done(task, &data->res.seq_res)) if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN; return -EAGAIN;
if (nfs4_read_stateid_changed(task, &data->args)) if (nfs4_read_stateid_changed(task, &hdr->args))
return -EAGAIN; return -EAGAIN;
return data->pgio_done_cb ? data->pgio_done_cb(task, data) : return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
nfs4_read_done_cb(task, data); nfs4_read_done_cb(task, hdr);
} }
static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{ {
data->timestamp = jiffies; hdr->timestamp = jiffies;
data->pgio_done_cb = nfs4_read_done_cb; hdr->pgio_done_cb = nfs4_read_done_cb;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0);
} }
static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{ {
if (nfs4_setup_sequence(NFS_SERVER(data->header->inode), if (nfs4_setup_sequence(NFS_SERVER(hdr->inode),
&data->args.seq_args, &hdr->args.seq_args,
&data->res.seq_res, &hdr->res.seq_res,
task)) task))
return 0; return 0;
if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context, if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO) hdr->args.lock_context,
hdr->rw_ops->rw_mode) == -EIO)
return -EIO; return -EIO;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags))) if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
return -EIO; return -EIO;
return 0; return 0;
} }
static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data) static int nfs4_write_done_cb(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{ {
struct inode *inode = data->header->inode; struct inode *inode = hdr->inode;
trace_nfs4_write(data, task->tk_status); trace_nfs4_write(hdr, task->tk_status);
if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { if (nfs4_async_handle_error(task, NFS_SERVER(inode),
hdr->args.context->state) == -EAGAIN) {
rpc_restart_call_prepare(task); rpc_restart_call_prepare(task);
return -EAGAIN; return -EAGAIN;
} }
if (task->tk_status >= 0) { if (task->tk_status >= 0) {
renew_lease(NFS_SERVER(inode), data->timestamp); renew_lease(NFS_SERVER(inode), hdr->timestamp);
nfs_post_op_update_inode_force_wcc(inode, &data->fattr); nfs_post_op_update_inode_force_wcc(inode, &hdr->fattr);
} }
return 0; return 0;
} }
@ -4134,23 +4197,21 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
return true; return true;
} }
static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data) static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{ {
if (!nfs4_sequence_done(task, &data->res.seq_res)) if (!nfs4_sequence_done(task, &hdr->res.seq_res))
return -EAGAIN; return -EAGAIN;
if (nfs4_write_stateid_changed(task, &data->args)) if (nfs4_write_stateid_changed(task, &hdr->args))
return -EAGAIN; return -EAGAIN;
return data->pgio_done_cb ? data->pgio_done_cb(task, data) : return hdr->pgio_done_cb ? hdr->pgio_done_cb(task, hdr) :
nfs4_write_done_cb(task, data); nfs4_write_done_cb(task, hdr);
} }
static static
bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data) bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
{ {
const struct nfs_pgio_header *hdr = data->header;
/* Don't request attributes for pNFS or O_DIRECT writes */ /* Don't request attributes for pNFS or O_DIRECT writes */
if (data->ds_clp != NULL || hdr->dreq != NULL) if (hdr->ds_clp != NULL || hdr->dreq != NULL)
return false; return false;
/* Otherwise, request attributes if and only if we don't hold /* Otherwise, request attributes if and only if we don't hold
* a delegation * a delegation
@ -4158,23 +4219,24 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
} }
static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{ {
struct nfs_server *server = NFS_SERVER(data->header->inode); struct nfs_server *server = NFS_SERVER(hdr->inode);
if (!nfs4_write_need_cache_consistency_data(data)) { if (!nfs4_write_need_cache_consistency_data(hdr)) {
data->args.bitmask = NULL; hdr->args.bitmask = NULL;
data->res.fattr = NULL; hdr->res.fattr = NULL;
} else } else
data->args.bitmask = server->cache_consistency_bitmask; hdr->args.bitmask = server->cache_consistency_bitmask;
if (!data->pgio_done_cb) if (!hdr->pgio_done_cb)
data->pgio_done_cb = nfs4_write_done_cb; hdr->pgio_done_cb = nfs4_write_done_cb;
data->res.server = server; hdr->res.server = server;
data->timestamp = jiffies; hdr->timestamp = jiffies;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1);
} }
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data) static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@ -4881,6 +4943,18 @@ nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len)
return scnprintf(buf, len, "tcp"); return scnprintf(buf, len, "tcp");
} }
static void nfs4_setclientid_done(struct rpc_task *task, void *calldata)
{
struct nfs4_setclientid *sc = calldata;
if (task->tk_status == 0)
sc->sc_cred = get_rpccred(task->tk_rqstp->rq_cred);
}
static const struct rpc_call_ops nfs4_setclientid_ops = {
.rpc_call_done = nfs4_setclientid_done,
};
/** /**
* nfs4_proc_setclientid - Negotiate client ID * nfs4_proc_setclientid - Negotiate client ID
* @clp: state data structure * @clp: state data structure
@ -4907,6 +4981,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
.rpc_resp = res, .rpc_resp = res,
.rpc_cred = cred, .rpc_cred = cred,
}; };
struct rpc_task *task;
struct rpc_task_setup task_setup_data = {
.rpc_client = clp->cl_rpcclient,
.rpc_message = &msg,
.callback_ops = &nfs4_setclientid_ops,
.callback_data = &setclientid,
.flags = RPC_TASK_TIMEOUT,
};
int status; int status;
/* nfs_client_id4 */ /* nfs_client_id4 */
@ -4933,7 +5015,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
dprintk("NFS call setclientid auth=%s, '%.*s'\n", dprintk("NFS call setclientid auth=%s, '%.*s'\n",
clp->cl_rpcclient->cl_auth->au_ops->au_name, clp->cl_rpcclient->cl_auth->au_ops->au_name,
setclientid.sc_name_len, setclientid.sc_name); setclientid.sc_name_len, setclientid.sc_name);
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) {
status = PTR_ERR(task);
goto out;
}
status = task->tk_status;
if (setclientid.sc_cred) {
clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
put_rpccred(setclientid.sc_cred);
}
rpc_put_task(task);
out:
trace_nfs4_setclientid(clp, status); trace_nfs4_setclientid(clp, status);
dprintk("NFS reply setclientid: %d\n", status); dprintk("NFS reply setclientid: %d\n", status);
return status; return status;
@ -4975,6 +5068,9 @@ struct nfs4_delegreturndata {
unsigned long timestamp; unsigned long timestamp;
struct nfs_fattr fattr; struct nfs_fattr fattr;
int rpc_status; int rpc_status;
struct inode *inode;
bool roc;
u32 roc_barrier;
}; };
static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
@ -4988,7 +5084,6 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) { switch (task->tk_status) {
case 0: case 0:
renew_lease(data->res.server, data->timestamp); renew_lease(data->res.server, data->timestamp);
break;
case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_BAD_STATEID: case -NFS4ERR_BAD_STATEID:
@ -4996,6 +5091,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED: case -NFS4ERR_EXPIRED:
task->tk_status = 0; task->tk_status = 0;
if (data->roc)
pnfs_roc_set_barrier(data->inode, data->roc_barrier);
break; break;
default: default:
if (nfs4_async_handle_error(task, data->res.server, NULL) == if (nfs4_async_handle_error(task, data->res.server, NULL) ==
@ -5009,6 +5106,10 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
static void nfs4_delegreturn_release(void *calldata) static void nfs4_delegreturn_release(void *calldata)
{ {
struct nfs4_delegreturndata *data = calldata;
if (data->roc)
pnfs_roc_release(data->inode);
kfree(calldata); kfree(calldata);
} }
@ -5018,6 +5119,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
d_data = (struct nfs4_delegreturndata *)data; d_data = (struct nfs4_delegreturndata *)data;
if (d_data->roc &&
pnfs_roc_drain(d_data->inode, &d_data->roc_barrier, task))
return;
nfs4_setup_sequence(d_data->res.server, nfs4_setup_sequence(d_data->res.server,
&d_data->args.seq_args, &d_data->args.seq_args,
&d_data->res.seq_res, &d_data->res.seq_res,
@ -5061,6 +5166,9 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
nfs_fattr_init(data->res.fattr); nfs_fattr_init(data->res.fattr);
data->timestamp = jiffies; data->timestamp = jiffies;
data->rpc_status = 0; data->rpc_status = 0;
data->inode = inode;
data->roc = list_empty(&NFS_I(inode)->open_files) ?
pnfs_roc(inode) : false;
task_setup_data.callback_data = data; task_setup_data.callback_data = data;
msg.rpc_argp = &data->args; msg.rpc_argp = &data->args;
@ -5834,8 +5942,10 @@ struct nfs_release_lockowner_data {
static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata) static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata)
{ {
struct nfs_release_lockowner_data *data = calldata; struct nfs_release_lockowner_data *data = calldata;
nfs40_setup_sequence(data->server, struct nfs_server *server = data->server;
&data->args.seq_args, &data->res.seq_res, task); nfs40_setup_sequence(server, &data->args.seq_args,
&data->res.seq_res, task);
data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
data->timestamp = jiffies; data->timestamp = jiffies;
} }
@ -5852,6 +5962,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
break; break;
case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED: case -NFS4ERR_EXPIRED:
nfs4_schedule_lease_recovery(server->nfs_client);
break;
case -NFS4ERR_LEASE_MOVED: case -NFS4ERR_LEASE_MOVED:
case -NFS4ERR_DELAY: case -NFS4ERR_DELAY:
if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
@ -5872,7 +5984,8 @@ static const struct rpc_call_ops nfs4_release_lockowner_ops = {
.rpc_release = nfs4_release_lockowner_release, .rpc_release = nfs4_release_lockowner_release,
}; };
static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp) static void
nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
{ {
struct nfs_release_lockowner_data *data; struct nfs_release_lockowner_data *data;
struct rpc_message msg = { struct rpc_message msg = {
@ -5880,11 +5993,11 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
}; };
if (server->nfs_client->cl_mvops->minor_version != 0) if (server->nfs_client->cl_mvops->minor_version != 0)
return -EINVAL; return;
data = kmalloc(sizeof(*data), GFP_NOFS); data = kmalloc(sizeof(*data), GFP_NOFS);
if (!data) if (!data)
return -ENOMEM; return;
data->lsp = lsp; data->lsp = lsp;
data->server = server; data->server = server;
data->args.lock_owner.clientid = server->nfs_client->cl_clientid; data->args.lock_owner.clientid = server->nfs_client->cl_clientid;
@ -5895,7 +6008,6 @@ static int nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_st
msg.rpc_resp = &data->res; msg.rpc_resp = &data->res;
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0); nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data); rpc_call_async(server->client, &msg, 0, &nfs4_release_lockowner_ops, data);
return 0;
} }
#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
@ -8182,7 +8294,8 @@ static int nfs41_free_stateid(struct nfs_server *server,
return ret; return ret;
} }
static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp) static void
nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
{ {
struct rpc_task *task; struct rpc_task *task;
struct rpc_cred *cred = lsp->ls_state->owner->so_cred; struct rpc_cred *cred = lsp->ls_state->owner->so_cred;
@ -8190,9 +8303,8 @@ static int nfs41_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta
task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false); task = _nfs41_free_stateid(server, &lsp->ls_stateid, cred, false);
nfs4_free_lock_state(server, lsp); nfs4_free_lock_state(server, lsp);
if (IS_ERR(task)) if (IS_ERR(task))
return PTR_ERR(task); return;
rpc_put_task(task); rpc_put_task(task);
return 0;
} }
static bool nfs41_match_stateid(const nfs4_stateid *s1, static bool nfs41_match_stateid(const nfs4_stateid *s1,

View file

@ -787,33 +787,36 @@ void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
* that is compatible with current->files * that is compatible with current->files
*/ */
static struct nfs4_lock_state * static struct nfs4_lock_state *
__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
{ {
struct nfs4_lock_state *pos; struct nfs4_lock_state *pos;
list_for_each_entry(pos, &state->lock_states, ls_locks) { list_for_each_entry(pos, &state->lock_states, ls_locks) {
if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type) if (pos->ls_owner != fl_owner)
continue; continue;
switch (pos->ls_owner.lo_type) {
case NFS4_POSIX_LOCK_TYPE:
if (pos->ls_owner.lo_u.posix_owner != fl_owner)
continue;
break;
case NFS4_FLOCK_LOCK_TYPE:
if (pos->ls_owner.lo_u.flock_owner != fl_pid)
continue;
}
atomic_inc(&pos->ls_count); atomic_inc(&pos->ls_count);
return pos; return pos;
} }
return NULL; return NULL;
} }
static void
free_lock_state_work(struct work_struct *work)
{
struct nfs4_lock_state *lsp = container_of(work,
struct nfs4_lock_state, ls_release);
struct nfs4_state *state = lsp->ls_state;
struct nfs_server *server = state->owner->so_server;
struct nfs_client *clp = server->nfs_client;
clp->cl_mvops->free_lock_state(server, lsp);
}
/* /*
* Return a compatible lock_state. If no initialized lock_state structure * Return a compatible lock_state. If no initialized lock_state structure
* exists, return an uninitialized one. * exists, return an uninitialized one.
* *
*/ */
static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type) static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner)
{ {
struct nfs4_lock_state *lsp; struct nfs4_lock_state *lsp;
struct nfs_server *server = state->owner->so_server; struct nfs_server *server = state->owner->so_server;
@ -824,21 +827,12 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
nfs4_init_seqid_counter(&lsp->ls_seqid); nfs4_init_seqid_counter(&lsp->ls_seqid);
atomic_set(&lsp->ls_count, 1); atomic_set(&lsp->ls_count, 1);
lsp->ls_state = state; lsp->ls_state = state;
lsp->ls_owner.lo_type = type; lsp->ls_owner = fl_owner;
switch (lsp->ls_owner.lo_type) {
case NFS4_FLOCK_LOCK_TYPE:
lsp->ls_owner.lo_u.flock_owner = fl_pid;
break;
case NFS4_POSIX_LOCK_TYPE:
lsp->ls_owner.lo_u.posix_owner = fl_owner;
break;
default:
goto out_free;
}
lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS); lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
if (lsp->ls_seqid.owner_id < 0) if (lsp->ls_seqid.owner_id < 0)
goto out_free; goto out_free;
INIT_LIST_HEAD(&lsp->ls_locks); INIT_LIST_HEAD(&lsp->ls_locks);
INIT_WORK(&lsp->ls_release, free_lock_state_work);
return lsp; return lsp;
out_free: out_free:
kfree(lsp); kfree(lsp);
@ -857,13 +851,13 @@ void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp
* exists, return an uninitialized one. * exists, return an uninitialized one.
* *
*/ */
static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type) static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner)
{ {
struct nfs4_lock_state *lsp, *new = NULL; struct nfs4_lock_state *lsp, *new = NULL;
for(;;) { for(;;) {
spin_lock(&state->state_lock); spin_lock(&state->state_lock);
lsp = __nfs4_find_lock_state(state, owner, pid, type); lsp = __nfs4_find_lock_state(state, owner);
if (lsp != NULL) if (lsp != NULL)
break; break;
if (new != NULL) { if (new != NULL) {
@ -874,7 +868,7 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
break; break;
} }
spin_unlock(&state->state_lock); spin_unlock(&state->state_lock);
new = nfs4_alloc_lock_state(state, owner, pid, type); new = nfs4_alloc_lock_state(state, owner);
if (new == NULL) if (new == NULL)
return NULL; return NULL;
} }
@ -902,13 +896,12 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
if (list_empty(&state->lock_states)) if (list_empty(&state->lock_states))
clear_bit(LK_STATE_IN_USE, &state->flags); clear_bit(LK_STATE_IN_USE, &state->flags);
spin_unlock(&state->state_lock); spin_unlock(&state->state_lock);
server = state->owner->so_server; if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags))
if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) { queue_work(nfsiod_workqueue, &lsp->ls_release);
struct nfs_client *clp = server->nfs_client; else {
server = state->owner->so_server;
clp->cl_mvops->free_lock_state(server, lsp);
} else
nfs4_free_lock_state(server, lsp); nfs4_free_lock_state(server, lsp);
}
} }
static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src) static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@ -935,13 +928,7 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
if (fl->fl_ops != NULL) if (fl->fl_ops != NULL)
return 0; return 0;
if (fl->fl_flags & FL_POSIX) lsp = nfs4_get_lock_state(state, fl->fl_owner);
lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
else if (fl->fl_flags & FL_FLOCK)
lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid,
NFS4_FLOCK_LOCK_TYPE);
else
return -EINVAL;
if (lsp == NULL) if (lsp == NULL)
return -ENOMEM; return -ENOMEM;
fl->fl_u.nfs4_fl.owner = lsp; fl->fl_u.nfs4_fl.owner = lsp;
@ -955,7 +942,6 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
{ {
struct nfs4_lock_state *lsp; struct nfs4_lock_state *lsp;
fl_owner_t fl_owner; fl_owner_t fl_owner;
pid_t fl_pid;
int ret = -ENOENT; int ret = -ENOENT;
@ -966,9 +952,8 @@ static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
goto out; goto out;
fl_owner = lockowner->l_owner; fl_owner = lockowner->l_owner;
fl_pid = lockowner->l_pid;
spin_lock(&state->state_lock); spin_lock(&state->state_lock);
lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); lsp = __nfs4_find_lock_state(state, fl_owner);
if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags)) if (lsp && test_bit(NFS_LOCK_LOST, &lsp->ls_flags))
ret = -EIO; ret = -EIO;
else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) { else if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {

View file

@ -932,11 +932,11 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
DECLARE_EVENT_CLASS(nfs4_read_event, DECLARE_EVENT_CLASS(nfs4_read_event,
TP_PROTO( TP_PROTO(
const struct nfs_pgio_data *data, const struct nfs_pgio_header *hdr,
int error int error
), ),
TP_ARGS(data, error), TP_ARGS(hdr, error),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
@ -948,12 +948,12 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
), ),
TP_fast_assign( TP_fast_assign(
const struct inode *inode = data->header->inode; const struct inode *inode = hdr->inode;
__entry->dev = inode->i_sb->s_dev; __entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode); __entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
__entry->offset = data->args.offset; __entry->offset = hdr->args.offset;
__entry->count = data->args.count; __entry->count = hdr->args.count;
__entry->error = error; __entry->error = error;
), ),
@ -972,10 +972,10 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
#define DEFINE_NFS4_READ_EVENT(name) \ #define DEFINE_NFS4_READ_EVENT(name) \
DEFINE_EVENT(nfs4_read_event, name, \ DEFINE_EVENT(nfs4_read_event, name, \
TP_PROTO( \ TP_PROTO( \
const struct nfs_pgio_data *data, \ const struct nfs_pgio_header *hdr, \
int error \ int error \
), \ ), \
TP_ARGS(data, error)) TP_ARGS(hdr, error))
DEFINE_NFS4_READ_EVENT(nfs4_read); DEFINE_NFS4_READ_EVENT(nfs4_read);
#ifdef CONFIG_NFS_V4_1 #ifdef CONFIG_NFS_V4_1
DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read); DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
@ -983,11 +983,11 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
DECLARE_EVENT_CLASS(nfs4_write_event, DECLARE_EVENT_CLASS(nfs4_write_event,
TP_PROTO( TP_PROTO(
const struct nfs_pgio_data *data, const struct nfs_pgio_header *hdr,
int error int error
), ),
TP_ARGS(data, error), TP_ARGS(hdr, error),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
@ -999,12 +999,12 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
), ),
TP_fast_assign( TP_fast_assign(
const struct inode *inode = data->header->inode; const struct inode *inode = hdr->inode;
__entry->dev = inode->i_sb->s_dev; __entry->dev = inode->i_sb->s_dev;
__entry->fileid = NFS_FILEID(inode); __entry->fileid = NFS_FILEID(inode);
__entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
__entry->offset = data->args.offset; __entry->offset = hdr->args.offset;
__entry->count = data->args.count; __entry->count = hdr->args.count;
__entry->error = error; __entry->error = error;
), ),
@ -1024,10 +1024,10 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
#define DEFINE_NFS4_WRITE_EVENT(name) \ #define DEFINE_NFS4_WRITE_EVENT(name) \
DEFINE_EVENT(nfs4_write_event, name, \ DEFINE_EVENT(nfs4_write_event, name, \
TP_PROTO( \ TP_PROTO( \
const struct nfs_pgio_data *data, \ const struct nfs_pgio_header *hdr, \
int error \ int error \
), \ ), \
TP_ARGS(data, error)) TP_ARGS(hdr, error))
DEFINE_NFS4_WRITE_EVENT(nfs4_write); DEFINE_NFS4_WRITE_EVENT(nfs4_write);
#ifdef CONFIG_NFS_V4_1 #ifdef CONFIG_NFS_V4_1
DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write); DEFINE_NFS4_WRITE_EVENT(nfs4_pnfs_write);

View file

@ -7092,7 +7092,7 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
if (!status) if (!status)
status = decode_sequence(xdr, &res->seq_res, rqstp); status = decode_sequence(xdr, &res->seq_res, rqstp);
if (!status) if (!status)
status = decode_reclaim_complete(xdr, (void *)NULL); status = decode_reclaim_complete(xdr, NULL);
return status; return status;
} }

View file

@ -439,22 +439,21 @@ static void _read_done(struct ore_io_state *ios, void *private)
objlayout_read_done(&objios->oir, status, objios->sync); objlayout_read_done(&objios->oir, status, objios->sync);
} }
int objio_read_pagelist(struct nfs_pgio_data *rdata) int objio_read_pagelist(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = rdata->header;
struct objio_state *objios; struct objio_state *objios;
int ret; int ret;
ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true, ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
hdr->lseg, rdata->args.pages, rdata->args.pgbase, hdr->lseg, hdr->args.pages, hdr->args.pgbase,
rdata->args.offset, rdata->args.count, rdata, hdr->args.offset, hdr->args.count, hdr,
GFP_KERNEL, &objios); GFP_KERNEL, &objios);
if (unlikely(ret)) if (unlikely(ret))
return ret; return ret;
objios->ios->done = _read_done; objios->ios->done = _read_done;
dprintk("%s: offset=0x%llx length=0x%x\n", __func__, dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
rdata->args.offset, rdata->args.count); hdr->args.offset, hdr->args.count);
ret = ore_read(objios->ios); ret = ore_read(objios->ios);
if (unlikely(ret)) if (unlikely(ret))
objio_free_result(&objios->oir); objio_free_result(&objios->oir);
@ -487,11 +486,11 @@ static void _write_done(struct ore_io_state *ios, void *private)
static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
{ {
struct objio_state *objios = priv; struct objio_state *objios = priv;
struct nfs_pgio_data *wdata = objios->oir.rpcdata; struct nfs_pgio_header *hdr = objios->oir.rpcdata;
struct address_space *mapping = wdata->header->inode->i_mapping; struct address_space *mapping = hdr->inode->i_mapping;
pgoff_t index = offset / PAGE_SIZE; pgoff_t index = offset / PAGE_SIZE;
struct page *page; struct page *page;
loff_t i_size = i_size_read(wdata->header->inode); loff_t i_size = i_size_read(hdr->inode);
if (offset >= i_size) { if (offset >= i_size) {
*uptodate = true; *uptodate = true;
@ -531,15 +530,14 @@ static const struct _ore_r4w_op _r4w_op = {
.put_page = &__r4w_put_page, .put_page = &__r4w_put_page,
}; };
int objio_write_pagelist(struct nfs_pgio_data *wdata, int how) int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
{ {
struct nfs_pgio_header *hdr = wdata->header;
struct objio_state *objios; struct objio_state *objios;
int ret; int ret;
ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false, ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
hdr->lseg, wdata->args.pages, wdata->args.pgbase, hdr->lseg, hdr->args.pages, hdr->args.pgbase,
wdata->args.offset, wdata->args.count, wdata, GFP_NOFS, hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
&objios); &objios);
if (unlikely(ret)) if (unlikely(ret))
return ret; return ret;
@ -551,7 +549,7 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
objios->ios->done = _write_done; objios->ios->done = _write_done;
dprintk("%s: offset=0x%llx length=0x%x\n", __func__, dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
wdata->args.offset, wdata->args.count); hdr->args.offset, hdr->args.count);
ret = ore_write(objios->ios); ret = ore_write(objios->ios);
if (unlikely(ret)) { if (unlikely(ret)) {
objio_free_result(&objios->oir); objio_free_result(&objios->oir);

View file

@ -229,36 +229,36 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
static void _rpc_read_complete(struct work_struct *work) static void _rpc_read_complete(struct work_struct *work)
{ {
struct rpc_task *task; struct rpc_task *task;
struct nfs_pgio_data *rdata; struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__); dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work); task = container_of(work, struct rpc_task, u.tk_work);
rdata = container_of(task, struct nfs_pgio_data, task); hdr = container_of(task, struct nfs_pgio_header, task);
pnfs_ld_read_done(rdata); pnfs_ld_read_done(hdr);
} }
void void
objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync) objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{ {
struct nfs_pgio_data *rdata = oir->rpcdata; struct nfs_pgio_header *hdr = oir->rpcdata;
oir->status = rdata->task.tk_status = status; oir->status = hdr->task.tk_status = status;
if (status >= 0) if (status >= 0)
rdata->res.count = status; hdr->res.count = status;
else else
rdata->header->pnfs_error = status; hdr->pnfs_error = status;
objlayout_iodone(oir); objlayout_iodone(oir);
/* must not use oir after this point */ /* must not use oir after this point */
dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__, dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
status, rdata->res.eof, sync); status, hdr->res.eof, sync);
if (sync) if (sync)
pnfs_ld_read_done(rdata); pnfs_ld_read_done(hdr);
else { else {
INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
schedule_work(&rdata->task.u.tk_work); schedule_work(&hdr->task.u.tk_work);
} }
} }
@ -266,12 +266,11 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async reads. * Perform sync or async reads.
*/ */
enum pnfs_try_status enum pnfs_try_status
objlayout_read_pagelist(struct nfs_pgio_data *rdata) objlayout_read_pagelist(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = rdata->header;
struct inode *inode = hdr->inode; struct inode *inode = hdr->inode;
loff_t offset = rdata->args.offset; loff_t offset = hdr->args.offset;
size_t count = rdata->args.count; size_t count = hdr->args.count;
int err; int err;
loff_t eof; loff_t eof;
@ -279,23 +278,23 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
if (unlikely(offset + count > eof)) { if (unlikely(offset + count > eof)) {
if (offset >= eof) { if (offset >= eof) {
err = 0; err = 0;
rdata->res.count = 0; hdr->res.count = 0;
rdata->res.eof = 1; hdr->res.eof = 1;
/*FIXME: do we need to call pnfs_ld_read_done() */ /*FIXME: do we need to call pnfs_ld_read_done() */
goto out; goto out;
} }
count = eof - offset; count = eof - offset;
} }
rdata->res.eof = (offset + count) >= eof; hdr->res.eof = (offset + count) >= eof;
_fix_verify_io_params(hdr->lseg, &rdata->args.pages, _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
&rdata->args.pgbase, &hdr->args.pgbase,
rdata->args.offset, rdata->args.count); hdr->args.offset, hdr->args.count);
dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n", dprintk("%s: inode(%lx) offset 0x%llx count 0x%Zx eof=%d\n",
__func__, inode->i_ino, offset, count, rdata->res.eof); __func__, inode->i_ino, offset, count, hdr->res.eof);
err = objio_read_pagelist(rdata); err = objio_read_pagelist(hdr);
out: out:
if (unlikely(err)) { if (unlikely(err)) {
hdr->pnfs_error = err; hdr->pnfs_error = err;
@ -312,38 +311,38 @@ objlayout_read_pagelist(struct nfs_pgio_data *rdata)
static void _rpc_write_complete(struct work_struct *work) static void _rpc_write_complete(struct work_struct *work)
{ {
struct rpc_task *task; struct rpc_task *task;
struct nfs_pgio_data *wdata; struct nfs_pgio_header *hdr;
dprintk("%s enter\n", __func__); dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work); task = container_of(work, struct rpc_task, u.tk_work);
wdata = container_of(task, struct nfs_pgio_data, task); hdr = container_of(task, struct nfs_pgio_header, task);
pnfs_ld_write_done(wdata); pnfs_ld_write_done(hdr);
} }
void void
objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync) objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{ {
struct nfs_pgio_data *wdata = oir->rpcdata; struct nfs_pgio_header *hdr = oir->rpcdata;
oir->status = wdata->task.tk_status = status; oir->status = hdr->task.tk_status = status;
if (status >= 0) { if (status >= 0) {
wdata->res.count = status; hdr->res.count = status;
wdata->verf.committed = oir->committed; hdr->verf.committed = oir->committed;
} else { } else {
wdata->header->pnfs_error = status; hdr->pnfs_error = status;
} }
objlayout_iodone(oir); objlayout_iodone(oir);
/* must not use oir after this point */ /* must not use oir after this point */
dprintk("%s: Return status %zd committed %d sync=%d\n", __func__, dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
status, wdata->verf.committed, sync); status, hdr->verf.committed, sync);
if (sync) if (sync)
pnfs_ld_write_done(wdata); pnfs_ld_write_done(hdr);
else { else {
INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
schedule_work(&wdata->task.u.tk_work); schedule_work(&hdr->task.u.tk_work);
} }
} }
@ -351,17 +350,15 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async writes. * Perform sync or async writes.
*/ */
enum pnfs_try_status enum pnfs_try_status
objlayout_write_pagelist(struct nfs_pgio_data *wdata, objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
int how)
{ {
struct nfs_pgio_header *hdr = wdata->header;
int err; int err;
_fix_verify_io_params(hdr->lseg, &wdata->args.pages, _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
&wdata->args.pgbase, &hdr->args.pgbase,
wdata->args.offset, wdata->args.count); hdr->args.offset, hdr->args.count);
err = objio_write_pagelist(wdata, how); err = objio_write_pagelist(hdr, how);
if (unlikely(err)) { if (unlikely(err)) {
hdr->pnfs_error = err; hdr->pnfs_error = err;
dprintk("%s: Returned Error %d\n", __func__, err); dprintk("%s: Returned Error %d\n", __func__, err);

View file

@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
*/ */
extern void objio_free_result(struct objlayout_io_res *oir); extern void objio_free_result(struct objlayout_io_res *oir);
extern int objio_read_pagelist(struct nfs_pgio_data *rdata); extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how); extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
/* /*
* callback API * callback API
@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
extern void objlayout_free_lseg(struct pnfs_layout_segment *); extern void objlayout_free_lseg(struct pnfs_layout_segment *);
extern enum pnfs_try_status objlayout_read_pagelist( extern enum pnfs_try_status objlayout_read_pagelist(
struct nfs_pgio_data *); struct nfs_pgio_header *);
extern enum pnfs_try_status objlayout_write_pagelist( extern enum pnfs_try_status objlayout_write_pagelist(
struct nfs_pgio_data *, struct nfs_pgio_header *,
int how); int how);
extern void objlayout_encode_layoutcommit( extern void objlayout_encode_layoutcommit(

View file

@ -141,16 +141,24 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
* @req - request in group that is to be locked * @req - request in group that is to be locked
* *
* this lock must be held if modifying the page group list * this lock must be held if modifying the page group list
*
* returns result from wait_on_bit_lock: 0 on success, < 0 on error
*/ */
void int
nfs_page_group_lock(struct nfs_page *req) nfs_page_group_lock(struct nfs_page *req, bool wait)
{ {
struct nfs_page *head = req->wb_head; struct nfs_page *head = req->wb_head;
int ret;
WARN_ON_ONCE(head != head->wb_head); WARN_ON_ONCE(head != head->wb_head);
wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, do {
ret = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
} while (wait && ret != 0);
WARN_ON_ONCE(ret > 0);
return ret;
} }
/* /*
@ -211,7 +219,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
{ {
bool ret; bool ret;
nfs_page_group_lock(req); nfs_page_group_lock(req, true);
ret = nfs_page_group_sync_on_bit_locked(req, bit); ret = nfs_page_group_sync_on_bit_locked(req, bit);
nfs_page_group_unlock(req); nfs_page_group_unlock(req);
@ -454,123 +462,72 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
} }
EXPORT_SYMBOL_GPL(nfs_generic_pg_test); EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr) struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
{ {
return container_of(hdr, struct nfs_rw_header, header); struct nfs_pgio_header *hdr = ops->rw_alloc_header();
}
/**
* nfs_rw_header_alloc - Allocate a header for a read or write
* @ops: Read or write function vector
*/
struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
{
struct nfs_rw_header *header = ops->rw_alloc_header();
if (header) {
struct nfs_pgio_header *hdr = &header->header;
if (hdr) {
INIT_LIST_HEAD(&hdr->pages); INIT_LIST_HEAD(&hdr->pages);
spin_lock_init(&hdr->lock); spin_lock_init(&hdr->lock);
atomic_set(&hdr->refcnt, 0);
hdr->rw_ops = ops; hdr->rw_ops = ops;
} }
return header; return hdr;
} }
EXPORT_SYMBOL_GPL(nfs_rw_header_alloc); EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc);
/* /*
* nfs_rw_header_free - Free a read or write header * nfs_pgio_header_free - Free a read or write header
* @hdr: The header to free * @hdr: The header to free
*/ */
void nfs_rw_header_free(struct nfs_pgio_header *hdr) void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
{ {
hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr)); hdr->rw_ops->rw_free_header(hdr);
} }
EXPORT_SYMBOL_GPL(nfs_rw_header_free); EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
/** /**
* nfs_pgio_data_alloc - Allocate pageio data * nfs_pgio_data_destroy - make @hdr suitable for reuse
* @hdr: The header making a request *
* @pagecount: Number of pages to create * Frees memory and releases refs from nfs_generic_pgio, so that it may
* be called again.
*
* @hdr: A header that has had nfs_generic_pgio called
*/ */
static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr, void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
unsigned int pagecount)
{ {
struct nfs_pgio_data *data, *prealloc; put_nfs_open_context(hdr->args.context);
if (hdr->page_array.pagevec != hdr->page_array.page_array)
prealloc = &NFS_RW_HEADER(hdr)->rpc_data; kfree(hdr->page_array.pagevec);
if (prealloc->header == NULL)
data = prealloc;
else
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out;
if (nfs_pgarray_set(&data->pages, pagecount)) {
data->header = hdr;
atomic_inc(&hdr->refcnt);
} else {
if (data != prealloc)
kfree(data);
data = NULL;
}
out:
return data;
} }
EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
/**
* nfs_pgio_data_release - Properly free pageio data
* @data: The data to release
*/
void nfs_pgio_data_release(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr);
put_nfs_open_context(data->args.context);
if (data->pages.pagevec != data->pages.page_array)
kfree(data->pages.pagevec);
if (data == &pageio_header->rpc_data) {
data->header = NULL;
data = NULL;
}
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
/* Note: we only free the rpc_task after callbacks are done.
* See the comment in rpc_free_task() for why
*/
kfree(data);
}
EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
/** /**
* nfs_pgio_rpcsetup - Set up arguments for a pageio call * nfs_pgio_rpcsetup - Set up arguments for a pageio call
* @data: The pageio data * @hdr: The pageio hdr
* @count: Number of bytes to read * @count: Number of bytes to read
* @offset: Initial offset * @offset: Initial offset
* @how: How to commit data (writes only) * @how: How to commit data (writes only)
* @cinfo: Commit information for the call (writes only) * @cinfo: Commit information for the call (writes only)
*/ */
static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data, static void nfs_pgio_rpcsetup(struct nfs_pgio_header *hdr,
unsigned int count, unsigned int offset, unsigned int count, unsigned int offset,
int how, struct nfs_commit_info *cinfo) int how, struct nfs_commit_info *cinfo)
{ {
struct nfs_page *req = data->header->req; struct nfs_page *req = hdr->req;
/* Set up the RPC argument and reply structs /* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */ * NB: take care not to mess about with hdr->commit et al. */
data->args.fh = NFS_FH(data->header->inode); hdr->args.fh = NFS_FH(hdr->inode);
data->args.offset = req_offset(req) + offset; hdr->args.offset = req_offset(req) + offset;
/* pnfs_set_layoutcommit needs this */ /* pnfs_set_layoutcommit needs this */
data->mds_offset = data->args.offset; hdr->mds_offset = hdr->args.offset;
data->args.pgbase = req->wb_pgbase + offset; hdr->args.pgbase = req->wb_pgbase + offset;
data->args.pages = data->pages.pagevec; hdr->args.pages = hdr->page_array.pagevec;
data->args.count = count; hdr->args.count = count;
data->args.context = get_nfs_open_context(req->wb_context); hdr->args.context = get_nfs_open_context(req->wb_context);
data->args.lock_context = req->wb_lock_context; hdr->args.lock_context = req->wb_lock_context;
data->args.stable = NFS_UNSTABLE; hdr->args.stable = NFS_UNSTABLE;
switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
case 0: case 0:
break; break;
@ -578,59 +535,59 @@ static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
if (nfs_reqs_to_commit(cinfo)) if (nfs_reqs_to_commit(cinfo))
break; break;
default: default:
data->args.stable = NFS_FILE_SYNC; hdr->args.stable = NFS_FILE_SYNC;
} }
data->res.fattr = &data->fattr; hdr->res.fattr = &hdr->fattr;
data->res.count = count; hdr->res.count = count;
data->res.eof = 0; hdr->res.eof = 0;
data->res.verf = &data->verf; hdr->res.verf = &hdr->verf;
nfs_fattr_init(&data->fattr); nfs_fattr_init(&hdr->fattr);
} }
/** /**
* nfs_pgio_prepare - Prepare pageio data to go over the wire * nfs_pgio_prepare - Prepare pageio hdr to go over the wire
* @task: The current task * @task: The current task
* @calldata: pageio data to prepare * @calldata: pageio header to prepare
*/ */
static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
{ {
struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = calldata;
int err; int err;
err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data); err = NFS_PROTO(hdr->inode)->pgio_rpc_prepare(task, hdr);
if (err) if (err)
rpc_exit(task, err); rpc_exit(task, err);
} }
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data, int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
const struct rpc_call_ops *call_ops, int how, int flags) const struct rpc_call_ops *call_ops, int how, int flags)
{ {
struct rpc_task *task; struct rpc_task *task;
struct rpc_message msg = { struct rpc_message msg = {
.rpc_argp = &data->args, .rpc_argp = &hdr->args,
.rpc_resp = &data->res, .rpc_resp = &hdr->res,
.rpc_cred = data->header->cred, .rpc_cred = hdr->cred,
}; };
struct rpc_task_setup task_setup_data = { struct rpc_task_setup task_setup_data = {
.rpc_client = clnt, .rpc_client = clnt,
.task = &data->task, .task = &hdr->task,
.rpc_message = &msg, .rpc_message = &msg,
.callback_ops = call_ops, .callback_ops = call_ops,
.callback_data = data, .callback_data = hdr,
.workqueue = nfsiod_workqueue, .workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC | flags, .flags = RPC_TASK_ASYNC | flags,
}; };
int ret = 0; int ret = 0;
data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how); hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how);
dprintk("NFS: %5u initiated pgio call " dprintk("NFS: %5u initiated pgio call "
"(req %s/%llu, %u bytes @ offset %llu)\n", "(req %s/%llu, %u bytes @ offset %llu)\n",
data->task.tk_pid, hdr->task.tk_pid,
data->header->inode->i_sb->s_id, hdr->inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(data->header->inode), (unsigned long long)NFS_FILEID(hdr->inode),
data->args.count, hdr->args.count,
(unsigned long long)data->args.offset); (unsigned long long)hdr->args.offset);
task = rpc_run_task(&task_setup_data); task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) { if (IS_ERR(task)) {
@ -657,22 +614,23 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr) struct nfs_pgio_header *hdr)
{ {
set_bit(NFS_IOHDR_REDO, &hdr->flags); set_bit(NFS_IOHDR_REDO, &hdr->flags);
nfs_pgio_data_release(hdr->data); nfs_pgio_data_destroy(hdr);
hdr->data = NULL; hdr->completion_ops->completion(hdr);
desc->pg_completion_ops->error_cleanup(&desc->pg_list); desc->pg_completion_ops->error_cleanup(&desc->pg_list);
return -ENOMEM; return -ENOMEM;
} }
/** /**
* nfs_pgio_release - Release pageio data * nfs_pgio_release - Release pageio data
* @calldata: The pageio data to release * @calldata: The pageio header to release
*/ */
static void nfs_pgio_release(void *calldata) static void nfs_pgio_release(void *calldata)
{ {
struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = calldata;
if (data->header->rw_ops->rw_release) if (hdr->rw_ops->rw_release)
data->header->rw_ops->rw_release(data); hdr->rw_ops->rw_release(hdr);
nfs_pgio_data_release(data); nfs_pgio_data_destroy(hdr);
hdr->completion_ops->completion(hdr);
} }
/** /**
@ -713,22 +671,22 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init);
/** /**
* nfs_pgio_result - Basic pageio error handling * nfs_pgio_result - Basic pageio error handling
* @task: The task that ran * @task: The task that ran
* @calldata: Pageio data to check * @calldata: Pageio header to check
*/ */
static void nfs_pgio_result(struct rpc_task *task, void *calldata) static void nfs_pgio_result(struct rpc_task *task, void *calldata)
{ {
struct nfs_pgio_data *data = calldata; struct nfs_pgio_header *hdr = calldata;
struct inode *inode = data->header->inode; struct inode *inode = hdr->inode;
dprintk("NFS: %s: %5u, (status %d)\n", __func__, dprintk("NFS: %s: %5u, (status %d)\n", __func__,
task->tk_pid, task->tk_status); task->tk_pid, task->tk_status);
if (data->header->rw_ops->rw_done(task, data, inode) != 0) if (hdr->rw_ops->rw_done(task, hdr, inode) != 0)
return; return;
if (task->tk_status < 0) if (task->tk_status < 0)
nfs_set_pgio_error(data->header, task->tk_status, data->args.offset); nfs_set_pgio_error(hdr, task->tk_status, hdr->args.offset);
else else
data->header->rw_ops->rw_result(task, data); hdr->rw_ops->rw_result(task, hdr);
} }
/* /*
@ -744,17 +702,16 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
{ {
struct nfs_page *req; struct nfs_page *req;
struct page **pages; struct page **pages;
struct nfs_pgio_data *data;
struct list_head *head = &desc->pg_list; struct list_head *head = &desc->pg_list;
struct nfs_commit_info cinfo; struct nfs_commit_info cinfo;
unsigned int pagecount;
data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base, pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count);
desc->pg_count)); if (!nfs_pgarray_set(&hdr->page_array, pagecount))
if (!data)
return nfs_pgio_error(desc, hdr); return nfs_pgio_error(desc, hdr);
nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
pages = data->pages.pagevec; pages = hdr->page_array.pagevec;
while (!list_empty(head)) { while (!list_empty(head)) {
req = nfs_list_entry(head->next); req = nfs_list_entry(head->next);
nfs_list_remove_request(req); nfs_list_remove_request(req);
@ -767,8 +724,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
desc->pg_ioflags &= ~FLUSH_COND_STABLE; desc->pg_ioflags &= ~FLUSH_COND_STABLE;
/* Set up the argument struct */ /* Set up the argument struct */
nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
hdr->data = data;
desc->pg_rpc_callops = &nfs_pgio_common_ops; desc->pg_rpc_callops = &nfs_pgio_common_ops;
return 0; return 0;
} }
@ -776,25 +732,20 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio);
static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
{ {
struct nfs_rw_header *rw_hdr;
struct nfs_pgio_header *hdr; struct nfs_pgio_header *hdr;
int ret; int ret;
rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops); hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
if (!rw_hdr) { if (!hdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list); desc->pg_completion_ops->error_cleanup(&desc->pg_list);
return -ENOMEM; return -ENOMEM;
} }
hdr = &rw_hdr->header; nfs_pgheader_init(desc, hdr, nfs_pgio_header_free);
nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
atomic_inc(&hdr->refcnt);
ret = nfs_generic_pgio(desc, hdr); ret = nfs_generic_pgio(desc, hdr);
if (ret == 0) if (ret == 0)
ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
hdr->data, desc->pg_rpc_callops, hdr, desc->pg_rpc_callops,
desc->pg_ioflags, 0); desc->pg_ioflags, 0);
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
return ret; return ret;
} }
@ -907,8 +858,13 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *subreq; struct nfs_page *subreq;
unsigned int bytes_left = 0; unsigned int bytes_left = 0;
unsigned int offset, pgbase; unsigned int offset, pgbase;
int ret;
nfs_page_group_lock(req); ret = nfs_page_group_lock(req, false);
if (ret < 0) {
desc->pg_error = ret;
return 0;
}
subreq = req; subreq = req;
bytes_left = subreq->wb_bytes; bytes_left = subreq->wb_bytes;
@ -930,7 +886,11 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
if (desc->pg_recoalesce) if (desc->pg_recoalesce)
return 0; return 0;
/* retry add_request for this subreq */ /* retry add_request for this subreq */
nfs_page_group_lock(req); ret = nfs_page_group_lock(req, false);
if (ret < 0) {
desc->pg_error = ret;
return 0;
}
continue; continue;
} }
@ -1005,7 +965,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
} while (ret); } while (ret);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(nfs_pageio_add_request);
/*
* nfs_pageio_resend - Transfer requests to new descriptor and resend
* @hdr - the pgio header to move request from
* @desc - the pageio descriptor to add requests to
*
* Try to move each request (nfs_page) from @hdr to @desc then attempt
* to send them.
*
* Returns 0 on success and < 0 on error.
*/
int nfs_pageio_resend(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr)
{
LIST_HEAD(failed);
desc->pg_dreq = hdr->dreq;
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
if (!nfs_pageio_add_request(desc, req))
nfs_list_add_request(req, &failed);
}
nfs_pageio_complete(desc);
if (!list_empty(&failed)) {
list_move(&failed, &hdr->pages);
return -EIO;
}
return 0;
}
EXPORT_SYMBOL_GPL(nfs_pageio_resend);
/** /**
* nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
@ -1021,7 +1012,6 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
break; break;
} }
} }
EXPORT_SYMBOL_GPL(nfs_pageio_complete);
/** /**
* nfs_pageio_cond_complete - Conditional I/O completion * nfs_pageio_cond_complete - Conditional I/O completion

View file

@ -361,6 +361,23 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
} }
EXPORT_SYMBOL_GPL(pnfs_put_lseg); EXPORT_SYMBOL_GPL(pnfs_put_lseg);
static void pnfs_put_lseg_async_work(struct work_struct *work)
{
struct pnfs_layout_segment *lseg;
lseg = container_of(work, struct pnfs_layout_segment, pls_work);
pnfs_put_lseg(lseg);
}
void
pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
{
INIT_WORK(&lseg->pls_work, pnfs_put_lseg_async_work);
schedule_work(&lseg->pls_work);
}
EXPORT_SYMBOL_GPL(pnfs_put_lseg_async);
static u64 static u64
end_offset(u64 start, u64 len) end_offset(u64 start, u64 len)
{ {
@ -1470,41 +1487,19 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
} }
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
int pnfs_write_done_resend_to_mds(struct inode *inode, int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *hdr)
struct list_head *head,
const struct nfs_pgio_completion_ops *compl_ops,
struct nfs_direct_req *dreq)
{ {
struct nfs_pageio_descriptor pgio; struct nfs_pageio_descriptor pgio;
LIST_HEAD(failed);
/* Resend all requests through the MDS */ /* Resend all requests through the MDS */
nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops); nfs_pageio_init_write(&pgio, hdr->inode, FLUSH_STABLE, true,
pgio.pg_dreq = dreq; hdr->completion_ops);
while (!list_empty(head)) { return nfs_pageio_resend(&pgio, hdr);
struct nfs_page *req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
if (!nfs_pageio_add_request(&pgio, req))
nfs_list_add_request(req, &failed);
}
nfs_pageio_complete(&pgio);
if (!list_empty(&failed)) {
/* For some reason our attempt to resend pages. Mark the
* overall send request as having failed, and let
* nfs_writeback_release_full deal with the error.
*/
list_move(&failed, head);
return -EIO;
}
return 0;
} }
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds); EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data) static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header;
dprintk("pnfs write error = %d\n", hdr->pnfs_error); dprintk("pnfs write error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
@ -1512,50 +1507,42 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
pnfs_return_layout(hdr->inode); pnfs_return_layout(hdr->inode);
} }
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
data->task.tk_status = pnfs_write_done_resend_to_mds(hdr->inode, hdr->task.tk_status = pnfs_write_done_resend_to_mds(hdr);
&hdr->pages,
hdr->completion_ops,
hdr->dreq);
} }
/* /*
* Called by non rpc-based layout drivers * Called by non rpc-based layout drivers
*/ */
void pnfs_ld_write_done(struct nfs_pgio_data *data) void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header; trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
trace_nfs4_pnfs_write(data, hdr->pnfs_error);
if (!hdr->pnfs_error) { if (!hdr->pnfs_error) {
pnfs_set_layoutcommit(data); pnfs_set_layoutcommit(hdr);
hdr->mds_ops->rpc_call_done(&data->task, data); hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
} else } else
pnfs_ld_handle_write_error(data); pnfs_ld_handle_write_error(hdr);
hdr->mds_ops->rpc_release(data); hdr->mds_ops->rpc_release(hdr);
} }
EXPORT_SYMBOL_GPL(pnfs_ld_write_done); EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
static void static void
pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_data *data) struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
list_splice_tail_init(&hdr->pages, &desc->pg_list); list_splice_tail_init(&hdr->pages, &desc->pg_list);
nfs_pageio_reset_write_mds(desc); nfs_pageio_reset_write_mds(desc);
desc->pg_recoalesce = 1; desc->pg_recoalesce = 1;
} }
nfs_pgio_data_release(data); nfs_pgio_data_destroy(hdr);
} }
static enum pnfs_try_status static enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_pgio_data *wdata, pnfs_try_to_write_data(struct nfs_pgio_header *hdr,
const struct rpc_call_ops *call_ops, const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg, struct pnfs_layout_segment *lseg,
int how) int how)
{ {
struct nfs_pgio_header *hdr = wdata->header;
struct inode *inode = hdr->inode; struct inode *inode = hdr->inode;
enum pnfs_try_status trypnfs; enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode); struct nfs_server *nfss = NFS_SERVER(inode);
@ -1563,8 +1550,8 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
hdr->mds_ops = call_ops; hdr->mds_ops = call_ops;
dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
inode->i_ino, wdata->args.count, wdata->args.offset, how); inode->i_ino, hdr->args.count, hdr->args.offset, how);
trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); trypnfs = nfss->pnfs_curr_ld->write_pagelist(hdr, how);
if (trypnfs != PNFS_NOT_ATTEMPTED) if (trypnfs != PNFS_NOT_ATTEMPTED)
nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@ -1575,139 +1562,105 @@ static void
pnfs_do_write(struct nfs_pageio_descriptor *desc, pnfs_do_write(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr, int how) struct nfs_pgio_header *hdr, int how)
{ {
struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg; struct pnfs_layout_segment *lseg = desc->pg_lseg;
enum pnfs_try_status trypnfs; enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL; desc->pg_lseg = NULL;
trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
if (trypnfs == PNFS_NOT_ATTEMPTED) if (trypnfs == PNFS_NOT_ATTEMPTED)
pnfs_write_through_mds(desc, data); pnfs_write_through_mds(desc, hdr);
pnfs_put_lseg(lseg); pnfs_put_lseg(lseg);
} }
static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
{ {
pnfs_put_lseg(hdr->lseg); pnfs_put_lseg(hdr->lseg);
nfs_rw_header_free(hdr); nfs_pgio_header_free(hdr);
} }
EXPORT_SYMBOL_GPL(pnfs_writehdr_free); EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
int int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{ {
struct nfs_rw_header *whdr;
struct nfs_pgio_header *hdr; struct nfs_pgio_header *hdr;
int ret; int ret;
whdr = nfs_rw_header_alloc(desc->pg_rw_ops); hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
if (!whdr) { if (!hdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list); desc->pg_completion_ops->error_cleanup(&desc->pg_list);
pnfs_put_lseg(desc->pg_lseg); pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL; desc->pg_lseg = NULL;
return -ENOMEM; return -ENOMEM;
} }
hdr = &whdr->header;
nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg); hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
atomic_inc(&hdr->refcnt);
ret = nfs_generic_pgio(desc, hdr); ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) { if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg); pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL; desc->pg_lseg = NULL;
} else } else
pnfs_do_write(desc, hdr, desc->pg_ioflags); pnfs_do_write(desc, hdr, desc->pg_ioflags);
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
int pnfs_read_done_resend_to_mds(struct inode *inode, int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *hdr)
struct list_head *head,
const struct nfs_pgio_completion_ops *compl_ops,
struct nfs_direct_req *dreq)
{ {
struct nfs_pageio_descriptor pgio; struct nfs_pageio_descriptor pgio;
LIST_HEAD(failed);
/* Resend all requests through the MDS */ /* Resend all requests through the MDS */
nfs_pageio_init_read(&pgio, inode, true, compl_ops); nfs_pageio_init_read(&pgio, hdr->inode, true, hdr->completion_ops);
pgio.pg_dreq = dreq; return nfs_pageio_resend(&pgio, hdr);
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
if (!nfs_pageio_add_request(&pgio, req))
nfs_list_add_request(req, &failed);
}
nfs_pageio_complete(&pgio);
if (!list_empty(&failed)) {
list_move(&failed, head);
return -EIO;
}
return 0;
} }
EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds); EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data) static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header;
dprintk("pnfs read error = %d\n", hdr->pnfs_error); dprintk("pnfs read error = %d\n", hdr->pnfs_error);
if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags & if (NFS_SERVER(hdr->inode)->pnfs_curr_ld->flags &
PNFS_LAYOUTRET_ON_ERROR) { PNFS_LAYOUTRET_ON_ERROR) {
pnfs_return_layout(hdr->inode); pnfs_return_layout(hdr->inode);
} }
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags))
data->task.tk_status = pnfs_read_done_resend_to_mds(hdr->inode, hdr->task.tk_status = pnfs_read_done_resend_to_mds(hdr);
&hdr->pages,
hdr->completion_ops,
hdr->dreq);
} }
/* /*
* Called by non rpc-based layout drivers * Called by non rpc-based layout drivers
*/ */
void pnfs_ld_read_done(struct nfs_pgio_data *data) void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header; trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
trace_nfs4_pnfs_read(data, hdr->pnfs_error);
if (likely(!hdr->pnfs_error)) { if (likely(!hdr->pnfs_error)) {
__nfs4_read_done_cb(data); __nfs4_read_done_cb(hdr);
hdr->mds_ops->rpc_call_done(&data->task, data); hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
} else } else
pnfs_ld_handle_read_error(data); pnfs_ld_handle_read_error(hdr);
hdr->mds_ops->rpc_release(data); hdr->mds_ops->rpc_release(hdr);
} }
EXPORT_SYMBOL_GPL(pnfs_ld_read_done); EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
static void static void
pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_data *data) struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header;
if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
list_splice_tail_init(&hdr->pages, &desc->pg_list); list_splice_tail_init(&hdr->pages, &desc->pg_list);
nfs_pageio_reset_read_mds(desc); nfs_pageio_reset_read_mds(desc);
desc->pg_recoalesce = 1; desc->pg_recoalesce = 1;
} }
nfs_pgio_data_release(data); nfs_pgio_data_destroy(hdr);
} }
/* /*
* Call the appropriate parallel I/O subsystem read function. * Call the appropriate parallel I/O subsystem read function.
*/ */
static enum pnfs_try_status static enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_pgio_data *rdata, pnfs_try_to_read_data(struct nfs_pgio_header *hdr,
const struct rpc_call_ops *call_ops, const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg) struct pnfs_layout_segment *lseg)
{ {
struct nfs_pgio_header *hdr = rdata->header;
struct inode *inode = hdr->inode; struct inode *inode = hdr->inode;
struct nfs_server *nfss = NFS_SERVER(inode); struct nfs_server *nfss = NFS_SERVER(inode);
enum pnfs_try_status trypnfs; enum pnfs_try_status trypnfs;
@ -1715,9 +1668,9 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
hdr->mds_ops = call_ops; hdr->mds_ops = call_ops;
dprintk("%s: Reading ino:%lu %u@%llu\n", dprintk("%s: Reading ino:%lu %u@%llu\n",
__func__, inode->i_ino, rdata->args.count, rdata->args.offset); __func__, inode->i_ino, hdr->args.count, hdr->args.offset);
trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); trypnfs = nfss->pnfs_curr_ld->read_pagelist(hdr);
if (trypnfs != PNFS_NOT_ATTEMPTED) if (trypnfs != PNFS_NOT_ATTEMPTED)
nfs_inc_stats(inode, NFSIOS_PNFS_READ); nfs_inc_stats(inode, NFSIOS_PNFS_READ);
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
@ -1727,52 +1680,46 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
static void static void
pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg; struct pnfs_layout_segment *lseg = desc->pg_lseg;
enum pnfs_try_status trypnfs; enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL; desc->pg_lseg = NULL;
trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
if (trypnfs == PNFS_NOT_ATTEMPTED) if (trypnfs == PNFS_NOT_ATTEMPTED)
pnfs_read_through_mds(desc, data); pnfs_read_through_mds(desc, hdr);
pnfs_put_lseg(lseg); pnfs_put_lseg(lseg);
} }
static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
{ {
pnfs_put_lseg(hdr->lseg); pnfs_put_lseg(hdr->lseg);
nfs_rw_header_free(hdr); nfs_pgio_header_free(hdr);
} }
EXPORT_SYMBOL_GPL(pnfs_readhdr_free); EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
int int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{ {
struct nfs_rw_header *rhdr;
struct nfs_pgio_header *hdr; struct nfs_pgio_header *hdr;
int ret; int ret;
rhdr = nfs_rw_header_alloc(desc->pg_rw_ops); hdr = nfs_pgio_header_alloc(desc->pg_rw_ops);
if (!rhdr) { if (!hdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list); desc->pg_completion_ops->error_cleanup(&desc->pg_list);
ret = -ENOMEM; ret = -ENOMEM;
pnfs_put_lseg(desc->pg_lseg); pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL; desc->pg_lseg = NULL;
return ret; return ret;
} }
hdr = &rhdr->header;
nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg); hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
atomic_inc(&hdr->refcnt);
ret = nfs_generic_pgio(desc, hdr); ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) { if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg); pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL; desc->pg_lseg = NULL;
} else } else
pnfs_do_read(desc, hdr); pnfs_do_read(desc, hdr);
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
@ -1820,12 +1767,11 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
void void
pnfs_set_layoutcommit(struct nfs_pgio_data *wdata) pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = wdata->header;
struct inode *inode = hdr->inode; struct inode *inode = hdr->inode;
struct nfs_inode *nfsi = NFS_I(inode); struct nfs_inode *nfsi = NFS_I(inode);
loff_t end_pos = wdata->mds_offset + wdata->res.count; loff_t end_pos = hdr->mds_offset + hdr->res.count;
bool mark_as_dirty = false; bool mark_as_dirty = false;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);

View file

@ -32,6 +32,7 @@
#include <linux/nfs_fs.h> #include <linux/nfs_fs.h>
#include <linux/nfs_page.h> #include <linux/nfs_page.h>
#include <linux/workqueue.h>
enum { enum {
NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
@ -46,6 +47,7 @@ struct pnfs_layout_segment {
atomic_t pls_refcount; atomic_t pls_refcount;
unsigned long pls_flags; unsigned long pls_flags;
struct pnfs_layout_hdr *pls_layout; struct pnfs_layout_hdr *pls_layout;
struct work_struct pls_work;
}; };
enum pnfs_try_status { enum pnfs_try_status {
@ -104,6 +106,8 @@ struct pnfs_layoutdriver_type {
int max); int max);
void (*recover_commit_reqs) (struct list_head *list, void (*recover_commit_reqs) (struct list_head *list,
struct nfs_commit_info *cinfo); struct nfs_commit_info *cinfo);
struct nfs_page * (*search_commit_reqs)(struct nfs_commit_info *cinfo,
struct page *page);
int (*commit_pagelist)(struct inode *inode, int (*commit_pagelist)(struct inode *inode,
struct list_head *mds_pages, struct list_head *mds_pages,
int how, int how,
@ -113,8 +117,8 @@ struct pnfs_layoutdriver_type {
* Return PNFS_ATTEMPTED to indicate the layout code has attempted * Return PNFS_ATTEMPTED to indicate the layout code has attempted
* I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
*/ */
enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data); enum pnfs_try_status (*read_pagelist)(struct nfs_pgio_header *);
enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how); enum pnfs_try_status (*write_pagelist)(struct nfs_pgio_header *, int);
void (*free_deviceid_node) (struct nfs4_deviceid_node *); void (*free_deviceid_node) (struct nfs4_deviceid_node *);
@ -179,6 +183,7 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
/* pnfs.c */ /* pnfs.c */
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_put_lseg(struct pnfs_layout_segment *lseg); void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg);
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32); void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
void unset_pnfs_layoutdriver(struct nfs_server *); void unset_pnfs_layoutdriver(struct nfs_server *);
@ -213,13 +218,13 @@ bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino); void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task); bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata); void pnfs_set_layoutcommit(struct nfs_pgio_header *);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *); int _pnfs_return_layout(struct inode *);
int pnfs_commit_and_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *);
void pnfs_ld_write_done(struct nfs_pgio_data *); void pnfs_ld_write_done(struct nfs_pgio_header *);
void pnfs_ld_read_done(struct nfs_pgio_data *); void pnfs_ld_read_done(struct nfs_pgio_header *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx, struct nfs_open_context *ctx,
loff_t pos, loff_t pos,
@ -228,12 +233,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
gfp_t gfp_flags); gfp_t gfp_flags);
void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
int pnfs_read_done_resend_to_mds(struct inode *inode, struct list_head *head, int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *);
const struct nfs_pgio_completion_ops *compl_ops, int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *);
struct nfs_direct_req *dreq);
int pnfs_write_done_resend_to_mds(struct inode *inode, struct list_head *head,
const struct nfs_pgio_completion_ops *compl_ops,
struct nfs_direct_req *dreq);
struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
/* nfs4_deviceid_flags */ /* nfs4_deviceid_flags */
@ -345,6 +346,17 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo);
} }
static inline struct nfs_page *
pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
struct page *page)
{
struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
if (ld == NULL || ld->search_commit_reqs == NULL)
return NULL;
return ld->search_commit_reqs(cinfo, page);
}
/* Should the pNFS client commit and return the layout upon a setattr */ /* Should the pNFS client commit and return the layout upon a setattr */
static inline bool static inline bool
pnfs_ld_layoutret_on_setattr(struct inode *inode) pnfs_ld_layoutret_on_setattr(struct inode *inode)
@ -410,6 +422,10 @@ static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg)
{ {
} }
static inline void pnfs_put_lseg_async(struct pnfs_layout_segment *lseg)
{
}
static inline int pnfs_return_layout(struct inode *ino) static inline int pnfs_return_layout(struct inode *ino)
{ {
return 0; return 0;
@ -496,6 +512,13 @@ pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list,
{ {
} }
static inline struct nfs_page *
pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo,
struct page *page)
{
return NULL;
}
static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
{ {
return 0; return 0;

View file

@ -578,46 +578,49 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return 0; return 0;
} }
static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data) static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{ {
struct inode *inode = data->header->inode; struct inode *inode = hdr->inode;
nfs_invalidate_atime(inode); nfs_invalidate_atime(inode);
if (task->tk_status >= 0) { if (task->tk_status >= 0) {
nfs_refresh_inode(inode, data->res.fattr); nfs_refresh_inode(inode, hdr->res.fattr);
/* Emulate the eof flag, which isn't normally needed in NFSv2 /* Emulate the eof flag, which isn't normally needed in NFSv2
* as it is guaranteed to always return the file attributes * as it is guaranteed to always return the file attributes
*/ */
if (data->args.offset + data->res.count >= data->res.fattr->size) if (hdr->args.offset + hdr->res.count >= hdr->res.fattr->size)
data->res.eof = 1; hdr->res.eof = 1;
} }
return 0; return 0;
} }
static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg) static void nfs_proc_read_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{ {
msg->rpc_proc = &nfs_procedures[NFSPROC_READ]; msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
} }
static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data) static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{ {
rpc_call_start(task); rpc_call_start(task);
return 0; return 0;
} }
static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data) static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{ {
struct inode *inode = data->header->inode; struct inode *inode = hdr->inode;
if (task->tk_status >= 0) if (task->tk_status >= 0)
nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); nfs_post_op_update_inode_force_wcc(inode, hdr->res.fattr);
return 0; return 0;
} }
static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg) static void nfs_proc_write_setup(struct nfs_pgio_header *hdr,
struct rpc_message *msg)
{ {
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
data->args.stable = NFS_FILE_SYNC; hdr->args.stable = NFS_FILE_SYNC;
msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE]; msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
} }

View file

@ -33,12 +33,12 @@ static const struct nfs_rw_ops nfs_rw_read_ops;
static struct kmem_cache *nfs_rdata_cachep; static struct kmem_cache *nfs_rdata_cachep;
static struct nfs_rw_header *nfs_readhdr_alloc(void) static struct nfs_pgio_header *nfs_readhdr_alloc(void)
{ {
return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
} }
static void nfs_readhdr_free(struct nfs_rw_header *rhdr) static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
{ {
kmem_cache_free(nfs_rdata_cachep, rhdr); kmem_cache_free(nfs_rdata_cachep, rhdr);
} }
@ -115,12 +115,6 @@ static void nfs_readpage_release(struct nfs_page *req)
unlock_page(req->wb_page); unlock_page(req->wb_page);
} }
dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
req->wb_context->dentry->d_inode->i_sb->s_id,
(unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
nfs_release_request(req); nfs_release_request(req);
} }
@ -172,14 +166,15 @@ out:
hdr->release(hdr); hdr->release(hdr);
} }
static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg, static void nfs_initiate_read(struct nfs_pgio_header *hdr,
struct rpc_message *msg,
struct rpc_task_setup *task_setup_data, int how) struct rpc_task_setup *task_setup_data, int how)
{ {
struct inode *inode = data->header->inode; struct inode *inode = hdr->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
task_setup_data->flags |= swap_flags; task_setup_data->flags |= swap_flags;
NFS_PROTO(inode)->read_setup(data, msg); NFS_PROTO(inode)->read_setup(hdr, msg);
} }
static void static void
@ -203,14 +198,15 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
* This is the callback from RPC telling us whether a reply was * This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown). * received or some error occurred (timeout or socket shutdown).
*/ */
static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data, static int nfs_readpage_done(struct rpc_task *task,
struct nfs_pgio_header *hdr,
struct inode *inode) struct inode *inode)
{ {
int status = NFS_PROTO(inode)->read_done(task, data); int status = NFS_PROTO(inode)->read_done(task, hdr);
if (status != 0) if (status != 0)
return status; return status;
nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
if (task->tk_status == -ESTALE) { if (task->tk_status == -ESTALE) {
set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
@ -219,34 +215,34 @@ static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
return 0; return 0;
} }
static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data) static void nfs_readpage_retry(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_args *argp = &data->args; struct nfs_pgio_args *argp = &hdr->args;
struct nfs_pgio_res *resp = &data->res; struct nfs_pgio_res *resp = &hdr->res;
/* This is a short read! */ /* This is a short read! */
nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */ /* Has the server at least made some progress? */
if (resp->count == 0) { if (resp->count == 0) {
nfs_set_pgio_error(data->header, -EIO, argp->offset); nfs_set_pgio_error(hdr, -EIO, argp->offset);
return; return;
} }
/* Yes, so retry the read at the end of the data */ /* Yes, so retry the read at the end of the hdr */
data->mds_offset += resp->count; hdr->mds_offset += resp->count;
argp->offset += resp->count; argp->offset += resp->count;
argp->pgbase += resp->count; argp->pgbase += resp->count;
argp->count -= resp->count; argp->count -= resp->count;
rpc_restart_call_prepare(task); rpc_restart_call_prepare(task);
} }
static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data) static void nfs_readpage_result(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header; if (hdr->res.eof) {
if (data->res.eof) {
loff_t bound; loff_t bound;
bound = data->args.offset + data->res.count; bound = hdr->args.offset + hdr->res.count;
spin_lock(&hdr->lock); spin_lock(&hdr->lock);
if (bound < hdr->io_start + hdr->good_bytes) { if (bound < hdr->io_start + hdr->good_bytes) {
set_bit(NFS_IOHDR_EOF, &hdr->flags); set_bit(NFS_IOHDR_EOF, &hdr->flags);
@ -254,8 +250,8 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *dat
hdr->good_bytes = bound - hdr->io_start; hdr->good_bytes = bound - hdr->io_start;
} }
spin_unlock(&hdr->lock); spin_unlock(&hdr->lock);
} else if (data->res.count != data->args.count) } else if (hdr->res.count != hdr->args.count)
nfs_readpage_retry(task, data); nfs_readpage_retry(task, hdr);
} }
/* /*
@ -404,7 +400,7 @@ out:
int __init nfs_init_readpagecache(void) int __init nfs_init_readpagecache(void)
{ {
nfs_rdata_cachep = kmem_cache_create("nfs_read_data", nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
sizeof(struct nfs_rw_header), sizeof(struct nfs_pgio_header),
0, SLAB_HWCACHE_ALIGN, 0, SLAB_HWCACHE_ALIGN,
NULL); NULL);
if (nfs_rdata_cachep == NULL) if (nfs_rdata_cachep == NULL)

View file

@ -1027,8 +1027,7 @@ static bool nfs_auth_info_add(struct nfs_auth_info *auth_info,
rpc_authflavor_t flavor) rpc_authflavor_t flavor)
{ {
unsigned int i; unsigned int i;
unsigned int max_flavor_len = (sizeof(auth_info->flavors) / unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors);
sizeof(auth_info->flavors[0]));
/* make sure this flavor isn't already in the list */ /* make sure this flavor isn't already in the list */
for (i = 0; i < auth_info->flavor_len; i++) { for (i = 0; i < auth_info->flavor_len; i++) {
@ -2180,7 +2179,7 @@ out_no_address:
return -EINVAL; return -EINVAL;
} }
#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ #define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
| NFS_MOUNT_SECURE \ | NFS_MOUNT_SECURE \
| NFS_MOUNT_TCP \ | NFS_MOUNT_TCP \
| NFS_MOUNT_VER3 \ | NFS_MOUNT_VER3 \
@ -2188,15 +2187,16 @@ out_no_address:
| NFS_MOUNT_NONLM \ | NFS_MOUNT_NONLM \
| NFS_MOUNT_BROKEN_SUID \ | NFS_MOUNT_BROKEN_SUID \
| NFS_MOUNT_STRICTLOCK \ | NFS_MOUNT_STRICTLOCK \
| NFS_MOUNT_UNSHARED \
| NFS_MOUNT_NORESVPORT \
| NFS_MOUNT_LEGACY_INTERFACE) | NFS_MOUNT_LEGACY_INTERFACE)
#define NFS_MOUNT_CMP_FLAGMASK (NFS_REMOUNT_CMP_FLAGMASK & \
~(NFS_MOUNT_UNSHARED | NFS_MOUNT_NORESVPORT))
static int static int
nfs_compare_remount_data(struct nfs_server *nfss, nfs_compare_remount_data(struct nfs_server *nfss,
struct nfs_parsed_mount_data *data) struct nfs_parsed_mount_data *data)
{ {
if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK || if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK ||
data->rsize != nfss->rsize || data->rsize != nfss->rsize ||
data->wsize != nfss->wsize || data->wsize != nfss->wsize ||
data->version != nfss->nfs_client->rpc_ops->version || data->version != nfss->nfs_client->rpc_ops->version ||

View file

@ -47,6 +47,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops; static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static const struct nfs_rw_ops nfs_rw_write_ops; static const struct nfs_rw_ops nfs_rw_write_ops;
static void nfs_clear_request_commit(struct nfs_page *req); static void nfs_clear_request_commit(struct nfs_page *req);
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
struct inode *inode);
static struct kmem_cache *nfs_wdata_cachep; static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_wdata_mempool;
@ -71,18 +73,18 @@ void nfs_commit_free(struct nfs_commit_data *p)
} }
EXPORT_SYMBOL_GPL(nfs_commit_free); EXPORT_SYMBOL_GPL(nfs_commit_free);
static struct nfs_rw_header *nfs_writehdr_alloc(void) static struct nfs_pgio_header *nfs_writehdr_alloc(void)
{ {
struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
if (p) if (p)
memset(p, 0, sizeof(*p)); memset(p, 0, sizeof(*p));
return p; return p;
} }
static void nfs_writehdr_free(struct nfs_rw_header *whdr) static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
{ {
mempool_free(whdr, nfs_wdata_mempool); mempool_free(hdr, nfs_wdata_mempool);
} }
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
@ -92,6 +94,38 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
} }
/*
* nfs_page_search_commits_for_head_request_locked
*
* Search through commit lists on @inode for the head request for @page.
* Must be called while holding the inode (which is cinfo) lock.
*
* Returns the head request if found, or NULL if not found.
*/
static struct nfs_page *
nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
struct page *page)
{
struct nfs_page *freq, *t;
struct nfs_commit_info cinfo;
struct inode *inode = &nfsi->vfs_inode;
nfs_init_cinfo_from_inode(&cinfo, inode);
/* search through pnfs commit lists */
freq = pnfs_search_commit_reqs(inode, &cinfo, page);
if (freq)
return freq->wb_head;
/* Linearly search the commit list for the correct request */
list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
if (freq->wb_page == page)
return freq->wb_head;
}
return NULL;
}
/* /*
* nfs_page_find_head_request_locked - find head request associated with @page * nfs_page_find_head_request_locked - find head request associated with @page
* *
@ -106,21 +140,12 @@ nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
if (PagePrivate(page)) if (PagePrivate(page))
req = (struct nfs_page *)page_private(page); req = (struct nfs_page *)page_private(page);
else if (unlikely(PageSwapCache(page))) { else if (unlikely(PageSwapCache(page)))
struct nfs_page *freq, *t; req = nfs_page_search_commits_for_head_request_locked(nfsi,
page);
/* Linearly search the commit list for the correct req */
list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
if (freq->wb_page == page) {
req = freq->wb_head;
break;
}
}
}
if (req) { if (req) {
WARN_ON_ONCE(req->wb_head != req); WARN_ON_ONCE(req->wb_head != req);
kref_get(&req->wb_kref); kref_get(&req->wb_kref);
} }
@ -216,7 +241,7 @@ static bool nfs_page_group_covers_page(struct nfs_page *req)
unsigned int pos = 0; unsigned int pos = 0;
unsigned int len = nfs_page_length(req->wb_page); unsigned int len = nfs_page_length(req->wb_page);
nfs_page_group_lock(req); nfs_page_group_lock(req, true);
do { do {
tmp = nfs_page_group_search_locked(req->wb_head, pos); tmp = nfs_page_group_search_locked(req->wb_head, pos);
@ -379,8 +404,6 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
subreq->wb_head = subreq; subreq->wb_head = subreq;
subreq->wb_this_page = subreq; subreq->wb_this_page = subreq;
nfs_clear_request_commit(subreq);
/* subreq is now totally disconnected from page group or any /* subreq is now totally disconnected from page group or any
* write / commit lists. last chance to wake any waiters */ * write / commit lists. last chance to wake any waiters */
nfs_unlock_request(subreq); nfs_unlock_request(subreq);
@ -456,7 +479,9 @@ try_again:
} }
/* lock each request in the page group */ /* lock each request in the page group */
nfs_page_group_lock(head); ret = nfs_page_group_lock(head, false);
if (ret < 0)
return ERR_PTR(ret);
subreq = head; subreq = head;
do { do {
/* /*
@ -488,7 +513,7 @@ try_again:
* Commit list removal accounting is done after locks are dropped */ * Commit list removal accounting is done after locks are dropped */
subreq = head; subreq = head;
do { do {
nfs_list_remove_request(subreq); nfs_clear_request_commit(subreq);
subreq = subreq->wb_this_page; subreq = subreq->wb_this_page;
} while (subreq != head); } while (subreq != head);
@ -518,15 +543,11 @@ try_again:
nfs_page_group_unlock(head); nfs_page_group_unlock(head);
/* drop lock to clear_request_commit the head req and clean up /* drop lock to clean uprequests on destroy list */
* requests on destroy list */
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
nfs_destroy_unlinked_subrequests(destroy_list, head); nfs_destroy_unlinked_subrequests(destroy_list, head);
/* clean up commit list state */
nfs_clear_request_commit(head);
/* still holds ref on head from nfs_page_find_head_request_locked /* still holds ref on head from nfs_page_find_head_request_locked
* and still has lock on head from lock loop */ * and still has lock on head from lock loop */
return head; return head;
@ -705,6 +726,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
nfs_release_request(req); nfs_release_request(req);
else
WARN_ON_ONCE(1);
} }
static void static void
@ -808,6 +831,7 @@ nfs_clear_page_commit(struct page *page)
dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE); dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE);
} }
/* Called holding inode (/cinfo) lock */
static void static void
nfs_clear_request_commit(struct nfs_page *req) nfs_clear_request_commit(struct nfs_page *req)
{ {
@ -817,20 +841,17 @@ nfs_clear_request_commit(struct nfs_page *req)
nfs_init_cinfo_from_inode(&cinfo, inode); nfs_init_cinfo_from_inode(&cinfo, inode);
if (!pnfs_clear_request_commit(req, &cinfo)) { if (!pnfs_clear_request_commit(req, &cinfo)) {
spin_lock(cinfo.lock);
nfs_request_remove_commit_list(req, &cinfo); nfs_request_remove_commit_list(req, &cinfo);
spin_unlock(cinfo.lock);
} }
nfs_clear_page_commit(req->wb_page); nfs_clear_page_commit(req->wb_page);
} }
} }
static inline int nfs_write_need_commit(struct nfs_pgio_header *hdr)
int nfs_write_need_commit(struct nfs_pgio_data *data)
{ {
if (data->verf.committed == NFS_DATA_SYNC) if (hdr->verf.committed == NFS_DATA_SYNC)
return data->header->lseg == NULL; return hdr->lseg == NULL;
return data->verf.committed != NFS_FILE_SYNC; return hdr->verf.committed != NFS_FILE_SYNC;
} }
#else #else
@ -856,8 +877,7 @@ nfs_clear_request_commit(struct nfs_page *req)
{ {
} }
static inline int nfs_write_need_commit(struct nfs_pgio_header *hdr)
int nfs_write_need_commit(struct nfs_pgio_data *data)
{ {
return 0; return 0;
} }
@ -883,11 +903,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
nfs_context_set_write_error(req->wb_context, hdr->error); nfs_context_set_write_error(req->wb_context, hdr->error);
goto remove_req; goto remove_req;
} }
if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { if (nfs_write_need_commit(hdr)) {
nfs_mark_request_dirty(req);
goto next;
}
if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo); nfs_mark_request_commit(req, hdr->lseg, &cinfo);
goto next; goto next;
@ -1038,9 +1054,9 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
else else
req->wb_bytes = rqend - req->wb_offset; req->wb_bytes = rqend - req->wb_offset;
out_unlock: out_unlock:
spin_unlock(&inode->i_lock);
if (req) if (req)
nfs_clear_request_commit(req); nfs_clear_request_commit(req);
spin_unlock(&inode->i_lock);
return req; return req;
out_flushme: out_flushme:
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
@ -1241,17 +1257,18 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL; return RPC_PRIORITY_NORMAL;
} }
static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg, static void nfs_initiate_write(struct nfs_pgio_header *hdr,
struct rpc_message *msg,
struct rpc_task_setup *task_setup_data, int how) struct rpc_task_setup *task_setup_data, int how)
{ {
struct inode *inode = data->header->inode; struct inode *inode = hdr->inode;
int priority = flush_task_priority(how); int priority = flush_task_priority(how);
task_setup_data->priority = priority; task_setup_data->priority = priority;
NFS_PROTO(inode)->write_setup(data, msg); NFS_PROTO(inode)->write_setup(hdr, msg);
nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
&task_setup_data->rpc_client, msg, data); &task_setup_data->rpc_client, msg, hdr);
} }
/* If a nfs_flush_* function fails, it should remove reqs from @head and /* If a nfs_flush_* function fails, it should remove reqs from @head and
@ -1313,21 +1330,9 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
} }
static void nfs_writeback_release_common(struct nfs_pgio_data *data) static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_header *hdr = data->header; /* do nothing! */
int status = data->task.tk_status;
if ((status >= 0) && nfs_write_need_commit(data)) {
spin_lock(&hdr->lock);
if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
; /* Do nothing */
else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
spin_unlock(&hdr->lock);
}
} }
/* /*
@ -1358,7 +1363,8 @@ static int nfs_should_remove_suid(const struct inode *inode)
/* /*
* This function is called when the WRITE call is complete. * This function is called when the WRITE call is complete.
*/ */
static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data, static int nfs_writeback_done(struct rpc_task *task,
struct nfs_pgio_header *hdr,
struct inode *inode) struct inode *inode)
{ {
int status; int status;
@ -1370,13 +1376,14 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
* another writer had changed the file, but some applications * another writer had changed the file, but some applications
* depend on tighter cache coherency when writing. * depend on tighter cache coherency when writing.
*/ */
status = NFS_PROTO(inode)->write_done(task, data); status = NFS_PROTO(inode)->write_done(task, hdr);
if (status != 0) if (status != 0)
return status; return status;
nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count); nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) { if (hdr->res.verf->committed < hdr->args.stable &&
task->tk_status >= 0) {
/* We tried a write call, but the server did not /* We tried a write call, but the server did not
* commit data to stable storage even though we * commit data to stable storage even though we
* requested it. * requested it.
@ -1392,7 +1399,7 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
dprintk("NFS: faulty NFS server %s:" dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n", " (committed = %d) != (stable = %d)\n",
NFS_SERVER(inode)->nfs_client->cl_hostname, NFS_SERVER(inode)->nfs_client->cl_hostname,
data->res.verf->committed, data->args.stable); hdr->res.verf->committed, hdr->args.stable);
complain = jiffies + 300 * HZ; complain = jiffies + 300 * HZ;
} }
} }
@ -1407,16 +1414,17 @@ static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
/* /*
* This function is called when the WRITE call is complete. * This function is called when the WRITE call is complete.
*/ */
static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data) static void nfs_writeback_result(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{ {
struct nfs_pgio_args *argp = &data->args; struct nfs_pgio_args *argp = &hdr->args;
struct nfs_pgio_res *resp = &data->res; struct nfs_pgio_res *resp = &hdr->res;
if (resp->count < argp->count) { if (resp->count < argp->count) {
static unsigned long complain; static unsigned long complain;
/* This a short write! */ /* This a short write! */
nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE); nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
/* Has the server at least made some progress? */ /* Has the server at least made some progress? */
if (resp->count == 0) { if (resp->count == 0) {
@ -1426,14 +1434,14 @@ static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *da
argp->count); argp->count);
complain = jiffies + 300 * HZ; complain = jiffies + 300 * HZ;
} }
nfs_set_pgio_error(data->header, -EIO, argp->offset); nfs_set_pgio_error(hdr, -EIO, argp->offset);
task->tk_status = -EIO; task->tk_status = -EIO;
return; return;
} }
/* Was this an NFSv2 write or an NFSv3 stable write? */ /* Was this an NFSv2 write or an NFSv3 stable write? */
if (resp->verf->committed != NFS_UNSTABLE) { if (resp->verf->committed != NFS_UNSTABLE) {
/* Resend from where the server left off */ /* Resend from where the server left off */
data->mds_offset += resp->count; hdr->mds_offset += resp->count;
argp->offset += resp->count; argp->offset += resp->count;
argp->pgbase += resp->count; argp->pgbase += resp->count;
argp->count -= resp->count; argp->count -= resp->count;
@ -1884,7 +1892,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
int __init nfs_init_writepagecache(void) int __init nfs_init_writepagecache(void)
{ {
nfs_wdata_cachep = kmem_cache_create("nfs_write_data", nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
sizeof(struct nfs_rw_header), sizeof(struct nfs_pgio_header),
0, SLAB_HWCACHE_ALIGN, 0, SLAB_HWCACHE_ALIGN,
NULL); NULL);
if (nfs_wdata_cachep == NULL) if (nfs_wdata_cachep == NULL)

View file

@ -30,9 +30,6 @@
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
EXPORT_SYMBOL_GPL(nfsacl_encode);
EXPORT_SYMBOL_GPL(nfsacl_decode);
struct nfsacl_encode_desc { struct nfsacl_encode_desc {
struct xdr_array2_desc desc; struct xdr_array2_desc desc;
unsigned int count; unsigned int count;
@ -136,6 +133,7 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode,
nfsacl_desc.desc.array_len; nfsacl_desc.desc.array_len;
return err; return err;
} }
EXPORT_SYMBOL_GPL(nfsacl_encode);
struct nfsacl_decode_desc { struct nfsacl_decode_desc {
struct xdr_array2_desc desc; struct xdr_array2_desc desc;
@ -295,3 +293,4 @@ int nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt,
return 8 + nfsacl_desc.desc.elem_size * return 8 + nfsacl_desc.desc.elem_size *
nfsacl_desc.desc.array_len; nfsacl_desc.desc.array_len;
} }
EXPORT_SYMBOL_GPL(nfsacl_decode);

View file

@ -52,6 +52,7 @@ struct nfs_access_entry {
unsigned long jiffies; unsigned long jiffies;
struct rpc_cred * cred; struct rpc_cred * cred;
int mask; int mask;
struct rcu_head rcu_head;
}; };
struct nfs_lockowner { struct nfs_lockowner {
@ -352,6 +353,7 @@ extern int nfs_release(struct inode *, struct file *);
extern int nfs_attribute_timeout(struct inode *inode); extern int nfs_attribute_timeout(struct inode *inode);
extern int nfs_attribute_cache_expired(struct inode *inode); extern int nfs_attribute_cache_expired(struct inode *inode);
extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
extern int nfs_revalidate_inode_rcu(struct nfs_server *server, struct inode *inode);
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
extern int nfs_setattr(struct dentry *, struct iattr *); extern int nfs_setattr(struct dentry *, struct iattr *);

View file

@ -45,6 +45,7 @@ struct nfs_client {
struct sockaddr_storage cl_addr; /* server identifier */ struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen; size_t cl_addrlen;
char * cl_hostname; /* hostname of server */ char * cl_hostname; /* hostname of server */
char * cl_acceptor; /* GSSAPI acceptor name */
struct list_head cl_share_link; /* link in global client list */ struct list_head cl_share_link; /* link in global client list */
struct list_head cl_superblocks; /* List of nfs_server structs */ struct list_head cl_superblocks; /* List of nfs_server structs */

View file

@ -26,7 +26,7 @@ enum {
PG_MAPPED, /* page private set for buffered io */ PG_MAPPED, /* page private set for buffered io */
PG_CLEAN, /* write succeeded */ PG_CLEAN, /* write succeeded */
PG_COMMIT_TO_DS, /* used by pnfs layouts */ PG_COMMIT_TO_DS, /* used by pnfs layouts */
PG_INODE_REF, /* extra ref held by inode (head req only) */ PG_INODE_REF, /* extra ref held by inode when in writeback */
PG_HEADLOCK, /* page group lock of wb_head */ PG_HEADLOCK, /* page group lock of wb_head */
PG_TEARDOWN, /* page group sync for destroy */ PG_TEARDOWN, /* page group sync for destroy */
PG_UNLOCKPAGE, /* page group sync bit in read path */ PG_UNLOCKPAGE, /* page group sync bit in read path */
@ -62,12 +62,13 @@ struct nfs_pageio_ops {
struct nfs_rw_ops { struct nfs_rw_ops {
const fmode_t rw_mode; const fmode_t rw_mode;
struct nfs_rw_header *(*rw_alloc_header)(void); struct nfs_pgio_header *(*rw_alloc_header)(void);
void (*rw_free_header)(struct nfs_rw_header *); void (*rw_free_header)(struct nfs_pgio_header *);
void (*rw_release)(struct nfs_pgio_data *); void (*rw_release)(struct nfs_pgio_header *);
int (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *,
void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *); struct inode *);
void (*rw_initiate)(struct nfs_pgio_data *, struct rpc_message *, void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *);
void (*rw_initiate)(struct nfs_pgio_header *, struct rpc_message *,
struct rpc_task_setup *, int); struct rpc_task_setup *, int);
}; };
@ -111,6 +112,8 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
int how); int how);
extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
struct nfs_page *); struct nfs_page *);
extern int nfs_pageio_resend(struct nfs_pageio_descriptor *,
struct nfs_pgio_header *);
extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc); extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t); extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
@ -119,7 +122,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
extern int nfs_wait_on_request(struct nfs_page *); extern int nfs_wait_on_request(struct nfs_page *);
extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_request(struct nfs_page *req);
extern void nfs_unlock_and_release_request(struct nfs_page *); extern void nfs_unlock_and_release_request(struct nfs_page *);
extern void nfs_page_group_lock(struct nfs_page *); extern int nfs_page_group_lock(struct nfs_page *, bool);
extern void nfs_page_group_unlock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *);
extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);

View file

@ -993,6 +993,7 @@ struct nfs4_setclientid {
unsigned int sc_uaddr_len; unsigned int sc_uaddr_len;
char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; char sc_uaddr[RPCBIND_MAXUADDRLEN + 1];
u32 sc_cb_ident; u32 sc_cb_ident;
struct rpc_cred *sc_cred;
}; };
struct nfs4_setclientid_res { struct nfs4_setclientid_res {
@ -1253,18 +1254,12 @@ enum {
NFS_IOHDR_ERROR = 0, NFS_IOHDR_ERROR = 0,
NFS_IOHDR_EOF, NFS_IOHDR_EOF,
NFS_IOHDR_REDO, NFS_IOHDR_REDO,
NFS_IOHDR_NEED_COMMIT,
NFS_IOHDR_NEED_RESCHED,
}; };
struct nfs_pgio_data;
struct nfs_pgio_header { struct nfs_pgio_header {
struct inode *inode; struct inode *inode;
struct rpc_cred *cred; struct rpc_cred *cred;
struct list_head pages; struct list_head pages;
struct nfs_pgio_data *data;
atomic_t refcnt;
struct nfs_page *req; struct nfs_page *req;
struct nfs_writeverf verf; /* Used for writes */ struct nfs_writeverf verf; /* Used for writes */
struct pnfs_layout_segment *lseg; struct pnfs_layout_segment *lseg;
@ -1281,28 +1276,22 @@ struct nfs_pgio_header {
int error; /* merge with pnfs_error */ int error; /* merge with pnfs_error */
unsigned long good_bytes; /* boundary of good data */ unsigned long good_bytes; /* boundary of good data */
unsigned long flags; unsigned long flags;
};
struct nfs_pgio_data { /*
struct nfs_pgio_header *header; * rpc data
*/
struct rpc_task task; struct rpc_task task;
struct nfs_fattr fattr; struct nfs_fattr fattr;
struct nfs_writeverf verf; /* Used for writes */
struct nfs_pgio_args args; /* argument struct */ struct nfs_pgio_args args; /* argument struct */
struct nfs_pgio_res res; /* result struct */ struct nfs_pgio_res res; /* result struct */
unsigned long timestamp; /* For lease renewal */ unsigned long timestamp; /* For lease renewal */
int (*pgio_done_cb) (struct rpc_task *task, struct nfs_pgio_data *data); int (*pgio_done_cb)(struct rpc_task *, struct nfs_pgio_header *);
__u64 mds_offset; /* Filelayout dense stripe */ __u64 mds_offset; /* Filelayout dense stripe */
struct nfs_page_array pages; struct nfs_page_array page_array;
struct nfs_client *ds_clp; /* pNFS data server */ struct nfs_client *ds_clp; /* pNFS data server */
int ds_idx; /* ds index if ds_clp is set */ int ds_idx; /* ds index if ds_clp is set */
}; };
struct nfs_rw_header {
struct nfs_pgio_header header;
struct nfs_pgio_data rpc_data;
};
struct nfs_mds_commit_info { struct nfs_mds_commit_info {
atomic_t rpcs_out; atomic_t rpcs_out;
unsigned long ncommit; unsigned long ncommit;
@ -1432,11 +1421,12 @@ struct nfs_rpc_ops {
struct nfs_pathconf *); struct nfs_pathconf *);
int (*set_capabilities)(struct nfs_server *, struct nfs_fh *); int (*set_capabilities)(struct nfs_server *, struct nfs_fh *);
int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int); int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
int (*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *); int (*pgio_rpc_prepare)(struct rpc_task *,
void (*read_setup) (struct nfs_pgio_data *, struct rpc_message *); struct nfs_pgio_header *);
int (*read_done) (struct rpc_task *, struct nfs_pgio_data *); void (*read_setup)(struct nfs_pgio_header *, struct rpc_message *);
void (*write_setup) (struct nfs_pgio_data *, struct rpc_message *); int (*read_done)(struct rpc_task *, struct nfs_pgio_header *);
int (*write_done) (struct rpc_task *, struct nfs_pgio_data *); void (*write_setup)(struct nfs_pgio_header *, struct rpc_message *);
int (*write_done)(struct rpc_task *, struct nfs_pgio_header *);
void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *); void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *); void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
int (*commit_done) (struct rpc_task *, struct nfs_commit_data *); int (*commit_done) (struct rpc_task *, struct nfs_commit_data *);

View file

@ -103,6 +103,7 @@ struct rpc_auth_create_args {
/* Flags for rpcauth_lookupcred() */ /* Flags for rpcauth_lookupcred() */
#define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */ #define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */
#define RPCAUTH_LOOKUP_RCU 0x02 /* lock-less lookup */
/* /*
* Client authentication ops * Client authentication ops
@ -140,6 +141,7 @@ struct rpc_credops {
void *, __be32 *, void *); void *, __be32 *, void *);
int (*crkey_timeout)(struct rpc_cred *); int (*crkey_timeout)(struct rpc_cred *);
bool (*crkey_to_expire)(struct rpc_cred *); bool (*crkey_to_expire)(struct rpc_cred *);
char * (*crstringify_acceptor)(struct rpc_cred *);
}; };
extern const struct rpc_authops authunix_ops; extern const struct rpc_authops authunix_ops;
@ -153,6 +155,7 @@ void rpc_destroy_generic_auth(void);
void rpc_destroy_authunix(void); void rpc_destroy_authunix(void);
struct rpc_cred * rpc_lookup_cred(void); struct rpc_cred * rpc_lookup_cred(void);
struct rpc_cred * rpc_lookup_cred_nonblock(void);
struct rpc_cred * rpc_lookup_machine_cred(const char *service_name); struct rpc_cred * rpc_lookup_machine_cred(const char *service_name);
int rpcauth_register(const struct rpc_authops *); int rpcauth_register(const struct rpc_authops *);
int rpcauth_unregister(const struct rpc_authops *); int rpcauth_unregister(const struct rpc_authops *);
@ -182,6 +185,7 @@ void rpcauth_clear_credcache(struct rpc_cred_cache *);
int rpcauth_key_timeout_notify(struct rpc_auth *, int rpcauth_key_timeout_notify(struct rpc_auth *,
struct rpc_cred *); struct rpc_cred *);
bool rpcauth_cred_key_to_expire(struct rpc_cred *); bool rpcauth_cred_key_to_expire(struct rpc_cred *);
char * rpcauth_stringify_acceptor(struct rpc_cred *);
static inline static inline
struct rpc_cred * get_rpccred(struct rpc_cred *cred) struct rpc_cred * get_rpccred(struct rpc_cred *cred)

View file

@ -69,8 +69,9 @@ struct gss_cl_ctx {
enum rpc_gss_proc gc_proc; enum rpc_gss_proc gc_proc;
u32 gc_seq; u32 gc_seq;
spinlock_t gc_seq_lock; spinlock_t gc_seq_lock;
struct gss_ctx __rcu *gc_gss_ctx; struct gss_ctx *gc_gss_ctx;
struct xdr_netobj gc_wire_ctx; struct xdr_netobj gc_wire_ctx;
struct xdr_netobj gc_acceptor;
u32 gc_win; u32 gc_win;
unsigned long gc_expiry; unsigned long gc_expiry;
struct rcu_head gc_rcu; struct rcu_head gc_rcu;

View file

@ -81,7 +81,7 @@ struct gss_krb5_enctype {
struct xdr_netobj *in, struct xdr_netobj *in,
struct xdr_netobj *out); /* complete key generation */ struct xdr_netobj *out); /* complete key generation */
u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset,
struct xdr_buf *buf, int ec, struct xdr_buf *buf,
struct page **pages); /* v2 encryption function */ struct page **pages); /* v2 encryption function */
u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset, u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset,
struct xdr_buf *buf, u32 *headskip, struct xdr_buf *buf, u32 *headskip,
@ -310,7 +310,7 @@ gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
u32 u32
gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
struct xdr_buf *buf, int ec, struct xdr_buf *buf,
struct page **pages); struct page **pages);
u32 u32

View file

@ -62,8 +62,6 @@
#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */
/* memory registration strategies */ /* memory registration strategies */
#define RPCRDMA_PERSISTENT_REGISTRATION (1)
enum rpcrdma_memreg { enum rpcrdma_memreg {
RPCRDMA_BOUNCEBUFFERS = 0, RPCRDMA_BOUNCEBUFFERS = 0,
RPCRDMA_REGISTER, RPCRDMA_REGISTER,

View file

@ -176,7 +176,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
len = (buf + buflen) - delim - 1; len = (buf + buflen) - delim - 1;
p = kstrndup(delim + 1, len, GFP_KERNEL); p = kstrndup(delim + 1, len, GFP_KERNEL);
if (p) { if (p) {
unsigned long scope_id = 0; u32 scope_id = 0;
struct net_device *dev; struct net_device *dev;
dev = dev_get_by_name(net, p); dev = dev_get_by_name(net, p);
@ -184,7 +184,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
scope_id = dev->ifindex; scope_id = dev->ifindex;
dev_put(dev); dev_put(dev);
} else { } else {
if (strict_strtoul(p, 10, &scope_id) == 0) { if (kstrtou32(p, 10, &scope_id) == 0) {
kfree(p); kfree(p);
return 0; return 0;
} }
@ -304,7 +304,7 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags)
* @sap: buffer into which to plant socket address * @sap: buffer into which to plant socket address
* @salen: size of buffer * @salen: size of buffer
* *
* @uaddr does not have to be '\0'-terminated, but strict_strtoul() and * @uaddr does not have to be '\0'-terminated, but kstrtou8() and
* rpc_pton() require proper string termination to be successful. * rpc_pton() require proper string termination to be successful.
* *
* Returns the size of the socket address if successful; otherwise * Returns the size of the socket address if successful; otherwise
@ -315,7 +315,7 @@ size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr,
const size_t salen) const size_t salen)
{ {
char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')]; char *c, buf[RPCBIND_MAXUADDRLEN + sizeof('\0')];
unsigned long portlo, porthi; u8 portlo, porthi;
unsigned short port; unsigned short port;
if (uaddr_len > RPCBIND_MAXUADDRLEN) if (uaddr_len > RPCBIND_MAXUADDRLEN)
@ -327,18 +327,14 @@ size_t rpc_uaddr2sockaddr(struct net *net, const char *uaddr,
c = strrchr(buf, '.'); c = strrchr(buf, '.');
if (unlikely(c == NULL)) if (unlikely(c == NULL))
return 0; return 0;
if (unlikely(strict_strtoul(c + 1, 10, &portlo) != 0)) if (unlikely(kstrtou8(c + 1, 10, &portlo) != 0))
return 0;
if (unlikely(portlo > 255))
return 0; return 0;
*c = '\0'; *c = '\0';
c = strrchr(buf, '.'); c = strrchr(buf, '.');
if (unlikely(c == NULL)) if (unlikely(c == NULL))
return 0; return 0;
if (unlikely(strict_strtoul(c + 1, 10, &porthi) != 0)) if (unlikely(kstrtou8(c + 1, 10, &porthi) != 0))
return 0;
if (unlikely(porthi > 255))
return 0; return 0;
port = (unsigned short)((porthi << 8) | portlo); port = (unsigned short)((porthi << 8) | portlo);

View file

@ -48,7 +48,7 @@ static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
if (!val) if (!val)
goto out_inval; goto out_inval;
ret = strict_strtoul(val, 0, &num); ret = kstrtoul(val, 0, &num);
if (ret == -EINVAL) if (ret == -EINVAL)
goto out_inval; goto out_inval;
nbits = fls(num); nbits = fls(num);
@ -80,6 +80,10 @@ static struct kernel_param_ops param_ops_hashtbl_sz = {
module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644); module_param_named(auth_hashtable_size, auth_hashbits, hashtbl_sz, 0644);
MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size"); MODULE_PARM_DESC(auth_hashtable_size, "RPC credential cache hashtable size");
static unsigned long auth_max_cred_cachesize = ULONG_MAX;
module_param(auth_max_cred_cachesize, ulong, 0644);
MODULE_PARM_DESC(auth_max_cred_cachesize, "RPC credential maximum total cache size");
static u32 static u32
pseudoflavor_to_flavor(u32 flavor) { pseudoflavor_to_flavor(u32 flavor) {
if (flavor > RPC_AUTH_MAXFLAVOR) if (flavor > RPC_AUTH_MAXFLAVOR)
@ -363,6 +367,15 @@ rpcauth_cred_key_to_expire(struct rpc_cred *cred)
} }
EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire); EXPORT_SYMBOL_GPL(rpcauth_cred_key_to_expire);
char *
rpcauth_stringify_acceptor(struct rpc_cred *cred)
{
if (!cred->cr_ops->crstringify_acceptor)
return NULL;
return cred->cr_ops->crstringify_acceptor(cred);
}
EXPORT_SYMBOL_GPL(rpcauth_stringify_acceptor);
/* /*
* Destroy a list of credentials * Destroy a list of credentials
*/ */
@ -472,6 +485,20 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
return freed; return freed;
} }
static unsigned long
rpcauth_cache_do_shrink(int nr_to_scan)
{
LIST_HEAD(free);
unsigned long freed;
spin_lock(&rpc_credcache_lock);
freed = rpcauth_prune_expired(&free, nr_to_scan);
spin_unlock(&rpc_credcache_lock);
rpcauth_destroy_credlist(&free);
return freed;
}
/* /*
* Run memory cache shrinker. * Run memory cache shrinker.
*/ */
@ -479,9 +506,6 @@ static unsigned long
rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{ {
LIST_HEAD(free);
unsigned long freed;
if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL) if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
return SHRINK_STOP; return SHRINK_STOP;
@ -489,12 +513,7 @@ rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
if (list_empty(&cred_unused)) if (list_empty(&cred_unused))
return SHRINK_STOP; return SHRINK_STOP;
spin_lock(&rpc_credcache_lock); return rpcauth_cache_do_shrink(sc->nr_to_scan);
freed = rpcauth_prune_expired(&free, sc->nr_to_scan);
spin_unlock(&rpc_credcache_lock);
rpcauth_destroy_credlist(&free);
return freed;
} }
static unsigned long static unsigned long
@ -504,6 +523,21 @@ rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
} }
static void
rpcauth_cache_enforce_limit(void)
{
unsigned long diff;
unsigned int nr_to_scan;
if (number_cred_unused <= auth_max_cred_cachesize)
return;
diff = number_cred_unused - auth_max_cred_cachesize;
nr_to_scan = 100;
if (diff < nr_to_scan)
nr_to_scan = diff;
rpcauth_cache_do_shrink(nr_to_scan);
}
/* /*
* Look up a process' credentials in the authentication cache * Look up a process' credentials in the authentication cache
*/ */
@ -523,6 +557,12 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) { hlist_for_each_entry_rcu(entry, &cache->hashtable[nr], cr_hash) {
if (!entry->cr_ops->crmatch(acred, entry, flags)) if (!entry->cr_ops->crmatch(acred, entry, flags))
continue; continue;
if (flags & RPCAUTH_LOOKUP_RCU) {
if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
!test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
cred = entry;
break;
}
spin_lock(&cache->lock); spin_lock(&cache->lock);
if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) { if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
@ -537,6 +577,9 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
if (cred != NULL) if (cred != NULL)
goto found; goto found;
if (flags & RPCAUTH_LOOKUP_RCU)
return ERR_PTR(-ECHILD);
new = auth->au_ops->crcreate(auth, acred, flags); new = auth->au_ops->crcreate(auth, acred, flags);
if (IS_ERR(new)) { if (IS_ERR(new)) {
cred = new; cred = new;
@ -557,6 +600,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
} else } else
list_add_tail(&new->cr_lru, &free); list_add_tail(&new->cr_lru, &free);
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
rpcauth_cache_enforce_limit();
found: found:
if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
cred->cr_ops->cr_init != NULL && cred->cr_ops->cr_init != NULL &&
@ -586,10 +630,8 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
memset(&acred, 0, sizeof(acred)); memset(&acred, 0, sizeof(acred));
acred.uid = cred->fsuid; acred.uid = cred->fsuid;
acred.gid = cred->fsgid; acred.gid = cred->fsgid;
acred.group_info = get_group_info(((struct cred *)cred)->group_info); acred.group_info = cred->group_info;
ret = auth->au_ops->lookup_cred(auth, &acred, flags); ret = auth->au_ops->lookup_cred(auth, &acred, flags);
put_group_info(acred.group_info);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(rpcauth_lookupcred); EXPORT_SYMBOL_GPL(rpcauth_lookupcred);

View file

@ -38,6 +38,12 @@ struct rpc_cred *rpc_lookup_cred(void)
} }
EXPORT_SYMBOL_GPL(rpc_lookup_cred); EXPORT_SYMBOL_GPL(rpc_lookup_cred);
struct rpc_cred *rpc_lookup_cred_nonblock(void)
{
return rpcauth_lookupcred(&generic_auth, RPCAUTH_LOOKUP_RCU);
}
EXPORT_SYMBOL_GPL(rpc_lookup_cred_nonblock);
/* /*
* Public call interface for looking up machine creds. * Public call interface for looking up machine creds.
*/ */

View file

@ -183,8 +183,9 @@ gss_cred_get_ctx(struct rpc_cred *cred)
struct gss_cl_ctx *ctx = NULL; struct gss_cl_ctx *ctx = NULL;
rcu_read_lock(); rcu_read_lock();
if (gss_cred->gc_ctx) ctx = rcu_dereference(gss_cred->gc_ctx);
ctx = gss_get_ctx(gss_cred->gc_ctx); if (ctx)
gss_get_ctx(ctx);
rcu_read_unlock(); rcu_read_unlock();
return ctx; return ctx;
} }
@ -262,9 +263,22 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
p = ERR_PTR(ret); p = ERR_PTR(ret);
goto err; goto err;
} }
dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u\n",
__func__, ctx->gc_expiry, now, timeout); /* is there any trailing data? */
return q; if (q == end) {
p = q;
goto done;
}
/* pull in acceptor name (if there is one) */
p = simple_get_netobj(q, end, &ctx->gc_acceptor);
if (IS_ERR(p))
goto err;
done:
dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
__func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
ctx->gc_acceptor.data);
return p;
err: err:
dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p)); dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p));
return p; return p;
@ -1194,13 +1208,13 @@ gss_destroying_context(struct rpc_cred *cred)
{ {
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
struct rpc_task *task; struct rpc_task *task;
if (gss_cred->gc_ctx == NULL || if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
return 0; return 0;
gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY; ctx->gc_proc = RPC_GSS_PROC_DESTROY;
cred->cr_ops = &gss_nullops; cred->cr_ops = &gss_nullops;
/* Take a reference to ensure the cred will be destroyed either /* Take a reference to ensure the cred will be destroyed either
@ -1225,6 +1239,7 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx)
gss_delete_sec_context(&ctx->gc_gss_ctx); gss_delete_sec_context(&ctx->gc_gss_ctx);
kfree(ctx->gc_wire_ctx.data); kfree(ctx->gc_wire_ctx.data);
kfree(ctx->gc_acceptor.data);
kfree(ctx); kfree(ctx);
} }
@ -1260,7 +1275,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
{ {
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth);
struct gss_cl_ctx *ctx = gss_cred->gc_ctx; struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1);
RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); RCU_INIT_POINTER(gss_cred->gc_ctx, NULL);
call_rcu(&cred->cr_rcu, gss_free_cred_callback); call_rcu(&cred->cr_rcu, gss_free_cred_callback);
@ -1332,6 +1347,36 @@ gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred)
return err; return err;
} }
static char *
gss_stringify_acceptor(struct rpc_cred *cred)
{
char *string = NULL;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
struct gss_cl_ctx *ctx;
struct xdr_netobj *acceptor;
rcu_read_lock();
ctx = rcu_dereference(gss_cred->gc_ctx);
if (!ctx)
goto out;
acceptor = &ctx->gc_acceptor;
/* no point if there's no string */
if (!acceptor->len)
goto out;
string = kmalloc(acceptor->len + 1, GFP_KERNEL);
if (!string)
goto out;
memcpy(string, acceptor->data, acceptor->len);
string[acceptor->len] = '\0';
out:
rcu_read_unlock();
return string;
}
/* /*
* Returns -EACCES if GSS context is NULL or will expire within the * Returns -EACCES if GSS context is NULL or will expire within the
* timeout (miliseconds) * timeout (miliseconds)
@ -1340,15 +1385,16 @@ static int
gss_key_timeout(struct rpc_cred *rc) gss_key_timeout(struct rpc_cred *rc)
{ {
struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
struct gss_cl_ctx *ctx;
unsigned long now = jiffies; unsigned long now = jiffies;
unsigned long expire; unsigned long expire;
if (gss_cred->gc_ctx == NULL) rcu_read_lock();
return -EACCES; ctx = rcu_dereference(gss_cred->gc_ctx);
if (ctx)
expire = gss_cred->gc_ctx->gc_expiry - (gss_key_expire_timeo * HZ); expire = ctx->gc_expiry - (gss_key_expire_timeo * HZ);
rcu_read_unlock();
if (time_after(now, expire)) if (!ctx || time_after(now, expire))
return -EACCES; return -EACCES;
return 0; return 0;
} }
@ -1357,13 +1403,19 @@ static int
gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
{ {
struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
struct gss_cl_ctx *ctx;
int ret; int ret;
if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
goto out; goto out;
/* Don't match with creds that have expired. */ /* Don't match with creds that have expired. */
if (time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) rcu_read_lock();
ctx = rcu_dereference(gss_cred->gc_ctx);
if (!ctx || time_after(jiffies, ctx->gc_expiry)) {
rcu_read_unlock();
return 0; return 0;
}
rcu_read_unlock();
if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags)) if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags))
return 0; return 0;
out: out:
@ -1909,29 +1961,31 @@ static const struct rpc_authops authgss_ops = {
}; };
static const struct rpc_credops gss_credops = { static const struct rpc_credops gss_credops = {
.cr_name = "AUTH_GSS", .cr_name = "AUTH_GSS",
.crdestroy = gss_destroy_cred, .crdestroy = gss_destroy_cred,
.cr_init = gss_cred_init, .cr_init = gss_cred_init,
.crbind = rpcauth_generic_bind_cred, .crbind = rpcauth_generic_bind_cred,
.crmatch = gss_match, .crmatch = gss_match,
.crmarshal = gss_marshal, .crmarshal = gss_marshal,
.crrefresh = gss_refresh, .crrefresh = gss_refresh,
.crvalidate = gss_validate, .crvalidate = gss_validate,
.crwrap_req = gss_wrap_req, .crwrap_req = gss_wrap_req,
.crunwrap_resp = gss_unwrap_resp, .crunwrap_resp = gss_unwrap_resp,
.crkey_timeout = gss_key_timeout, .crkey_timeout = gss_key_timeout,
.crstringify_acceptor = gss_stringify_acceptor,
}; };
static const struct rpc_credops gss_nullops = { static const struct rpc_credops gss_nullops = {
.cr_name = "AUTH_GSS", .cr_name = "AUTH_GSS",
.crdestroy = gss_destroy_nullcred, .crdestroy = gss_destroy_nullcred,
.crbind = rpcauth_generic_bind_cred, .crbind = rpcauth_generic_bind_cred,
.crmatch = gss_match, .crmatch = gss_match,
.crmarshal = gss_marshal, .crmarshal = gss_marshal,
.crrefresh = gss_refresh_null, .crrefresh = gss_refresh_null,
.crvalidate = gss_validate, .crvalidate = gss_validate,
.crwrap_req = gss_wrap_req, .crwrap_req = gss_wrap_req,
.crunwrap_resp = gss_unwrap_resp, .crunwrap_resp = gss_unwrap_resp,
.crstringify_acceptor = gss_stringify_acceptor,
}; };
static const struct rpc_pipe_ops gss_upcall_ops_v0 = { static const struct rpc_pipe_ops gss_upcall_ops_v0 = {

View file

@ -641,7 +641,7 @@ out:
u32 u32
gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset, gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
struct xdr_buf *buf, int ec, struct page **pages) struct xdr_buf *buf, struct page **pages)
{ {
u32 err; u32 err;
struct xdr_netobj hmac; struct xdr_netobj hmac;
@ -684,13 +684,8 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
ecptr = buf->tail[0].iov_base; ecptr = buf->tail[0].iov_base;
} }
memset(ecptr, 'X', ec);
buf->tail[0].iov_len += ec;
buf->len += ec;
/* copy plaintext gss token header after filler (if any) */ /* copy plaintext gss token header after filler (if any) */
memcpy(ecptr + ec, buf->head[0].iov_base + offset, memcpy(ecptr, buf->head[0].iov_base + offset, GSS_KRB5_TOK_HDR_LEN);
GSS_KRB5_TOK_HDR_LEN);
buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN; buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
buf->len += GSS_KRB5_TOK_HDR_LEN; buf->len += GSS_KRB5_TOK_HDR_LEN;

View file

@ -70,31 +70,37 @@
DEFINE_SPINLOCK(krb5_seq_lock); DEFINE_SPINLOCK(krb5_seq_lock);
static char * static void *
setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token) setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
{ {
__be16 *ptr, *krb5_hdr; u16 *ptr;
void *krb5_hdr;
int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength; int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
token->len = g_token_size(&ctx->mech_used, body_size); token->len = g_token_size(&ctx->mech_used, body_size);
ptr = (__be16 *)token->data; ptr = (u16 *)token->data;
g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr); g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
/* ptr now at start of header described in rfc 1964, section 1.2.1: */ /* ptr now at start of header described in rfc 1964, section 1.2.1: */
krb5_hdr = ptr; krb5_hdr = ptr;
*ptr++ = KG_TOK_MIC_MSG; *ptr++ = KG_TOK_MIC_MSG;
*ptr++ = cpu_to_le16(ctx->gk5e->signalg); /*
* signalg is stored as if it were converted from LE to host endian, even
* though it's an opaque pair of bytes according to the RFC.
*/
*ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg);
*ptr++ = SEAL_ALG_NONE; *ptr++ = SEAL_ALG_NONE;
*ptr++ = 0xffff; *ptr = 0xffff;
return (char *)krb5_hdr; return krb5_hdr;
} }
static void * static void *
setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token) setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
{ {
__be16 *ptr, *krb5_hdr; u16 *ptr;
void *krb5_hdr;
u8 *p, flags = 0x00; u8 *p, flags = 0x00;
if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0) if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
@ -104,15 +110,15 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
/* Per rfc 4121, sec 4.2.6.1, there is no header, /* Per rfc 4121, sec 4.2.6.1, there is no header,
* just start the token */ * just start the token */
krb5_hdr = ptr = (__be16 *)token->data; krb5_hdr = ptr = (u16 *)token->data;
*ptr++ = KG2_TOK_MIC; *ptr++ = KG2_TOK_MIC;
p = (u8 *)ptr; p = (u8 *)ptr;
*p++ = flags; *p++ = flags;
*p++ = 0xff; *p++ = 0xff;
ptr = (__be16 *)p; ptr = (u16 *)p;
*ptr++ = 0xffff;
*ptr++ = 0xffff; *ptr++ = 0xffff;
*ptr = 0xffff;
token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength; token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
return krb5_hdr; return krb5_hdr;
@ -181,7 +187,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
spin_lock(&krb5_seq_lock); spin_lock(&krb5_seq_lock);
seq_send = ctx->seq_send64++; seq_send = ctx->seq_send64++;
spin_unlock(&krb5_seq_lock); spin_unlock(&krb5_seq_lock);
*((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send); *((__be64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
if (ctx->initiate) { if (ctx->initiate) {
cksumkey = ctx->initiator_sign; cksumkey = ctx->initiator_sign;

View file

@ -201,9 +201,15 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength; msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
*(__be16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg); /*
memset(ptr + 4, 0xff, 4); * signalg and sealalg are stored as if they were converted from LE
*(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg); * to host endian, even though they're opaque pairs of bytes according
* to the RFC.
*/
*(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
*(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
ptr[6] = 0xff;
ptr[7] = 0xff;
gss_krb5_make_confounder(msg_start, conflen); gss_krb5_make_confounder(msg_start, conflen);
@ -438,7 +444,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
u8 *ptr, *plainhdr; u8 *ptr, *plainhdr;
s32 now; s32 now;
u8 flags = 0x00; u8 flags = 0x00;
__be16 *be16ptr, ec = 0; __be16 *be16ptr;
__be64 *be64ptr; __be64 *be64ptr;
u32 err; u32 err;
@ -468,16 +474,16 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
be16ptr = (__be16 *)ptr; be16ptr = (__be16 *)ptr;
blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc); blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
*be16ptr++ = cpu_to_be16(ec); *be16ptr++ = 0;
/* "inner" token header always uses 0 for RRC */ /* "inner" token header always uses 0 for RRC */
*be16ptr++ = cpu_to_be16(0); *be16ptr++ = 0;
be64ptr = (__be64 *)be16ptr; be64ptr = (__be64 *)be16ptr;
spin_lock(&krb5_seq_lock); spin_lock(&krb5_seq_lock);
*be64ptr = cpu_to_be64(kctx->seq_send64++); *be64ptr = cpu_to_be64(kctx->seq_send64++);
spin_unlock(&krb5_seq_lock); spin_unlock(&krb5_seq_lock);
err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, ec, pages); err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
if (err) if (err)
return err; return err;

View file

@ -35,6 +35,8 @@ nul_destroy(struct rpc_auth *auth)
static struct rpc_cred * static struct rpc_cred *
nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{ {
if (flags & RPCAUTH_LOOKUP_RCU)
return &null_cred;
return get_rpccred(&null_cred); return get_rpccred(&null_cred);
} }

View file

@ -1746,6 +1746,7 @@ call_bind_status(struct rpc_task *task)
case -EHOSTDOWN: case -EHOSTDOWN:
case -EHOSTUNREACH: case -EHOSTUNREACH:
case -ENETUNREACH: case -ENETUNREACH:
case -ENOBUFS:
case -EPIPE: case -EPIPE:
dprintk("RPC: %5u remote rpcbind unreachable: %d\n", dprintk("RPC: %5u remote rpcbind unreachable: %d\n",
task->tk_pid, task->tk_status); task->tk_pid, task->tk_status);
@ -1812,6 +1813,8 @@ call_connect_status(struct rpc_task *task)
case -ECONNABORTED: case -ECONNABORTED:
case -ENETUNREACH: case -ENETUNREACH:
case -EHOSTUNREACH: case -EHOSTUNREACH:
case -ENOBUFS:
case -EPIPE:
if (RPC_IS_SOFTCONN(task)) if (RPC_IS_SOFTCONN(task))
break; break;
/* retry with existing socket, after a delay */ /* retry with existing socket, after a delay */
@ -1918,6 +1921,7 @@ call_transmit_status(struct rpc_task *task)
case -ECONNRESET: case -ECONNRESET:
case -ECONNABORTED: case -ECONNABORTED:
case -ENOTCONN: case -ENOTCONN:
case -ENOBUFS:
case -EPIPE: case -EPIPE:
rpc_task_force_reencode(task); rpc_task_force_reencode(task);
} }
@ -2034,6 +2038,7 @@ call_status(struct rpc_task *task)
case -ECONNRESET: case -ECONNRESET:
case -ECONNABORTED: case -ECONNABORTED:
rpc_force_rebind(clnt); rpc_force_rebind(clnt);
case -ENOBUFS:
rpc_delay(task, 3*HZ); rpc_delay(task, 3*HZ);
case -EPIPE: case -EPIPE:
case -ENOTCONN: case -ENOTCONN:

View file

@ -195,7 +195,7 @@ static struct inode *
rpc_alloc_inode(struct super_block *sb) rpc_alloc_inode(struct super_block *sb)
{ {
struct rpc_inode *rpci; struct rpc_inode *rpci;
rpci = (struct rpc_inode *)kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL); rpci = kmem_cache_alloc(rpc_inode_cachep, GFP_KERNEL);
if (!rpci) if (!rpci)
return NULL; return NULL;
return &rpci->vfs_inode; return &rpci->vfs_inode;

View file

@ -744,6 +744,7 @@ static void xprt_connect_status(struct rpc_task *task)
case -ECONNABORTED: case -ECONNABORTED:
case -ENETUNREACH: case -ENETUNREACH:
case -EHOSTUNREACH: case -EHOSTUNREACH:
case -EPIPE:
case -EAGAIN: case -EAGAIN:
dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid); dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
break; break;

View file

@ -53,14 +53,6 @@
# define RPCDBG_FACILITY RPCDBG_TRANS # define RPCDBG_FACILITY RPCDBG_TRANS
#endif #endif
enum rpcrdma_chunktype {
rpcrdma_noch = 0,
rpcrdma_readch,
rpcrdma_areadch,
rpcrdma_writech,
rpcrdma_replych
};
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
static const char transfertypes[][12] = { static const char transfertypes[][12] = {
"pure inline", /* no chunks */ "pure inline", /* no chunks */
@ -279,12 +271,36 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
return (unsigned char *)iptr - (unsigned char *)headerp; return (unsigned char *)iptr - (unsigned char *)headerp;
out: out:
for (pos = 0; nchunks--;) if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
pos += rpcrdma_deregister_external( for (pos = 0; nchunks--;)
&req->rl_segments[pos], r_xprt); pos += rpcrdma_deregister_external(
&req->rl_segments[pos], r_xprt);
}
return n; return n;
} }
/*
* Marshal chunks. This routine returns the header length
* consumed by marshaling.
*
* Returns positive RPC/RDMA header size, or negative errno.
*/
ssize_t
rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)req->rl_base;
if (req->rl_rtype != rpcrdma_noch)
result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
headerp, req->rl_rtype);
else if (req->rl_wtype != rpcrdma_noch)
result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
headerp, req->rl_wtype);
return result;
}
/* /*
* Copy write data inline. * Copy write data inline.
* This function is used for "small" requests. Data which is passed * This function is used for "small" requests. Data which is passed
@ -377,7 +393,6 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
char *base; char *base;
size_t rpclen, padlen; size_t rpclen, padlen;
ssize_t hdrlen; ssize_t hdrlen;
enum rpcrdma_chunktype rtype, wtype;
struct rpcrdma_msg *headerp; struct rpcrdma_msg *headerp;
/* /*
@ -415,13 +430,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* into pages; otherwise use reply chunks. * into pages; otherwise use reply chunks.
*/ */
if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
wtype = rpcrdma_noch; req->rl_wtype = rpcrdma_noch;
else if (rqst->rq_rcv_buf.page_len == 0) else if (rqst->rq_rcv_buf.page_len == 0)
wtype = rpcrdma_replych; req->rl_wtype = rpcrdma_replych;
else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
wtype = rpcrdma_writech; req->rl_wtype = rpcrdma_writech;
else else
wtype = rpcrdma_replych; req->rl_wtype = rpcrdma_replych;
/* /*
* Chunks needed for arguments? * Chunks needed for arguments?
@ -438,16 +453,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* TBD check NFSv4 setacl * TBD check NFSv4 setacl
*/ */
if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
rtype = rpcrdma_noch; req->rl_rtype = rpcrdma_noch;
else if (rqst->rq_snd_buf.page_len == 0) else if (rqst->rq_snd_buf.page_len == 0)
rtype = rpcrdma_areadch; req->rl_rtype = rpcrdma_areadch;
else else
rtype = rpcrdma_readch; req->rl_rtype = rpcrdma_readch;
/* The following simplification is not true forever */ /* The following simplification is not true forever */
if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych)
wtype = rpcrdma_noch; req->rl_wtype = rpcrdma_noch;
if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) { if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) {
dprintk("RPC: %s: cannot marshal multiple chunk lists\n", dprintk("RPC: %s: cannot marshal multiple chunk lists\n",
__func__); __func__);
return -EIO; return -EIO;
@ -461,7 +476,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* When padding is in use and applies to the transfer, insert * When padding is in use and applies to the transfer, insert
* it and change the message type. * it and change the message type.
*/ */
if (rtype == rpcrdma_noch) { if (req->rl_rtype == rpcrdma_noch) {
padlen = rpcrdma_inline_pullup(rqst, padlen = rpcrdma_inline_pullup(rqst,
RPCRDMA_INLINE_PAD_VALUE(rqst)); RPCRDMA_INLINE_PAD_VALUE(rqst));
@ -476,7 +491,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
if (wtype != rpcrdma_noch) { if (req->rl_wtype != rpcrdma_noch) {
dprintk("RPC: %s: invalid chunk list\n", dprintk("RPC: %s: invalid chunk list\n",
__func__); __func__);
return -EIO; return -EIO;
@ -497,30 +512,18 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* on receive. Therefore, we request a reply chunk * on receive. Therefore, we request a reply chunk
* for non-writes wherever feasible and efficient. * for non-writes wherever feasible and efficient.
*/ */
if (wtype == rpcrdma_noch) if (req->rl_wtype == rpcrdma_noch)
wtype = rpcrdma_replych; req->rl_wtype = rpcrdma_replych;
} }
} }
/* hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen);
* Marshal chunks. This routine will return the header length
* consumed by marshaling.
*/
if (rtype != rpcrdma_noch) {
hdrlen = rpcrdma_create_chunks(rqst,
&rqst->rq_snd_buf, headerp, rtype);
wtype = rtype; /* simplify dprintk */
} else if (wtype != rpcrdma_noch) {
hdrlen = rpcrdma_create_chunks(rqst,
&rqst->rq_rcv_buf, headerp, wtype);
}
if (hdrlen < 0) if (hdrlen < 0)
return hdrlen; return hdrlen;
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
" headerp 0x%p base 0x%p lkey 0x%x\n", " headerp 0x%p base 0x%p lkey 0x%x\n",
__func__, transfertypes[wtype], hdrlen, rpclen, padlen, __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
headerp, base, req->rl_iov.lkey); headerp, base, req->rl_iov.lkey);
/* /*

View file

@ -296,7 +296,6 @@ xprt_setup_rdma(struct xprt_create *args)
xprt->resvport = 0; /* privileged port not needed */ xprt->resvport = 0; /* privileged port not needed */
xprt->tsh_size = 0; /* RPC-RDMA handles framing */ xprt->tsh_size = 0; /* RPC-RDMA handles framing */
xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE;
xprt->ops = &xprt_rdma_procs; xprt->ops = &xprt_rdma_procs;
/* /*
@ -382,6 +381,9 @@ xprt_setup_rdma(struct xprt_create *args)
new_ep->rep_xprt = xprt; new_ep->rep_xprt = xprt;
xprt_rdma_format_addresses(xprt); xprt_rdma_format_addresses(xprt);
xprt->max_payload = rpcrdma_max_payload(new_xprt);
dprintk("RPC: %s: transport data payload maximum: %zu bytes\n",
__func__, xprt->max_payload);
if (!try_module_get(THIS_MODULE)) if (!try_module_get(THIS_MODULE))
goto out4; goto out4;
@ -412,7 +414,7 @@ xprt_rdma_close(struct rpc_xprt *xprt)
if (r_xprt->rx_ep.rep_connected > 0) if (r_xprt->rx_ep.rep_connected > 0)
xprt->reestablish_timeout = 0; xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt); xprt_disconnect_done(xprt);
(void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
} }
static void static void
@ -595,13 +597,14 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpc_xprt *xprt = rqst->rq_xprt; struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc; int rc = 0;
if (req->rl_niovs == 0) { if (req->rl_niovs == 0)
rc = rpcrdma_marshal_req(rqst); rc = rpcrdma_marshal_req(rqst);
if (rc < 0) else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
goto failed_marshal; rc = rpcrdma_marshal_chunks(rqst, 0);
} if (rc < 0)
goto failed_marshal;
if (req->rl_reply == NULL) /* e.g. reconnection */ if (req->rl_reply == NULL) /* e.g. reconnection */
rpcrdma_recv_buffer_get(req); rpcrdma_recv_buffer_get(req);

File diff suppressed because it is too large Load diff

View file

@ -59,6 +59,7 @@
* Interface Adapter -- one per transport instance * Interface Adapter -- one per transport instance
*/ */
struct rpcrdma_ia { struct rpcrdma_ia {
rwlock_t ri_qplock;
struct rdma_cm_id *ri_id; struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd; struct ib_pd *ri_pd;
struct ib_mr *ri_bind_mem; struct ib_mr *ri_bind_mem;
@ -98,6 +99,14 @@ struct rpcrdma_ep {
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
enum rpcrdma_chunktype {
rpcrdma_noch = 0,
rpcrdma_readch,
rpcrdma_areadch,
rpcrdma_writech,
rpcrdma_replych
};
/* /*
* struct rpcrdma_rep -- this structure encapsulates state required to recv * struct rpcrdma_rep -- this structure encapsulates state required to recv
* and complete a reply, asychronously. It needs several pieces of * and complete a reply, asychronously. It needs several pieces of
@ -136,6 +145,40 @@ struct rpcrdma_rep {
char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
}; };
/*
* struct rpcrdma_mw - external memory region metadata
*
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*
* Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During
* call_allocate, rpcrdma_buffer_get() assigns one to each segment in
* an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
* track of registration metadata while each RPC is pending.
* rpcrdma_deregister_external() uses this metadata to unmap and
* release these resources when an RPC is complete.
*/
enum rpcrdma_frmr_state {
FRMR_IS_INVALID, /* ready to be used */
FRMR_IS_VALID, /* in use */
FRMR_IS_STALE, /* failed completion */
};
struct rpcrdma_frmr {
struct ib_fast_reg_page_list *fr_pgl;
struct ib_mr *fr_mr;
enum rpcrdma_frmr_state fr_state;
};
struct rpcrdma_mw {
union {
struct ib_fmr *fmr;
struct rpcrdma_frmr frmr;
} r;
struct list_head mw_list;
struct list_head mw_all;
};
/* /*
* struct rpcrdma_req -- structure central to the request/reply sequence. * struct rpcrdma_req -- structure central to the request/reply sequence.
* *
@ -163,17 +206,7 @@ struct rpcrdma_rep {
struct rpcrdma_mr_seg { /* chunk descriptors */ struct rpcrdma_mr_seg { /* chunk descriptors */
union { /* chunk memory handles */ union { /* chunk memory handles */
struct ib_mr *rl_mr; /* if registered directly */ struct ib_mr *rl_mr; /* if registered directly */
struct rpcrdma_mw { /* if registered from region */ struct rpcrdma_mw *rl_mw; /* if registered from region */
union {
struct ib_fmr *fmr;
struct {
struct ib_fast_reg_page_list *fr_pgl;
struct ib_mr *fr_mr;
enum { FRMR_IS_INVALID, FRMR_IS_VALID } state;
} frmr;
} r;
struct list_head mw_list;
} *rl_mw;
} mr_chunk; } mr_chunk;
u64 mr_base; /* registration result */ u64 mr_base; /* registration result */
u32 mr_rkey; /* registration result */ u32 mr_rkey; /* registration result */
@ -191,6 +224,7 @@ struct rpcrdma_req {
unsigned int rl_niovs; /* 0, 2 or 4 */ unsigned int rl_niovs; /* 0, 2 or 4 */
unsigned int rl_nchunks; /* non-zero if chunks */ unsigned int rl_nchunks; /* non-zero if chunks */
unsigned int rl_connect_cookie; /* retry detection */ unsigned int rl_connect_cookie; /* retry detection */
enum rpcrdma_chunktype rl_rtype, rl_wtype;
struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
@ -214,6 +248,7 @@ struct rpcrdma_buffer {
atomic_t rb_credits; /* most recent server credits */ atomic_t rb_credits; /* most recent server credits */
int rb_max_requests;/* client max requests */ int rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
struct list_head rb_all;
int rb_send_index; int rb_send_index;
struct rpcrdma_req **rb_send_bufs; struct rpcrdma_req **rb_send_bufs;
int rb_recv_index; int rb_recv_index;
@ -306,7 +341,7 @@ int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *); struct rpcrdma_create_data_internal *);
void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_req *); struct rpcrdma_req *);
@ -346,7 +381,9 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
/* /*
* RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
*/ */
ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t);
int rpcrdma_marshal_req(struct rpc_rqst *); int rpcrdma_marshal_req(struct rpc_rqst *);
size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
/* Temporary NFS request map cache. Created in svc_rdma.c */ /* Temporary NFS request map cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_map_cachep; extern struct kmem_cache *svc_rdma_map_cachep;

View file

@ -594,6 +594,7 @@ static int xs_local_send_request(struct rpc_task *task)
} }
switch (status) { switch (status) {
case -ENOBUFS:
case -EAGAIN: case -EAGAIN:
status = xs_nospace(task); status = xs_nospace(task);
break; break;
@ -661,6 +662,7 @@ static int xs_udp_send_request(struct rpc_task *task)
dprintk("RPC: sendmsg returned unrecognized error %d\n", dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status); -status);
case -ENETUNREACH: case -ENETUNREACH:
case -ENOBUFS:
case -EPIPE: case -EPIPE:
case -ECONNREFUSED: case -ECONNREFUSED:
/* When the server has died, an ICMP port unreachable message /* When the server has died, an ICMP port unreachable message
@ -758,6 +760,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
status = -ENOTCONN; status = -ENOTCONN;
/* Should we call xs_close() here? */ /* Should we call xs_close() here? */
break; break;
case -ENOBUFS:
case -EAGAIN: case -EAGAIN:
status = xs_nospace(task); status = xs_nospace(task);
break; break;
@ -1946,6 +1949,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
dprintk("RPC: xprt %p connected to %s\n", dprintk("RPC: xprt %p connected to %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
xprt_set_connected(xprt); xprt_set_connected(xprt);
case -ENOBUFS:
break; break;
case -ENOENT: case -ENOENT:
dprintk("RPC: xprt %p: socket %s does not exist\n", dprintk("RPC: xprt %p: socket %s does not exist\n",
@ -2281,6 +2285,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -ECONNREFUSED: case -ECONNREFUSED:
case -ECONNRESET: case -ECONNRESET:
case -ENETUNREACH: case -ENETUNREACH:
case -ENOBUFS:
/* retry with existing socket, after a delay */ /* retry with existing socket, after a delay */
goto out; goto out;
} }
@ -3054,12 +3059,12 @@ static int param_set_uint_minmax(const char *val,
const struct kernel_param *kp, const struct kernel_param *kp,
unsigned int min, unsigned int max) unsigned int min, unsigned int max)
{ {
unsigned long num; unsigned int num;
int ret; int ret;
if (!val) if (!val)
return -EINVAL; return -EINVAL;
ret = strict_strtoul(val, 0, &num); ret = kstrtouint(val, 0, &num);
if (ret == -EINVAL || num < min || num > max) if (ret == -EINVAL || num < min || num > max)
return -EINVAL; return -EINVAL;
*((unsigned int *)kp->arg) = num; *((unsigned int *)kp->arg) = num;