Initial 4.4 merge window submission

- "Checksum offload support in user space" enablement
 - Misc cxgb4 fixes, add T6 support
 - Misc usnic fixes
 - 32 bit build warning fixes
 - Misc ocrdma fixes
 - Multicast loopback prevention extension
 - Extend the GID cache to store and return attributes of GIDs
 - Misc iSER updates
 - iSER clustering update
 - Network NameSpace support for rdma CM
 - Work Request cleanup series
 - New Memory Registration API
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJWPO5UAAoJELgmozMOVy/dSCQP/iX2ImMZOS3VkOYKhLR3dSv8
 4vTEiYIoAT1JEXiPpiabuuACwotcZcMRk9kZ0dcWmBoFusTzKJmoDOkgAYd95XqY
 EsAyjqtzUGNNMjH5u5W+kdbaFdH9Ktq7IJvspRlJuvzC47Srax+qBxX01jrAkDgh
 4PoA3hEa2KkvkDjY2Mhvk9EWd/uflO9Ky6o0D8jUQkWtEvKBRyDjQLk30oW6wHX9
 pTWqww3dD0EXTrR+PDA88v2saKH1kZFU1Nt2eU8Bw+zlJM8hcX6U7PfRX0g3HT/J
 o+7ejTdLPWFDH35gJOU+KE519f1JbwfRjPJCqbOC9IttBB7iHSbhcpQLpWv4JV1x
 agdBeDA3TGQj3dHb2SkYMlWXCBp7q8UCbVGvvirTFzGSGU73sc6hhP+vCKvPQIlE
 Ah5tUqD7Y3mOBjvuDeIzKMLXILd5d3cH+m7Laytrf5e7fJPmBRZyOkcMh0QVElyl
 mKo+PFjghgeTFb405J7SDDw/vThVyN9HyIt7AGEzObaajzOOk9R1hkQr46XVy9TK
 yi58fl85yQ2n6TWV6NRnvkQoMy/N2HAEuXk/7HtO0PabV5w3Lo0zvXB9SnVrrVEm
 58FWRBYCWorVSdSacuDnPm0iz45WSRIb9G9sBlhEC93eXRq2rSBoy4RvyLeliHFH
 hllyhNNolI6FJ64j07Xm
 =bBIY
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma updates from Doug Ledford:
 "This is my initial round of 4.4 merge window patches.  There are a few
  other things I wish to get in for 4.4 that aren't in this pull, as
  this represents what has gone through merge/build/run testing and not
  what is the last few items for which testing is not yet complete.

   - "Checksum offload support in user space" enablement
   - Misc cxgb4 fixes, add T6 support
   - Misc usnic fixes
   - 32 bit build warning fixes
   - Misc ocrdma fixes
   - Multicast loopback prevention extension
   - Extend the GID cache to store and return attributes of GIDs
   - Misc iSER updates
   - iSER clustering update
   - Network NameSpace support for rdma CM
   - Work Request cleanup series
   - New Memory Registration API"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (76 commits)
  IB/core, cma: Make __attribute_const__ declarations sparse-friendly
  IB/core: Remove old fast registration API
  IB/ipath: Remove fast registration from the code
  IB/hfi1: Remove fast registration from the code
  RDMA/nes: Remove old FRWR API
  IB/qib: Remove old FRWR API
  iw_cxgb4: Remove old FRWR API
  RDMA/cxgb3: Remove old FRWR API
  RDMA/ocrdma: Remove old FRWR API
  IB/mlx4: Remove old FRWR API support
  IB/mlx5: Remove old FRWR API support
  IB/srp: Dont allocate a page vector when using fast_reg
  IB/srp: Remove srp_finish_mapping
  IB/srp: Convert to new registration API
  IB/srp: Split srp_map_sg
  RDS/IW: Convert to new memory registration API
  svcrdma: Port to new memory registration API
  xprtrdma: Port to new memory registration API
  iser-target: Port to new memory registration API
  IB/iser: Port to new fast registration API
  ...
This commit is contained in:
Linus Torvalds 2015-11-07 13:33:07 -08:00
commit ab9f2faf8f
140 changed files with 3599 additions and 3014 deletions

View file

@ -336,7 +336,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
/* Create a CMA ID and try to bind it. This catches both
* IB and iWARP capable NICs.
*/
cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);

View file

@ -75,7 +75,11 @@ struct rds_ib_connect_private {
struct rds_ib_send_work {
void *s_op;
struct ib_send_wr s_wr;
union {
struct ib_send_wr s_wr;
struct ib_rdma_wr s_rdma_wr;
struct ib_atomic_wr s_atomic_wr;
};
struct ib_sge s_sge[RDS_IB_MAX_SGE];
unsigned long s_queued;
};

View file

@ -668,7 +668,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ic->i_cm_id)) {
ret = PTR_ERR(ic->i_cm_id);

View file

@ -777,23 +777,23 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
send->s_queued = jiffies;
if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
send->s_atomic_wr.compare_add = op->op_m_cswp.compare;
send->s_atomic_wr.swap = op->op_m_cswp.swap;
send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask;
send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask;
} else { /* FADD */
send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
send->s_wr.wr.atomic.swap = 0;
send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
send->s_wr.wr.atomic.swap_mask = 0;
send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
send->s_atomic_wr.compare_add = op->op_m_fadd.add;
send->s_atomic_wr.swap = 0;
send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
send->s_atomic_wr.swap_mask = 0;
}
nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
send->s_wr.num_sge = 1;
send->s_wr.next = NULL;
send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
send->s_wr.wr.atomic.rkey = op->op_rkey;
send->s_atomic_wr.wr.num_sge = 1;
send->s_atomic_wr.wr.next = NULL;
send->s_atomic_wr.remote_addr = op->op_remote_addr;
send->s_atomic_wr.rkey = op->op_rkey;
send->s_op = op;
rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
@ -818,11 +818,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
if (nr_sig)
atomic_add(nr_sig, &ic->i_signaled_sends);
failed_wr = &send->s_wr;
ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
failed_wr = &send->s_atomic_wr.wr;
ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr);
rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
send, &send->s_wr, ret, failed_wr);
BUG_ON(failed_wr != &send->s_wr);
send, &send->s_atomic_wr, ret, failed_wr);
BUG_ON(failed_wr != &send->s_atomic_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@ -831,9 +831,9 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
goto out;
}
if (unlikely(failed_wr != &send->s_wr)) {
if (unlikely(failed_wr != &send->s_atomic_wr.wr)) {
printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
BUG_ON(failed_wr != &send->s_wr);
BUG_ON(failed_wr != &send->s_atomic_wr.wr);
}
out:
@ -904,22 +904,23 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
send->s_wr.wr.rdma.remote_addr = remote_addr;
send->s_wr.wr.rdma.rkey = op->op_rkey;
send->s_rdma_wr.remote_addr = remote_addr;
send->s_rdma_wr.rkey = op->op_rkey;
if (num_sge > max_sge) {
send->s_wr.num_sge = max_sge;
send->s_rdma_wr.wr.num_sge = max_sge;
num_sge -= max_sge;
} else {
send->s_wr.num_sge = num_sge;
send->s_rdma_wr.wr.num_sge = num_sge;
}
send->s_wr.next = NULL;
send->s_rdma_wr.wr.next = NULL;
if (prev)
prev->s_wr.next = &send->s_wr;
prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr;
for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
scat != &op->op_sg[op->op_count]; j++) {
len = ib_sg_dma_len(ic->i_cm_id->device, scat);
send->s_sge[j].addr =
ib_sg_dma_address(ic->i_cm_id->device, scat);
@ -934,7 +935,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
}
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
&send->s_wr, send->s_wr.num_sge, send->s_wr.next);
&send->s_rdma_wr.wr,
send->s_rdma_wr.wr.num_sge,
send->s_rdma_wr.wr.next);
prev = send;
if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@ -955,11 +958,11 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
if (nr_sig)
atomic_add(nr_sig, &ic->i_signaled_sends);
failed_wr = &first->s_wr;
ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
failed_wr = &first->s_rdma_wr.wr;
ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
first, &first->s_wr, ret, failed_wr);
BUG_ON(failed_wr != &first->s_wr);
first, &first->s_rdma_wr.wr, ret, failed_wr);
BUG_ON(failed_wr != &first->s_rdma_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@ -968,9 +971,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
goto out;
}
if (unlikely(failed_wr != &first->s_wr)) {
if (unlikely(failed_wr != &first->s_rdma_wr.wr)) {
printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
BUG_ON(failed_wr != &first->s_wr);
BUG_ON(failed_wr != &first->s_rdma_wr.wr);
}

View file

@ -223,7 +223,7 @@ static int rds_iw_laddr_check(struct net *net, __be32 addr)
/* Create a CMA ID and try to bind it. This catches both
* IB and iWARP capable NICs.
*/
cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);

View file

@ -74,10 +74,13 @@ struct rds_iw_send_work {
struct rm_rdma_op *s_op;
struct rds_iw_mapping *s_mapping;
struct ib_mr *s_mr;
struct ib_fast_reg_page_list *s_page_list;
unsigned char s_remap_count;
struct ib_send_wr s_wr;
union {
struct ib_send_wr s_send_wr;
struct ib_rdma_wr s_rdma_wr;
struct ib_reg_wr s_reg_wr;
};
struct ib_sge s_sge[RDS_IW_MAX_SGE];
unsigned long s_queued;
};
@ -195,7 +198,7 @@ struct rds_iw_device {
/* Magic WR_ID for ACKs */
#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL)
#define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL)
#define RDS_IW_REG_WR_ID ((u64)0xefefefefefefefefULL)
#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL)
struct rds_iw_statistics {

View file

@ -524,7 +524,7 @@ int rds_iw_conn_connect(struct rds_connection *conn)
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ic->i_cm_id)) {
ret = PTR_ERR(ic->i_cm_id);

View file

@ -47,7 +47,6 @@ struct rds_iw_mr {
struct rdma_cm_id *cm_id;
struct ib_mr *mr;
struct ib_fast_reg_page_list *page_list;
struct rds_iw_mapping mapping;
unsigned char remap_count;
@ -77,8 +76,8 @@ struct rds_iw_mr_pool {
static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr,
struct scatterlist *sg, unsigned int nents);
static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
@ -258,19 +257,18 @@ static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
sg->bytes = 0;
}
static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
struct rds_iw_scatterlist *sg)
static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
struct rds_iw_scatterlist *sg)
{
struct ib_device *dev = rds_iwdev->dev;
u64 *dma_pages = NULL;
int i, j, ret;
int i, ret;
WARN_ON(sg->dma_len);
sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
if (unlikely(!sg->dma_len)) {
printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
return ERR_PTR(-EBUSY);
return -EBUSY;
}
sg->bytes = 0;
@ -303,31 +301,14 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
if (sg->dma_npages > fastreg_message_size)
goto out_unmap;
dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC);
if (!dma_pages) {
ret = -ENOMEM;
goto out_unmap;
}
for (i = j = 0; i < sg->dma_len; ++i) {
unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
u64 end_addr;
end_addr = dma_addr + dma_len;
dma_addr &= ~PAGE_MASK;
for (; dma_addr < end_addr; dma_addr += PAGE_SIZE)
dma_pages[j++] = dma_addr;
BUG_ON(j > sg->dma_npages);
}
return dma_pages;
return 0;
out_unmap:
ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
sg->dma_len = 0;
kfree(dma_pages);
return ERR_PTR(ret);
return ret;
}
@ -440,7 +421,7 @@ static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
INIT_LIST_HEAD(&ibmr->mapping.m_list);
ibmr->mapping.m_mr = ibmr;
err = rds_iw_init_fastreg(pool, ibmr);
err = rds_iw_init_reg(pool, ibmr);
if (err)
goto out_no_cigar;
@ -620,7 +601,7 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
ibmr->cm_id = cm_id;
ibmr->device = rds_iwdev;
ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents);
ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents);
if (ret == 0)
*key_ret = ibmr->mr->rkey;
else
@ -636,7 +617,7 @@ out:
}
/*
* iWARP fastreg handling
* iWARP reg handling
*
* The life cycle of a fastreg registration is a bit different from
* FMRs.
@ -648,7 +629,7 @@ out:
* This creates a bit of a problem for us, as we do not have the destination
* IP in GET_MR, so the connection must be setup prior to the GET_MR call for
* RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit
* will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request
* will try to queue a LOCAL_INV (if needed) and a REG_MR work request
* before queuing the SEND. When completions for these arrive, they are
* dispatched to the MR has a bit set showing that RDMa can be performed.
*
@ -657,11 +638,10 @@ out:
* The expectation there is that this invalidation step includes ALL
* PREVIOUSLY FREED MRs.
*/
static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr)
static int rds_iw_init_reg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr)
{
struct rds_iw_device *rds_iwdev = pool->device;
struct ib_fast_reg_page_list *page_list = NULL;
struct ib_mr *mr;
int err;
@ -674,55 +654,44 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
return err;
}
/* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages
* is not filled in.
*/
page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size);
if (IS_ERR(page_list)) {
err = PTR_ERR(page_list);
printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err);
ib_dereg_mr(mr);
return err;
}
ibmr->page_list = page_list;
ibmr->mr = mr;
return 0;
}
static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping)
static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping)
{
struct rds_iw_mr *ibmr = mapping->m_mr;
struct ib_send_wr f_wr, *failed_wr;
int ret;
struct rds_iw_scatterlist *m_sg = &mapping->m_sg;
struct ib_reg_wr reg_wr;
struct ib_send_wr *failed_wr;
int ret, n;
n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE);
if (unlikely(n != m_sg->len))
return n < 0 ? n : -EINVAL;
reg_wr.wr.next = NULL;
reg_wr.wr.opcode = IB_WR_REG_MR;
reg_wr.wr.wr_id = RDS_IW_REG_WR_ID;
reg_wr.wr.num_sge = 0;
reg_wr.mr = ibmr->mr;
reg_wr.key = mapping->m_rkey;
reg_wr.access = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
/*
* Perform a WR for the fast_reg_mr. Each individual page
* Perform a WR for the reg_mr. Each individual page
* in the sg list is added to the fast reg page list and placed
* inside the fast_reg_mr WR. The key used is a rolling 8bit
* inside the reg_mr WR. The key used is a rolling 8bit
* counter, which should guarantee uniqueness.
*/
ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
mapping->m_rkey = ibmr->mr->rkey;
memset(&f_wr, 0, sizeof(f_wr));
f_wr.wr_id = RDS_IW_FAST_REG_WR_ID;
f_wr.opcode = IB_WR_FAST_REG_MR;
f_wr.wr.fast_reg.length = mapping->m_sg.bytes;
f_wr.wr.fast_reg.rkey = mapping->m_rkey;
f_wr.wr.fast_reg.page_list = ibmr->page_list;
f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len;
f_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
f_wr.wr.fast_reg.iova_start = 0;
f_wr.send_flags = IB_SEND_SIGNALED;
failed_wr = &f_wr;
ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr);
BUG_ON(failed_wr != &f_wr);
failed_wr = &reg_wr.wr;
ret = ib_post_send(ibmr->cm_id->qp, &reg_wr.wr, &failed_wr);
BUG_ON(failed_wr != &reg_wr.wr);
if (ret)
printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
__func__, __LINE__, ret);
@ -754,21 +723,20 @@ out:
return ret;
}
static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr,
struct scatterlist *sg,
unsigned int sg_len)
static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr,
struct scatterlist *sg,
unsigned int sg_len)
{
struct rds_iw_device *rds_iwdev = pool->device;
struct rds_iw_mapping *mapping = &ibmr->mapping;
u64 *dma_pages;
int i, ret = 0;
int ret = 0;
rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
dma_pages = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
if (IS_ERR(dma_pages)) {
ret = PTR_ERR(dma_pages);
ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
if (ret) {
dma_pages = NULL;
goto out;
}
@ -778,10 +746,7 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
goto out;
}
for (i = 0; i < mapping->m_sg.dma_npages; ++i)
ibmr->page_list->page_list[i] = dma_pages[i];
ret = rds_iw_rdma_build_fastreg(mapping);
ret = rds_iw_rdma_reg_mr(mapping);
if (ret)
goto out;
@ -867,8 +832,6 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
struct rds_iw_mr *ibmr)
{
if (ibmr->page_list)
ib_free_fast_reg_page_list(ibmr->page_list);
if (ibmr->mr)
ib_dereg_mr(ibmr->mr);
}

View file

@ -137,13 +137,13 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
send->s_op = NULL;
send->s_mapping = NULL;
send->s_wr.next = NULL;
send->s_wr.wr_id = i;
send->s_wr.sg_list = send->s_sge;
send->s_wr.num_sge = 1;
send->s_wr.opcode = IB_WR_SEND;
send->s_wr.send_flags = 0;
send->s_wr.ex.imm_data = 0;
send->s_send_wr.next = NULL;
send->s_send_wr.wr_id = i;
send->s_send_wr.sg_list = send->s_sge;
send->s_send_wr.num_sge = 1;
send->s_send_wr.opcode = IB_WR_SEND;
send->s_send_wr.send_flags = 0;
send->s_send_wr.ex.imm_data = 0;
sge = rds_iw_data_sge(ic, send->s_sge);
sge->lkey = 0;
@ -159,13 +159,6 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
break;
}
send->s_page_list = ib_alloc_fast_reg_page_list(
ic->i_cm_id->device, fastreg_message_size);
if (IS_ERR(send->s_page_list)) {
printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
break;
}
}
}
@ -177,9 +170,7 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
BUG_ON(!send->s_mr);
ib_dereg_mr(send->s_mr);
BUG_ON(!send->s_page_list);
ib_free_fast_reg_page_list(send->s_page_list);
if (send->s_wr.opcode == 0xdead)
if (send->s_send_wr.opcode == 0xdead)
continue;
if (send->s_rm)
rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
@ -227,7 +218,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
continue;
}
if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) {
if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) {
ic->i_fastreg_posted = 1;
continue;
}
@ -247,12 +238,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
send = &ic->i_sends[oldest];
/* In the error case, wc.opcode sometimes contains garbage */
switch (send->s_wr.opcode) {
switch (send->s_send_wr.opcode) {
case IB_WR_SEND:
if (send->s_rm)
rds_iw_send_unmap_rm(ic, send, wc.status);
break;
case IB_WR_FAST_REG_MR:
case IB_WR_REG_MR:
case IB_WR_RDMA_WRITE:
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
@ -262,12 +253,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
default:
printk_ratelimited(KERN_NOTICE
"RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
__func__, send->s_wr.opcode);
__func__, send->s_send_wr.opcode);
break;
}
send->s_wr.opcode = 0xdead;
send->s_wr.num_sge = 1;
send->s_send_wr.opcode = 0xdead;
send->s_send_wr.num_sge = 1;
if (time_after(jiffies, send->s_queued + HZ/2))
rds_iw_stats_inc(s_iw_tx_stalled);
@ -455,10 +446,10 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
WARN_ON(pos != send - ic->i_sends);
send->s_wr.send_flags = send_flags;
send->s_wr.opcode = IB_WR_SEND;
send->s_wr.num_sge = 2;
send->s_wr.next = NULL;
send->s_send_wr.send_flags = send_flags;
send->s_send_wr.opcode = IB_WR_SEND;
send->s_send_wr.num_sge = 2;
send->s_send_wr.next = NULL;
send->s_queued = jiffies;
send->s_op = NULL;
@ -472,7 +463,7 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
} else {
/* We're sending a packet with no payload. There is only
* one SGE */
send->s_wr.num_sge = 1;
send->s_send_wr.num_sge = 1;
sge = &send->s_sge[0];
}
@ -672,23 +663,23 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
*/
if (ic->i_unsignaled_wrs-- == 0) {
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
}
ic->i_unsignaled_bytes -= len;
if (ic->i_unsignaled_bytes <= 0) {
ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
}
/*
* Always signal the last one if we're stopping due to flow control.
*/
if (flow_controlled && i == (work_alloc-1))
send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
&send->s_wr, send->s_wr.num_sge, send->s_wr.next);
&send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next);
sent += len;
rm->data.op_dmaoff += len;
@ -722,7 +713,7 @@ add_header:
}
if (prev)
prev->s_wr.next = &send->s_wr;
prev->s_send_wr.next = &send->s_send_wr;
prev = send;
pos = (pos + 1) % ic->i_send_ring.w_nr;
@ -736,7 +727,7 @@ add_header:
/* if we finished the message then send completion owns it */
if (scat == &rm->data.op_sg[rm->data.op_count]) {
prev->s_rm = ic->i_rm;
prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
ic->i_rm = NULL;
}
@ -748,11 +739,11 @@ add_header:
rds_iw_send_add_credits(conn, credit_alloc - i);
/* XXX need to worry about failed_wr and partial sends. */
failed_wr = &first->s_wr;
ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
failed_wr = &first->s_send_wr;
ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
first, &first->s_wr, ret, failed_wr);
BUG_ON(failed_wr != &first->s_wr);
first, &first->s_send_wr, ret, failed_wr);
BUG_ON(failed_wr != &first->s_send_wr);
if (ret) {
printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);
@ -770,24 +761,26 @@ out:
return ret;
}
static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr)
static int rds_iw_build_send_reg(struct rds_iw_send_work *send,
struct scatterlist *sg,
int sg_nents)
{
BUG_ON(nent > send->s_page_list->max_page_list_len);
/*
* Perform a WR for the fast_reg_mr. Each individual page
* in the sg list is added to the fast reg page list and placed
* inside the fast_reg_mr WR.
*/
send->s_wr.opcode = IB_WR_FAST_REG_MR;
send->s_wr.wr.fast_reg.length = len;
send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey;
send->s_wr.wr.fast_reg.page_list = send->s_page_list;
send->s_wr.wr.fast_reg.page_list_len = nent;
send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
send->s_wr.wr.fast_reg.iova_start = sg_addr;
int n;
n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE);
if (unlikely(n != sg_nents))
return n < 0 ? n : -EINVAL;
send->s_reg_wr.wr.opcode = IB_WR_REG_MR;
send->s_reg_wr.wr.wr_id = 0;
send->s_reg_wr.wr.num_sge = 0;
send->s_reg_wr.mr = send->s_mr;
send->s_reg_wr.key = send->s_mr->rkey;
send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE;
ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
return 0;
}
int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
@ -808,6 +801,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
int sent;
int ret;
int num_sge;
int sg_nents;
rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
@ -861,9 +855,10 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
scat = &op->op_sg[0];
sent = 0;
num_sge = op->op_count;
sg_nents = 0;
for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
send->s_wr.send_flags = 0;
send->s_rdma_wr.wr.send_flags = 0;
send->s_queued = jiffies;
/*
@ -872,7 +867,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
*/
if (ic->i_unsignaled_wrs-- == 0) {
ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
send->s_wr.send_flags = IB_SEND_SIGNALED;
send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
}
/* To avoid the need to have the plumbing to invalidate the fastreg_mr used
@ -880,30 +875,31 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
* IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
*/
if (op->op_write)
send->s_wr.opcode = IB_WR_RDMA_WRITE;
send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
else
send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
send->s_wr.wr.rdma.remote_addr = remote_addr;
send->s_wr.wr.rdma.rkey = op->op_rkey;
send->s_rdma_wr.remote_addr = remote_addr;
send->s_rdma_wr.rkey = op->op_rkey;
send->s_op = op;
if (num_sge > rds_iwdev->max_sge) {
send->s_wr.num_sge = rds_iwdev->max_sge;
send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge;
num_sge -= rds_iwdev->max_sge;
} else
send->s_wr.num_sge = num_sge;
send->s_rdma_wr.wr.num_sge = num_sge;
send->s_wr.next = NULL;
send->s_rdma_wr.wr.next = NULL;
if (prev)
prev->s_wr.next = &send->s_wr;
prev->s_send_wr.next = &send->s_rdma_wr.wr;
for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
scat != &op->op_sg[op->op_count]; j++) {
len = ib_sg_dma_len(ic->i_cm_id->device, scat);
if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat);
if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV)
sg_nents++;
else {
send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
send->s_sge[j].length = len;
@ -917,15 +913,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
scat++;
}
if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
send->s_wr.num_sge = 1;
if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
send->s_rdma_wr.wr.num_sge = 1;
send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
}
rdsdebug("send %p wr %p num_sge %u next %p\n", send,
&send->s_wr, send->s_wr.num_sge, send->s_wr.next);
&send->s_rdma_wr,
send->s_rdma_wr.wr.num_sge,
send->s_rdma_wr.wr.next);
prev = send;
if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@ -934,7 +932,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
/* if we finished the message then send completion owns it */
if (scat == &op->op_sg[op->op_count])
first->s_wr.send_flags = IB_SEND_SIGNALED;
first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
if (i < work_alloc) {
rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
@ -948,16 +946,20 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
* fastreg_mr (or possibly a dma_mr)
*/
if (!op->op_write) {
rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos],
&op->op_sg[0], sg_nents);
if (ret) {
printk(KERN_WARNING "RDS/IW: failed to reg send mem\n");
goto out;
}
work_alloc++;
}
failed_wr = &first->s_wr;
ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
failed_wr = &first->s_rdma_wr.wr;
ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
first, &first->s_wr, ret, failed_wr);
BUG_ON(failed_wr != &first->s_wr);
first, &first->s_rdma_wr, ret, failed_wr);
BUG_ON(failed_wr != &first->s_rdma_wr.wr);
if (ret) {
printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
"returned %d\n", &conn->c_faddr, ret);

View file

@ -142,8 +142,8 @@ static int rds_rdma_listen_init(void)
struct rdma_cm_id *cm_id;
int ret;
cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP,
IB_QPT_RC);
cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
printk(KERN_ERR "RDS/RDMA: failed to setup listener, "