mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-04-12 17:34:31 +00:00
rds: zerocopy Tx support.
If the MSG_ZEROCOPY flag is specified with rds_sendmsg(), and,
if the SO_ZEROCOPY socket option has been set on the PF_RDS socket,
application pages sent down with rds_sendmsg() are pinned.
The pinning uses the accounting infrastructure added by
Commit a91dbff551
("sock: ulimit on MSG_ZEROCOPY pages")
The payload bytes in the message may not be modified for the
duration that the message has been pinned. A multi-threaded
application using this infrastructure may thus need to be notified
about send-completion so that it can free/reuse the buffers
passed to rds_sendmsg(). Notification of send-completion will
identify each message-buffer by a cookie that the application
must specify as ancillary data to rds_sendmsg().
The ancillary data in this case has cmsg_level == SOL_RDS
and cmsg_type == RDS_CMSG_ZCOPY_COOKIE.
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
01883eda72
commit
0cebaccef3
4 changed files with 91 additions and 8 deletions
|
@ -103,6 +103,7 @@
|
||||||
#define RDS_CMSG_MASKED_ATOMIC_FADD 8
|
#define RDS_CMSG_MASKED_ATOMIC_FADD 8
|
||||||
#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
|
#define RDS_CMSG_MASKED_ATOMIC_CSWP 9
|
||||||
#define RDS_CMSG_RXPATH_LATENCY 11
|
#define RDS_CMSG_RXPATH_LATENCY 11
|
||||||
|
#define RDS_CMSG_ZCOPY_COOKIE 12
|
||||||
|
|
||||||
#define RDS_INFO_FIRST 10000
|
#define RDS_INFO_FIRST 10000
|
||||||
#define RDS_INFO_COUNTERS 10000
|
#define RDS_INFO_COUNTERS 10000
|
||||||
|
|
|
@ -341,12 +341,14 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
|
||||||
return rm;
|
return rm;
|
||||||
}
|
}
|
||||||
|
|
||||||
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
|
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
|
||||||
|
bool zcopy)
|
||||||
{
|
{
|
||||||
unsigned long to_copy, nbytes;
|
unsigned long to_copy, nbytes;
|
||||||
unsigned long sg_off;
|
unsigned long sg_off;
|
||||||
struct scatterlist *sg;
|
struct scatterlist *sg;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
int length = iov_iter_count(from);
|
||||||
|
|
||||||
rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
|
rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from));
|
||||||
|
|
||||||
|
@ -356,6 +358,53 @@ int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from)
|
||||||
sg = rm->data.op_sg;
|
sg = rm->data.op_sg;
|
||||||
sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
|
sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
|
||||||
|
|
||||||
|
if (zcopy) {
|
||||||
|
int total_copied = 0;
|
||||||
|
struct sk_buff *skb;
|
||||||
|
|
||||||
|
skb = alloc_skb(SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(u32),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!skb)
|
||||||
|
return -ENOMEM;
|
||||||
|
rm->data.op_mmp_znotifier = RDS_ZCOPY_SKB(skb);
|
||||||
|
if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp,
|
||||||
|
length)) {
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
while (iov_iter_count(from)) {
|
||||||
|
struct page *pages;
|
||||||
|
size_t start;
|
||||||
|
ssize_t copied;
|
||||||
|
|
||||||
|
copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
|
||||||
|
1, &start);
|
||||||
|
if (copied < 0) {
|
||||||
|
struct mmpin *mmp;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < rm->data.op_nents; i++)
|
||||||
|
put_page(sg_page(&rm->data.op_sg[i]));
|
||||||
|
mmp = &rm->data.op_mmp_znotifier->z_mmp;
|
||||||
|
mm_unaccount_pinned_pages(mmp);
|
||||||
|
ret = -EFAULT;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
total_copied += copied;
|
||||||
|
iov_iter_advance(from, copied);
|
||||||
|
length -= copied;
|
||||||
|
sg_set_page(sg, pages, copied, start);
|
||||||
|
rm->data.op_nents++;
|
||||||
|
sg++;
|
||||||
|
}
|
||||||
|
WARN_ON_ONCE(length != 0);
|
||||||
|
return ret;
|
||||||
|
err:
|
||||||
|
consume_skb(skb);
|
||||||
|
rm->data.op_mmp_znotifier = NULL;
|
||||||
|
return ret;
|
||||||
|
} /* zcopy */
|
||||||
|
|
||||||
while (iov_iter_count(from)) {
|
while (iov_iter_count(from)) {
|
||||||
if (!sg_page(sg)) {
|
if (!sg_page(sg)) {
|
||||||
ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
|
ret = rds_page_remainder_alloc(sg, iov_iter_count(from),
|
||||||
|
|
|
@ -785,7 +785,8 @@ rds_conn_connecting(struct rds_connection *conn)
|
||||||
/* message.c */
|
/* message.c */
|
||||||
struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
|
struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
|
||||||
struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
|
struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
|
||||||
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from);
|
int rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from,
|
||||||
|
bool zcopy);
|
||||||
struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
|
struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
|
||||||
void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
|
void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
|
||||||
__be16 dport, u64 seq);
|
__be16 dport, u64 seq);
|
||||||
|
|
|
@ -875,12 +875,13 @@ out:
|
||||||
* rds_message is getting to be quite complicated, and we'd like to allocate
|
* rds_message is getting to be quite complicated, and we'd like to allocate
|
||||||
* it all in one go. This figures out how big it needs to be up front.
|
* it all in one go. This figures out how big it needs to be up front.
|
||||||
*/
|
*/
|
||||||
static int rds_rm_size(struct msghdr *msg, int data_len)
|
static int rds_rm_size(struct msghdr *msg, int num_sgs)
|
||||||
{
|
{
|
||||||
struct cmsghdr *cmsg;
|
struct cmsghdr *cmsg;
|
||||||
int size = 0;
|
int size = 0;
|
||||||
int cmsg_groups = 0;
|
int cmsg_groups = 0;
|
||||||
int retval;
|
int retval;
|
||||||
|
bool zcopy_cookie = false;
|
||||||
|
|
||||||
for_each_cmsghdr(cmsg, msg) {
|
for_each_cmsghdr(cmsg, msg) {
|
||||||
if (!CMSG_OK(msg, cmsg))
|
if (!CMSG_OK(msg, cmsg))
|
||||||
|
@ -899,6 +900,8 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case RDS_CMSG_ZCOPY_COOKIE:
|
||||||
|
zcopy_cookie = true;
|
||||||
case RDS_CMSG_RDMA_DEST:
|
case RDS_CMSG_RDMA_DEST:
|
||||||
case RDS_CMSG_RDMA_MAP:
|
case RDS_CMSG_RDMA_MAP:
|
||||||
cmsg_groups |= 2;
|
cmsg_groups |= 2;
|
||||||
|
@ -919,7 +922,10 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
|
if ((msg->msg_flags & MSG_ZEROCOPY) && !zcopy_cookie)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
size += num_sgs * sizeof(struct scatterlist);
|
||||||
|
|
||||||
/* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
|
/* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
|
||||||
if (cmsg_groups == 3)
|
if (cmsg_groups == 3)
|
||||||
|
@ -928,6 +934,18 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int rds_cmsg_zcopy(struct rds_sock *rs, struct rds_message *rm,
|
||||||
|
struct cmsghdr *cmsg)
|
||||||
|
{
|
||||||
|
u32 *cookie;
|
||||||
|
|
||||||
|
if (cmsg->cmsg_len < CMSG_LEN(sizeof(*cookie)))
|
||||||
|
return -EINVAL;
|
||||||
|
cookie = CMSG_DATA(cmsg);
|
||||||
|
rm->data.op_mmp_znotifier->z_cookie = *cookie;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
|
static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
|
||||||
struct msghdr *msg, int *allocated_mr)
|
struct msghdr *msg, int *allocated_mr)
|
||||||
{
|
{
|
||||||
|
@ -970,6 +988,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
|
||||||
ret = rds_cmsg_atomic(rs, rm, cmsg);
|
ret = rds_cmsg_atomic(rs, rm, cmsg);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case RDS_CMSG_ZCOPY_COOKIE:
|
||||||
|
ret = rds_cmsg_zcopy(rs, rm, cmsg);
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
@ -1040,10 +1062,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
||||||
long timeo = sock_sndtimeo(sk, nonblock);
|
long timeo = sock_sndtimeo(sk, nonblock);
|
||||||
struct rds_conn_path *cpath;
|
struct rds_conn_path *cpath;
|
||||||
size_t total_payload_len = payload_len, rdma_payload_len = 0;
|
size_t total_payload_len = payload_len, rdma_payload_len = 0;
|
||||||
|
bool zcopy = ((msg->msg_flags & MSG_ZEROCOPY) &&
|
||||||
|
sock_flag(rds_rs_to_sk(rs), SOCK_ZEROCOPY));
|
||||||
|
int num_sgs = ceil(payload_len, PAGE_SIZE);
|
||||||
|
|
||||||
/* Mirror Linux UDP mirror of BSD error message compatibility */
|
/* Mirror Linux UDP mirror of BSD error message compatibility */
|
||||||
/* XXX: Perhaps MSG_MORE someday */
|
/* XXX: Perhaps MSG_MORE someday */
|
||||||
if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
|
if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT | MSG_ZEROCOPY)) {
|
||||||
ret = -EOPNOTSUPP;
|
ret = -EOPNOTSUPP;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -1087,8 +1112,15 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (zcopy) {
|
||||||
|
if (rs->rs_transport->t_type != RDS_TRANS_TCP) {
|
||||||
|
ret = -EOPNOTSUPP;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
num_sgs = iov_iter_npages(&msg->msg_iter, INT_MAX);
|
||||||
|
}
|
||||||
/* size of rm including all sgs */
|
/* size of rm including all sgs */
|
||||||
ret = rds_rm_size(msg, payload_len);
|
ret = rds_rm_size(msg, num_sgs);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
@ -1100,12 +1132,12 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
|
||||||
|
|
||||||
/* Attach data to the rm */
|
/* Attach data to the rm */
|
||||||
if (payload_len) {
|
if (payload_len) {
|
||||||
rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
|
rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
|
||||||
if (!rm->data.op_sg) {
|
if (!rm->data.op_sg) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
ret = rds_message_copy_from_user(rm, &msg->msg_iter);
|
ret = rds_message_copy_from_user(rm, &msg->msg_iter, zcopy);
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue