mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-03-21 06:24:12 +00:00
Merge branch 'XDP-transmission-for-tuntap'
Jason Wang says: ==================== XDP transmission for tuntap This series tries to implement XDP transmission (ndo_xdp_xmit) for tuntap. Pointer ring was used for queuing both XDP buffers and sk_buff, this is done by encoding the type into lowest bit of the pointer and storin XDP metadata in the headroom of XDP buff. Tests gets 3.05 Mpps when doing xdp_redirect_map from ixgbe to VM (testpmd + virtio-net in guest). This gives us ~20% improvments compared to use skb during redirect. Please review. Changes from V1: - slient warnings - fix typos - add skb mode number in the commit log ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
e8b18af8c3
5 changed files with 269 additions and 90 deletions
|
@ -330,7 +330,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
|
||||||
if (!q)
|
if (!q)
|
||||||
return RX_HANDLER_PASS;
|
return RX_HANDLER_PASS;
|
||||||
|
|
||||||
if (__skb_array_full(&q->skb_array))
|
if (__ptr_ring_full(&q->ring))
|
||||||
goto drop;
|
goto drop;
|
||||||
|
|
||||||
skb_push(skb, ETH_HLEN);
|
skb_push(skb, ETH_HLEN);
|
||||||
|
@ -348,7 +348,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
|
||||||
goto drop;
|
goto drop;
|
||||||
|
|
||||||
if (!segs) {
|
if (!segs) {
|
||||||
if (skb_array_produce(&q->skb_array, skb))
|
if (ptr_ring_produce(&q->ring, skb))
|
||||||
goto drop;
|
goto drop;
|
||||||
goto wake_up;
|
goto wake_up;
|
||||||
}
|
}
|
||||||
|
@ -358,7 +358,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
|
||||||
struct sk_buff *nskb = segs->next;
|
struct sk_buff *nskb = segs->next;
|
||||||
|
|
||||||
segs->next = NULL;
|
segs->next = NULL;
|
||||||
if (skb_array_produce(&q->skb_array, segs)) {
|
if (ptr_ring_produce(&q->ring, segs)) {
|
||||||
kfree_skb(segs);
|
kfree_skb(segs);
|
||||||
kfree_skb_list(nskb);
|
kfree_skb_list(nskb);
|
||||||
break;
|
break;
|
||||||
|
@ -375,7 +375,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
|
||||||
!(features & NETIF_F_CSUM_MASK) &&
|
!(features & NETIF_F_CSUM_MASK) &&
|
||||||
skb_checksum_help(skb))
|
skb_checksum_help(skb))
|
||||||
goto drop;
|
goto drop;
|
||||||
if (skb_array_produce(&q->skb_array, skb))
|
if (ptr_ring_produce(&q->ring, skb))
|
||||||
goto drop;
|
goto drop;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -497,7 +497,7 @@ static void tap_sock_destruct(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct tap_queue *q = container_of(sk, struct tap_queue, sk);
|
struct tap_queue *q = container_of(sk, struct tap_queue, sk);
|
||||||
|
|
||||||
skb_array_cleanup(&q->skb_array);
|
ptr_ring_cleanup(&q->ring, __skb_array_destroy_skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int tap_open(struct inode *inode, struct file *file)
|
static int tap_open(struct inode *inode, struct file *file)
|
||||||
|
@ -517,7 +517,7 @@ static int tap_open(struct inode *inode, struct file *file)
|
||||||
&tap_proto, 0);
|
&tap_proto, 0);
|
||||||
if (!q)
|
if (!q)
|
||||||
goto err;
|
goto err;
|
||||||
if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) {
|
if (ptr_ring_init(&q->ring, tap->dev->tx_queue_len, GFP_KERNEL)) {
|
||||||
sk_free(&q->sk);
|
sk_free(&q->sk);
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
@ -546,7 +546,7 @@ static int tap_open(struct inode *inode, struct file *file)
|
||||||
|
|
||||||
err = tap_set_queue(tap, file, q);
|
err = tap_set_queue(tap, file, q);
|
||||||
if (err) {
|
if (err) {
|
||||||
/* tap_sock_destruct() will take care of freeing skb_array */
|
/* tap_sock_destruct() will take care of freeing ptr_ring */
|
||||||
goto err_put;
|
goto err_put;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -583,7 +583,7 @@ static unsigned int tap_poll(struct file *file, poll_table *wait)
|
||||||
mask = 0;
|
mask = 0;
|
||||||
poll_wait(file, &q->wq.wait, wait);
|
poll_wait(file, &q->wq.wait, wait);
|
||||||
|
|
||||||
if (!skb_array_empty(&q->skb_array))
|
if (!ptr_ring_empty(&q->ring))
|
||||||
mask |= POLLIN | POLLRDNORM;
|
mask |= POLLIN | POLLRDNORM;
|
||||||
|
|
||||||
if (sock_writeable(&q->sk) ||
|
if (sock_writeable(&q->sk) ||
|
||||||
|
@ -844,7 +844,7 @@ static ssize_t tap_do_read(struct tap_queue *q,
|
||||||
TASK_INTERRUPTIBLE);
|
TASK_INTERRUPTIBLE);
|
||||||
|
|
||||||
/* Read frames from the queue */
|
/* Read frames from the queue */
|
||||||
skb = skb_array_consume(&q->skb_array);
|
skb = ptr_ring_consume(&q->ring);
|
||||||
if (skb)
|
if (skb)
|
||||||
break;
|
break;
|
||||||
if (noblock) {
|
if (noblock) {
|
||||||
|
@ -1176,7 +1176,7 @@ static int tap_peek_len(struct socket *sock)
|
||||||
{
|
{
|
||||||
struct tap_queue *q = container_of(sock, struct tap_queue,
|
struct tap_queue *q = container_of(sock, struct tap_queue,
|
||||||
sock);
|
sock);
|
||||||
return skb_array_peek_len(&q->skb_array);
|
return PTR_RING_PEEK_CALL(&q->ring, __skb_array_len_with_tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Ops structure to mimic raw sockets with tun */
|
/* Ops structure to mimic raw sockets with tun */
|
||||||
|
@ -1202,7 +1202,7 @@ struct socket *tap_get_socket(struct file *file)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(tap_get_socket);
|
EXPORT_SYMBOL_GPL(tap_get_socket);
|
||||||
|
|
||||||
struct skb_array *tap_get_skb_array(struct file *file)
|
struct ptr_ring *tap_get_ptr_ring(struct file *file)
|
||||||
{
|
{
|
||||||
struct tap_queue *q;
|
struct tap_queue *q;
|
||||||
|
|
||||||
|
@ -1211,29 +1211,30 @@ struct skb_array *tap_get_skb_array(struct file *file)
|
||||||
q = file->private_data;
|
q = file->private_data;
|
||||||
if (!q)
|
if (!q)
|
||||||
return ERR_PTR(-EBADFD);
|
return ERR_PTR(-EBADFD);
|
||||||
return &q->skb_array;
|
return &q->ring;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(tap_get_skb_array);
|
EXPORT_SYMBOL_GPL(tap_get_ptr_ring);
|
||||||
|
|
||||||
int tap_queue_resize(struct tap_dev *tap)
|
int tap_queue_resize(struct tap_dev *tap)
|
||||||
{
|
{
|
||||||
struct net_device *dev = tap->dev;
|
struct net_device *dev = tap->dev;
|
||||||
struct tap_queue *q;
|
struct tap_queue *q;
|
||||||
struct skb_array **arrays;
|
struct ptr_ring **rings;
|
||||||
int n = tap->numqueues;
|
int n = tap->numqueues;
|
||||||
int ret, i = 0;
|
int ret, i = 0;
|
||||||
|
|
||||||
arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL);
|
rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL);
|
||||||
if (!arrays)
|
if (!rings)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
list_for_each_entry(q, &tap->queue_list, next)
|
list_for_each_entry(q, &tap->queue_list, next)
|
||||||
arrays[i++] = &q->skb_array;
|
rings[i++] = &q->ring;
|
||||||
|
|
||||||
ret = skb_array_resize_multiple(arrays, n,
|
ret = ptr_ring_resize_multiple(rings, n,
|
||||||
dev->tx_queue_len, GFP_KERNEL);
|
dev->tx_queue_len, GFP_KERNEL,
|
||||||
|
__skb_array_destroy_skb);
|
||||||
|
|
||||||
kfree(arrays);
|
kfree(rings);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(tap_queue_resize);
|
EXPORT_SYMBOL_GPL(tap_queue_resize);
|
||||||
|
|
|
@ -179,7 +179,7 @@ struct tun_file {
|
||||||
struct mutex napi_mutex; /* Protects access to the above napi */
|
struct mutex napi_mutex; /* Protects access to the above napi */
|
||||||
struct list_head next;
|
struct list_head next;
|
||||||
struct tun_struct *detached;
|
struct tun_struct *detached;
|
||||||
struct skb_array tx_array;
|
struct ptr_ring tx_ring;
|
||||||
struct xdp_rxq_info xdp_rxq;
|
struct xdp_rxq_info xdp_rxq;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -241,6 +241,24 @@ struct tun_struct {
|
||||||
struct tun_steering_prog __rcu *steering_prog;
|
struct tun_steering_prog __rcu *steering_prog;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
bool tun_is_xdp_buff(void *ptr)
|
||||||
|
{
|
||||||
|
return (unsigned long)ptr & TUN_XDP_FLAG;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(tun_is_xdp_buff);
|
||||||
|
|
||||||
|
void *tun_xdp_to_ptr(void *ptr)
|
||||||
|
{
|
||||||
|
return (void *)((unsigned long)ptr | TUN_XDP_FLAG);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(tun_xdp_to_ptr);
|
||||||
|
|
||||||
|
void *tun_ptr_to_xdp(void *ptr)
|
||||||
|
{
|
||||||
|
return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(tun_ptr_to_xdp);
|
||||||
|
|
||||||
static int tun_napi_receive(struct napi_struct *napi, int budget)
|
static int tun_napi_receive(struct napi_struct *napi, int budget)
|
||||||
{
|
{
|
||||||
struct tun_file *tfile = container_of(napi, struct tun_file, napi);
|
struct tun_file *tfile = container_of(napi, struct tun_file, napi);
|
||||||
|
@ -631,12 +649,25 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
|
||||||
return tun;
|
return tun;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void tun_ptr_free(void *ptr)
|
||||||
|
{
|
||||||
|
if (!ptr)
|
||||||
|
return;
|
||||||
|
if (tun_is_xdp_buff(ptr)) {
|
||||||
|
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
|
||||||
|
|
||||||
|
put_page(virt_to_head_page(xdp->data));
|
||||||
|
} else {
|
||||||
|
__skb_array_destroy_skb(ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void tun_queue_purge(struct tun_file *tfile)
|
static void tun_queue_purge(struct tun_file *tfile)
|
||||||
{
|
{
|
||||||
struct sk_buff *skb;
|
void *ptr;
|
||||||
|
|
||||||
while ((skb = skb_array_consume(&tfile->tx_array)) != NULL)
|
while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL)
|
||||||
kfree_skb(skb);
|
tun_ptr_free(ptr);
|
||||||
|
|
||||||
skb_queue_purge(&tfile->sk.sk_write_queue);
|
skb_queue_purge(&tfile->sk.sk_write_queue);
|
||||||
skb_queue_purge(&tfile->sk.sk_error_queue);
|
skb_queue_purge(&tfile->sk.sk_error_queue);
|
||||||
|
@ -689,7 +720,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
|
||||||
unregister_netdevice(tun->dev);
|
unregister_netdevice(tun->dev);
|
||||||
}
|
}
|
||||||
if (tun) {
|
if (tun) {
|
||||||
skb_array_cleanup(&tfile->tx_array);
|
ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
|
||||||
xdp_rxq_info_unreg(&tfile->xdp_rxq);
|
xdp_rxq_info_unreg(&tfile->xdp_rxq);
|
||||||
}
|
}
|
||||||
sock_put(&tfile->sk);
|
sock_put(&tfile->sk);
|
||||||
|
@ -782,7 +813,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!tfile->detached &&
|
if (!tfile->detached &&
|
||||||
skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) {
|
ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -1048,7 +1079,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||||
|
|
||||||
nf_reset(skb);
|
nf_reset(skb);
|
||||||
|
|
||||||
if (skb_array_produce(&tfile->tx_array, skb))
|
if (ptr_ring_produce(&tfile->tx_ring, skb))
|
||||||
goto drop;
|
goto drop;
|
||||||
|
|
||||||
/* Notify and wake up reader process */
|
/* Notify and wake up reader process */
|
||||||
|
@ -1221,6 +1252,67 @@ static const struct net_device_ops tun_netdev_ops = {
|
||||||
.ndo_get_stats64 = tun_net_get_stats64,
|
.ndo_get_stats64 = tun_net_get_stats64,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
|
||||||
|
{
|
||||||
|
struct tun_struct *tun = netdev_priv(dev);
|
||||||
|
struct xdp_buff *buff = xdp->data_hard_start;
|
||||||
|
int headroom = xdp->data - xdp->data_hard_start;
|
||||||
|
struct tun_file *tfile;
|
||||||
|
u32 numqueues;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
/* Assure headroom is available and buff is properly aligned */
|
||||||
|
if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp)))
|
||||||
|
return -ENOSPC;
|
||||||
|
|
||||||
|
*buff = *xdp;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
numqueues = READ_ONCE(tun->numqueues);
|
||||||
|
if (!numqueues) {
|
||||||
|
ret = -ENOSPC;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
|
||||||
|
numqueues]);
|
||||||
|
/* Encode the XDP flag into lowest bit for consumer to differ
|
||||||
|
* XDP buffer from sk_buff.
|
||||||
|
*/
|
||||||
|
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) {
|
||||||
|
this_cpu_inc(tun->pcpu_stats->tx_dropped);
|
||||||
|
ret = -ENOSPC;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void tun_xdp_flush(struct net_device *dev)
|
||||||
|
{
|
||||||
|
struct tun_struct *tun = netdev_priv(dev);
|
||||||
|
struct tun_file *tfile;
|
||||||
|
u32 numqueues;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
numqueues = READ_ONCE(tun->numqueues);
|
||||||
|
if (!numqueues)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
|
||||||
|
numqueues]);
|
||||||
|
/* Notify and wake up reader process */
|
||||||
|
if (tfile->flags & TUN_FASYNC)
|
||||||
|
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
|
||||||
|
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
|
||||||
|
|
||||||
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
static const struct net_device_ops tap_netdev_ops = {
|
static const struct net_device_ops tap_netdev_ops = {
|
||||||
.ndo_uninit = tun_net_uninit,
|
.ndo_uninit = tun_net_uninit,
|
||||||
.ndo_open = tun_net_open,
|
.ndo_open = tun_net_open,
|
||||||
|
@ -1238,6 +1330,8 @@ static const struct net_device_ops tap_netdev_ops = {
|
||||||
.ndo_set_rx_headroom = tun_set_headroom,
|
.ndo_set_rx_headroom = tun_set_headroom,
|
||||||
.ndo_get_stats64 = tun_net_get_stats64,
|
.ndo_get_stats64 = tun_net_get_stats64,
|
||||||
.ndo_bpf = tun_xdp,
|
.ndo_bpf = tun_xdp,
|
||||||
|
.ndo_xdp_xmit = tun_xdp_xmit,
|
||||||
|
.ndo_xdp_flush = tun_xdp_flush,
|
||||||
};
|
};
|
||||||
|
|
||||||
static void tun_flow_init(struct tun_struct *tun)
|
static void tun_flow_init(struct tun_struct *tun)
|
||||||
|
@ -1316,7 +1410,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
|
||||||
|
|
||||||
poll_wait(file, sk_sleep(sk), wait);
|
poll_wait(file, sk_sleep(sk), wait);
|
||||||
|
|
||||||
if (!skb_array_empty(&tfile->tx_array))
|
if (!ptr_ring_empty(&tfile->tx_ring))
|
||||||
mask |= POLLIN | POLLRDNORM;
|
mask |= POLLIN | POLLRDNORM;
|
||||||
|
|
||||||
if (tun->dev->flags & IFF_UP &&
|
if (tun->dev->flags & IFF_UP &&
|
||||||
|
@ -1862,6 +1956,40 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static ssize_t tun_put_user_xdp(struct tun_struct *tun,
|
||||||
|
struct tun_file *tfile,
|
||||||
|
struct xdp_buff *xdp,
|
||||||
|
struct iov_iter *iter)
|
||||||
|
{
|
||||||
|
int vnet_hdr_sz = 0;
|
||||||
|
size_t size = xdp->data_end - xdp->data;
|
||||||
|
struct tun_pcpu_stats *stats;
|
||||||
|
size_t ret;
|
||||||
|
|
||||||
|
if (tun->flags & IFF_VNET_HDR) {
|
||||||
|
struct virtio_net_hdr gso = { 0 };
|
||||||
|
|
||||||
|
vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
|
||||||
|
if (unlikely(iov_iter_count(iter) < vnet_hdr_sz))
|
||||||
|
return -EINVAL;
|
||||||
|
if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) !=
|
||||||
|
sizeof(gso)))
|
||||||
|
return -EFAULT;
|
||||||
|
iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz;
|
||||||
|
|
||||||
|
stats = get_cpu_ptr(tun->pcpu_stats);
|
||||||
|
u64_stats_update_begin(&stats->syncp);
|
||||||
|
stats->tx_packets++;
|
||||||
|
stats->tx_bytes += ret;
|
||||||
|
u64_stats_update_end(&stats->syncp);
|
||||||
|
put_cpu_ptr(tun->pcpu_stats);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/* Put packet to the user space buffer */
|
/* Put packet to the user space buffer */
|
||||||
static ssize_t tun_put_user(struct tun_struct *tun,
|
static ssize_t tun_put_user(struct tun_struct *tun,
|
||||||
struct tun_file *tfile,
|
struct tun_file *tfile,
|
||||||
|
@ -1959,15 +2087,14 @@ done:
|
||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
|
static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
|
||||||
int *err)
|
|
||||||
{
|
{
|
||||||
DECLARE_WAITQUEUE(wait, current);
|
DECLARE_WAITQUEUE(wait, current);
|
||||||
struct sk_buff *skb = NULL;
|
void *ptr = NULL;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
skb = skb_array_consume(&tfile->tx_array);
|
ptr = ptr_ring_consume(&tfile->tx_ring);
|
||||||
if (skb)
|
if (ptr)
|
||||||
goto out;
|
goto out;
|
||||||
if (noblock) {
|
if (noblock) {
|
||||||
error = -EAGAIN;
|
error = -EAGAIN;
|
||||||
|
@ -1978,8 +2105,8 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
|
||||||
current->state = TASK_INTERRUPTIBLE;
|
current->state = TASK_INTERRUPTIBLE;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
skb = skb_array_consume(&tfile->tx_array);
|
ptr = ptr_ring_consume(&tfile->tx_ring);
|
||||||
if (skb)
|
if (ptr)
|
||||||
break;
|
break;
|
||||||
if (signal_pending(current)) {
|
if (signal_pending(current)) {
|
||||||
error = -ERESTARTSYS;
|
error = -ERESTARTSYS;
|
||||||
|
@ -1998,12 +2125,12 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
|
||||||
|
|
||||||
out:
|
out:
|
||||||
*err = error;
|
*err = error;
|
||||||
return skb;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
|
static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
|
||||||
struct iov_iter *to,
|
struct iov_iter *to,
|
||||||
int noblock, struct sk_buff *skb)
|
int noblock, void *ptr)
|
||||||
{
|
{
|
||||||
ssize_t ret;
|
ssize_t ret;
|
||||||
int err;
|
int err;
|
||||||
|
@ -2011,23 +2138,31 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
|
||||||
tun_debug(KERN_INFO, tun, "tun_do_read\n");
|
tun_debug(KERN_INFO, tun, "tun_do_read\n");
|
||||||
|
|
||||||
if (!iov_iter_count(to)) {
|
if (!iov_iter_count(to)) {
|
||||||
if (skb)
|
tun_ptr_free(ptr);
|
||||||
kfree_skb(skb);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!skb) {
|
if (!ptr) {
|
||||||
/* Read frames from ring */
|
/* Read frames from ring */
|
||||||
skb = tun_ring_recv(tfile, noblock, &err);
|
ptr = tun_ring_recv(tfile, noblock, &err);
|
||||||
if (!skb)
|
if (!ptr)
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (tun_is_xdp_buff(ptr)) {
|
||||||
|
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
|
||||||
|
|
||||||
|
ret = tun_put_user_xdp(tun, tfile, xdp, to);
|
||||||
|
put_page(virt_to_head_page(xdp->data));
|
||||||
|
} else {
|
||||||
|
struct sk_buff *skb = ptr;
|
||||||
|
|
||||||
ret = tun_put_user(tun, tfile, skb, to);
|
ret = tun_put_user(tun, tfile, skb, to);
|
||||||
if (unlikely(ret < 0))
|
if (unlikely(ret < 0))
|
||||||
kfree_skb(skb);
|
kfree_skb(skb);
|
||||||
else
|
else
|
||||||
consume_skb(skb);
|
consume_skb(skb);
|
||||||
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -2164,12 +2299,12 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
|
||||||
{
|
{
|
||||||
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
|
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
|
||||||
struct tun_struct *tun = tun_get(tfile);
|
struct tun_struct *tun = tun_get(tfile);
|
||||||
struct sk_buff *skb = m->msg_control;
|
void *ptr = m->msg_control;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!tun) {
|
if (!tun) {
|
||||||
ret = -EBADFD;
|
ret = -EBADFD;
|
||||||
goto out_free_skb;
|
goto out_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
|
if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
|
||||||
|
@ -2181,7 +2316,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
|
||||||
SOL_PACKET, TUN_TX_TIMESTAMP);
|
SOL_PACKET, TUN_TX_TIMESTAMP);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
|
ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr);
|
||||||
if (ret > (ssize_t)total_len) {
|
if (ret > (ssize_t)total_len) {
|
||||||
m->msg_flags |= MSG_TRUNC;
|
m->msg_flags |= MSG_TRUNC;
|
||||||
ret = flags & MSG_TRUNC ? ret : total_len;
|
ret = flags & MSG_TRUNC ? ret : total_len;
|
||||||
|
@ -2192,12 +2327,25 @@ out:
|
||||||
|
|
||||||
out_put_tun:
|
out_put_tun:
|
||||||
tun_put(tun);
|
tun_put(tun);
|
||||||
out_free_skb:
|
out_free:
|
||||||
if (skb)
|
tun_ptr_free(ptr);
|
||||||
kfree_skb(skb);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int tun_ptr_peek_len(void *ptr)
|
||||||
|
{
|
||||||
|
if (likely(ptr)) {
|
||||||
|
if (tun_is_xdp_buff(ptr)) {
|
||||||
|
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
|
||||||
|
|
||||||
|
return xdp->data_end - xdp->data;
|
||||||
|
}
|
||||||
|
return __skb_array_len_with_tag(ptr);
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int tun_peek_len(struct socket *sock)
|
static int tun_peek_len(struct socket *sock)
|
||||||
{
|
{
|
||||||
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
|
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
|
||||||
|
@ -2208,7 +2356,7 @@ static int tun_peek_len(struct socket *sock)
|
||||||
if (!tun)
|
if (!tun)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
ret = skb_array_peek_len(&tfile->tx_array);
|
ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len);
|
||||||
tun_put(tun);
|
tun_put(tun);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -3114,25 +3262,26 @@ static int tun_queue_resize(struct tun_struct *tun)
|
||||||
{
|
{
|
||||||
struct net_device *dev = tun->dev;
|
struct net_device *dev = tun->dev;
|
||||||
struct tun_file *tfile;
|
struct tun_file *tfile;
|
||||||
struct skb_array **arrays;
|
struct ptr_ring **rings;
|
||||||
int n = tun->numqueues + tun->numdisabled;
|
int n = tun->numqueues + tun->numdisabled;
|
||||||
int ret, i;
|
int ret, i;
|
||||||
|
|
||||||
arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL);
|
rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL);
|
||||||
if (!arrays)
|
if (!rings)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
for (i = 0; i < tun->numqueues; i++) {
|
for (i = 0; i < tun->numqueues; i++) {
|
||||||
tfile = rtnl_dereference(tun->tfiles[i]);
|
tfile = rtnl_dereference(tun->tfiles[i]);
|
||||||
arrays[i] = &tfile->tx_array;
|
rings[i] = &tfile->tx_ring;
|
||||||
}
|
}
|
||||||
list_for_each_entry(tfile, &tun->disabled, next)
|
list_for_each_entry(tfile, &tun->disabled, next)
|
||||||
arrays[i++] = &tfile->tx_array;
|
rings[i++] = &tfile->tx_ring;
|
||||||
|
|
||||||
ret = skb_array_resize_multiple(arrays, n,
|
ret = ptr_ring_resize_multiple(rings, n,
|
||||||
dev->tx_queue_len, GFP_KERNEL);
|
dev->tx_queue_len, GFP_KERNEL,
|
||||||
|
tun_ptr_free);
|
||||||
|
|
||||||
kfree(arrays);
|
kfree(rings);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3218,7 +3367,7 @@ struct socket *tun_get_socket(struct file *file)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(tun_get_socket);
|
EXPORT_SYMBOL_GPL(tun_get_socket);
|
||||||
|
|
||||||
struct skb_array *tun_get_skb_array(struct file *file)
|
struct ptr_ring *tun_get_tx_ring(struct file *file)
|
||||||
{
|
{
|
||||||
struct tun_file *tfile;
|
struct tun_file *tfile;
|
||||||
|
|
||||||
|
@ -3227,9 +3376,9 @@ struct skb_array *tun_get_skb_array(struct file *file)
|
||||||
tfile = file->private_data;
|
tfile = file->private_data;
|
||||||
if (!tfile)
|
if (!tfile)
|
||||||
return ERR_PTR(-EBADFD);
|
return ERR_PTR(-EBADFD);
|
||||||
return &tfile->tx_array;
|
return &tfile->tx_ring;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(tun_get_skb_array);
|
EXPORT_SYMBOL_GPL(tun_get_tx_ring);
|
||||||
|
|
||||||
module_init(tun_init);
|
module_init(tun_init);
|
||||||
module_exit(tun_cleanup);
|
module_exit(tun_cleanup);
|
||||||
|
|
|
@ -89,7 +89,7 @@ struct vhost_net_ubuf_ref {
|
||||||
|
|
||||||
#define VHOST_RX_BATCH 64
|
#define VHOST_RX_BATCH 64
|
||||||
struct vhost_net_buf {
|
struct vhost_net_buf {
|
||||||
struct sk_buff **queue;
|
void **queue;
|
||||||
int tail;
|
int tail;
|
||||||
int head;
|
int head;
|
||||||
};
|
};
|
||||||
|
@ -108,7 +108,7 @@ struct vhost_net_virtqueue {
|
||||||
/* Reference counting for outstanding ubufs.
|
/* Reference counting for outstanding ubufs.
|
||||||
* Protected by vq mutex. Writers must also take device mutex. */
|
* Protected by vq mutex. Writers must also take device mutex. */
|
||||||
struct vhost_net_ubuf_ref *ubufs;
|
struct vhost_net_ubuf_ref *ubufs;
|
||||||
struct skb_array *rx_array;
|
struct ptr_ring *rx_ring;
|
||||||
struct vhost_net_buf rxq;
|
struct vhost_net_buf rxq;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -158,7 +158,7 @@ static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
|
||||||
struct vhost_net_buf *rxq = &nvq->rxq;
|
struct vhost_net_buf *rxq = &nvq->rxq;
|
||||||
|
|
||||||
rxq->head = 0;
|
rxq->head = 0;
|
||||||
rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue,
|
rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
|
||||||
VHOST_RX_BATCH);
|
VHOST_RX_BATCH);
|
||||||
return rxq->tail;
|
return rxq->tail;
|
||||||
}
|
}
|
||||||
|
@ -167,13 +167,25 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
|
||||||
{
|
{
|
||||||
struct vhost_net_buf *rxq = &nvq->rxq;
|
struct vhost_net_buf *rxq = &nvq->rxq;
|
||||||
|
|
||||||
if (nvq->rx_array && !vhost_net_buf_is_empty(rxq)) {
|
if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) {
|
||||||
skb_array_unconsume(nvq->rx_array, rxq->queue + rxq->head,
|
ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head,
|
||||||
vhost_net_buf_get_size(rxq));
|
vhost_net_buf_get_size(rxq),
|
||||||
|
__skb_array_destroy_skb);
|
||||||
rxq->head = rxq->tail = 0;
|
rxq->head = rxq->tail = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int vhost_net_buf_peek_len(void *ptr)
|
||||||
|
{
|
||||||
|
if (tun_is_xdp_buff(ptr)) {
|
||||||
|
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
|
||||||
|
|
||||||
|
return xdp->data_end - xdp->data;
|
||||||
|
}
|
||||||
|
|
||||||
|
return __skb_array_len_with_tag(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
|
static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
|
||||||
{
|
{
|
||||||
struct vhost_net_buf *rxq = &nvq->rxq;
|
struct vhost_net_buf *rxq = &nvq->rxq;
|
||||||
|
@ -185,7 +197,7 @@ static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq));
|
return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vhost_net_buf_init(struct vhost_net_buf *rxq)
|
static void vhost_net_buf_init(struct vhost_net_buf *rxq)
|
||||||
|
@ -583,7 +595,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
|
||||||
int len = 0;
|
int len = 0;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
if (rvq->rx_array)
|
if (rvq->rx_ring)
|
||||||
return vhost_net_buf_peek(rvq);
|
return vhost_net_buf_peek(rvq);
|
||||||
|
|
||||||
spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
|
spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
|
||||||
|
@ -790,7 +802,7 @@ static void handle_rx(struct vhost_net *net)
|
||||||
* they refilled. */
|
* they refilled. */
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
if (nvq->rx_array)
|
if (nvq->rx_ring)
|
||||||
msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
|
msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
|
||||||
/* On overrun, truncate and discard */
|
/* On overrun, truncate and discard */
|
||||||
if (unlikely(headcount > UIO_MAXIOV)) {
|
if (unlikely(headcount > UIO_MAXIOV)) {
|
||||||
|
@ -896,7 +908,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
|
||||||
struct vhost_net *n;
|
struct vhost_net *n;
|
||||||
struct vhost_dev *dev;
|
struct vhost_dev *dev;
|
||||||
struct vhost_virtqueue **vqs;
|
struct vhost_virtqueue **vqs;
|
||||||
struct sk_buff **queue;
|
void **queue;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
|
n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
|
||||||
|
@ -908,7 +920,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
|
||||||
queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *),
|
queue = kmalloc_array(VHOST_RX_BATCH, sizeof(void *),
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
if (!queue) {
|
if (!queue) {
|
||||||
kfree(vqs);
|
kfree(vqs);
|
||||||
|
@ -1046,23 +1058,23 @@ err:
|
||||||
return ERR_PTR(r);
|
return ERR_PTR(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct skb_array *get_tap_skb_array(int fd)
|
static struct ptr_ring *get_tap_ptr_ring(int fd)
|
||||||
{
|
{
|
||||||
struct skb_array *array;
|
struct ptr_ring *ring;
|
||||||
struct file *file = fget(fd);
|
struct file *file = fget(fd);
|
||||||
|
|
||||||
if (!file)
|
if (!file)
|
||||||
return NULL;
|
return NULL;
|
||||||
array = tun_get_skb_array(file);
|
ring = tun_get_tx_ring(file);
|
||||||
if (!IS_ERR(array))
|
if (!IS_ERR(ring))
|
||||||
goto out;
|
goto out;
|
||||||
array = tap_get_skb_array(file);
|
ring = tap_get_ptr_ring(file);
|
||||||
if (!IS_ERR(array))
|
if (!IS_ERR(ring))
|
||||||
goto out;
|
goto out;
|
||||||
array = NULL;
|
ring = NULL;
|
||||||
out:
|
out:
|
||||||
fput(file);
|
fput(file);
|
||||||
return array;
|
return ring;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct socket *get_tap_socket(int fd)
|
static struct socket *get_tap_socket(int fd)
|
||||||
|
@ -1143,7 +1155,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
|
||||||
vq->private_data = sock;
|
vq->private_data = sock;
|
||||||
vhost_net_buf_unproduce(nvq);
|
vhost_net_buf_unproduce(nvq);
|
||||||
if (index == VHOST_NET_VQ_RX)
|
if (index == VHOST_NET_VQ_RX)
|
||||||
nvq->rx_array = get_tap_skb_array(fd);
|
nvq->rx_ring = get_tap_ptr_ring(fd);
|
||||||
r = vhost_vq_init_access(vq);
|
r = vhost_vq_init_access(vq);
|
||||||
if (r)
|
if (r)
|
||||||
goto err_used;
|
goto err_used;
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
#if IS_ENABLED(CONFIG_TAP)
|
#if IS_ENABLED(CONFIG_TAP)
|
||||||
struct socket *tap_get_socket(struct file *);
|
struct socket *tap_get_socket(struct file *);
|
||||||
struct skb_array *tap_get_skb_array(struct file *file);
|
struct ptr_ring *tap_get_ptr_ring(struct file *file);
|
||||||
#else
|
#else
|
||||||
#include <linux/err.h>
|
#include <linux/err.h>
|
||||||
#include <linux/errno.h>
|
#include <linux/errno.h>
|
||||||
|
@ -14,7 +14,7 @@ static inline struct socket *tap_get_socket(struct file *f)
|
||||||
{
|
{
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
static inline struct skb_array *tap_get_skb_array(struct file *f)
|
static inline struct ptr_ring *tap_get_ptr_ring(struct file *f)
|
||||||
{
|
{
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
|
@ -70,7 +70,7 @@ struct tap_queue {
|
||||||
u16 queue_index;
|
u16 queue_index;
|
||||||
bool enabled;
|
bool enabled;
|
||||||
struct list_head next;
|
struct list_head next;
|
||||||
struct skb_array skb_array;
|
struct ptr_ring ring;
|
||||||
};
|
};
|
||||||
|
|
||||||
rx_handler_result_t tap_handle_frame(struct sk_buff **pskb);
|
rx_handler_result_t tap_handle_frame(struct sk_buff **pskb);
|
||||||
|
|
|
@ -17,9 +17,14 @@
|
||||||
|
|
||||||
#include <uapi/linux/if_tun.h>
|
#include <uapi/linux/if_tun.h>
|
||||||
|
|
||||||
|
#define TUN_XDP_FLAG 0x1UL
|
||||||
|
|
||||||
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
|
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
|
||||||
struct socket *tun_get_socket(struct file *);
|
struct socket *tun_get_socket(struct file *);
|
||||||
struct skb_array *tun_get_skb_array(struct file *file);
|
struct ptr_ring *tun_get_tx_ring(struct file *file);
|
||||||
|
bool tun_is_xdp_buff(void *ptr);
|
||||||
|
void *tun_xdp_to_ptr(void *ptr);
|
||||||
|
void *tun_ptr_to_xdp(void *ptr);
|
||||||
#else
|
#else
|
||||||
#include <linux/err.h>
|
#include <linux/err.h>
|
||||||
#include <linux/errno.h>
|
#include <linux/errno.h>
|
||||||
|
@ -29,9 +34,21 @@ static inline struct socket *tun_get_socket(struct file *f)
|
||||||
{
|
{
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
static inline struct skb_array *tun_get_skb_array(struct file *f)
|
static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
|
||||||
{
|
{
|
||||||
return ERR_PTR(-EINVAL);
|
return ERR_PTR(-EINVAL);
|
||||||
}
|
}
|
||||||
|
static inline bool tun_is_xdp_buff(void *ptr)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
void *tun_xdp_to_ptr(void *ptr)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
void *tun_ptr_to_xdp(void *ptr)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
#endif /* CONFIG_TUN */
|
#endif /* CONFIG_TUN */
|
||||||
#endif /* __IF_TUN_H */
|
#endif /* __IF_TUN_H */
|
||||||
|
|
Loading…
Add table
Reference in a new issue