virtio/vhost: new features for 4.8

- New vsock device support in host and guest
 - Platform IOMMU support in host and guest,
   including compatibility quirks for legacy systems.
 - Misc fixes and cleanups.
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQEcBAABAgAGBQJXofvbAAoJECgfDbjSjVRpUTIH/iEoK9h636tBayXy0PXkPby0
 6fMaRFy6H1HgEttgDhJE8Pqg/ba3qaW9Em0fHyFq7Mp2waFHAZ8hAT8phC6TAK3c
 CIBnfzyyuI8u3N9SnNOfelPVcwCBfuALuuTsXB/rwKbYQEVv+U5Rdt3Vyx9+lXkj
 P005klz7PfqxFhQrrnj4Eh7VawtHwmMuLH8YoWpCZpM71dHPo6eL+3ftKwhH2boo
 qK86uVprwba03Pewpm13vQnotemfVfUUkjXd4EJpG3dx7E0KZosuj0ZG9OV8mPGQ
 Cl2gBdUhocdJgeUnAHmf6tumYi9KFlYfy6xLy44YMmN7FL3E9nQjaKZp25UKfiM=
 =ztIm
 -----END PGP SIGNATURE-----

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio/vhost updates from Michael Tsirkin:

 - new vsock device support in host and guest

 - platform IOMMU support in host and guest, including compatibility
   quirks for legacy systems.

 - misc fixes and cleanups.

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  VSOCK: Use kvfree()
  vhost: split out vringh Kconfig
  vhost: detect 32 bit integer wrap around
  vhost: new device IOTLB API
  vhost: drop vringh dependency
  vhost: convert pre sorted vhost memory array to interval tree
  vhost: introduce vhost memory accessors
  VSOCK: Add Makefile and Kconfig
  VSOCK: Introduce vhost_vsock.ko
  VSOCK: Introduce virtio_transport.ko
  VSOCK: Introduce virtio_vsock_common.ko
  VSOCK: defer sock removal to transports
  VSOCK: transport-specific vsock_transport functions
  vhost: drop vringh dependency
  vop: pull in vhost Kconfig
  virtio: new feature to detect IOMMU device quirk
  balloon: check the number of available pages in leak balloon
  vhost: lockless enqueuing
  vhost: simplify work flushing
This commit is contained in:
Linus Torvalds 2016-08-06 09:20:13 -04:00
commit 0803e04011
28 changed files with 3765 additions and 201 deletions

View file

@ -12419,6 +12419,19 @@ S: Maintained
F: drivers/media/v4l2-core/videobuf2-* F: drivers/media/v4l2-core/videobuf2-*
F: include/media/videobuf2-* F: include/media/videobuf2-*
VIRTIO AND VHOST VSOCK DRIVER
M: Stefan Hajnoczi <stefanha@redhat.com>
L: kvm@vger.kernel.org
L: virtualization@lists.linux-foundation.org
L: netdev@vger.kernel.org
S: Maintained
F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: net/vmw_vsock/virtio_transport_common.c
F: net/vmw_vsock/virtio_transport.c
F: drivers/vhost/vsock.c
F: drivers/vhost/vsock.h
VIRTUAL SERIO DEVICE DRIVER VIRTUAL SERIO DEVICE DRIVER
M: Stephen Chandler Paul <thatslyude@gmail.com> M: Stephen Chandler Paul <thatslyude@gmail.com>
S: Maintained S: Maintained

View file

@ -138,6 +138,7 @@ obj-$(CONFIG_OF) += of/
obj-$(CONFIG_SSB) += ssb/ obj-$(CONFIG_SSB) += ssb/
obj-$(CONFIG_BCMA) += bcma/ obj-$(CONFIG_BCMA) += bcma/
obj-$(CONFIG_VHOST_RING) += vhost/ obj-$(CONFIG_VHOST_RING) += vhost/
obj-$(CONFIG_VHOST) += vhost/
obj-$(CONFIG_VLYNQ) += vlynq/ obj-$(CONFIG_VLYNQ) += vlynq/
obj-$(CONFIG_STAGING) += staging/ obj-$(CONFIG_STAGING) += staging/
obj-y += platform/ obj-y += platform/

View file

@ -146,3 +146,7 @@ config VOP
More information about the Intel MIC family as well as the Linux More information about the Intel MIC family as well as the Linux
OS and tools for MIC to use with this driver are available from OS and tools for MIC to use with this driver are available from
<http://software.intel.com/en-us/mic-developer>. <http://software.intel.com/en-us/mic-developer>.
if VOP
source "drivers/vhost/Kconfig.vringh"
endif

View file

@ -52,5 +52,5 @@ config CAIF_VIRTIO
The caif driver for CAIF over Virtio. The caif driver for CAIF over Virtio.
if CAIF_VIRTIO if CAIF_VIRTIO
source "drivers/vhost/Kconfig" source "drivers/vhost/Kconfig.vringh"
endif endif

View file

@ -2,7 +2,6 @@ config VHOST_NET
tristate "Host kernel accelerator for virtio net" tristate "Host kernel accelerator for virtio net"
depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP)
select VHOST select VHOST
select VHOST_RING
---help--- ---help---
This kernel module can be loaded in host kernel to accelerate This kernel module can be loaded in host kernel to accelerate
guest networking with virtio_net. Not to be confused with virtio_net guest networking with virtio_net. Not to be confused with virtio_net
@ -15,17 +14,24 @@ config VHOST_SCSI
tristate "VHOST_SCSI TCM fabric driver" tristate "VHOST_SCSI TCM fabric driver"
depends on TARGET_CORE && EVENTFD && m depends on TARGET_CORE && EVENTFD && m
select VHOST select VHOST
select VHOST_RING
default n default n
---help--- ---help---
Say M here to enable the vhost_scsi TCM fabric module Say M here to enable the vhost_scsi TCM fabric module
for use with virtio-scsi guests for use with virtio-scsi guests
config VHOST_RING config VHOST_VSOCK
tristate tristate "vhost virtio-vsock driver"
depends on VSOCKETS && EVENTFD
select VIRTIO_VSOCKETS_COMMON
select VHOST
default n
---help--- ---help---
This option is selected by any driver which needs to access This kernel module can be loaded in the host kernel to provide AF_VSOCK
the host side of a virtio ring. sockets for communicating with guests. The guests must have the
virtio_transport.ko driver loaded to use the virtio-vsock device.
To compile this driver as a module, choose M here: the module will be called
vhost_vsock.
config VHOST config VHOST
tristate tristate

View file

@ -0,0 +1,5 @@
config VHOST_RING
tristate
---help---
This option is selected by any driver which needs to access
the host side of a virtio ring.

View file

@ -4,5 +4,9 @@ vhost_net-y := net.o
obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
vhost_scsi-y := scsi.o vhost_scsi-y := scsi.o
obj-$(CONFIG_VHOST_VSOCK) += vhost_vsock.o
vhost_vsock-y := vsock.o
obj-$(CONFIG_VHOST_RING) += vringh.o obj-$(CONFIG_VHOST_RING) += vringh.o
obj-$(CONFIG_VHOST) += vhost.o obj-$(CONFIG_VHOST) += vhost.o

View file

@ -61,7 +61,8 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
enum { enum {
VHOST_NET_FEATURES = VHOST_FEATURES | VHOST_NET_FEATURES = VHOST_FEATURES |
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
(1ULL << VIRTIO_NET_F_MRG_RXBUF) (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
(1ULL << VIRTIO_F_IOMMU_PLATFORM)
}; };
enum { enum {
@ -334,7 +335,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
{ {
unsigned long uninitialized_var(endtime); unsigned long uninitialized_var(endtime);
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL); out_num, in_num, NULL, NULL);
if (r == vq->num && vq->busyloop_timeout) { if (r == vq->num && vq->busyloop_timeout) {
preempt_disable(); preempt_disable();
@ -344,7 +345,7 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
cpu_relax_lowlatency(); cpu_relax_lowlatency();
preempt_enable(); preempt_enable();
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL); out_num, in_num, NULL, NULL);
} }
return r; return r;
@ -377,6 +378,9 @@ static void handle_tx(struct vhost_net *net)
if (!sock) if (!sock)
goto out; goto out;
if (!vq_iotlb_prefetch(vq))
goto out;
vhost_disable_notify(&net->dev, vq); vhost_disable_notify(&net->dev, vq);
hdr_size = nvq->vhost_hlen; hdr_size = nvq->vhost_hlen;
@ -652,6 +656,10 @@ static void handle_rx(struct vhost_net *net)
sock = vq->private_data; sock = vq->private_data;
if (!sock) if (!sock)
goto out; goto out;
if (!vq_iotlb_prefetch(vq))
goto out;
vhost_disable_notify(&net->dev, vq); vhost_disable_notify(&net->dev, vq);
vhost_net_disable_vq(net, vq); vhost_net_disable_vq(net, vq);
@ -1052,20 +1060,20 @@ static long vhost_net_reset_owner(struct vhost_net *n)
struct socket *tx_sock = NULL; struct socket *tx_sock = NULL;
struct socket *rx_sock = NULL; struct socket *rx_sock = NULL;
long err; long err;
struct vhost_memory *memory; struct vhost_umem *umem;
mutex_lock(&n->dev.mutex); mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev); err = vhost_dev_check_owner(&n->dev);
if (err) if (err)
goto done; goto done;
memory = vhost_dev_reset_owner_prepare(); umem = vhost_dev_reset_owner_prepare();
if (!memory) { if (!umem) {
err = -ENOMEM; err = -ENOMEM;
goto done; goto done;
} }
vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_stop(n, &tx_sock, &rx_sock);
vhost_net_flush(n); vhost_net_flush(n);
vhost_dev_reset_owner(&n->dev, memory); vhost_dev_reset_owner(&n->dev, umem);
vhost_net_vq_reset(n); vhost_net_vq_reset(n);
done: done:
mutex_unlock(&n->dev.mutex); mutex_unlock(&n->dev.mutex);
@ -1096,10 +1104,14 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
} }
mutex_lock(&n->dev.mutex); mutex_lock(&n->dev.mutex);
if ((features & (1 << VHOST_F_LOG_ALL)) && if ((features & (1 << VHOST_F_LOG_ALL)) &&
!vhost_log_access_ok(&n->dev)) { !vhost_log_access_ok(&n->dev))
mutex_unlock(&n->dev.mutex); goto out_unlock;
return -EFAULT;
if ((features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))) {
if (vhost_init_device_iotlb(&n->dev, true))
goto out_unlock;
} }
for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
mutex_lock(&n->vqs[i].vq.mutex); mutex_lock(&n->vqs[i].vq.mutex);
n->vqs[i].vq.acked_features = features; n->vqs[i].vq.acked_features = features;
@ -1109,6 +1121,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
} }
mutex_unlock(&n->dev.mutex); mutex_unlock(&n->dev.mutex);
return 0; return 0;
out_unlock:
mutex_unlock(&n->dev.mutex);
return -EFAULT;
} }
static long vhost_net_set_owner(struct vhost_net *n) static long vhost_net_set_owner(struct vhost_net *n)
@ -1182,9 +1198,40 @@ static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
} }
#endif #endif
static ssize_t vhost_net_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;
int noblock = file->f_flags & O_NONBLOCK;
return vhost_chr_read_iter(dev, to, noblock);
}
static ssize_t vhost_net_chr_write_iter(struct kiocb *iocb,
struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;
return vhost_chr_write_iter(dev, from);
}
static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait)
{
struct vhost_net *n = file->private_data;
struct vhost_dev *dev = &n->dev;
return vhost_chr_poll(file, dev, wait);
}
static const struct file_operations vhost_net_fops = { static const struct file_operations vhost_net_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.release = vhost_net_release, .release = vhost_net_release,
.read_iter = vhost_net_chr_read_iter,
.write_iter = vhost_net_chr_write_iter,
.poll = vhost_net_chr_poll,
.unlocked_ioctl = vhost_net_ioctl, .unlocked_ioctl = vhost_net_ioctl,
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
.compat_ioctl = vhost_net_compat_ioctl, .compat_ioctl = vhost_net_compat_ioctl,

File diff suppressed because it is too large Load diff

View file

@ -15,13 +15,15 @@
struct vhost_work; struct vhost_work;
typedef void (*vhost_work_fn_t)(struct vhost_work *work); typedef void (*vhost_work_fn_t)(struct vhost_work *work);
#define VHOST_WORK_QUEUED 1
struct vhost_work { struct vhost_work {
struct list_head node; struct llist_node node;
vhost_work_fn_t fn; vhost_work_fn_t fn;
wait_queue_head_t done; wait_queue_head_t done;
int flushing; int flushing;
unsigned queue_seq; unsigned queue_seq;
unsigned done_seq; unsigned done_seq;
unsigned long flags;
}; };
/* Poll a file (eventfd or socket) */ /* Poll a file (eventfd or socket) */
@ -53,6 +55,27 @@ struct vhost_log {
u64 len; u64 len;
}; };
#define START(node) ((node)->start)
#define LAST(node) ((node)->last)
struct vhost_umem_node {
struct rb_node rb;
struct list_head link;
__u64 start;
__u64 last;
__u64 size;
__u64 userspace_addr;
__u32 perm;
__u32 flags_padding;
__u64 __subtree_last;
};
struct vhost_umem {
struct rb_root umem_tree;
struct list_head umem_list;
int numem;
};
/* The virtqueue structure describes a queue attached to a device. */ /* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue { struct vhost_virtqueue {
struct vhost_dev *dev; struct vhost_dev *dev;
@ -98,10 +121,12 @@ struct vhost_virtqueue {
u64 log_addr; u64 log_addr;
struct iovec iov[UIO_MAXIOV]; struct iovec iov[UIO_MAXIOV];
struct iovec iotlb_iov[64];
struct iovec *indirect; struct iovec *indirect;
struct vring_used_elem *heads; struct vring_used_elem *heads;
/* Protected by virtqueue mutex. */ /* Protected by virtqueue mutex. */
struct vhost_memory *memory; struct vhost_umem *umem;
struct vhost_umem *iotlb;
void *private_data; void *private_data;
u64 acked_features; u64 acked_features;
/* Log write descriptors */ /* Log write descriptors */
@ -118,25 +143,35 @@ struct vhost_virtqueue {
u32 busyloop_timeout; u32 busyloop_timeout;
}; };
struct vhost_msg_node {
struct vhost_msg msg;
struct vhost_virtqueue *vq;
struct list_head node;
};
struct vhost_dev { struct vhost_dev {
struct vhost_memory *memory;
struct mm_struct *mm; struct mm_struct *mm;
struct mutex mutex; struct mutex mutex;
struct vhost_virtqueue **vqs; struct vhost_virtqueue **vqs;
int nvqs; int nvqs;
struct file *log_file; struct file *log_file;
struct eventfd_ctx *log_ctx; struct eventfd_ctx *log_ctx;
spinlock_t work_lock; struct llist_head work_list;
struct list_head work_list;
struct task_struct *worker; struct task_struct *worker;
struct vhost_umem *umem;
struct vhost_umem *iotlb;
spinlock_t iotlb_lock;
struct list_head read_list;
struct list_head pending_list;
wait_queue_head_t wait;
}; };
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
long vhost_dev_set_owner(struct vhost_dev *dev); long vhost_dev_set_owner(struct vhost_dev *dev);
bool vhost_dev_has_owner(struct vhost_dev *dev); bool vhost_dev_has_owner(struct vhost_dev *dev);
long vhost_dev_check_owner(struct vhost_dev *); long vhost_dev_check_owner(struct vhost_dev *);
struct vhost_memory *vhost_dev_reset_owner_prepare(void); struct vhost_umem *vhost_dev_reset_owner_prepare(void);
void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *); void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_umem *);
void vhost_dev_cleanup(struct vhost_dev *, bool locked); void vhost_dev_cleanup(struct vhost_dev *, bool locked);
void vhost_dev_stop(struct vhost_dev *); void vhost_dev_stop(struct vhost_dev *);
long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
@ -165,6 +200,21 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
unsigned int log_num, u64 len); unsigned int log_num, u64 len);
int vq_iotlb_prefetch(struct vhost_virtqueue *vq);
struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);
void vhost_enqueue_msg(struct vhost_dev *dev,
struct list_head *head,
struct vhost_msg_node *node);
struct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev,
struct list_head *head);
unsigned int vhost_chr_poll(struct file *file, struct vhost_dev *dev,
poll_table *wait);
ssize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to,
int noblock);
ssize_t vhost_chr_write_iter(struct vhost_dev *dev,
struct iov_iter *from);
int vhost_init_device_iotlb(struct vhost_dev *d, bool enabled);
#define vq_err(vq, fmt, ...) do { \ #define vq_err(vq, fmt, ...) do { \
pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \

719
drivers/vhost/vsock.c Normal file
View file

@ -0,0 +1,719 @@
/*
* vhost transport for vsock
*
* Copyright (C) 2013-2015 Red Hat, Inc.
* Author: Asias He <asias@redhat.com>
* Stefan Hajnoczi <stefanha@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2.
*/
#include <linux/miscdevice.h>
#include <linux/atomic.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/vmalloc.h>
#include <net/sock.h>
#include <linux/virtio_vsock.h>
#include <linux/vhost.h>
#include <net/af_vsock.h>
#include "vhost.h"
#define VHOST_VSOCK_DEFAULT_HOST_CID 2
enum {
VHOST_VSOCK_FEATURES = VHOST_FEATURES,
};
/* Used to track all the vhost_vsock instances on the system. */
static DEFINE_SPINLOCK(vhost_vsock_lock);
static LIST_HEAD(vhost_vsock_list);
struct vhost_vsock {
struct vhost_dev dev;
struct vhost_virtqueue vqs[2];
/* Link to global vhost_vsock_list, protected by vhost_vsock_lock */
struct list_head list;
struct vhost_work send_pkt_work;
spinlock_t send_pkt_list_lock;
struct list_head send_pkt_list; /* host->guest pending packets */
atomic_t queued_replies;
u32 guest_cid;
};
static u32 vhost_transport_get_local_cid(void)
{
return VHOST_VSOCK_DEFAULT_HOST_CID;
}
static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
{
struct vhost_vsock *vsock;
spin_lock_bh(&vhost_vsock_lock);
list_for_each_entry(vsock, &vhost_vsock_list, list) {
u32 other_cid = vsock->guest_cid;
/* Skip instances that have no CID yet */
if (other_cid == 0)
continue;
if (other_cid == guest_cid) {
spin_unlock_bh(&vhost_vsock_lock);
return vsock;
}
}
spin_unlock_bh(&vhost_vsock_lock);
return NULL;
}
static void
vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
struct vhost_virtqueue *vq)
{
struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
bool added = false;
bool restart_tx = false;
mutex_lock(&vq->mutex);
if (!vq->private_data)
goto out;
/* Avoid further vmexits, we're already processing the virtqueue */
vhost_disable_notify(&vsock->dev, vq);
for (;;) {
struct virtio_vsock_pkt *pkt;
struct iov_iter iov_iter;
unsigned out, in;
size_t nbytes;
size_t len;
int head;
spin_lock_bh(&vsock->send_pkt_list_lock);
if (list_empty(&vsock->send_pkt_list)) {
spin_unlock_bh(&vsock->send_pkt_list_lock);
vhost_enable_notify(&vsock->dev, vq);
break;
}
pkt = list_first_entry(&vsock->send_pkt_list,
struct virtio_vsock_pkt, list);
list_del_init(&pkt->list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
&out, &in, NULL, NULL);
if (head < 0) {
spin_lock_bh(&vsock->send_pkt_list_lock);
list_add(&pkt->list, &vsock->send_pkt_list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
break;
}
if (head == vq->num) {
spin_lock_bh(&vsock->send_pkt_list_lock);
list_add(&pkt->list, &vsock->send_pkt_list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
/* We cannot finish yet if more buffers snuck in while
* re-enabling notify.
*/
if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
vhost_disable_notify(&vsock->dev, vq);
continue;
}
break;
}
if (out) {
virtio_transport_free_pkt(pkt);
vq_err(vq, "Expected 0 output buffers, got %u\n", out);
break;
}
len = iov_length(&vq->iov[out], in);
iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
if (nbytes != sizeof(pkt->hdr)) {
virtio_transport_free_pkt(pkt);
vq_err(vq, "Faulted on copying pkt hdr\n");
break;
}
nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter);
if (nbytes != pkt->len) {
virtio_transport_free_pkt(pkt);
vq_err(vq, "Faulted on copying pkt buf\n");
break;
}
vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
added = true;
if (pkt->reply) {
int val;
val = atomic_dec_return(&vsock->queued_replies);
/* Do we have resources to resume tx processing? */
if (val + 1 == tx_vq->num)
restart_tx = true;
}
virtio_transport_free_pkt(pkt);
}
if (added)
vhost_signal(&vsock->dev, vq);
out:
mutex_unlock(&vq->mutex);
if (restart_tx)
vhost_poll_queue(&tx_vq->poll);
}
static void vhost_transport_send_pkt_work(struct vhost_work *work)
{
struct vhost_virtqueue *vq;
struct vhost_vsock *vsock;
vsock = container_of(work, struct vhost_vsock, send_pkt_work);
vq = &vsock->vqs[VSOCK_VQ_RX];
vhost_transport_do_send_pkt(vsock, vq);
}
static int
vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
{
struct vhost_vsock *vsock;
struct vhost_virtqueue *vq;
int len = pkt->len;
/* Find the vhost_vsock according to guest context id */
vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
if (!vsock) {
virtio_transport_free_pkt(pkt);
return -ENODEV;
}
vq = &vsock->vqs[VSOCK_VQ_RX];
if (pkt->reply)
atomic_inc(&vsock->queued_replies);
spin_lock_bh(&vsock->send_pkt_list_lock);
list_add_tail(&pkt->list, &vsock->send_pkt_list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
return len;
}
static struct virtio_vsock_pkt *
vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
unsigned int out, unsigned int in)
{
struct virtio_vsock_pkt *pkt;
struct iov_iter iov_iter;
size_t nbytes;
size_t len;
if (in != 0) {
vq_err(vq, "Expected 0 input buffers, got %u\n", in);
return NULL;
}
pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
if (!pkt)
return NULL;
len = iov_length(vq->iov, out);
iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
if (nbytes != sizeof(pkt->hdr)) {
vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
sizeof(pkt->hdr), nbytes);
kfree(pkt);
return NULL;
}
if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
pkt->len = le32_to_cpu(pkt->hdr.len);
/* No payload */
if (!pkt->len)
return pkt;
/* The pkt is too big */
if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
kfree(pkt);
return NULL;
}
pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
if (!pkt->buf) {
kfree(pkt);
return NULL;
}
nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
if (nbytes != pkt->len) {
vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
pkt->len, nbytes);
virtio_transport_free_pkt(pkt);
return NULL;
}
return pkt;
}
/* Is there space left for replies to rx packets? */
static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
{
struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
int val;
smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
val = atomic_read(&vsock->queued_replies);
return val < vq->num;
}
static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
{
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
poll.work);
struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
dev);
struct virtio_vsock_pkt *pkt;
int head;
unsigned int out, in;
bool added = false;
mutex_lock(&vq->mutex);
if (!vq->private_data)
goto out;
vhost_disable_notify(&vsock->dev, vq);
for (;;) {
if (!vhost_vsock_more_replies(vsock)) {
/* Stop tx until the device processes already
* pending replies. Leave tx virtqueue
* callbacks disabled.
*/
goto no_more_replies;
}
head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
&out, &in, NULL, NULL);
if (head < 0)
break;
if (head == vq->num) {
if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
vhost_disable_notify(&vsock->dev, vq);
continue;
}
break;
}
pkt = vhost_vsock_alloc_pkt(vq, out, in);
if (!pkt) {
vq_err(vq, "Faulted on pkt\n");
continue;
}
/* Only accept correctly addressed packets */
if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
virtio_transport_recv_pkt(pkt);
else
virtio_transport_free_pkt(pkt);
vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
added = true;
}
no_more_replies:
if (added)
vhost_signal(&vsock->dev, vq);
out:
mutex_unlock(&vq->mutex);
}
static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
{
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
poll.work);
struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
dev);
vhost_transport_do_send_pkt(vsock, vq);
}
static int vhost_vsock_start(struct vhost_vsock *vsock)
{
size_t i;
int ret;
mutex_lock(&vsock->dev.mutex);
ret = vhost_dev_check_owner(&vsock->dev);
if (ret)
goto err;
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
struct vhost_virtqueue *vq = &vsock->vqs[i];
mutex_lock(&vq->mutex);
if (!vhost_vq_access_ok(vq)) {
ret = -EFAULT;
mutex_unlock(&vq->mutex);
goto err_vq;
}
if (!vq->private_data) {
vq->private_data = vsock;
vhost_vq_init_access(vq);
}
mutex_unlock(&vq->mutex);
}
mutex_unlock(&vsock->dev.mutex);
return 0;
err_vq:
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
struct vhost_virtqueue *vq = &vsock->vqs[i];
mutex_lock(&vq->mutex);
vq->private_data = NULL;
mutex_unlock(&vq->mutex);
}
err:
mutex_unlock(&vsock->dev.mutex);
return ret;
}
static int vhost_vsock_stop(struct vhost_vsock *vsock)
{
size_t i;
int ret;
mutex_lock(&vsock->dev.mutex);
ret = vhost_dev_check_owner(&vsock->dev);
if (ret)
goto err;
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
struct vhost_virtqueue *vq = &vsock->vqs[i];
mutex_lock(&vq->mutex);
vq->private_data = NULL;
mutex_unlock(&vq->mutex);
}
err:
mutex_unlock(&vsock->dev.mutex);
return ret;
}
static void vhost_vsock_free(struct vhost_vsock *vsock)
{
kvfree(vsock);
}
static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
{
struct vhost_virtqueue **vqs;
struct vhost_vsock *vsock;
int ret;
/* This struct is large and allocation could fail, fall back to vmalloc
* if there is no other way.
*/
vsock = kzalloc(sizeof(*vsock), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
if (!vsock) {
vsock = vmalloc(sizeof(*vsock));
if (!vsock)
return -ENOMEM;
}
vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
if (!vqs) {
ret = -ENOMEM;
goto out;
}
atomic_set(&vsock->queued_replies, 0);
vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs));
file->private_data = vsock;
spin_lock_init(&vsock->send_pkt_list_lock);
INIT_LIST_HEAD(&vsock->send_pkt_list);
vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
spin_lock_bh(&vhost_vsock_lock);
list_add_tail(&vsock->list, &vhost_vsock_list);
spin_unlock_bh(&vhost_vsock_lock);
return 0;
out:
vhost_vsock_free(vsock);
return ret;
}
static void vhost_vsock_flush(struct vhost_vsock *vsock)
{
int i;
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
if (vsock->vqs[i].handle_kick)
vhost_poll_flush(&vsock->vqs[i].poll);
vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
}
static void vhost_vsock_reset_orphans(struct sock *sk)
{
struct vsock_sock *vsk = vsock_sk(sk);
/* vmci_transport.c doesn't take sk_lock here either. At least we're
* under vsock_table_lock so the sock cannot disappear while we're
* executing.
*/
if (!vhost_vsock_get(vsk->local_addr.svm_cid)) {
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
sk->sk_state = SS_UNCONNECTED;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
}
}
static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
{
struct vhost_vsock *vsock = file->private_data;
spin_lock_bh(&vhost_vsock_lock);
list_del(&vsock->list);
spin_unlock_bh(&vhost_vsock_lock);
/* Iterating over all connections for all CIDs to find orphans is
* inefficient. Room for improvement here. */
vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
vhost_vsock_stop(vsock);
vhost_vsock_flush(vsock);
vhost_dev_stop(&vsock->dev);
spin_lock_bh(&vsock->send_pkt_list_lock);
while (!list_empty(&vsock->send_pkt_list)) {
struct virtio_vsock_pkt *pkt;
pkt = list_first_entry(&vsock->send_pkt_list,
struct virtio_vsock_pkt, list);
list_del_init(&pkt->list);
virtio_transport_free_pkt(pkt);
}
spin_unlock_bh(&vsock->send_pkt_list_lock);
vhost_dev_cleanup(&vsock->dev, false);
kfree(vsock->dev.vqs);
vhost_vsock_free(vsock);
return 0;
}
static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
{
struct vhost_vsock *other;
/* Refuse reserved CIDs */
if (guest_cid <= VMADDR_CID_HOST ||
guest_cid == U32_MAX)
return -EINVAL;
/* 64-bit CIDs are not yet supported */
if (guest_cid > U32_MAX)
return -EINVAL;
/* Refuse if CID is already in use */
other = vhost_vsock_get(guest_cid);
if (other && other != vsock)
return -EADDRINUSE;
spin_lock_bh(&vhost_vsock_lock);
vsock->guest_cid = guest_cid;
spin_unlock_bh(&vhost_vsock_lock);
return 0;
}
static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
{
struct vhost_virtqueue *vq;
int i;
if (features & ~VHOST_VSOCK_FEATURES)
return -EOPNOTSUPP;
mutex_lock(&vsock->dev.mutex);
if ((features & (1 << VHOST_F_LOG_ALL)) &&
!vhost_log_access_ok(&vsock->dev)) {
mutex_unlock(&vsock->dev.mutex);
return -EFAULT;
}
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
vq = &vsock->vqs[i];
mutex_lock(&vq->mutex);
vq->acked_features = features;
mutex_unlock(&vq->mutex);
}
mutex_unlock(&vsock->dev.mutex);
return 0;
}
static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
unsigned long arg)
{
struct vhost_vsock *vsock = f->private_data;
void __user *argp = (void __user *)arg;
u64 guest_cid;
u64 features;
int start;
int r;
switch (ioctl) {
case VHOST_VSOCK_SET_GUEST_CID:
if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
return -EFAULT;
return vhost_vsock_set_cid(vsock, guest_cid);
case VHOST_VSOCK_SET_RUNNING:
if (copy_from_user(&start, argp, sizeof(start)))
return -EFAULT;
if (start)
return vhost_vsock_start(vsock);
else
return vhost_vsock_stop(vsock);
case VHOST_GET_FEATURES:
features = VHOST_VSOCK_FEATURES;
if (copy_to_user(argp, &features, sizeof(features)))
return -EFAULT;
return 0;
case VHOST_SET_FEATURES:
if (copy_from_user(&features, argp, sizeof(features)))
return -EFAULT;
return vhost_vsock_set_features(vsock, features);
default:
mutex_lock(&vsock->dev.mutex);
r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
if (r == -ENOIOCTLCMD)
r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
else
vhost_vsock_flush(vsock);
mutex_unlock(&vsock->dev.mutex);
return r;
}
}
static const struct file_operations vhost_vsock_fops = {
.owner = THIS_MODULE,
.open = vhost_vsock_dev_open,
.release = vhost_vsock_dev_release,
.llseek = noop_llseek,
.unlocked_ioctl = vhost_vsock_dev_ioctl,
};
static struct miscdevice vhost_vsock_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = "vhost-vsock",
.fops = &vhost_vsock_fops,
};
static struct virtio_transport vhost_transport = {
.transport = {
.get_local_cid = vhost_transport_get_local_cid,
.init = virtio_transport_do_socket_init,
.destruct = virtio_transport_destruct,
.release = virtio_transport_release,
.connect = virtio_transport_connect,
.shutdown = virtio_transport_shutdown,
.dgram_enqueue = virtio_transport_dgram_enqueue,
.dgram_dequeue = virtio_transport_dgram_dequeue,
.dgram_bind = virtio_transport_dgram_bind,
.dgram_allow = virtio_transport_dgram_allow,
.stream_enqueue = virtio_transport_stream_enqueue,
.stream_dequeue = virtio_transport_stream_dequeue,
.stream_has_data = virtio_transport_stream_has_data,
.stream_has_space = virtio_transport_stream_has_space,
.stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
.stream_is_active = virtio_transport_stream_is_active,
.stream_allow = virtio_transport_stream_allow,
.notify_poll_in = virtio_transport_notify_poll_in,
.notify_poll_out = virtio_transport_notify_poll_out,
.notify_recv_init = virtio_transport_notify_recv_init,
.notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
.notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
.notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
.notify_send_init = virtio_transport_notify_send_init,
.notify_send_pre_block = virtio_transport_notify_send_pre_block,
.notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
.set_buffer_size = virtio_transport_set_buffer_size,
.set_min_buffer_size = virtio_transport_set_min_buffer_size,
.set_max_buffer_size = virtio_transport_set_max_buffer_size,
.get_buffer_size = virtio_transport_get_buffer_size,
.get_min_buffer_size = virtio_transport_get_min_buffer_size,
.get_max_buffer_size = virtio_transport_get_max_buffer_size,
},
.send_pkt = vhost_transport_send_pkt,
};
static int __init vhost_vsock_init(void)
{
int ret;
ret = vsock_core_init(&vhost_transport.transport);
if (ret < 0)
return ret;
return misc_register(&vhost_vsock_misc);
};
static void __exit vhost_vsock_exit(void)
{
misc_deregister(&vhost_vsock_misc);
vsock_core_exit();
};
module_init(vhost_vsock_init);
module_exit(vhost_vsock_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Asias He");
MODULE_DESCRIPTION("vhost transport for vsock ");

View file

@ -207,6 +207,8 @@ static unsigned leak_balloon(struct virtio_balloon *vb, size_t num)
num = min(num, ARRAY_SIZE(vb->pfns)); num = min(num, ARRAY_SIZE(vb->pfns));
mutex_lock(&vb->balloon_lock); mutex_lock(&vb->balloon_lock);
/* We can't release more pages than taken */
num = min(num, (size_t)vb->num_pages);
for (vb->num_pfns = 0; vb->num_pfns < num; for (vb->num_pfns = 0; vb->num_pfns < num;
vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) { vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
page = balloon_page_dequeue(vb_dev_info); page = balloon_page_dequeue(vb_dev_info);

View file

@ -117,7 +117,10 @@ struct vring_virtqueue {
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq) #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
/* /*
* The interaction between virtio and a possible IOMMU is a mess. * Modern virtio devices have feature bits to specify whether they need a
* quirk and bypass the IOMMU. If not there, just use the DMA API.
*
* If there, the interaction between virtio and DMA API is messy.
* *
* On most systems with virtio, physical addresses match bus addresses, * On most systems with virtio, physical addresses match bus addresses,
* and it doesn't particularly matter whether we use the DMA API. * and it doesn't particularly matter whether we use the DMA API.
@ -133,10 +136,18 @@ struct vring_virtqueue {
* *
* For the time being, we preserve historic behavior and bypass the DMA * For the time being, we preserve historic behavior and bypass the DMA
* API. * API.
*
* TODO: install a per-device DMA ops structure that does the right thing
* taking into account all the above quirks, and use the DMA API
* unconditionally on data path.
*/ */
static bool vring_use_dma_api(struct virtio_device *vdev) static bool vring_use_dma_api(struct virtio_device *vdev)
{ {
if (!virtio_has_iommu_quirk(vdev))
return true;
/* Otherwise, we are left to guess. */
/* /*
* In theory, it's possible to have a buggy QEMU-supposed * In theory, it's possible to have a buggy QEMU-supposed
* emulated Q35 IOMMU and Xen enabled at the same time. On * emulated Q35 IOMMU and Xen enabled at the same time. On
@ -1099,6 +1110,8 @@ void vring_transport_features(struct virtio_device *vdev)
break; break;
case VIRTIO_F_VERSION_1: case VIRTIO_F_VERSION_1:
break; break;
case VIRTIO_F_IOMMU_PLATFORM:
break;
default: default:
/* We don't understand this bit. */ /* We don't understand this bit. */
__virtio_clear_bit(vdev, i); __virtio_clear_bit(vdev, i);

View file

@ -149,6 +149,19 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
return __virtio_test_bit(vdev, fbit); return __virtio_test_bit(vdev, fbit);
} }
/**
* virtio_has_iommu_quirk - determine whether this device has the iommu quirk
* @vdev: the device
*/
static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
{
/*
* Note the reverse polarity of the quirk feature (compared to most
* other features), this is for compatibility with legacy systems.
*/
return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
}
static inline static inline
struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
vq_callback_t *c, const char *n) vq_callback_t *c, const char *n)

View file

@ -0,0 +1,154 @@
#ifndef _LINUX_VIRTIO_VSOCK_H
#define _LINUX_VIRTIO_VSOCK_H
#include <uapi/linux/virtio_vsock.h>
#include <linux/socket.h>
#include <net/sock.h>
#include <net/af_vsock.h>
#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128
#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256)
#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256)
#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4)
#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL
#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64)
enum {
VSOCK_VQ_RX = 0, /* for host to guest data */
VSOCK_VQ_TX = 1, /* for guest to host data */
VSOCK_VQ_EVENT = 2,
VSOCK_VQ_MAX = 3,
};
/* Per-socket state (accessed via vsk->trans) */
struct virtio_vsock_sock {
struct vsock_sock *vsk;
/* Protected by lock_sock(sk_vsock(trans->vsk)) */
u32 buf_size;
u32 buf_size_min;
u32 buf_size_max;
spinlock_t tx_lock;
spinlock_t rx_lock;
/* Protected by tx_lock */
u32 tx_cnt;
u32 buf_alloc;
u32 peer_fwd_cnt;
u32 peer_buf_alloc;
/* Protected by rx_lock */
u32 fwd_cnt;
u32 rx_bytes;
struct list_head rx_queue;
};
struct virtio_vsock_pkt {
struct virtio_vsock_hdr hdr;
struct work_struct work;
struct list_head list;
void *buf;
u32 len;
u32 off;
bool reply;
};
struct virtio_vsock_pkt_info {
u32 remote_cid, remote_port;
struct msghdr *msg;
u32 pkt_len;
u16 type;
u16 op;
u32 flags;
bool reply;
};
struct virtio_transport {
/* This must be the first field */
struct vsock_transport transport;
/* Takes ownership of the packet */
int (*send_pkt)(struct virtio_vsock_pkt *pkt);
};
ssize_t
virtio_transport_stream_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len,
int type);
int
virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len, int flags);
s64 virtio_transport_stream_has_data(struct vsock_sock *vsk);
s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
int virtio_transport_do_socket_init(struct vsock_sock *vsk,
struct vsock_sock *psk);
u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk);
u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk);
u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk);
void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val);
void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val);
void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val);
int
virtio_transport_notify_poll_in(struct vsock_sock *vsk,
size_t target,
bool *data_ready_now);
int
virtio_transport_notify_poll_out(struct vsock_sock *vsk,
size_t target,
bool *space_available_now);
int virtio_transport_notify_recv_init(struct vsock_sock *vsk,
size_t target, struct vsock_transport_recv_notify_data *data);
int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk,
size_t target, struct vsock_transport_recv_notify_data *data);
int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk,
size_t target, struct vsock_transport_recv_notify_data *data);
int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk,
size_t target, ssize_t copied, bool data_read,
struct vsock_transport_recv_notify_data *data);
int virtio_transport_notify_send_init(struct vsock_sock *vsk,
struct vsock_transport_send_notify_data *data);
int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk,
struct vsock_transport_send_notify_data *data);
int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk,
struct vsock_transport_send_notify_data *data);
int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk,
ssize_t written, struct vsock_transport_send_notify_data *data);
u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk);
bool virtio_transport_stream_is_active(struct vsock_sock *vsk);
bool virtio_transport_stream_allow(u32 cid, u32 port);
int virtio_transport_dgram_bind(struct vsock_sock *vsk,
struct sockaddr_vm *addr);
bool virtio_transport_dgram_allow(u32 cid, u32 port);
int virtio_transport_connect(struct vsock_sock *vsk);
int virtio_transport_shutdown(struct vsock_sock *vsk, int mode);
void virtio_transport_release(struct vsock_sock *vsk);
ssize_t
virtio_transport_stream_enqueue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len);
int
virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
struct sockaddr_vm *remote_addr,
struct msghdr *msg,
size_t len);
void virtio_transport_destruct(struct vsock_sock *vsk);
void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt);
void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt);
void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt);
u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted);
void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit);
#endif /* _LINUX_VIRTIO_VSOCK_H */

View file

@ -63,6 +63,8 @@ struct vsock_sock {
struct list_head accept_queue; struct list_head accept_queue;
bool rejected; bool rejected;
struct delayed_work dwork; struct delayed_work dwork;
struct delayed_work close_work;
bool close_work_scheduled;
u32 peer_shutdown; u32 peer_shutdown;
bool sent_request; bool sent_request;
bool ignore_connecting_rst; bool ignore_connecting_rst;
@ -165,6 +167,9 @@ static inline int vsock_core_init(const struct vsock_transport *t)
} }
void vsock_core_exit(void); void vsock_core_exit(void);
/* The transport may downcast this to access transport-specific functions */
const struct vsock_transport *vsock_core_get_transport(void);
/**** UTILS ****/ /**** UTILS ****/
void vsock_release_pending(struct sock *pending); void vsock_release_pending(struct sock *pending);
@ -177,6 +182,7 @@ void vsock_remove_connected(struct vsock_sock *vsk);
struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr); struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr);
struct sock *vsock_find_connected_socket(struct sockaddr_vm *src, struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
struct sockaddr_vm *dst); struct sockaddr_vm *dst);
void vsock_remove_sock(struct vsock_sock *vsk);
void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
#endif /* __AF_VSOCK_H__ */ #endif /* __AF_VSOCK_H__ */

View file

@ -0,0 +1,144 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM vsock
#if !defined(_TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H) || \
defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H
#include <linux/tracepoint.h>
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_STREAM);
#define show_type(val) \
__print_symbolic(val, { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" })
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_INVALID);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_REQUEST);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RESPONSE);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RST);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_SHUTDOWN);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RW);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_UPDATE);
TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_REQUEST);
#define show_op(val) \
__print_symbolic(val, \
{ VIRTIO_VSOCK_OP_INVALID, "INVALID" }, \
{ VIRTIO_VSOCK_OP_REQUEST, "REQUEST" }, \
{ VIRTIO_VSOCK_OP_RESPONSE, "RESPONSE" }, \
{ VIRTIO_VSOCK_OP_RST, "RST" }, \
{ VIRTIO_VSOCK_OP_SHUTDOWN, "SHUTDOWN" }, \
{ VIRTIO_VSOCK_OP_RW, "RW" }, \
{ VIRTIO_VSOCK_OP_CREDIT_UPDATE, "CREDIT_UPDATE" }, \
{ VIRTIO_VSOCK_OP_CREDIT_REQUEST, "CREDIT_REQUEST" })
TRACE_EVENT(virtio_transport_alloc_pkt,
TP_PROTO(
__u32 src_cid, __u32 src_port,
__u32 dst_cid, __u32 dst_port,
__u32 len,
__u16 type,
__u16 op,
__u32 flags
),
TP_ARGS(
src_cid, src_port,
dst_cid, dst_port,
len,
type,
op,
flags
),
TP_STRUCT__entry(
__field(__u32, src_cid)
__field(__u32, src_port)
__field(__u32, dst_cid)
__field(__u32, dst_port)
__field(__u32, len)
__field(__u16, type)
__field(__u16, op)
__field(__u32, flags)
),
TP_fast_assign(
__entry->src_cid = src_cid;
__entry->src_port = src_port;
__entry->dst_cid = dst_cid;
__entry->dst_port = dst_port;
__entry->len = len;
__entry->type = type;
__entry->op = op;
__entry->flags = flags;
),
TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x",
__entry->src_cid, __entry->src_port,
__entry->dst_cid, __entry->dst_port,
__entry->len,
show_type(__entry->type),
show_op(__entry->op),
__entry->flags)
);
TRACE_EVENT(virtio_transport_recv_pkt,
TP_PROTO(
__u32 src_cid, __u32 src_port,
__u32 dst_cid, __u32 dst_port,
__u32 len,
__u16 type,
__u16 op,
__u32 flags,
__u32 buf_alloc,
__u32 fwd_cnt
),
TP_ARGS(
src_cid, src_port,
dst_cid, dst_port,
len,
type,
op,
flags,
buf_alloc,
fwd_cnt
),
TP_STRUCT__entry(
__field(__u32, src_cid)
__field(__u32, src_port)
__field(__u32, dst_cid)
__field(__u32, dst_port)
__field(__u32, len)
__field(__u16, type)
__field(__u16, op)
__field(__u32, flags)
__field(__u32, buf_alloc)
__field(__u32, fwd_cnt)
),
TP_fast_assign(
__entry->src_cid = src_cid;
__entry->src_port = src_port;
__entry->dst_cid = dst_cid;
__entry->dst_port = dst_port;
__entry->len = len;
__entry->type = type;
__entry->op = op;
__entry->flags = flags;
__entry->buf_alloc = buf_alloc;
__entry->fwd_cnt = fwd_cnt;
),
TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x "
"buf_alloc=%u fwd_cnt=%u",
__entry->src_cid, __entry->src_port,
__entry->dst_cid, __entry->dst_port,
__entry->len,
show_type(__entry->type),
show_op(__entry->op),
__entry->flags,
__entry->buf_alloc,
__entry->fwd_cnt)
);
#endif /* _TRACE_VSOCK_VIRTIO_TRANSPORT_COMMON_H */
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE vsock_virtio_transport_common
/* This part must be outside protection */
#include <trace/define_trace.h>

View file

@ -454,6 +454,7 @@ header-y += virtio_ring.h
header-y += virtio_rng.h header-y += virtio_rng.h
header-y += virtio_scsi.h header-y += virtio_scsi.h
header-y += virtio_types.h header-y += virtio_types.h
header-y += virtio_vsock.h
header-y += vm_sockets.h header-y += vm_sockets.h
header-y += vt.h header-y += vt.h
header-y += vtpm_proxy.h header-y += vtpm_proxy.h

View file

@ -47,6 +47,32 @@ struct vhost_vring_addr {
__u64 log_guest_addr; __u64 log_guest_addr;
}; };
/* no alignment requirement */
struct vhost_iotlb_msg {
__u64 iova;
__u64 size;
__u64 uaddr;
#define VHOST_ACCESS_RO 0x1
#define VHOST_ACCESS_WO 0x2
#define VHOST_ACCESS_RW 0x3
__u8 perm;
#define VHOST_IOTLB_MISS 1
#define VHOST_IOTLB_UPDATE 2
#define VHOST_IOTLB_INVALIDATE 3
#define VHOST_IOTLB_ACCESS_FAIL 4
__u8 type;
};
#define VHOST_IOTLB_MSG 0x1
struct vhost_msg {
int type;
union {
struct vhost_iotlb_msg iotlb;
__u8 padding[64];
};
};
struct vhost_memory_region { struct vhost_memory_region {
__u64 guest_phys_addr; __u64 guest_phys_addr;
__u64 memory_size; /* bytes */ __u64 memory_size; /* bytes */
@ -146,6 +172,8 @@ struct vhost_memory {
#define VHOST_F_LOG_ALL 26 #define VHOST_F_LOG_ALL 26
/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
#define VHOST_NET_F_VIRTIO_NET_HDR 27 #define VHOST_NET_F_VIRTIO_NET_HDR 27
/* Vhost have device IOTLB */
#define VHOST_F_DEVICE_IOTLB 63
/* VHOST_SCSI specific definitions */ /* VHOST_SCSI specific definitions */
@ -175,4 +203,9 @@ struct vhost_scsi_target {
#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32) #define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32) #define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
/* VHOST_VSOCK specific defines */
#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64)
#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int)
#endif #endif

View file

@ -49,7 +49,7 @@
* transport being used (eg. virtio_ring), the rest are per-device feature * transport being used (eg. virtio_ring), the rest are per-device feature
* bits. */ * bits. */
#define VIRTIO_TRANSPORT_F_START 28 #define VIRTIO_TRANSPORT_F_START 28
#define VIRTIO_TRANSPORT_F_END 33 #define VIRTIO_TRANSPORT_F_END 34
#ifndef VIRTIO_CONFIG_NO_LEGACY #ifndef VIRTIO_CONFIG_NO_LEGACY
/* Do we get callbacks when the ring is completely used, even if we've /* Do we get callbacks when the ring is completely used, even if we've
@ -63,4 +63,12 @@
/* v1.0 compliant. */ /* v1.0 compliant. */
#define VIRTIO_F_VERSION_1 32 #define VIRTIO_F_VERSION_1 32
/*
* If clear - device has the IOMMU bypass quirk feature.
* If set - use platform tools to detect the IOMMU.
*
* Note the reverse polarity (compared to most other features),
* this is for compatibility with legacy systems.
*/
#define VIRTIO_F_IOMMU_PLATFORM 33
#endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */ #endif /* _UAPI_LINUX_VIRTIO_CONFIG_H */

View file

@ -41,5 +41,6 @@
#define VIRTIO_ID_CAIF 12 /* Virtio caif */ #define VIRTIO_ID_CAIF 12 /* Virtio caif */
#define VIRTIO_ID_GPU 16 /* virtio GPU */ #define VIRTIO_ID_GPU 16 /* virtio GPU */
#define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_INPUT 18 /* virtio input */
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#endif /* _LINUX_VIRTIO_IDS_H */ #endif /* _LINUX_VIRTIO_IDS_H */

View file

@ -0,0 +1,94 @@
/*
* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
* anyone can use the definitions to implement compatible drivers/servers:
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of IBM nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Copyright (C) Red Hat, Inc., 2013-2015
* Copyright (C) Asias He <asias@redhat.com>, 2013
* Copyright (C) Stefan Hajnoczi <stefanha@redhat.com>, 2015
*/
#ifndef _UAPI_LINUX_VIRTIO_VSOCK_H
#define _UAPI_LINUX_VIRTIO_VOSCK_H
#include <linux/types.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
struct virtio_vsock_config {
__le64 guest_cid;
} __attribute__((packed));
enum virtio_vsock_event_id {
VIRTIO_VSOCK_EVENT_TRANSPORT_RESET = 0,
};
struct virtio_vsock_event {
__le32 id;
} __attribute__((packed));
struct virtio_vsock_hdr {
__le64 src_cid;
__le64 dst_cid;
__le32 src_port;
__le32 dst_port;
__le32 len;
__le16 type; /* enum virtio_vsock_type */
__le16 op; /* enum virtio_vsock_op */
__le32 flags;
__le32 buf_alloc;
__le32 fwd_cnt;
} __attribute__((packed));
enum virtio_vsock_type {
VIRTIO_VSOCK_TYPE_STREAM = 1,
};
enum virtio_vsock_op {
VIRTIO_VSOCK_OP_INVALID = 0,
/* Connect operations */
VIRTIO_VSOCK_OP_REQUEST = 1,
VIRTIO_VSOCK_OP_RESPONSE = 2,
VIRTIO_VSOCK_OP_RST = 3,
VIRTIO_VSOCK_OP_SHUTDOWN = 4,
/* To send payload */
VIRTIO_VSOCK_OP_RW = 5,
/* Tell the peer our credit info */
VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6,
/* Request the peer to send the credit info to us */
VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7,
};
/* VIRTIO_VSOCK_OP_SHUTDOWN flags values */
enum virtio_vsock_shutdown {
VIRTIO_VSOCK_SHUTDOWN_RCV = 1,
VIRTIO_VSOCK_SHUTDOWN_SEND = 2,
};
#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */

View file

@ -26,3 +26,23 @@ config VMWARE_VMCI_VSOCKETS
To compile this driver as a module, choose M here: the module To compile this driver as a module, choose M here: the module
will be called vmw_vsock_vmci_transport. If unsure, say N. will be called vmw_vsock_vmci_transport. If unsure, say N.
config VIRTIO_VSOCKETS
tristate "virtio transport for Virtual Sockets"
depends on VSOCKETS && VIRTIO
select VIRTIO_VSOCKETS_COMMON
help
This module implements a virtio transport for Virtual Sockets.
Enable this transport if your Virtual Machine host supports Virtual
Sockets over virtio.
To compile this driver as a module, choose M here: the module will be
called vmw_vsock_virtio_transport. If unsure, say N.
config VIRTIO_VSOCKETS_COMMON
tristate
help
This option is selected by any driver which needs to access
the virtio_vsock. The module will be called
vmw_vsock_virtio_transport_common.

View file

@ -1,7 +1,13 @@
obj-$(CONFIG_VSOCKETS) += vsock.o obj-$(CONFIG_VSOCKETS) += vsock.o
obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
vsock-y += af_vsock.o vsock_addr.o vsock-y += af_vsock.o vsock_addr.o
vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
vmci_transport_notify_qstate.o vmci_transport_notify_qstate.o
vmw_vsock_virtio_transport-y += virtio_transport.o
vmw_vsock_virtio_transport_common-y += virtio_transport_common.o

View file

@ -344,6 +344,16 @@ static bool vsock_in_connected_table(struct vsock_sock *vsk)
return ret; return ret;
} }
void vsock_remove_sock(struct vsock_sock *vsk)
{
if (vsock_in_bound_table(vsk))
vsock_remove_bound(vsk);
if (vsock_in_connected_table(vsk))
vsock_remove_connected(vsk);
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);
void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)) void vsock_for_each_connected_socket(void (*fn)(struct sock *sk))
{ {
int i; int i;
@ -660,12 +670,6 @@ static void __vsock_release(struct sock *sk)
vsk = vsock_sk(sk); vsk = vsock_sk(sk);
pending = NULL; /* Compiler warning. */ pending = NULL; /* Compiler warning. */
if (vsock_in_bound_table(vsk))
vsock_remove_bound(vsk);
if (vsock_in_connected_table(vsk))
vsock_remove_connected(vsk);
transport->release(vsk); transport->release(vsk);
lock_sock(sk); lock_sock(sk);
@ -1995,6 +1999,15 @@ void vsock_core_exit(void)
} }
EXPORT_SYMBOL_GPL(vsock_core_exit); EXPORT_SYMBOL_GPL(vsock_core_exit);
const struct vsock_transport *vsock_core_get_transport(void)
{
/* vsock_register_mutex not taken since only the transport uses this
* function and only while registered.
*/
return transport;
}
EXPORT_SYMBOL_GPL(vsock_core_get_transport);
MODULE_AUTHOR("VMware, Inc."); MODULE_AUTHOR("VMware, Inc.");
MODULE_DESCRIPTION("VMware Virtual Socket Family"); MODULE_DESCRIPTION("VMware Virtual Socket Family");
MODULE_VERSION("1.0.1.0-k"); MODULE_VERSION("1.0.1.0-k");

View file

@ -0,0 +1,624 @@
/*
* virtio transport for vsock
*
* Copyright (C) 2013-2015 Red Hat, Inc.
* Author: Asias He <asias@redhat.com>
* Stefan Hajnoczi <stefanha@redhat.com>
*
* Some of the code is take from Gerd Hoffmann <kraxel@redhat.com>'s
* early virtio-vsock proof-of-concept bits.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*/
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/atomic.h>
#include <linux/virtio.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
#include <linux/virtio_vsock.h>
#include <net/sock.h>
#include <linux/mutex.h>
#include <net/af_vsock.h>
static struct workqueue_struct *virtio_vsock_workqueue;
static struct virtio_vsock *the_virtio_vsock;
static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */
struct virtio_vsock {
struct virtio_device *vdev;
struct virtqueue *vqs[VSOCK_VQ_MAX];
/* Virtqueue processing is deferred to a workqueue */
struct work_struct tx_work;
struct work_struct rx_work;
struct work_struct event_work;
/* The following fields are protected by tx_lock. vqs[VSOCK_VQ_TX]
* must be accessed with tx_lock held.
*/
struct mutex tx_lock;
struct work_struct send_pkt_work;
spinlock_t send_pkt_list_lock;
struct list_head send_pkt_list;
atomic_t queued_replies;
/* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX]
* must be accessed with rx_lock held.
*/
struct mutex rx_lock;
int rx_buf_nr;
int rx_buf_max_nr;
/* The following fields are protected by event_lock.
* vqs[VSOCK_VQ_EVENT] must be accessed with event_lock held.
*/
struct mutex event_lock;
struct virtio_vsock_event event_list[8];
u32 guest_cid;
};
static struct virtio_vsock *virtio_vsock_get(void)
{
return the_virtio_vsock;
}
static u32 virtio_transport_get_local_cid(void)
{
struct virtio_vsock *vsock = virtio_vsock_get();
return vsock->guest_cid;
}
static void
virtio_transport_send_pkt_work(struct work_struct *work)
{
struct virtio_vsock *vsock =
container_of(work, struct virtio_vsock, send_pkt_work);
struct virtqueue *vq;
bool added = false;
bool restart_rx = false;
mutex_lock(&vsock->tx_lock);
vq = vsock->vqs[VSOCK_VQ_TX];
/* Avoid unnecessary interrupts while we're processing the ring */
virtqueue_disable_cb(vq);
for (;;) {
struct virtio_vsock_pkt *pkt;
struct scatterlist hdr, buf, *sgs[2];
int ret, in_sg = 0, out_sg = 0;
bool reply;
spin_lock_bh(&vsock->send_pkt_list_lock);
if (list_empty(&vsock->send_pkt_list)) {
spin_unlock_bh(&vsock->send_pkt_list_lock);
virtqueue_enable_cb(vq);
break;
}
pkt = list_first_entry(&vsock->send_pkt_list,
struct virtio_vsock_pkt, list);
list_del_init(&pkt->list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
reply = pkt->reply;
sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
sgs[out_sg++] = &hdr;
if (pkt->buf) {
sg_init_one(&buf, pkt->buf, pkt->len);
sgs[out_sg++] = &buf;
}
ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL);
if (ret < 0) {
spin_lock_bh(&vsock->send_pkt_list_lock);
list_add(&pkt->list, &vsock->send_pkt_list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
if (!virtqueue_enable_cb(vq) && ret == -ENOSPC)
continue; /* retry now that we have more space */
break;
}
if (reply) {
struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
int val;
val = atomic_dec_return(&vsock->queued_replies);
/* Do we now have resources to resume rx processing? */
if (val + 1 == virtqueue_get_vring_size(rx_vq))
restart_rx = true;
}
added = true;
}
if (added)
virtqueue_kick(vq);
mutex_unlock(&vsock->tx_lock);
if (restart_rx)
queue_work(virtio_vsock_workqueue, &vsock->rx_work);
}
static int
virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
{
struct virtio_vsock *vsock;
int len = pkt->len;
vsock = virtio_vsock_get();
if (!vsock) {
virtio_transport_free_pkt(pkt);
return -ENODEV;
}
if (pkt->reply)
atomic_inc(&vsock->queued_replies);
spin_lock_bh(&vsock->send_pkt_list_lock);
list_add_tail(&pkt->list, &vsock->send_pkt_list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
return len;
}
static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
{
int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
struct virtio_vsock_pkt *pkt;
struct scatterlist hdr, buf, *sgs[2];
struct virtqueue *vq;
int ret;
vq = vsock->vqs[VSOCK_VQ_RX];
do {
pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
if (!pkt)
break;
pkt->buf = kmalloc(buf_len, GFP_KERNEL);
if (!pkt->buf) {
virtio_transport_free_pkt(pkt);
break;
}
pkt->len = buf_len;
sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
sgs[0] = &hdr;
sg_init_one(&buf, pkt->buf, buf_len);
sgs[1] = &buf;
ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL);
if (ret) {
virtio_transport_free_pkt(pkt);
break;
}
vsock->rx_buf_nr++;
} while (vq->num_free);
if (vsock->rx_buf_nr > vsock->rx_buf_max_nr)
vsock->rx_buf_max_nr = vsock->rx_buf_nr;
virtqueue_kick(vq);
}
static void virtio_transport_tx_work(struct work_struct *work)
{
struct virtio_vsock *vsock =
container_of(work, struct virtio_vsock, tx_work);
struct virtqueue *vq;
bool added = false;
vq = vsock->vqs[VSOCK_VQ_TX];
mutex_lock(&vsock->tx_lock);
do {
struct virtio_vsock_pkt *pkt;
unsigned int len;
virtqueue_disable_cb(vq);
while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) {
virtio_transport_free_pkt(pkt);
added = true;
}
} while (!virtqueue_enable_cb(vq));
mutex_unlock(&vsock->tx_lock);
if (added)
queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
}
/* Is there space left for replies to rx packets? */
static bool virtio_transport_more_replies(struct virtio_vsock *vsock)
{
struct virtqueue *vq = vsock->vqs[VSOCK_VQ_RX];
int val;
smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
val = atomic_read(&vsock->queued_replies);
return val < virtqueue_get_vring_size(vq);
}
static void virtio_transport_rx_work(struct work_struct *work)
{
struct virtio_vsock *vsock =
container_of(work, struct virtio_vsock, rx_work);
struct virtqueue *vq;
vq = vsock->vqs[VSOCK_VQ_RX];
mutex_lock(&vsock->rx_lock);
do {
virtqueue_disable_cb(vq);
for (;;) {
struct virtio_vsock_pkt *pkt;
unsigned int len;
if (!virtio_transport_more_replies(vsock)) {
/* Stop rx until the device processes already
* pending replies. Leave rx virtqueue
* callbacks disabled.
*/
goto out;
}
pkt = virtqueue_get_buf(vq, &len);
if (!pkt) {
break;
}
vsock->rx_buf_nr--;
/* Drop short/long packets */
if (unlikely(len < sizeof(pkt->hdr) ||
len > sizeof(pkt->hdr) + pkt->len)) {
virtio_transport_free_pkt(pkt);
continue;
}
pkt->len = len - sizeof(pkt->hdr);
virtio_transport_recv_pkt(pkt);
}
} while (!virtqueue_enable_cb(vq));
out:
if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2)
virtio_vsock_rx_fill(vsock);
mutex_unlock(&vsock->rx_lock);
}
/* event_lock must be held */
static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock,
struct virtio_vsock_event *event)
{
struct scatterlist sg;
struct virtqueue *vq;
vq = vsock->vqs[VSOCK_VQ_EVENT];
sg_init_one(&sg, event, sizeof(*event));
return virtqueue_add_inbuf(vq, &sg, 1, event, GFP_KERNEL);
}
/* event_lock must be held */
static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
{
size_t i;
for (i = 0; i < ARRAY_SIZE(vsock->event_list); i++) {
struct virtio_vsock_event *event = &vsock->event_list[i];
virtio_vsock_event_fill_one(vsock, event);
}
virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
}
static void virtio_vsock_reset_sock(struct sock *sk)
{
lock_sock(sk);
sk->sk_state = SS_UNCONNECTED;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
release_sock(sk);
}
static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)
{
struct virtio_device *vdev = vsock->vdev;
u64 guest_cid;
vdev->config->get(vdev, offsetof(struct virtio_vsock_config, guest_cid),
&guest_cid, sizeof(guest_cid));
vsock->guest_cid = le64_to_cpu(guest_cid);
}
/* event_lock must be held */
static void virtio_vsock_event_handle(struct virtio_vsock *vsock,
struct virtio_vsock_event *event)
{
switch (le32_to_cpu(event->id)) {
case VIRTIO_VSOCK_EVENT_TRANSPORT_RESET:
virtio_vsock_update_guest_cid(vsock);
vsock_for_each_connected_socket(virtio_vsock_reset_sock);
break;
}
}
static void virtio_transport_event_work(struct work_struct *work)
{
struct virtio_vsock *vsock =
container_of(work, struct virtio_vsock, event_work);
struct virtqueue *vq;
vq = vsock->vqs[VSOCK_VQ_EVENT];
mutex_lock(&vsock->event_lock);
do {
struct virtio_vsock_event *event;
unsigned int len;
virtqueue_disable_cb(vq);
while ((event = virtqueue_get_buf(vq, &len)) != NULL) {
if (len == sizeof(*event))
virtio_vsock_event_handle(vsock, event);
virtio_vsock_event_fill_one(vsock, event);
}
} while (!virtqueue_enable_cb(vq));
virtqueue_kick(vsock->vqs[VSOCK_VQ_EVENT]);
mutex_unlock(&vsock->event_lock);
}
static void virtio_vsock_event_done(struct virtqueue *vq)
{
struct virtio_vsock *vsock = vq->vdev->priv;
if (!vsock)
return;
queue_work(virtio_vsock_workqueue, &vsock->event_work);
}
static void virtio_vsock_tx_done(struct virtqueue *vq)
{
struct virtio_vsock *vsock = vq->vdev->priv;
if (!vsock)
return;
queue_work(virtio_vsock_workqueue, &vsock->tx_work);
}
static void virtio_vsock_rx_done(struct virtqueue *vq)
{
struct virtio_vsock *vsock = vq->vdev->priv;
if (!vsock)
return;
queue_work(virtio_vsock_workqueue, &vsock->rx_work);
}
static struct virtio_transport virtio_transport = {
.transport = {
.get_local_cid = virtio_transport_get_local_cid,
.init = virtio_transport_do_socket_init,
.destruct = virtio_transport_destruct,
.release = virtio_transport_release,
.connect = virtio_transport_connect,
.shutdown = virtio_transport_shutdown,
.dgram_bind = virtio_transport_dgram_bind,
.dgram_dequeue = virtio_transport_dgram_dequeue,
.dgram_enqueue = virtio_transport_dgram_enqueue,
.dgram_allow = virtio_transport_dgram_allow,
.stream_dequeue = virtio_transport_stream_dequeue,
.stream_enqueue = virtio_transport_stream_enqueue,
.stream_has_data = virtio_transport_stream_has_data,
.stream_has_space = virtio_transport_stream_has_space,
.stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
.stream_is_active = virtio_transport_stream_is_active,
.stream_allow = virtio_transport_stream_allow,
.notify_poll_in = virtio_transport_notify_poll_in,
.notify_poll_out = virtio_transport_notify_poll_out,
.notify_recv_init = virtio_transport_notify_recv_init,
.notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
.notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
.notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
.notify_send_init = virtio_transport_notify_send_init,
.notify_send_pre_block = virtio_transport_notify_send_pre_block,
.notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
.set_buffer_size = virtio_transport_set_buffer_size,
.set_min_buffer_size = virtio_transport_set_min_buffer_size,
.set_max_buffer_size = virtio_transport_set_max_buffer_size,
.get_buffer_size = virtio_transport_get_buffer_size,
.get_min_buffer_size = virtio_transport_get_min_buffer_size,
.get_max_buffer_size = virtio_transport_get_max_buffer_size,
},
.send_pkt = virtio_transport_send_pkt,
};
static int virtio_vsock_probe(struct virtio_device *vdev)
{
vq_callback_t *callbacks[] = {
virtio_vsock_rx_done,
virtio_vsock_tx_done,
virtio_vsock_event_done,
};
static const char * const names[] = {
"rx",
"tx",
"event",
};
struct virtio_vsock *vsock = NULL;
int ret;
ret = mutex_lock_interruptible(&the_virtio_vsock_mutex);
if (ret)
return ret;
/* Only one virtio-vsock device per guest is supported */
if (the_virtio_vsock) {
ret = -EBUSY;
goto out;
}
vsock = kzalloc(sizeof(*vsock), GFP_KERNEL);
if (!vsock) {
ret = -ENOMEM;
goto out;
}
vsock->vdev = vdev;
ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
vsock->vqs, callbacks, names);
if (ret < 0)
goto out;
virtio_vsock_update_guest_cid(vsock);
ret = vsock_core_init(&virtio_transport.transport);
if (ret < 0)
goto out_vqs;
vsock->rx_buf_nr = 0;
vsock->rx_buf_max_nr = 0;
atomic_set(&vsock->queued_replies, 0);
vdev->priv = vsock;
the_virtio_vsock = vsock;
mutex_init(&vsock->tx_lock);
mutex_init(&vsock->rx_lock);
mutex_init(&vsock->event_lock);
spin_lock_init(&vsock->send_pkt_list_lock);
INIT_LIST_HEAD(&vsock->send_pkt_list);
INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
INIT_WORK(&vsock->event_work, virtio_transport_event_work);
INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
mutex_lock(&vsock->rx_lock);
virtio_vsock_rx_fill(vsock);
mutex_unlock(&vsock->rx_lock);
mutex_lock(&vsock->event_lock);
virtio_vsock_event_fill(vsock);
mutex_unlock(&vsock->event_lock);
mutex_unlock(&the_virtio_vsock_mutex);
return 0;
out_vqs:
vsock->vdev->config->del_vqs(vsock->vdev);
out:
kfree(vsock);
mutex_unlock(&the_virtio_vsock_mutex);
return ret;
}
static void virtio_vsock_remove(struct virtio_device *vdev)
{
struct virtio_vsock *vsock = vdev->priv;
struct virtio_vsock_pkt *pkt;
flush_work(&vsock->rx_work);
flush_work(&vsock->tx_work);
flush_work(&vsock->event_work);
flush_work(&vsock->send_pkt_work);
vdev->config->reset(vdev);
mutex_lock(&vsock->rx_lock);
while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
virtio_transport_free_pkt(pkt);
mutex_unlock(&vsock->rx_lock);
mutex_lock(&vsock->tx_lock);
while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX])))
virtio_transport_free_pkt(pkt);
mutex_unlock(&vsock->tx_lock);
spin_lock_bh(&vsock->send_pkt_list_lock);
while (!list_empty(&vsock->send_pkt_list)) {
pkt = list_first_entry(&vsock->send_pkt_list,
struct virtio_vsock_pkt, list);
list_del(&pkt->list);
virtio_transport_free_pkt(pkt);
}
spin_unlock_bh(&vsock->send_pkt_list_lock);
mutex_lock(&the_virtio_vsock_mutex);
the_virtio_vsock = NULL;
vsock_core_exit();
mutex_unlock(&the_virtio_vsock_mutex);
vdev->config->del_vqs(vdev);
kfree(vsock);
}
static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_VSOCK, VIRTIO_DEV_ANY_ID },
{ 0 },
};
static unsigned int features[] = {
};
static struct virtio_driver virtio_vsock_driver = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtio_vsock_probe,
.remove = virtio_vsock_remove,
};
static int __init virtio_vsock_init(void)
{
int ret;
virtio_vsock_workqueue = alloc_workqueue("virtio_vsock", 0, 0);
if (!virtio_vsock_workqueue)
return -ENOMEM;
ret = register_virtio_driver(&virtio_vsock_driver);
if (ret)
destroy_workqueue(virtio_vsock_workqueue);
return ret;
}
static void __exit virtio_vsock_exit(void)
{
unregister_virtio_driver(&virtio_vsock_driver);
destroy_workqueue(virtio_vsock_workqueue);
}
module_init(virtio_vsock_init);
module_exit(virtio_vsock_exit);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Asias He");
MODULE_DESCRIPTION("virtio transport for vsock");
MODULE_DEVICE_TABLE(virtio, id_table);

View file

@ -0,0 +1,992 @@
/*
* common code for virtio vsock
*
* Copyright (C) 2013-2015 Red Hat, Inc.
* Author: Asias He <asias@redhat.com>
* Stefan Hajnoczi <stefanha@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2.
*/
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/virtio.h>
#include <linux/virtio_ids.h>
#include <linux/virtio_config.h>
#include <linux/virtio_vsock.h>
#include <net/sock.h>
#include <net/af_vsock.h>
#define CREATE_TRACE_POINTS
#include <trace/events/vsock_virtio_transport_common.h>
/* How long to wait for graceful shutdown of a connection */
#define VSOCK_CLOSE_TIMEOUT (8 * HZ)
static const struct virtio_transport *virtio_transport_get_ops(void)
{
const struct vsock_transport *t = vsock_core_get_transport();
return container_of(t, struct virtio_transport, transport);
}
struct virtio_vsock_pkt *
virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
size_t len,
u32 src_cid,
u32 src_port,
u32 dst_cid,
u32 dst_port)
{
struct virtio_vsock_pkt *pkt;
int err;
pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
if (!pkt)
return NULL;
pkt->hdr.type = cpu_to_le16(info->type);
pkt->hdr.op = cpu_to_le16(info->op);
pkt->hdr.src_cid = cpu_to_le64(src_cid);
pkt->hdr.dst_cid = cpu_to_le64(dst_cid);
pkt->hdr.src_port = cpu_to_le32(src_port);
pkt->hdr.dst_port = cpu_to_le32(dst_port);
pkt->hdr.flags = cpu_to_le32(info->flags);
pkt->len = len;
pkt->hdr.len = cpu_to_le32(len);
pkt->reply = info->reply;
if (info->msg && len > 0) {
pkt->buf = kmalloc(len, GFP_KERNEL);
if (!pkt->buf)
goto out_pkt;
err = memcpy_from_msg(pkt->buf, info->msg, len);
if (err)
goto out;
}
trace_virtio_transport_alloc_pkt(src_cid, src_port,
dst_cid, dst_port,
len,
info->type,
info->op,
info->flags);
return pkt;
out:
kfree(pkt->buf);
out_pkt:
kfree(pkt);
return NULL;
}
EXPORT_SYMBOL_GPL(virtio_transport_alloc_pkt);
static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
struct virtio_vsock_pkt_info *info)
{
u32 src_cid, src_port, dst_cid, dst_port;
struct virtio_vsock_sock *vvs;
struct virtio_vsock_pkt *pkt;
u32 pkt_len = info->pkt_len;
src_cid = vm_sockets_get_local_cid();
src_port = vsk->local_addr.svm_port;
if (!info->remote_cid) {
dst_cid = vsk->remote_addr.svm_cid;
dst_port = vsk->remote_addr.svm_port;
} else {
dst_cid = info->remote_cid;
dst_port = info->remote_port;
}
vvs = vsk->trans;
/* we can send less than pkt_len bytes */
if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE)
pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
/* virtio_transport_get_credit might return less than pkt_len credit */
pkt_len = virtio_transport_get_credit(vvs, pkt_len);
/* Do not send zero length OP_RW pkt */
if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
return pkt_len;
pkt = virtio_transport_alloc_pkt(info, pkt_len,
src_cid, src_port,
dst_cid, dst_port);
if (!pkt) {
virtio_transport_put_credit(vvs, pkt_len);
return -ENOMEM;
}
virtio_transport_inc_tx_pkt(vvs, pkt);
return virtio_transport_get_ops()->send_pkt(pkt);
}
static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
struct virtio_vsock_pkt *pkt)
{
vvs->rx_bytes += pkt->len;
}
static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
struct virtio_vsock_pkt *pkt)
{
vvs->rx_bytes -= pkt->len;
vvs->fwd_cnt += pkt->len;
}
void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt)
{
spin_lock_bh(&vvs->tx_lock);
pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
spin_unlock_bh(&vvs->tx_lock);
}
EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
{
u32 ret;
spin_lock_bh(&vvs->tx_lock);
ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
if (ret > credit)
ret = credit;
vvs->tx_cnt += ret;
spin_unlock_bh(&vvs->tx_lock);
return ret;
}
EXPORT_SYMBOL_GPL(virtio_transport_get_credit);
void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
{
spin_lock_bh(&vvs->tx_lock);
vvs->tx_cnt -= credit;
spin_unlock_bh(&vvs->tx_lock);
}
EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
static int virtio_transport_send_credit_update(struct vsock_sock *vsk,
int type,
struct virtio_vsock_hdr *hdr)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
.type = type,
};
return virtio_transport_send_pkt_info(vsk, &info);
}
static ssize_t
virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len)
{
struct virtio_vsock_sock *vvs = vsk->trans;
struct virtio_vsock_pkt *pkt;
size_t bytes, total = 0;
int err = -EFAULT;
spin_lock_bh(&vvs->rx_lock);
while (total < len && !list_empty(&vvs->rx_queue)) {
pkt = list_first_entry(&vvs->rx_queue,
struct virtio_vsock_pkt, list);
bytes = len - total;
if (bytes > pkt->len - pkt->off)
bytes = pkt->len - pkt->off;
/* sk_lock is held by caller so no one else can dequeue.
* Unlock rx_lock since memcpy_to_msg() may sleep.
*/
spin_unlock_bh(&vvs->rx_lock);
err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes);
if (err)
goto out;
spin_lock_bh(&vvs->rx_lock);
total += bytes;
pkt->off += bytes;
if (pkt->off == pkt->len) {
virtio_transport_dec_rx_pkt(vvs, pkt);
list_del(&pkt->list);
virtio_transport_free_pkt(pkt);
}
}
spin_unlock_bh(&vvs->rx_lock);
/* Send a credit pkt to peer */
virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
NULL);
return total;
out:
if (total)
err = total;
return err;
}
ssize_t
virtio_transport_stream_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len, int flags)
{
if (flags & MSG_PEEK)
return -EOPNOTSUPP;
return virtio_transport_stream_do_dequeue(vsk, msg, len);
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
int
virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len, int flags)
{
return -EOPNOTSUPP;
}
EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
s64 virtio_transport_stream_has_data(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
s64 bytes;
spin_lock_bh(&vvs->rx_lock);
bytes = vvs->rx_bytes;
spin_unlock_bh(&vvs->rx_lock);
return bytes;
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data);
static s64 virtio_transport_has_space(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
s64 bytes;
bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
if (bytes < 0)
bytes = 0;
return bytes;
}
s64 virtio_transport_stream_has_space(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
s64 bytes;
spin_lock_bh(&vvs->tx_lock);
bytes = virtio_transport_has_space(vsk);
spin_unlock_bh(&vvs->tx_lock);
return bytes;
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space);
int virtio_transport_do_socket_init(struct vsock_sock *vsk,
struct vsock_sock *psk)
{
struct virtio_vsock_sock *vvs;
vvs = kzalloc(sizeof(*vvs), GFP_KERNEL);
if (!vvs)
return -ENOMEM;
vsk->trans = vvs;
vvs->vsk = vsk;
if (psk) {
struct virtio_vsock_sock *ptrans = psk->trans;
vvs->buf_size = ptrans->buf_size;
vvs->buf_size_min = ptrans->buf_size_min;
vvs->buf_size_max = ptrans->buf_size_max;
vvs->peer_buf_alloc = ptrans->peer_buf_alloc;
} else {
vvs->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE;
vvs->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE;
vvs->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE;
}
vvs->buf_alloc = vvs->buf_size;
spin_lock_init(&vvs->rx_lock);
spin_lock_init(&vvs->tx_lock);
INIT_LIST_HEAD(&vvs->rx_queue);
return 0;
}
EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init);
u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
return vvs->buf_size;
}
EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size);
u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
return vvs->buf_size_min;
}
EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size);
u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
return vvs->buf_size_max;
}
EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size);
void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
{
struct virtio_vsock_sock *vvs = vsk->trans;
if (val > VIRTIO_VSOCK_MAX_BUF_SIZE)
val = VIRTIO_VSOCK_MAX_BUF_SIZE;
if (val < vvs->buf_size_min)
vvs->buf_size_min = val;
if (val > vvs->buf_size_max)
vvs->buf_size_max = val;
vvs->buf_size = val;
vvs->buf_alloc = val;
}
EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size);
void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
{
struct virtio_vsock_sock *vvs = vsk->trans;
if (val > VIRTIO_VSOCK_MAX_BUF_SIZE)
val = VIRTIO_VSOCK_MAX_BUF_SIZE;
if (val > vvs->buf_size)
vvs->buf_size = val;
vvs->buf_size_min = val;
}
EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size);
void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
{
struct virtio_vsock_sock *vvs = vsk->trans;
if (val > VIRTIO_VSOCK_MAX_BUF_SIZE)
val = VIRTIO_VSOCK_MAX_BUF_SIZE;
if (val < vvs->buf_size)
vvs->buf_size = val;
vvs->buf_size_max = val;
}
EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size);
int
virtio_transport_notify_poll_in(struct vsock_sock *vsk,
size_t target,
bool *data_ready_now)
{
if (vsock_stream_has_data(vsk))
*data_ready_now = true;
else
*data_ready_now = false;
return 0;
}
EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in);
int
virtio_transport_notify_poll_out(struct vsock_sock *vsk,
size_t target,
bool *space_avail_now)
{
s64 free_space;
free_space = vsock_stream_has_space(vsk);
if (free_space > 0)
*space_avail_now = true;
else if (free_space == 0)
*space_avail_now = false;
return 0;
}
EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out);
int virtio_transport_notify_recv_init(struct vsock_sock *vsk,
size_t target, struct vsock_transport_recv_notify_data *data)
{
return 0;
}
EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init);
int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk,
size_t target, struct vsock_transport_recv_notify_data *data)
{
return 0;
}
EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block);
int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk,
size_t target, struct vsock_transport_recv_notify_data *data)
{
return 0;
}
EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue);
int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk,
size_t target, ssize_t copied, bool data_read,
struct vsock_transport_recv_notify_data *data)
{
return 0;
}
EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue);
int virtio_transport_notify_send_init(struct vsock_sock *vsk,
struct vsock_transport_send_notify_data *data)
{
return 0;
}
EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init);
int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk,
struct vsock_transport_send_notify_data *data)
{
return 0;
}
EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block);
int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk,
struct vsock_transport_send_notify_data *data)
{
return 0;
}
EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue);
int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk,
ssize_t written, struct vsock_transport_send_notify_data *data)
{
return 0;
}
EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue);
u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
return vvs->buf_size;
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat);
bool virtio_transport_stream_is_active(struct vsock_sock *vsk)
{
return true;
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active);
bool virtio_transport_stream_allow(u32 cid, u32 port)
{
return true;
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
int virtio_transport_dgram_bind(struct vsock_sock *vsk,
struct sockaddr_vm *addr)
{
return -EOPNOTSUPP;
}
EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
bool virtio_transport_dgram_allow(u32 cid, u32 port)
{
return false;
}
EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
int virtio_transport_connect(struct vsock_sock *vsk)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_REQUEST,
.type = VIRTIO_VSOCK_TYPE_STREAM,
};
return virtio_transport_send_pkt_info(vsk, &info);
}
EXPORT_SYMBOL_GPL(virtio_transport_connect);
int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_SHUTDOWN,
.type = VIRTIO_VSOCK_TYPE_STREAM,
.flags = (mode & RCV_SHUTDOWN ?
VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
(mode & SEND_SHUTDOWN ?
VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
};
return virtio_transport_send_pkt_info(vsk, &info);
}
EXPORT_SYMBOL_GPL(virtio_transport_shutdown);
int
virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
struct sockaddr_vm *remote_addr,
struct msghdr *msg,
size_t dgram_len)
{
return -EOPNOTSUPP;
}
EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
ssize_t
virtio_transport_stream_enqueue(struct vsock_sock *vsk,
struct msghdr *msg,
size_t len)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RW,
.type = VIRTIO_VSOCK_TYPE_STREAM,
.msg = msg,
.pkt_len = len,
};
return virtio_transport_send_pkt_info(vsk, &info);
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue);
void virtio_transport_destruct(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
kfree(vvs);
}
EXPORT_SYMBOL_GPL(virtio_transport_destruct);
static int virtio_transport_reset(struct vsock_sock *vsk,
struct virtio_vsock_pkt *pkt)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RST,
.type = VIRTIO_VSOCK_TYPE_STREAM,
.reply = !!pkt,
};
/* Send RST only if the original pkt is not a RST pkt */
if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
return 0;
return virtio_transport_send_pkt_info(vsk, &info);
}
/* Normally packets are associated with a socket. There may be no socket if an
* attempt was made to connect to a socket that does not exist.
*/
static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RST,
.type = le16_to_cpu(pkt->hdr.type),
.reply = true,
};
/* Send RST only if the original pkt is not a RST pkt */
if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
return 0;
pkt = virtio_transport_alloc_pkt(&info, 0,
le32_to_cpu(pkt->hdr.dst_cid),
le32_to_cpu(pkt->hdr.dst_port),
le32_to_cpu(pkt->hdr.src_cid),
le32_to_cpu(pkt->hdr.src_port));
if (!pkt)
return -ENOMEM;
return virtio_transport_get_ops()->send_pkt(pkt);
}
static void virtio_transport_wait_close(struct sock *sk, long timeout)
{
if (timeout) {
DEFINE_WAIT(wait);
do {
prepare_to_wait(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
if (sk_wait_event(sk, &timeout,
sock_flag(sk, SOCK_DONE)))
break;
} while (!signal_pending(current) && timeout);
finish_wait(sk_sleep(sk), &wait);
}
}
static void virtio_transport_do_close(struct vsock_sock *vsk,
bool cancel_timeout)
{
struct sock *sk = sk_vsock(vsk);
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
if (vsock_stream_has_data(vsk) <= 0)
sk->sk_state = SS_DISCONNECTING;
sk->sk_state_change(sk);
if (vsk->close_work_scheduled &&
(!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
vsk->close_work_scheduled = false;
vsock_remove_sock(vsk);
/* Release refcnt obtained when we scheduled the timeout */
sock_put(sk);
}
}
static void virtio_transport_close_timeout(struct work_struct *work)
{
struct vsock_sock *vsk =
container_of(work, struct vsock_sock, close_work.work);
struct sock *sk = sk_vsock(vsk);
sock_hold(sk);
lock_sock(sk);
if (!sock_flag(sk, SOCK_DONE)) {
(void)virtio_transport_reset(vsk, NULL);
virtio_transport_do_close(vsk, false);
}
vsk->close_work_scheduled = false;
release_sock(sk);
sock_put(sk);
}
/* User context, vsk->sk is locked */
static bool virtio_transport_close(struct vsock_sock *vsk)
{
struct sock *sk = &vsk->sk;
if (!(sk->sk_state == SS_CONNECTED ||
sk->sk_state == SS_DISCONNECTING))
return true;
/* Already received SHUTDOWN from peer, reply with RST */
if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) {
(void)virtio_transport_reset(vsk, NULL);
return true;
}
if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
(void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK);
if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING))
virtio_transport_wait_close(sk, sk->sk_lingertime);
if (sock_flag(sk, SOCK_DONE)) {
return true;
}
sock_hold(sk);
INIT_DELAYED_WORK(&vsk->close_work,
virtio_transport_close_timeout);
vsk->close_work_scheduled = true;
schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT);
return false;
}
void virtio_transport_release(struct vsock_sock *vsk)
{
struct sock *sk = &vsk->sk;
bool remove_sock = true;
lock_sock(sk);
if (sk->sk_type == SOCK_STREAM)
remove_sock = virtio_transport_close(vsk);
release_sock(sk);
if (remove_sock)
vsock_remove_sock(vsk);
}
EXPORT_SYMBOL_GPL(virtio_transport_release);
static int
virtio_transport_recv_connecting(struct sock *sk,
struct virtio_vsock_pkt *pkt)
{
struct vsock_sock *vsk = vsock_sk(sk);
int err;
int skerr;
switch (le16_to_cpu(pkt->hdr.op)) {
case VIRTIO_VSOCK_OP_RESPONSE:
sk->sk_state = SS_CONNECTED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsk);
sk->sk_state_change(sk);
break;
case VIRTIO_VSOCK_OP_INVALID:
break;
case VIRTIO_VSOCK_OP_RST:
skerr = ECONNRESET;
err = 0;
goto destroy;
default:
skerr = EPROTO;
err = -EINVAL;
goto destroy;
}
return 0;
destroy:
virtio_transport_reset(vsk, pkt);
sk->sk_state = SS_UNCONNECTED;
sk->sk_err = skerr;
sk->sk_error_report(sk);
return err;
}
static int
virtio_transport_recv_connected(struct sock *sk,
struct virtio_vsock_pkt *pkt)
{
struct vsock_sock *vsk = vsock_sk(sk);
struct virtio_vsock_sock *vvs = vsk->trans;
int err = 0;
switch (le16_to_cpu(pkt->hdr.op)) {
case VIRTIO_VSOCK_OP_RW:
pkt->len = le32_to_cpu(pkt->hdr.len);
pkt->off = 0;
spin_lock_bh(&vvs->rx_lock);
virtio_transport_inc_rx_pkt(vvs, pkt);
list_add_tail(&pkt->list, &vvs->rx_queue);
spin_unlock_bh(&vvs->rx_lock);
sk->sk_data_ready(sk);
return err;
case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
sk->sk_write_space(sk);
break;
case VIRTIO_VSOCK_OP_SHUTDOWN:
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
vsk->peer_shutdown |= RCV_SHUTDOWN;
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
vsk->peer_shutdown |= SEND_SHUTDOWN;
if (vsk->peer_shutdown == SHUTDOWN_MASK &&
vsock_stream_has_data(vsk) <= 0)
sk->sk_state = SS_DISCONNECTING;
if (le32_to_cpu(pkt->hdr.flags))
sk->sk_state_change(sk);
break;
case VIRTIO_VSOCK_OP_RST:
virtio_transport_do_close(vsk, true);
break;
default:
err = -EINVAL;
break;
}
virtio_transport_free_pkt(pkt);
return err;
}
static void
virtio_transport_recv_disconnecting(struct sock *sk,
struct virtio_vsock_pkt *pkt)
{
struct vsock_sock *vsk = vsock_sk(sk);
if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
virtio_transport_do_close(vsk, true);
}
static int
virtio_transport_send_response(struct vsock_sock *vsk,
struct virtio_vsock_pkt *pkt)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RESPONSE,
.type = VIRTIO_VSOCK_TYPE_STREAM,
.remote_cid = le32_to_cpu(pkt->hdr.src_cid),
.remote_port = le32_to_cpu(pkt->hdr.src_port),
.reply = true,
};
return virtio_transport_send_pkt_info(vsk, &info);
}
/* Handle server socket */
static int
virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
{
struct vsock_sock *vsk = vsock_sk(sk);
struct vsock_sock *vchild;
struct sock *child;
if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) {
virtio_transport_reset(vsk, pkt);
return -EINVAL;
}
if (sk_acceptq_is_full(sk)) {
virtio_transport_reset(vsk, pkt);
return -ENOMEM;
}
child = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
sk->sk_type, 0);
if (!child) {
virtio_transport_reset(vsk, pkt);
return -ENOMEM;
}
sk->sk_ack_backlog++;
lock_sock_nested(child, SINGLE_DEPTH_NESTING);
child->sk_state = SS_CONNECTED;
vchild = vsock_sk(child);
vsock_addr_init(&vchild->local_addr, le32_to_cpu(pkt->hdr.dst_cid),
le32_to_cpu(pkt->hdr.dst_port));
vsock_addr_init(&vchild->remote_addr, le32_to_cpu(pkt->hdr.src_cid),
le32_to_cpu(pkt->hdr.src_port));
vsock_insert_connected(vchild);
vsock_enqueue_accept(sk, child);
virtio_transport_send_response(vchild, pkt);
release_sock(child);
sk->sk_data_ready(sk);
return 0;
}
static bool virtio_transport_space_update(struct sock *sk,
struct virtio_vsock_pkt *pkt)
{
struct vsock_sock *vsk = vsock_sk(sk);
struct virtio_vsock_sock *vvs = vsk->trans;
bool space_available;
/* buf_alloc and fwd_cnt is always included in the hdr */
spin_lock_bh(&vvs->tx_lock);
vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc);
vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt);
space_available = virtio_transport_has_space(vsk);
spin_unlock_bh(&vvs->tx_lock);
return space_available;
}
/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
* lock.
*/
void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
{
struct sockaddr_vm src, dst;
struct vsock_sock *vsk;
struct sock *sk;
bool space_available;
vsock_addr_init(&src, le32_to_cpu(pkt->hdr.src_cid),
le32_to_cpu(pkt->hdr.src_port));
vsock_addr_init(&dst, le32_to_cpu(pkt->hdr.dst_cid),
le32_to_cpu(pkt->hdr.dst_port));
trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
dst.svm_cid, dst.svm_port,
le32_to_cpu(pkt->hdr.len),
le16_to_cpu(pkt->hdr.type),
le16_to_cpu(pkt->hdr.op),
le32_to_cpu(pkt->hdr.flags),
le32_to_cpu(pkt->hdr.buf_alloc),
le32_to_cpu(pkt->hdr.fwd_cnt));
if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) {
(void)virtio_transport_reset_no_sock(pkt);
goto free_pkt;
}
/* The socket must be in connected or bound table
* otherwise send reset back
*/
sk = vsock_find_connected_socket(&src, &dst);
if (!sk) {
sk = vsock_find_bound_socket(&dst);
if (!sk) {
(void)virtio_transport_reset_no_sock(pkt);
goto free_pkt;
}
}
vsk = vsock_sk(sk);
space_available = virtio_transport_space_update(sk, pkt);
lock_sock(sk);
/* Update CID in case it has changed after a transport reset event */
vsk->local_addr.svm_cid = dst.svm_cid;
if (space_available)
sk->sk_write_space(sk);
switch (sk->sk_state) {
case VSOCK_SS_LISTEN:
virtio_transport_recv_listen(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
case SS_CONNECTING:
virtio_transport_recv_connecting(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
case SS_CONNECTED:
virtio_transport_recv_connected(sk, pkt);
break;
case SS_DISCONNECTING:
virtio_transport_recv_disconnecting(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
default:
virtio_transport_free_pkt(pkt);
break;
}
release_sock(sk);
/* Release refcnt obtained when we fetched this socket out of the
* bound or connected list.
*/
sock_put(sk);
return;
free_pkt:
virtio_transport_free_pkt(pkt);
}
EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
{
kfree(pkt->buf);
kfree(pkt);
}
EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Asias He");
MODULE_DESCRIPTION("common code for virtio vsock");

View file

@ -1644,6 +1644,8 @@ static void vmci_transport_destruct(struct vsock_sock *vsk)
static void vmci_transport_release(struct vsock_sock *vsk) static void vmci_transport_release(struct vsock_sock *vsk)
{ {
vsock_remove_sock(vsk);
if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) { if (!vmci_handle_is_invalid(vmci_trans(vsk)->dg_handle)) {
vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle); vmci_datagram_destroy_handle(vmci_trans(vsk)->dg_handle);
vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE; vmci_trans(vsk)->dg_handle = VMCI_INVALID_HANDLE;