mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-06 06:37:59 +00:00
PCI/IOV: Add sysfs MSI-X vector assignment interface
A typical cloud provider SR-IOV use case is to create many VFs for use by guest VMs. The VFs may not be assigned to a VM until a customer requests a VM of a certain size, e.g., number of CPUs. A VF may need MSI-X vectors proportional to the number of CPUs in the VM, but there is no standard way to change the number of MSI-X vectors supported by a VF. Some Mellanox ConnectX devices support dynamic assignment of MSI-X vectors to SR-IOV VFs. This can be done by the PF driver after VFs are enabled, and it can be done without affecting VFs that are already in use. The hardware supports a limited pool of MSI-X vectors that can be assigned to the PF or to individual VFs. This is device-specific behavior that requires support in the PF driver. Add a read-only "sriov_vf_total_msix" sysfs file for the PF and a writable "sriov_vf_msix_count" file for each VF. Management software may use these to learn how many MSI-X vectors are available and to dynamically assign them to VFs before the VFs are passed through to a VM. If the PF driver implements the ->sriov_get_vf_total_msix() callback, "sriov_vf_total_msix" contains the total number of MSI-X vectors available for distribution among VFs. If no driver is bound to the VF, writing "N" to "sriov_vf_msix_count" uses the PF driver ->sriov_set_msix_vec_count() callback to assign "N" MSI-X vectors to the VF. When a VF driver subsequently reads the MSI-X Message Control register, it will see the new Table Size "N". Link: https://lore.kernel.org/linux-pci/20210314124256.70253-2-leon@kernel.org Acked-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
This commit is contained in:
parent
26bf30902c
commit
c3d5c2d96d
5 changed files with 137 additions and 8 deletions
|
@ -375,3 +375,32 @@ Description:
|
||||||
The value comes from the PCI kernel device state and can be one
|
The value comes from the PCI kernel device state and can be one
|
||||||
of: "unknown", "error", "D0", D1", "D2", "D3hot", "D3cold".
|
of: "unknown", "error", "D0", D1", "D2", "D3hot", "D3cold".
|
||||||
The file is read only.
|
The file is read only.
|
||||||
|
|
||||||
|
What: /sys/bus/pci/devices/.../sriov_vf_total_msix
|
||||||
|
Date: January 2021
|
||||||
|
Contact: Leon Romanovsky <leonro@nvidia.com>
|
||||||
|
Description:
|
||||||
|
This file is associated with a SR-IOV physical function (PF).
|
||||||
|
It contains the total number of MSI-X vectors available for
|
||||||
|
assignment to all virtual functions (VFs) associated with PF.
|
||||||
|
The value will be zero if the device doesn't support this
|
||||||
|
functionality. For supported devices, the value will be
|
||||||
|
constant and won't be changed after MSI-X vectors assignment.
|
||||||
|
|
||||||
|
What: /sys/bus/pci/devices/.../sriov_vf_msix_count
|
||||||
|
Date: January 2021
|
||||||
|
Contact: Leon Romanovsky <leonro@nvidia.com>
|
||||||
|
Description:
|
||||||
|
This file is associated with a SR-IOV virtual function (VF).
|
||||||
|
It allows configuration of the number of MSI-X vectors for
|
||||||
|
the VF. This allows devices that have a global pool of MSI-X
|
||||||
|
vectors to optimally divide them between VFs based on VF usage.
|
||||||
|
|
||||||
|
The values accepted are:
|
||||||
|
* > 0 - this number will be reported as the Table Size in the
|
||||||
|
VF's MSI-X capability
|
||||||
|
* < 0 - not valid
|
||||||
|
* = 0 - will reset to the device default value
|
||||||
|
|
||||||
|
The file is writable if the PF is bound to a driver that
|
||||||
|
implements ->sriov_set_msix_vec_count().
|
||||||
|
|
|
@ -31,6 +31,7 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int vf_id)
|
||||||
return (dev->devfn + dev->sriov->offset +
|
return (dev->devfn + dev->sriov->offset +
|
||||||
dev->sriov->stride * vf_id) & 0xff;
|
dev->sriov->stride * vf_id) & 0xff;
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(pci_iov_virtfn_devfn);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
|
* Per SR-IOV spec sec 3.3.10 and 3.3.11, First VF Offset and VF Stride may
|
||||||
|
@ -157,6 +158,92 @@ failed:
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_PCI_MSI
|
||||||
|
static ssize_t sriov_vf_total_msix_show(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
char *buf)
|
||||||
|
{
|
||||||
|
struct pci_dev *pdev = to_pci_dev(dev);
|
||||||
|
u32 vf_total_msix = 0;
|
||||||
|
|
||||||
|
device_lock(dev);
|
||||||
|
if (!pdev->driver || !pdev->driver->sriov_get_vf_total_msix)
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
vf_total_msix = pdev->driver->sriov_get_vf_total_msix(pdev);
|
||||||
|
unlock:
|
||||||
|
device_unlock(dev);
|
||||||
|
return sysfs_emit(buf, "%u\n", vf_total_msix);
|
||||||
|
}
|
||||||
|
static DEVICE_ATTR_RO(sriov_vf_total_msix);
|
||||||
|
|
||||||
|
static ssize_t sriov_vf_msix_count_store(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
const char *buf, size_t count)
|
||||||
|
{
|
||||||
|
struct pci_dev *vf_dev = to_pci_dev(dev);
|
||||||
|
struct pci_dev *pdev = pci_physfn(vf_dev);
|
||||||
|
int val, ret;
|
||||||
|
|
||||||
|
ret = kstrtoint(buf, 0, &val);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (val < 0)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
device_lock(&pdev->dev);
|
||||||
|
if (!pdev->driver || !pdev->driver->sriov_set_msix_vec_count) {
|
||||||
|
ret = -EOPNOTSUPP;
|
||||||
|
goto err_pdev;
|
||||||
|
}
|
||||||
|
|
||||||
|
device_lock(&vf_dev->dev);
|
||||||
|
if (vf_dev->driver) {
|
||||||
|
/*
|
||||||
|
* A driver is already attached to this VF and has configured
|
||||||
|
* itself based on the current MSI-X vector count. Changing
|
||||||
|
* the vector size could mess up the driver, so block it.
|
||||||
|
*/
|
||||||
|
ret = -EBUSY;
|
||||||
|
goto err_dev;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = pdev->driver->sriov_set_msix_vec_count(vf_dev, val);
|
||||||
|
|
||||||
|
err_dev:
|
||||||
|
device_unlock(&vf_dev->dev);
|
||||||
|
err_pdev:
|
||||||
|
device_unlock(&pdev->dev);
|
||||||
|
return ret ? : count;
|
||||||
|
}
|
||||||
|
static DEVICE_ATTR_WO(sriov_vf_msix_count);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static struct attribute *sriov_vf_dev_attrs[] = {
|
||||||
|
#ifdef CONFIG_PCI_MSI
|
||||||
|
&dev_attr_sriov_vf_msix_count.attr,
|
||||||
|
#endif
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
static umode_t sriov_vf_attrs_are_visible(struct kobject *kobj,
|
||||||
|
struct attribute *a, int n)
|
||||||
|
{
|
||||||
|
struct device *dev = kobj_to_dev(kobj);
|
||||||
|
struct pci_dev *pdev = to_pci_dev(dev);
|
||||||
|
|
||||||
|
if (!pdev->is_virtfn)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return a->mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct attribute_group sriov_vf_dev_attr_group = {
|
||||||
|
.attrs = sriov_vf_dev_attrs,
|
||||||
|
.is_visible = sriov_vf_attrs_are_visible,
|
||||||
|
};
|
||||||
|
|
||||||
int pci_iov_add_virtfn(struct pci_dev *dev, int id)
|
int pci_iov_add_virtfn(struct pci_dev *dev, int id)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -400,18 +487,21 @@ static DEVICE_ATTR_RO(sriov_stride);
|
||||||
static DEVICE_ATTR_RO(sriov_vf_device);
|
static DEVICE_ATTR_RO(sriov_vf_device);
|
||||||
static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
|
static DEVICE_ATTR_RW(sriov_drivers_autoprobe);
|
||||||
|
|
||||||
static struct attribute *sriov_dev_attrs[] = {
|
static struct attribute *sriov_pf_dev_attrs[] = {
|
||||||
&dev_attr_sriov_totalvfs.attr,
|
&dev_attr_sriov_totalvfs.attr,
|
||||||
&dev_attr_sriov_numvfs.attr,
|
&dev_attr_sriov_numvfs.attr,
|
||||||
&dev_attr_sriov_offset.attr,
|
&dev_attr_sriov_offset.attr,
|
||||||
&dev_attr_sriov_stride.attr,
|
&dev_attr_sriov_stride.attr,
|
||||||
&dev_attr_sriov_vf_device.attr,
|
&dev_attr_sriov_vf_device.attr,
|
||||||
&dev_attr_sriov_drivers_autoprobe.attr,
|
&dev_attr_sriov_drivers_autoprobe.attr,
|
||||||
|
#ifdef CONFIG_PCI_MSI
|
||||||
|
&dev_attr_sriov_vf_total_msix.attr,
|
||||||
|
#endif
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
static umode_t sriov_attrs_are_visible(struct kobject *kobj,
|
static umode_t sriov_pf_attrs_are_visible(struct kobject *kobj,
|
||||||
struct attribute *a, int n)
|
struct attribute *a, int n)
|
||||||
{
|
{
|
||||||
struct device *dev = kobj_to_dev(kobj);
|
struct device *dev = kobj_to_dev(kobj);
|
||||||
|
|
||||||
|
@ -421,9 +511,9 @@ static umode_t sriov_attrs_are_visible(struct kobject *kobj,
|
||||||
return a->mode;
|
return a->mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
const struct attribute_group sriov_dev_attr_group = {
|
const struct attribute_group sriov_pf_dev_attr_group = {
|
||||||
.attrs = sriov_dev_attrs,
|
.attrs = sriov_pf_dev_attrs,
|
||||||
.is_visible = sriov_attrs_are_visible,
|
.is_visible = sriov_pf_attrs_are_visible,
|
||||||
};
|
};
|
||||||
|
|
||||||
int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
|
int __weak pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
|
||||||
|
|
|
@ -1567,7 +1567,8 @@ static const struct attribute_group *pci_dev_attr_groups[] = {
|
||||||
&pci_dev_attr_group,
|
&pci_dev_attr_group,
|
||||||
&pci_dev_hp_attr_group,
|
&pci_dev_hp_attr_group,
|
||||||
#ifdef CONFIG_PCI_IOV
|
#ifdef CONFIG_PCI_IOV
|
||||||
&sriov_dev_attr_group,
|
&sriov_pf_dev_attr_group,
|
||||||
|
&sriov_vf_dev_attr_group,
|
||||||
#endif
|
#endif
|
||||||
&pci_bridge_attr_group,
|
&pci_bridge_attr_group,
|
||||||
&pcie_dev_attr_group,
|
&pcie_dev_attr_group,
|
||||||
|
|
|
@ -501,7 +501,8 @@ void pci_iov_update_resource(struct pci_dev *dev, int resno);
|
||||||
resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
|
resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno);
|
||||||
void pci_restore_iov_state(struct pci_dev *dev);
|
void pci_restore_iov_state(struct pci_dev *dev);
|
||||||
int pci_iov_bus_range(struct pci_bus *bus);
|
int pci_iov_bus_range(struct pci_bus *bus);
|
||||||
extern const struct attribute_group sriov_dev_attr_group;
|
extern const struct attribute_group sriov_pf_dev_attr_group;
|
||||||
|
extern const struct attribute_group sriov_vf_dev_attr_group;
|
||||||
#else
|
#else
|
||||||
static inline int pci_iov_init(struct pci_dev *dev)
|
static inline int pci_iov_init(struct pci_dev *dev)
|
||||||
{
|
{
|
||||||
|
|
|
@ -856,6 +856,12 @@ struct module;
|
||||||
* e.g. drivers/net/e100.c.
|
* e.g. drivers/net/e100.c.
|
||||||
* @sriov_configure: Optional driver callback to allow configuration of
|
* @sriov_configure: Optional driver callback to allow configuration of
|
||||||
* number of VFs to enable via sysfs "sriov_numvfs" file.
|
* number of VFs to enable via sysfs "sriov_numvfs" file.
|
||||||
|
* @sriov_set_msix_vec_count: PF Driver callback to change number of MSI-X
|
||||||
|
* vectors on a VF. Triggered via sysfs "sriov_vf_msix_count".
|
||||||
|
* This will change MSI-X Table Size in the VF Message Control
|
||||||
|
* registers.
|
||||||
|
* @sriov_get_vf_total_msix: PF driver callback to get the total number of
|
||||||
|
* MSI-X vectors available for distribution to the VFs.
|
||||||
* @err_handler: See Documentation/PCI/pci-error-recovery.rst
|
* @err_handler: See Documentation/PCI/pci-error-recovery.rst
|
||||||
* @groups: Sysfs attribute groups.
|
* @groups: Sysfs attribute groups.
|
||||||
* @driver: Driver model structure.
|
* @driver: Driver model structure.
|
||||||
|
@ -871,6 +877,8 @@ struct pci_driver {
|
||||||
int (*resume)(struct pci_dev *dev); /* Device woken up */
|
int (*resume)(struct pci_dev *dev); /* Device woken up */
|
||||||
void (*shutdown)(struct pci_dev *dev);
|
void (*shutdown)(struct pci_dev *dev);
|
||||||
int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */
|
int (*sriov_configure)(struct pci_dev *dev, int num_vfs); /* On PF */
|
||||||
|
int (*sriov_set_msix_vec_count)(struct pci_dev *vf, int msix_vec_count); /* On PF */
|
||||||
|
u32 (*sriov_get_vf_total_msix)(struct pci_dev *pf);
|
||||||
const struct pci_error_handlers *err_handler;
|
const struct pci_error_handlers *err_handler;
|
||||||
const struct attribute_group **groups;
|
const struct attribute_group **groups;
|
||||||
struct device_driver driver;
|
struct device_driver driver;
|
||||||
|
|
Loading…
Add table
Reference in a new issue