mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-26 08:31:13 +00:00
i40e/i40evf: enable hardware feature head write back
The hardware supports a feature to avoid updating the descriptor ring by marking each descriptor with a DD bit, and instead writes a memory location with an update to where the driver should clean up to. Enable this feature. Change-ID: I5da4e0681f0b581a6401c950a81808792267fe57 Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Signed-off-by: Mitch Williams <mitch.a.williams@intel.com> Signed-off-by: Catherine Sullivan <catherine.sullivan@intel.com> Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
This commit is contained in:
parent
6c167f582e
commit
1943d8ba95
4 changed files with 88 additions and 12 deletions
|
@ -2181,6 +2181,11 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
|
||||||
tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
|
tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FD_SB_ENABLED |
|
||||||
I40E_FLAG_FD_ATR_ENABLED));
|
I40E_FLAG_FD_ATR_ENABLED));
|
||||||
tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
|
tx_ctx.timesync_ena = !!(vsi->back->flags & I40E_FLAG_PTP);
|
||||||
|
/* FDIR VSI tx ring can still use RS bit and writebacks */
|
||||||
|
if (vsi->type != I40E_VSI_FDIR)
|
||||||
|
tx_ctx.head_wb_ena = 1;
|
||||||
|
tx_ctx.head_wb_addr = ring->dma +
|
||||||
|
(ring->count * sizeof(struct i40e_tx_desc));
|
||||||
|
|
||||||
/* As part of VSI creation/update, FW allocates certain
|
/* As part of VSI creation/update, FW allocates certain
|
||||||
* Tx arbitration queue sets for each TC enabled for
|
* Tx arbitration queue sets for each TC enabled for
|
||||||
|
|
|
@ -618,6 +618,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* i40e_get_head - Retrieve head from head writeback
|
||||||
|
* @tx_ring: tx ring to fetch head of
|
||||||
|
*
|
||||||
|
* Returns value of Tx ring head based on value stored
|
||||||
|
* in head write-back location
|
||||||
|
**/
|
||||||
|
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
|
||||||
|
{
|
||||||
|
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
|
||||||
|
|
||||||
|
return le32_to_cpu(*(volatile __le32 *)head);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* i40e_clean_tx_irq - Reclaim resources after transmit completes
|
* i40e_clean_tx_irq - Reclaim resources after transmit completes
|
||||||
* @tx_ring: tx ring to clean
|
* @tx_ring: tx ring to clean
|
||||||
|
@ -629,6 +643,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
||||||
{
|
{
|
||||||
u16 i = tx_ring->next_to_clean;
|
u16 i = tx_ring->next_to_clean;
|
||||||
struct i40e_tx_buffer *tx_buf;
|
struct i40e_tx_buffer *tx_buf;
|
||||||
|
struct i40e_tx_desc *tx_head;
|
||||||
struct i40e_tx_desc *tx_desc;
|
struct i40e_tx_desc *tx_desc;
|
||||||
unsigned int total_packets = 0;
|
unsigned int total_packets = 0;
|
||||||
unsigned int total_bytes = 0;
|
unsigned int total_bytes = 0;
|
||||||
|
@ -637,6 +652,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
||||||
tx_desc = I40E_TX_DESC(tx_ring, i);
|
tx_desc = I40E_TX_DESC(tx_ring, i);
|
||||||
i -= tx_ring->count;
|
i -= tx_ring->count;
|
||||||
|
|
||||||
|
tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
|
||||||
|
|
||||||
do {
|
do {
|
||||||
struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
|
struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
|
||||||
|
|
||||||
|
@ -647,9 +664,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
||||||
/* prevent any other reads prior to eop_desc */
|
/* prevent any other reads prior to eop_desc */
|
||||||
read_barrier_depends();
|
read_barrier_depends();
|
||||||
|
|
||||||
/* if the descriptor isn't done, no work yet to do */
|
/* we have caught up to head, no work left to do */
|
||||||
if (!(eop_desc->cmd_type_offset_bsz &
|
if (tx_head == tx_desc)
|
||||||
cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* clear next_to_watch to prevent false hangs */
|
/* clear next_to_watch to prevent false hangs */
|
||||||
|
@ -905,6 +921,10 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
|
||||||
|
|
||||||
/* round up to nearest 4K */
|
/* round up to nearest 4K */
|
||||||
tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
|
tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
|
||||||
|
/* add u32 for head writeback, align after this takes care of
|
||||||
|
* guaranteeing this is at least one cache line in size
|
||||||
|
*/
|
||||||
|
tx_ring->size += sizeof(u32);
|
||||||
tx_ring->size = ALIGN(tx_ring->size, 4096);
|
tx_ring->size = ALIGN(tx_ring->size, 4096);
|
||||||
tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
|
tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
|
||||||
&tx_ring->dma, GFP_KERNEL);
|
&tx_ring->dma, GFP_KERNEL);
|
||||||
|
@ -2042,9 +2062,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
|
||||||
tx_bi = &tx_ring->tx_bi[i];
|
tx_bi = &tx_ring->tx_bi[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
tx_desc->cmd_type_offset_bsz =
|
/* Place RS bit on last descriptor of any packet that spans across the
|
||||||
build_ctob(td_cmd, td_offset, size, td_tag) |
|
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
|
||||||
cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
|
*/
|
||||||
|
#define WB_STRIDE 0x3
|
||||||
|
if (((i & WB_STRIDE) != WB_STRIDE) &&
|
||||||
|
(first <= &tx_ring->tx_bi[i]) &&
|
||||||
|
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
|
||||||
|
tx_desc->cmd_type_offset_bsz =
|
||||||
|
build_ctob(td_cmd, td_offset, size, td_tag) |
|
||||||
|
cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
|
||||||
|
I40E_TXD_QW1_CMD_SHIFT);
|
||||||
|
} else {
|
||||||
|
tx_desc->cmd_type_offset_bsz =
|
||||||
|
build_ctob(td_cmd, td_offset, size, td_tag) |
|
||||||
|
cpu_to_le64((u64)I40E_TXD_CMD <<
|
||||||
|
I40E_TXD_QW1_CMD_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
|
netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
|
||||||
tx_ring->queue_index),
|
tx_ring->queue_index),
|
||||||
|
|
|
@ -230,6 +230,9 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_idx,
|
||||||
tx_ctx.qlen = info->ring_len;
|
tx_ctx.qlen = info->ring_len;
|
||||||
tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]);
|
tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]);
|
||||||
tx_ctx.rdylist_act = 0;
|
tx_ctx.rdylist_act = 0;
|
||||||
|
tx_ctx.head_wb_ena = 1;
|
||||||
|
tx_ctx.head_wb_addr = info->dma_ring_addr +
|
||||||
|
(info->ring_len * sizeof(struct i40e_tx_desc));
|
||||||
|
|
||||||
/* clear the context in the HMC */
|
/* clear the context in the HMC */
|
||||||
ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id);
|
ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id);
|
||||||
|
|
|
@ -169,6 +169,20 @@ static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* i40e_get_head - Retrieve head from head writeback
|
||||||
|
* @tx_ring: tx ring to fetch head of
|
||||||
|
*
|
||||||
|
* Returns value of Tx ring head based on value stored
|
||||||
|
* in head write-back location
|
||||||
|
**/
|
||||||
|
static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
|
||||||
|
{
|
||||||
|
void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
|
||||||
|
|
||||||
|
return le32_to_cpu(*(volatile __le32 *)head);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* i40e_clean_tx_irq - Reclaim resources after transmit completes
|
* i40e_clean_tx_irq - Reclaim resources after transmit completes
|
||||||
* @tx_ring: tx ring to clean
|
* @tx_ring: tx ring to clean
|
||||||
|
@ -180,6 +194,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
||||||
{
|
{
|
||||||
u16 i = tx_ring->next_to_clean;
|
u16 i = tx_ring->next_to_clean;
|
||||||
struct i40e_tx_buffer *tx_buf;
|
struct i40e_tx_buffer *tx_buf;
|
||||||
|
struct i40e_tx_desc *tx_head;
|
||||||
struct i40e_tx_desc *tx_desc;
|
struct i40e_tx_desc *tx_desc;
|
||||||
unsigned int total_packets = 0;
|
unsigned int total_packets = 0;
|
||||||
unsigned int total_bytes = 0;
|
unsigned int total_bytes = 0;
|
||||||
|
@ -188,6 +203,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
||||||
tx_desc = I40E_TX_DESC(tx_ring, i);
|
tx_desc = I40E_TX_DESC(tx_ring, i);
|
||||||
i -= tx_ring->count;
|
i -= tx_ring->count;
|
||||||
|
|
||||||
|
tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
|
||||||
|
|
||||||
do {
|
do {
|
||||||
struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
|
struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
|
||||||
|
|
||||||
|
@ -198,9 +215,8 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
||||||
/* prevent any other reads prior to eop_desc */
|
/* prevent any other reads prior to eop_desc */
|
||||||
read_barrier_depends();
|
read_barrier_depends();
|
||||||
|
|
||||||
/* if the descriptor isn't done, no work yet to do */
|
/* we have caught up to head, no work left to do */
|
||||||
if (!(eop_desc->cmd_type_offset_bsz &
|
if (tx_head == tx_desc)
|
||||||
cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* clear next_to_watch to prevent false hangs */
|
/* clear next_to_watch to prevent false hangs */
|
||||||
|
@ -432,6 +448,10 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)
|
||||||
|
|
||||||
/* round up to nearest 4K */
|
/* round up to nearest 4K */
|
||||||
tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
|
tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
|
||||||
|
/* add u32 for head writeback, align after this takes care of
|
||||||
|
* guaranteeing this is at least one cache line in size
|
||||||
|
*/
|
||||||
|
tx_ring->size += sizeof(u32);
|
||||||
tx_ring->size = ALIGN(tx_ring->size, 4096);
|
tx_ring->size = ALIGN(tx_ring->size, 4096);
|
||||||
tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
|
tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
|
||||||
&tx_ring->dma, GFP_KERNEL);
|
&tx_ring->dma, GFP_KERNEL);
|
||||||
|
@ -1377,9 +1397,23 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
|
||||||
tx_bi = &tx_ring->tx_bi[i];
|
tx_bi = &tx_ring->tx_bi[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
tx_desc->cmd_type_offset_bsz =
|
/* Place RS bit on last descriptor of any packet that spans across the
|
||||||
build_ctob(td_cmd, td_offset, size, td_tag) |
|
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
|
||||||
cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
|
*/
|
||||||
|
#define WB_STRIDE 0x3
|
||||||
|
if (((i & WB_STRIDE) != WB_STRIDE) &&
|
||||||
|
(first <= &tx_ring->tx_bi[i]) &&
|
||||||
|
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
|
||||||
|
tx_desc->cmd_type_offset_bsz =
|
||||||
|
build_ctob(td_cmd, td_offset, size, td_tag) |
|
||||||
|
cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
|
||||||
|
I40E_TXD_QW1_CMD_SHIFT);
|
||||||
|
} else {
|
||||||
|
tx_desc->cmd_type_offset_bsz =
|
||||||
|
build_ctob(td_cmd, td_offset, size, td_tag) |
|
||||||
|
cpu_to_le64((u64)I40E_TXD_CMD <<
|
||||||
|
I40E_TXD_QW1_CMD_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
|
netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
|
||||||
tx_ring->queue_index),
|
tx_ring->queue_index),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue