mirror of
https://github.com/Fishwaldo/linux-bl808.git
synced 2025-06-17 20:25:19 +00:00
Merge branch 'TPACKET_V3-TX_RING-support'
Sowmini Varadhan says: ==================== TPACKET_V3 TX_RING support This patch series allows an application to use a single PF_PACKET descriptor and leverage the best implementations of TX_RING and RX_RING that exist today. Patch 1 adds the kernel/Documentation changes for TX_RING support and patch2 adds the associated test case in selftests. Changes since v2: additional sanity checks for setsockopt input for TX_RING/TPACKET_V3. Refactored psock_tpacket.c test code to avoid code duplication from V2. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
aa276dd7b3
3 changed files with 111 additions and 28 deletions
|
@ -565,7 +565,7 @@ TPACKET_V1 --> TPACKET_V2:
|
||||||
(void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
|
(void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
|
||||||
|
|
||||||
TPACKET_V2 --> TPACKET_V3:
|
TPACKET_V2 --> TPACKET_V3:
|
||||||
- Flexible buffer implementation:
|
- Flexible buffer implementation for RX_RING:
|
||||||
1. Blocks can be configured with non-static frame-size
|
1. Blocks can be configured with non-static frame-size
|
||||||
2. Read/poll is at a block-level (as opposed to packet-level)
|
2. Read/poll is at a block-level (as opposed to packet-level)
|
||||||
3. Added poll timeout to avoid indefinite user-space wait
|
3. Added poll timeout to avoid indefinite user-space wait
|
||||||
|
@ -574,7 +574,12 @@ TPACKET_V2 --> TPACKET_V3:
|
||||||
4.1 block::timeout
|
4.1 block::timeout
|
||||||
4.2 tpkt_hdr::sk_rxhash
|
4.2 tpkt_hdr::sk_rxhash
|
||||||
- RX Hash data available in user space
|
- RX Hash data available in user space
|
||||||
- Currently only RX_RING available
|
- TX_RING semantics are conceptually similar to TPACKET_V2;
|
||||||
|
use tpacket3_hdr instead of tpacket2_hdr, and TPACKET3_HDRLEN
|
||||||
|
instead of TPACKET2_HDRLEN. In the current implementation,
|
||||||
|
the tp_next_offset field in the tpacket3_hdr MUST be set to
|
||||||
|
zero, indicating that the ring does not hold variable sized frames.
|
||||||
|
Packets with non-zero values of tp_next_offset will be dropped.
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
+ AF_PACKET fanout mode
|
+ AF_PACKET fanout mode
|
||||||
|
|
|
@ -409,6 +409,9 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
|
||||||
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
|
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
|
||||||
break;
|
break;
|
||||||
case TPACKET_V3:
|
case TPACKET_V3:
|
||||||
|
h.h3->tp_status = status;
|
||||||
|
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
WARN(1, "TPACKET version not supported.\n");
|
WARN(1, "TPACKET version not supported.\n");
|
||||||
BUG();
|
BUG();
|
||||||
|
@ -432,6 +435,8 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
|
||||||
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
|
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
|
||||||
return h.h2->tp_status;
|
return h.h2->tp_status;
|
||||||
case TPACKET_V3:
|
case TPACKET_V3:
|
||||||
|
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
|
||||||
|
return h.h3->tp_status;
|
||||||
default:
|
default:
|
||||||
WARN(1, "TPACKET version not supported.\n");
|
WARN(1, "TPACKET version not supported.\n");
|
||||||
BUG();
|
BUG();
|
||||||
|
@ -2497,6 +2502,13 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
|
||||||
ph.raw = frame;
|
ph.raw = frame;
|
||||||
|
|
||||||
switch (po->tp_version) {
|
switch (po->tp_version) {
|
||||||
|
case TPACKET_V3:
|
||||||
|
if (ph.h3->tp_next_offset != 0) {
|
||||||
|
pr_warn_once("variable sized slot not supported");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
tp_len = ph.h3->tp_len;
|
||||||
|
break;
|
||||||
case TPACKET_V2:
|
case TPACKET_V2:
|
||||||
tp_len = ph.h2->tp_len;
|
tp_len = ph.h2->tp_len;
|
||||||
break;
|
break;
|
||||||
|
@ -2516,6 +2528,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
|
||||||
off_max = po->tx_ring.frame_size - tp_len;
|
off_max = po->tx_ring.frame_size - tp_len;
|
||||||
if (po->sk.sk_type == SOCK_DGRAM) {
|
if (po->sk.sk_type == SOCK_DGRAM) {
|
||||||
switch (po->tp_version) {
|
switch (po->tp_version) {
|
||||||
|
case TPACKET_V3:
|
||||||
|
off = ph.h3->tp_net;
|
||||||
|
break;
|
||||||
case TPACKET_V2:
|
case TPACKET_V2:
|
||||||
off = ph.h2->tp_net;
|
off = ph.h2->tp_net;
|
||||||
break;
|
break;
|
||||||
|
@ -2525,6 +2540,9 @@ static int tpacket_parse_header(struct packet_sock *po, void *frame,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
switch (po->tp_version) {
|
switch (po->tp_version) {
|
||||||
|
case TPACKET_V3:
|
||||||
|
off = ph.h3->tp_mac;
|
||||||
|
break;
|
||||||
case TPACKET_V2:
|
case TPACKET_V2:
|
||||||
off = ph.h2->tp_mac;
|
off = ph.h2->tp_mac;
|
||||||
break;
|
break;
|
||||||
|
@ -4113,11 +4131,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
|
||||||
struct tpacket_req *req = &req_u->req;
|
struct tpacket_req *req = &req_u->req;
|
||||||
|
|
||||||
lock_sock(sk);
|
lock_sock(sk);
|
||||||
/* Opening a Tx-ring is NOT supported in TPACKET_V3 */
|
|
||||||
if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
|
|
||||||
net_warn_ratelimited("Tx-ring is not supported.\n");
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
rb = tx_ring ? &po->tx_ring : &po->rx_ring;
|
rb = tx_ring ? &po->tx_ring : &po->rx_ring;
|
||||||
rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
|
rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
|
||||||
|
@ -4177,11 +4190,19 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
|
||||||
goto out;
|
goto out;
|
||||||
switch (po->tp_version) {
|
switch (po->tp_version) {
|
||||||
case TPACKET_V3:
|
case TPACKET_V3:
|
||||||
/* Transmit path is not supported. We checked
|
/* Block transmit is not supported yet */
|
||||||
* it above but just being paranoid
|
if (!tx_ring) {
|
||||||
*/
|
|
||||||
if (!tx_ring)
|
|
||||||
init_prb_bdqc(po, rb, pg_vec, req_u);
|
init_prb_bdqc(po, rb, pg_vec, req_u);
|
||||||
|
} else {
|
||||||
|
struct tpacket_req3 *req3 = &req_u->req3;
|
||||||
|
|
||||||
|
if (req3->tp_retire_blk_tov ||
|
||||||
|
req3->tp_sizeof_priv ||
|
||||||
|
req3->tp_feature_req_word) {
|
||||||
|
err = -EINVAL;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -311,20 +311,33 @@ static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
|
||||||
__sync_synchronize();
|
__sync_synchronize();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int __v1_v2_tx_kernel_ready(void *base, int version)
|
static inline int __v3_tx_kernel_ready(struct tpacket3_hdr *hdr)
|
||||||
|
{
|
||||||
|
return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __v3_tx_user_ready(struct tpacket3_hdr *hdr)
|
||||||
|
{
|
||||||
|
hdr->tp_status = TP_STATUS_SEND_REQUEST;
|
||||||
|
__sync_synchronize();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int __tx_kernel_ready(void *base, int version)
|
||||||
{
|
{
|
||||||
switch (version) {
|
switch (version) {
|
||||||
case TPACKET_V1:
|
case TPACKET_V1:
|
||||||
return __v1_tx_kernel_ready(base);
|
return __v1_tx_kernel_ready(base);
|
||||||
case TPACKET_V2:
|
case TPACKET_V2:
|
||||||
return __v2_tx_kernel_ready(base);
|
return __v2_tx_kernel_ready(base);
|
||||||
|
case TPACKET_V3:
|
||||||
|
return __v3_tx_kernel_ready(base);
|
||||||
default:
|
default:
|
||||||
bug_on(1);
|
bug_on(1);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __v1_v2_tx_user_ready(void *base, int version)
|
static inline void __tx_user_ready(void *base, int version)
|
||||||
{
|
{
|
||||||
switch (version) {
|
switch (version) {
|
||||||
case TPACKET_V1:
|
case TPACKET_V1:
|
||||||
|
@ -333,6 +346,9 @@ static inline void __v1_v2_tx_user_ready(void *base, int version)
|
||||||
case TPACKET_V2:
|
case TPACKET_V2:
|
||||||
__v2_tx_user_ready(base);
|
__v2_tx_user_ready(base);
|
||||||
break;
|
break;
|
||||||
|
case TPACKET_V3:
|
||||||
|
__v3_tx_user_ready(base);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -348,7 +364,22 @@ static void __v1_v2_set_packet_loss_discard(int sock)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void walk_v1_v2_tx(int sock, struct ring *ring)
|
static inline void *get_next_frame(struct ring *ring, int n)
|
||||||
|
{
|
||||||
|
uint8_t *f0 = ring->rd[0].iov_base;
|
||||||
|
|
||||||
|
switch (ring->version) {
|
||||||
|
case TPACKET_V1:
|
||||||
|
case TPACKET_V2:
|
||||||
|
return ring->rd[n].iov_base;
|
||||||
|
case TPACKET_V3:
|
||||||
|
return f0 + (n * ring->req3.tp_frame_size);
|
||||||
|
default:
|
||||||
|
bug_on(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void walk_tx(int sock, struct ring *ring)
|
||||||
{
|
{
|
||||||
struct pollfd pfd;
|
struct pollfd pfd;
|
||||||
int rcv_sock, ret;
|
int rcv_sock, ret;
|
||||||
|
@ -360,9 +391,19 @@ static void walk_v1_v2_tx(int sock, struct ring *ring)
|
||||||
.sll_family = PF_PACKET,
|
.sll_family = PF_PACKET,
|
||||||
.sll_halen = ETH_ALEN,
|
.sll_halen = ETH_ALEN,
|
||||||
};
|
};
|
||||||
|
int nframes;
|
||||||
|
|
||||||
|
/* TPACKET_V{1,2} sets up the ring->rd* related variables based
|
||||||
|
* on frames (e.g., rd_num is tp_frame_nr) whereas V3 sets these
|
||||||
|
* up based on blocks (e.g, rd_num is tp_block_nr)
|
||||||
|
*/
|
||||||
|
if (ring->version <= TPACKET_V2)
|
||||||
|
nframes = ring->rd_num;
|
||||||
|
else
|
||||||
|
nframes = ring->req3.tp_frame_nr;
|
||||||
|
|
||||||
bug_on(ring->type != PACKET_TX_RING);
|
bug_on(ring->type != PACKET_TX_RING);
|
||||||
bug_on(ring->rd_num < NUM_PACKETS);
|
bug_on(nframes < NUM_PACKETS);
|
||||||
|
|
||||||
rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
|
rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
|
||||||
if (rcv_sock == -1) {
|
if (rcv_sock == -1) {
|
||||||
|
@ -388,10 +429,11 @@ static void walk_v1_v2_tx(int sock, struct ring *ring)
|
||||||
create_payload(packet, &packet_len);
|
create_payload(packet, &packet_len);
|
||||||
|
|
||||||
while (total_packets > 0) {
|
while (total_packets > 0) {
|
||||||
while (__v1_v2_tx_kernel_ready(ring->rd[frame_num].iov_base,
|
void *next = get_next_frame(ring, frame_num);
|
||||||
ring->version) &&
|
|
||||||
|
while (__tx_kernel_ready(next, ring->version) &&
|
||||||
total_packets > 0) {
|
total_packets > 0) {
|
||||||
ppd.raw = ring->rd[frame_num].iov_base;
|
ppd.raw = next;
|
||||||
|
|
||||||
switch (ring->version) {
|
switch (ring->version) {
|
||||||
case TPACKET_V1:
|
case TPACKET_V1:
|
||||||
|
@ -413,14 +455,27 @@ static void walk_v1_v2_tx(int sock, struct ring *ring)
|
||||||
packet_len);
|
packet_len);
|
||||||
total_bytes += ppd.v2->tp_h.tp_snaplen;
|
total_bytes += ppd.v2->tp_h.tp_snaplen;
|
||||||
break;
|
break;
|
||||||
|
case TPACKET_V3: {
|
||||||
|
struct tpacket3_hdr *tx = next;
|
||||||
|
|
||||||
|
tx->tp_snaplen = packet_len;
|
||||||
|
tx->tp_len = packet_len;
|
||||||
|
tx->tp_next_offset = 0;
|
||||||
|
|
||||||
|
memcpy((uint8_t *)tx + TPACKET3_HDRLEN -
|
||||||
|
sizeof(struct sockaddr_ll), packet,
|
||||||
|
packet_len);
|
||||||
|
total_bytes += tx->tp_snaplen;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
status_bar_update();
|
status_bar_update();
|
||||||
total_packets--;
|
total_packets--;
|
||||||
|
|
||||||
__v1_v2_tx_user_ready(ppd.raw, ring->version);
|
__tx_user_ready(next, ring->version);
|
||||||
|
|
||||||
frame_num = (frame_num + 1) % ring->rd_num;
|
frame_num = (frame_num + 1) % nframes;
|
||||||
}
|
}
|
||||||
|
|
||||||
poll(&pfd, 1, 1);
|
poll(&pfd, 1, 1);
|
||||||
|
@ -460,7 +515,7 @@ static void walk_v1_v2(int sock, struct ring *ring)
|
||||||
if (ring->type == PACKET_RX_RING)
|
if (ring->type == PACKET_RX_RING)
|
||||||
walk_v1_v2_rx(sock, ring);
|
walk_v1_v2_rx(sock, ring);
|
||||||
else
|
else
|
||||||
walk_v1_v2_tx(sock, ring);
|
walk_tx(sock, ring);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t __v3_prev_block_seq_num = 0;
|
static uint64_t __v3_prev_block_seq_num = 0;
|
||||||
|
@ -583,7 +638,7 @@ static void walk_v3(int sock, struct ring *ring)
|
||||||
if (ring->type == PACKET_RX_RING)
|
if (ring->type == PACKET_RX_RING)
|
||||||
walk_v3_rx(sock, ring);
|
walk_v3_rx(sock, ring);
|
||||||
else
|
else
|
||||||
bug_on(1);
|
walk_tx(sock, ring);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
|
static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
|
||||||
|
@ -602,12 +657,13 @@ static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
|
||||||
ring->flen = ring->req.tp_frame_size;
|
ring->flen = ring->req.tp_frame_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __v3_fill(struct ring *ring, unsigned int blocks)
|
static void __v3_fill(struct ring *ring, unsigned int blocks, int type)
|
||||||
{
|
{
|
||||||
ring->req3.tp_retire_blk_tov = 64;
|
if (type == PACKET_RX_RING) {
|
||||||
ring->req3.tp_sizeof_priv = 0;
|
ring->req3.tp_retire_blk_tov = 64;
|
||||||
ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
|
ring->req3.tp_sizeof_priv = 0;
|
||||||
|
ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
|
||||||
|
}
|
||||||
ring->req3.tp_block_size = getpagesize() << 2;
|
ring->req3.tp_block_size = getpagesize() << 2;
|
||||||
ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
|
ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
|
||||||
ring->req3.tp_block_nr = blocks;
|
ring->req3.tp_block_nr = blocks;
|
||||||
|
@ -641,7 +697,7 @@ static void setup_ring(int sock, struct ring *ring, int version, int type)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TPACKET_V3:
|
case TPACKET_V3:
|
||||||
__v3_fill(ring, blocks);
|
__v3_fill(ring, blocks, type);
|
||||||
ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
|
ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
|
||||||
sizeof(ring->req3));
|
sizeof(ring->req3));
|
||||||
break;
|
break;
|
||||||
|
@ -796,6 +852,7 @@ int main(void)
|
||||||
ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
|
ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
|
||||||
|
|
||||||
ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
|
ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
|
||||||
|
ret |= test_tpacket(TPACKET_V3, PACKET_TX_RING);
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue