[ICSK]: Introduce reqsk_queue_prune from code in tcp_synack_timer

With this we're very close to getting all of the current TCP
refactorings in my dccp-2.6 tree merged, next changeset will export
some functions needed by the current DCCP code and then dccp-2.6.git
will be born!

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Arnaldo Carvalho de Melo 2005-08-09 20:11:56 -07:00 committed by David S. Miller
parent 0a5578cf8e
commit 295f7324ff
10 changed files with 67 additions and 37 deletions

View file

@ -270,7 +270,7 @@ struct tcp_sock {
__u8 frto_counter; /* Number of new acks after RTO */ __u8 frto_counter; /* Number of new acks after RTO */
__u8 nonagle; /* Disable Nagle algorithm? */ __u8 nonagle; /* Disable Nagle algorithm? */
__u8 defer_accept; /* User waits for some data after accept() */ /* ONE BYTE HOLE, TRY TO PACK */
/* RTT measurement */ /* RTT measurement */
__u32 srtt; /* smoothed round trip time << 3 */ __u32 srtt; /* smoothed round trip time << 3 */

View file

@ -239,4 +239,6 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
reqsk_free(req); reqsk_free(req);
} }
extern void inet_csk_listen_stop(struct sock *sk);
#endif /* _INET_CONNECTION_SOCK_H */ #endif /* _INET_CONNECTION_SOCK_H */

View file

@ -97,6 +97,7 @@ struct listen_sock {
* *
* @rskq_accept_head - FIFO head of established children * @rskq_accept_head - FIFO head of established children
* @rskq_accept_tail - FIFO tail of established children * @rskq_accept_tail - FIFO tail of established children
* @rskq_defer_accept - User waits for some data after accept()
* @syn_wait_lock - serializer * @syn_wait_lock - serializer
* *
* %syn_wait_lock is necessary only to avoid proc interface having to grab the main * %syn_wait_lock is necessary only to avoid proc interface having to grab the main
@ -112,6 +113,8 @@ struct request_sock_queue {
struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_head;
struct request_sock *rskq_accept_tail; struct request_sock *rskq_accept_tail;
rwlock_t syn_wait_lock; rwlock_t syn_wait_lock;
u8 rskq_defer_accept;
/* 3 bytes hole, try to pack */
struct listen_sock *listen_opt; struct listen_sock *listen_opt;
}; };
@ -255,4 +258,8 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
write_unlock(&queue->syn_wait_lock); write_unlock(&queue->syn_wait_lock);
} }
extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
const unsigned long interval, const unsigned long timeout,
const unsigned long max_rto, int max_retries);
#endif /* _REQUEST_SOCK_H */ #endif /* _REQUEST_SOCK_H */

View file

@ -423,7 +423,8 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
size_t len, int nonblock, size_t len, int nonblock,
int flags, int *addr_len); int flags, int *addr_len);
extern int tcp_listen_start(struct sock *sk); extern int inet_csk_listen_start(struct sock *sk,
const int nr_table_entries);
extern void tcp_parse_options(struct sk_buff *skb, extern void tcp_parse_options(struct sk_buff *skb,
struct tcp_options_received *opt_rx, struct tcp_options_received *opt_rx,

View file

@ -52,6 +52,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
rwlock_init(&queue->syn_wait_lock); rwlock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = queue->rskq_accept_head = NULL; queue->rskq_accept_head = queue->rskq_accept_head = NULL;
queue->rskq_defer_accept = 0;
lopt->nr_table_entries = nr_table_entries; lopt->nr_table_entries = nr_table_entries;
write_lock_bh(&queue->syn_wait_lock); write_lock_bh(&queue->syn_wait_lock);

View file

@ -99,6 +99,7 @@
#include <net/arp.h> #include <net/arp.h>
#include <net/route.h> #include <net/route.h>
#include <net/ip_fib.h> #include <net/ip_fib.h>
#include <net/inet_connection_sock.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <net/udp.h> #include <net/udp.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>

View file

@ -495,7 +495,7 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start);
* This routine closes sockets which have been at least partially * This routine closes sockets which have been at least partially
* opened, but not yet accepted. * opened, but not yet accepted.
*/ */
static void inet_csk_listen_stop(struct sock *sk) void inet_csk_listen_stop(struct sock *sk)
{ {
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock *acc_req; struct request_sock *acc_req;
@ -1947,15 +1947,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
break; break;
case TCP_DEFER_ACCEPT: case TCP_DEFER_ACCEPT:
tp->defer_accept = 0; icsk->icsk_accept_queue.rskq_defer_accept = 0;
if (val > 0) { if (val > 0) {
/* Translate value in seconds to number of /* Translate value in seconds to number of
* retransmits */ * retransmits */
while (tp->defer_accept < 32 && while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
val > ((TCP_TIMEOUT_INIT / HZ) << val > ((TCP_TIMEOUT_INIT / HZ) <<
tp->defer_accept)) icsk->icsk_accept_queue.rskq_defer_accept))
tp->defer_accept++; icsk->icsk_accept_queue.rskq_defer_accept++;
tp->defer_accept++; icsk->icsk_accept_queue.rskq_defer_accept++;
} }
break; break;
@ -2058,6 +2058,7 @@ EXPORT_SYMBOL_GPL(tcp_get_info);
int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
int __user *optlen) int __user *optlen)
{ {
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int val, len; int val, len;
@ -2095,7 +2096,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
break; break;
case TCP_SYNCNT: case TCP_SYNCNT:
val = inet_csk(sk)->icsk_syn_retries ? : sysctl_tcp_syn_retries; val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
break; break;
case TCP_LINGER2: case TCP_LINGER2:
val = tp->linger2; val = tp->linger2;
@ -2103,8 +2104,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
val = (val ? : sysctl_tcp_fin_timeout) / HZ; val = (val ? : sysctl_tcp_fin_timeout) / HZ;
break; break;
case TCP_DEFER_ACCEPT: case TCP_DEFER_ACCEPT:
val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) << val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
(tp->defer_accept - 1)); ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
break; break;
case TCP_WINDOW_CLAMP: case TCP_WINDOW_CLAMP:
val = tp->window_clamp; val = tp->window_clamp;
@ -2125,7 +2126,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
return 0; return 0;
} }
case TCP_QUICKACK: case TCP_QUICKACK:
val = !inet_csk(sk)->icsk_ack.pingpong; val = !icsk->icsk_ack.pingpong;
break; break;
case TCP_CONGESTION: case TCP_CONGESTION:

View file

@ -3831,6 +3831,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
tcp_parse_options(skb, &tp->rx_opt, 0); tcp_parse_options(skb, &tp->rx_opt, 0);
if (th->ack) { if (th->ack) {
struct inet_connection_sock *icsk;
/* rfc793: /* rfc793:
* "If the state is SYN-SENT then * "If the state is SYN-SENT then
* first check the ACK bit * first check the ACK bit
@ -3956,7 +3957,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
sk_wake_async(sk, 0, POLL_OUT); sk_wake_async(sk, 0, POLL_OUT);
} }
if (sk->sk_write_pending || tp->defer_accept || inet_csk(sk)->icsk_ack.pingpong) { icsk = inet_csk(sk);
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong) {
/* Save one ACK. Data will be ready after /* Save one ACK. Data will be ready after
* several ticks, if write_pending is set. * several ticks, if write_pending is set.
* *
@ -3965,8 +3970,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
* to stand against the temptation 8) --ANK * to stand against the temptation 8) --ANK
*/ */
inet_csk_schedule_ack(sk); inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.lrcvtime = tcp_time_stamp; icsk->icsk_ack.lrcvtime = tcp_time_stamp;
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; icsk->icsk_ack.ato = TCP_ATO_MIN;
tcp_incr_quickack(sk); tcp_incr_quickack(sk);
tcp_enter_quickack_mode(sk); tcp_enter_quickack_mode(sk);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,

View file

@ -787,9 +787,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
does sequence test, SYN is truncated, and thus we consider does sequence test, SYN is truncated, and thus we consider
it a bare ACK. it a bare ACK.
If tp->defer_accept, we silently drop this bare ACK. Otherwise, If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
we create an established connection. Both ends (listening sockets) bare ACK. Otherwise, we create an established connection. Both
accept the new incoming connection and try to talk to each other. 8-) ends (listening sockets) accept the new incoming connection and try
to talk to each other. 8-)
Note: This case is both harmless, and rare. Possibility is about the Note: This case is both harmless, and rare. Possibility is about the
same as us discovering intelligent life on another plant tomorrow. same as us discovering intelligent life on another plant tomorrow.
@ -856,7 +857,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
return NULL; return NULL;
/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
inet_rsk(req)->acked = 1; inet_rsk(req)->acked = 1;
return NULL; return NULL;
} }

View file

@ -424,16 +424,12 @@ out_unlock:
sock_put(sk); sock_put(sk);
} }
/* void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent,
* Timer for listening sockets const unsigned long interval, const unsigned long timeout,
*/ const unsigned long max_rto, int max_retries)
static void tcp_synack_timer(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(parent);
struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = queue->listen_opt;
struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
int thresh = max_retries; int thresh = max_retries;
unsigned long now = jiffies; unsigned long now = jiffies;
struct request_sock **reqp, *req; struct request_sock **reqp, *req;
@ -470,10 +466,10 @@ static void tcp_synack_timer(struct sock *sk)
} }
} }
if (tp->defer_accept) if (queue->rskq_defer_accept)
max_retries = tp->defer_accept; max_retries = queue->rskq_defer_accept;
budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL)); budget = 2 * (lopt->nr_table_entries / (timeout / interval));
i = lopt->clock_hand; i = lopt->clock_hand;
do { do {
@ -482,20 +478,19 @@ static void tcp_synack_timer(struct sock *sk)
if (time_after_eq(now, req->expires)) { if (time_after_eq(now, req->expires)) {
if ((req->retrans < thresh || if ((req->retrans < thresh ||
(inet_rsk(req)->acked && req->retrans < max_retries)) (inet_rsk(req)->acked && req->retrans < max_retries))
&& !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) { && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) {
unsigned long timeo; unsigned long timeo;
if (req->retrans++ == 0) if (req->retrans++ == 0)
lopt->qlen_young--; lopt->qlen_young--;
timeo = min((TCP_TIMEOUT_INIT << req->retrans), timeo = min((timeout << req->retrans), max_rto);
TCP_RTO_MAX);
req->expires = now + timeo; req->expires = now + timeo;
reqp = &req->dl_next; reqp = &req->dl_next;
continue; continue;
} }
/* Drop this request */ /* Drop this request */
inet_csk_reqsk_queue_unlink(sk, req, reqp); inet_csk_reqsk_queue_unlink(parent, req, reqp);
reqsk_queue_removed(&icsk->icsk_accept_queue, req); reqsk_queue_removed(&icsk->icsk_accept_queue, req);
reqsk_free(req); reqsk_free(req);
continue; continue;
@ -503,14 +498,29 @@ static void tcp_synack_timer(struct sock *sk)
reqp = &req->dl_next; reqp = &req->dl_next;
} }
i = (i+1)&(TCP_SYNQ_HSIZE-1); i = (i + 1) & (lopt->nr_table_entries - 1);
} while (--budget > 0); } while (--budget > 0);
lopt->clock_hand = i; lopt->clock_hand = i;
if (lopt->qlen) if (lopt->qlen)
inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); inet_csk_reset_keepalive_timer(parent, interval);
}
EXPORT_SYMBOL_GPL(reqsk_queue_prune);
/*
* Timer for listening sockets
*/
static void tcp_synack_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries);
} }
void tcp_set_keepalive(struct sock *sk, int val) void tcp_set_keepalive(struct sock *sk, int val)