patch-2.1.120 linux/net/ipv4/tcp_ipv4.c
Next file: linux/net/ipv4/tcp_output.c
Previous file: linux/net/ipv4/tcp_input.c
Back to the patch index
Back to the overall index
- Lines: 556
- Date:
Thu Aug 27 19:43:53 1998
- Orig file:
v2.1.119/linux/net/ipv4/tcp_ipv4.c
- Orig date:
Thu Aug 27 19:56:30 1998
diff -u --recursive --new-file v2.1.119/linux/net/ipv4/tcp_ipv4.c linux/net/ipv4/tcp_ipv4.c
@@ -5,7 +5,7 @@
*
* Implementation of the Transmission Control Protocol(TCP).
*
- * Version: $Id: tcp_ipv4.c,v 1.150 1998/07/28 17:45:07 freitag Exp $
+ * Version: $Id: tcp_ipv4.c,v 1.157 1998/08/28 00:27:47 davem Exp $
*
* IPv4 specific functions
*
@@ -44,6 +44,7 @@
* Andi Kleen: various fixes.
* Vitaly E. Lavrov : Transparent proxy revived after year coma.
* Andi Kleen : Fix new listen.
+ * Andi Kleen : Fix accept error reporting.
*/
#include <linux/config.h>
@@ -140,7 +141,8 @@
if(tb->port == snum) {
if(tb->owners == NULL &&
(tb->flags & TCPB_FLAG_LOCKED)) {
- tb->flags &= ~TCPB_FLAG_LOCKED;
+ tb->flags &= ~(TCPB_FLAG_LOCKED |
+ TCPB_FLAG_FASTREUSE);
tcp_inc_slow_timer(TCP_SLT_BUCKETGC);
}
break;
@@ -208,7 +210,7 @@
/* We must walk the whole port owner list in this case. -DaveM */
for(sk2 = tb->owners; sk2; sk2 = sk2->bind_next) {
- if(sk->bound_dev_if == sk2->bound_dev_if) {
+ if (sk->bound_dev_if == sk2->bound_dev_if) {
if(!sk_reuse || !sk2->reuse || sk2->state == TCP_LISTEN) {
if(!sk2->rcv_saddr ||
!sk->rcv_saddr ||
@@ -223,16 +225,33 @@
}
if(result == 0) {
if(tb == NULL) {
- if(tcp_bucket_create(snum) == NULL)
+ if((tb = tcp_bucket_create(snum)) == NULL)
result = 1;
+ else if (sk->reuse && sk->state != TCP_LISTEN)
+ tb->flags |= TCPB_FLAG_FASTREUSE;
} else {
/* It could be pending garbage collection, this
* kills the race and prevents it from disappearing
* out from under us by the time we use it. -DaveM
*/
- if(tb->owners == NULL && !(tb->flags & TCPB_FLAG_LOCKED)) {
- tb->flags = TCPB_FLAG_LOCKED;
- tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
+ if(tb->owners == NULL) {
+ if (!(tb->flags & TCPB_FLAG_LOCKED)) {
+ tb->flags = (TCPB_FLAG_LOCKED |
+ ((sk->reuse &&
+ sk->state != TCP_LISTEN) ?
+ TCPB_FLAG_FASTREUSE : 0));
+ tcp_dec_slow_timer(TCP_SLT_BUCKETGC);
+ } else if (!(tb->flags & TCPB_FLAG_GOODSOCKNUM)) {
+ /* Someone is in between the bind
+ * and the actual connect or listen.
+ * See if it was a legitimate reuse
+ * and we are as well, else punt.
+ */
+ if (sk->reuse == 0 ||
+ !(tb->flags & TCPB_FLAG_FASTREUSE))
+ result = 1;
+ } else
+ tb->flags &= ~TCPB_FLAG_GOODSOCKNUM;
}
}
}
@@ -264,8 +283,11 @@
next:
} while(--remaining > 0);
tcp_port_rover = rover;
- if((remaining <= 0) || (tcp_bucket_create(rover) == NULL))
+ tb = NULL;
+ if((remaining <= 0) || ((tb = tcp_bucket_create(rover)) == NULL))
rover = 0;
+ if (tb != NULL)
+ tb->flags |= TCPB_FLAG_GOODSOCKNUM;
SOCKHASH_UNLOCK();
return rover;
@@ -543,8 +565,8 @@
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
struct sk_buff *buff;
struct rtable *rt;
+ u32 daddr, nexthop;
int tmp;
- int mss;
if (sk->state != TCP_CLOSE)
return(-EISCONN);
@@ -564,7 +586,14 @@
printk(KERN_DEBUG "%s forgot to set AF_INET in " __FUNCTION__ "\n", current->comm);
}
- tmp = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
+ nexthop = daddr = usin->sin_addr.s_addr;
+ if (sk->opt && sk->opt->srr) {
+ if (daddr == 0)
+ return -EINVAL;
+ nexthop = sk->opt->faddr;
+ }
+
+ tmp = ip_route_connect(&rt, nexthop, sk->saddr,
RT_TOS(sk->ip_tos)|sk->localroute, sk->bound_dev_if);
if (tmp < 0)
return tmp;
@@ -592,6 +621,8 @@
*/
sk->dport = usin->sin_port;
sk->daddr = rt->rt_dst;
+ if (sk->opt && sk->opt->srr)
+ sk->daddr = daddr;
if (!sk->saddr)
sk->saddr = rt->rt_src;
sk->rcv_saddr = sk->saddr;
@@ -601,22 +632,28 @@
return -EADDRNOTAVAIL;
}
- sk->mtu = rt->u.dst.pmtu;
- if ((sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
- (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
- (rt->u.dst.mxlock&(1<<RTAX_MTU)))) &&
- rt->u.dst.pmtu > 576 && rt->rt_dst != rt->rt_gateway)
- sk->mtu = 576;
+ tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,
+ sk->sport, usin->sin_port);
- if (sk->mtu < 64)
- sk->mtu = 64; /* Sanity limit */
+ tp->ext_header_len = 0;
+ if (sk->opt)
+ tp->ext_header_len = sk->opt->optlen;
- mss = sk->mtu - sizeof(struct iphdr);
+ /* Reset mss clamp */
+ tp->mss_clamp = ~0;
- tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,
- sk->sport, usin->sin_port);
+ if ((sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
+ (sk->ip_pmtudisc == IP_PMTUDISC_WANT &&
+ (rt->u.dst.mxlock&(1<<RTAX_MTU)))) &&
+ rt->u.dst.pmtu > 576 && rt->rt_dst != rt->rt_gateway) {
+ /* Clamp mss at maximum of 536 and user_mss.
+ Probably, user ordered to override tiny segment size
+ in gatewayed case.
+ */
+ tp->mss_clamp = max(tp->user_mss, 536);
+ }
- tcp_connect(sk, buff, mss);
+ tcp_connect(sk, buff, rt->u.dst.pmtu);
return 0;
}
@@ -694,7 +731,6 @@
*/
static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip)
{
- int new_mtu;
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
/* Don't interested in TCP_LISTEN and open_requests (SYN-ACKs
@@ -711,21 +747,19 @@
* route, but I think that's acceptable.
*/
if (sk->ip_pmtudisc != IP_PMTUDISC_DONT && sk->dst_cache) {
- new_mtu = sk->dst_cache->pmtu -
- (ip->ihl<<2) - tp->tcp_header_len;
- if (new_mtu < sk->mss && new_mtu > 0) {
- sk->mss = new_mtu;
+ if (tp->pmtu_cookie > sk->dst_cache->pmtu &&
+ !atomic_read(&sk->sock_readers)) {
+ lock_sock(sk);
+ tcp_sync_mss(sk, sk->dst_cache->pmtu);
+
/* Resend the TCP packet because it's
* clear that the old packet has been
* dropped. This is the new "fast" path mtu
* discovery.
*/
- if (!atomic_read(&sk->sock_readers)) {
- lock_sock(sk);
- tcp_simple_retransmit(sk);
- release_sock(sk);
- } /* else let the usual retransmit timer handle it */
- }
+ tcp_simple_retransmit(sk);
+ release_sock(sk);
+ } /* else let the usual retransmit timer handle it */
}
}
@@ -821,8 +855,15 @@
return;
}
+ /* The final ACK of the handshake should be already
+ * handled in the new socket context, not here.
+ * Strictly speaking - an ICMP error for the final
+ * ACK should set the opening flag, but that is too
+ * complicated right now.
+ */
if (!th->syn && !th->ack)
return;
+
req = tcp_v4_search_req(tp, iph, th, &prev);
if (!req)
return;
@@ -833,17 +874,33 @@
seq, req->snt_isn);
return;
}
- if (req->sk) { /* not yet accept()ed */
- sk = req->sk; /* report error in accept */
+ if (req->sk) {
+ /*
+ * Already in ESTABLISHED and a big socket is created,
+ * set error code there.
+ * The error will _not_ be reported in the accept(),
+ * but only with the next operation on the socket after
+ * accept.
+ */
+ sk = req->sk;
} else {
+ /*
+ * Still in SYN_RECV, just remove it silently.
+ * There is no good way to pass the error to the newly
+ * created socket, and POSIX does not want network
+ * errors returned from accept().
+ */
tp->syn_backlog--;
tcp_synq_unlink(tp, req, prev);
req->class->destructor(req);
tcp_openreq_free(req);
+ return;
}
- /* FALL THOUGH */
+ break;
case TCP_SYN_SENT:
case TCP_SYN_RECV:
+ if (!th->syn)
+ return;
opening = 1;
break;
}
@@ -855,10 +912,13 @@
tcp_statistics.TcpAttemptFails++;
if (sk->state != TCP_LISTEN)
tcp_set_state(sk,TCP_CLOSE);
+ mb();
sk->error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
}
- } else /* Only an error on timeout */
+ } else { /* Only an error on timeout */
sk->err_soft = icmp_err_convert[code].errno;
+ mb();
+ }
}
/* This routine computes an IPv4 TCP checksum. */
@@ -916,7 +976,7 @@
IPPROTO_TCP,
0);
arg.n_iov = 1;
- arg.csumoffset = offsetof(struct tcphdr, check) / sizeof(u16);
+ arg.csumoffset = offsetof(struct tcphdr, check) / 2;
ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
@@ -950,6 +1010,11 @@
}
#endif
+/*
+ * Send a SYN-ACK after having received an ACK.
+ * This still operates on a open_request only, not on a big
+ * socket.
+ */
static void tcp_v4_send_synack(struct sock *sk, struct open_request *req)
{
struct rtable *rt;
@@ -974,7 +1039,7 @@
return;
}
- mss = (rt->u.dst.pmtu - sizeof(struct iphdr) - sizeof(struct tcphdr));
+ mss = rt->u.dst.pmtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
skb = tcp_make_synack(sk, &rt->u.dst, req, mss);
if (skb) {
@@ -994,6 +1059,9 @@
ip_rt_put(rt);
}
+/*
+ * IPv4 open_request destructor.
+ */
static void tcp_v4_or_free(struct open_request *req)
{
if(!req->sk && req->af.v4_req.opt)
@@ -1016,9 +1084,9 @@
* Save and compile IPv4 options into the open_request if needed.
*/
static inline struct ip_options *
-tcp_v4_save_options(struct sock *sk, struct sk_buff *skb,
- struct ip_options *opt)
+tcp_v4_save_options(struct sock *sk, struct sk_buff *skb)
{
+ struct ip_options *opt = &(IPCB(skb)->opt);
struct ip_options *dopt = NULL;
if (opt && opt->optlen) {
@@ -1052,8 +1120,7 @@
#define BACKLOG(sk) ((sk)->tp_pinfo.af_tcp.syn_backlog) /* lvalue! */
#define BACKLOGMAX(sk) sysctl_max_syn_backlog
-int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
- __u32 isn)
+int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, __u32 isn)
{
struct tcp_opt tp;
struct open_request *req;
@@ -1070,6 +1137,11 @@
if (sk->dead)
goto dead;
+ /* Never answer to SYNs send to broadcast or multicast */
+ if (((struct rtable *)skb->dst)->rt_flags &
+ (RTCF_BROADCAST|RTCF_MULTICAST))
+ goto drop;
+
/* XXX: Check against a global syn pool counter. */
if (BACKLOG(sk) > BACKLOGMAX(sk)) {
#ifdef CONFIG_SYN_COOKIES
@@ -1094,13 +1166,18 @@
req->rcv_isn = TCP_SKB_CB(skb)->seq;
tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
- tp.in_mss = 536;
+
+ tp.mss_clamp = 65535;
tcp_parse_options(NULL, th, &tp, want_cookie);
- req->mss = tp.in_mss;
- if (tp.saw_tstamp) {
- req->mss -= TCPOLEN_TSTAMP_ALIGNED;
+ if (tp.mss_clamp == 65535)
+ tp.mss_clamp = 576 - sizeof(struct iphdr) - sizeof(struct iphdr);
+
+ if (sk->tp_pinfo.af_tcp.user_mss && sk->tp_pinfo.af_tcp.user_mss < tp.mss_clamp)
+ tp.mss_clamp = sk->tp_pinfo.af_tcp.user_mss;
+ req->mss = tp.mss_clamp;
+
+ if (tp.saw_tstamp)
req->ts_recent = tp.rcv_tsval;
- }
req->tstamp_ok = tp.tstamp_ok;
req->sack_ok = tp.sack_ok;
req->snd_wscale = tp.snd_wscale;
@@ -1120,7 +1197,7 @@
req->snt_isn = isn;
- req->af.v4_req.opt = tcp_v4_save_options(sk, skb, ptr);
+ req->af.v4_req.opt = tcp_v4_save_options(sk, skb);
req->class = &or_ipv4;
req->retrans = 0;
@@ -1139,7 +1216,6 @@
tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
}
- sk->data_ready(sk, 0);
return 0;
dead:
@@ -1160,8 +1236,7 @@
*
* This function wants to be moved to a common for IPv[46] file. --ANK
*/
-struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb,
- int snd_mss)
+struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, struct sk_buff *skb)
{
struct sock *newsk = sk_alloc(PF_INET, GFP_ATOMIC, 0);
@@ -1180,6 +1255,7 @@
skb_queue_head_init(&newsk->receive_queue);
atomic_set(&newsk->wmem_alloc, 0);
skb_queue_head_init(&newsk->write_queue);
+ atomic_set(&newsk->omem_alloc, 0);
newsk->done = 0;
newsk->proc = 0;
@@ -1231,7 +1307,7 @@
newtp->copied_seq = req->rcv_isn + 1;
newtp->saw_tstamp = 0;
- newtp->in_mss = 536;
+ newtp->mss_clamp = req->mss;
init_timer(&newtp->probe_timer);
newtp->probe_timer.function = &tcp_probe_timer;
@@ -1242,12 +1318,14 @@
newtp->urg_data = 0;
tcp_synq_init(newtp);
newtp->syn_backlog = 0;
+ if (skb->len >= 536)
+ newtp->last_seg_size = skb->len;
/* Back to base struct sock members. */
newsk->err = 0;
newsk->ack_backlog = 0;
newsk->max_ack_backlog = SOMAXCONN;
- newsk->priority = 1;
+ newsk->priority = 0;
/* IP layer stuff */
newsk->timeout = 0;
@@ -1276,14 +1354,6 @@
} else {
newtp->tcp_header_len = sizeof(struct tcphdr);
}
-
- snd_mss -= newtp->tcp_header_len;
-
- if (sk->user_mss)
- snd_mss = min(snd_mss, sk->user_mss);
-
- newsk->mss = min(req->mss, snd_mss);
-
}
return newsk;
}
@@ -1299,8 +1369,6 @@
struct ip_options *opt = req->af.v4_req.opt;
struct tcp_opt *newtp;
struct sock *newsk;
- int snd_mss;
- int mtu;
if (sk->ack_backlog > sk->max_ack_backlog)
goto exit; /* head drop */
@@ -1324,12 +1392,7 @@
goto exit;
#endif
- mtu = dst->pmtu;
- if (mtu < 68) /* XXX: we should turn pmtu disc off when this happens. */
- mtu = 68;
- snd_mss = mtu - sizeof(struct iphdr);
-
- newsk = tcp_create_openreq_child(sk, req, skb, snd_mss);
+ newsk = tcp_create_openreq_child(sk, req, skb);
if (!newsk)
goto exit;
@@ -1347,15 +1410,22 @@
newsk->sport = req->lcl_port;
#endif
newsk->opt = req->af.v4_req.opt;
- newsk->mtu = mtu;
-
- if (newsk->rcvbuf < (3 * newsk->mtu))
- newsk->rcvbuf = min ((3 * newsk->mtu), sysctl_rmem_max);
- if (newsk->sndbuf < (3 * newsk->mtu))
- newsk->sndbuf = min ((3 * newsk->mtu), sysctl_wmem_max);
+ newtp->ext_header_len = 0;
+ if (newsk->opt)
+ newtp->ext_header_len = newsk->opt->optlen;
+
+ tcp_sync_mss(newsk, dst->pmtu);
+ newtp->rcv_mss = newtp->mss_clamp;
+
+ /* It would be better to use newtp->mss_clamp here */
+ if (newsk->rcvbuf < (3 * newtp->pmtu_cookie))
+ newsk->rcvbuf = min ((3 * newtp->pmtu_cookie), sysctl_rmem_max);
+ if (newsk->sndbuf < (3 * newtp->pmtu_cookie))
+ newsk->sndbuf = min ((3 * newtp->pmtu_cookie), sysctl_wmem_max);
tcp_v4_hash(newsk);
add_to_prot_sklist(newsk);
+ sk->data_ready(sk, 0); /* Deliver SIGIO */
return newsk;
@@ -1373,8 +1443,8 @@
if (!req)
return;
/* Sequence number check required by RFC793 */
- if (before(TCP_SKB_CB(skb)->seq, req->snt_isn) ||
- after(TCP_SKB_CB(skb)->seq, req->snt_isn+1))
+ if (before(TCP_SKB_CB(skb)->seq, req->rcv_isn) ||
+ after(TCP_SKB_CB(skb)->seq, req->rcv_isn+1))
return;
tcp_synq_unlink(tp, req, prev);
(req->sk ? sk->ack_backlog : tp->syn_backlog)--;
@@ -1461,7 +1531,7 @@
sk = nsk;
}
- if (tcp_rcv_state_process(sk, skb, skb->h.th, &(IPCB(skb)->opt), skb->len))
+ if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
goto reset;
release_sock(sk);
return 0;
@@ -1559,7 +1629,7 @@
do_time_wait:
if(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
- skb, th, &(IPCB(skb)->opt), skb->len))
+ skb, th, skb->len))
goto no_tcp_socket;
goto discard_it;
}
@@ -1665,6 +1735,8 @@
tcp_v4_conn_request,
tcp_v4_syn_recv_sock,
tcp_v4_get_sock,
+ sizeof(struct iphdr),
+
ip_setsockopt,
ip_getsockopt,
v4_addr2sockaddr,
@@ -1683,7 +1755,7 @@
tp->rto = TCP_TIMEOUT_INIT; /*TCP_WRITE_TIME*/
tp->mdev = TCP_TIMEOUT_INIT;
- tp->in_mss = 536;
+ tp->mss_clamp = ~0;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
@@ -1691,11 +1763,11 @@
tp->snd_cwnd = (1 << TCP_CWND_SHIFT);
tp->snd_ssthresh = 0x7fffffff; /* Infinity */
- sk->priority = 1;
sk->state = TCP_CLOSE;
sk->max_ack_backlog = SOMAXCONN;
- sk->mtu = 576;
- sk->mss = 536;
+ tp->rcv_mss = 536;
+
+ sk->write_space = tcp_write_space;
/* Init SYN queue. */
tcp_synq_init(tp);
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov