patch-2.1.28 linux/net/ipv4/udp.c
Next file: linux/net/ipv6/af_inet6.c
Previous file: linux/net/ipv4/tcp_timer.c
Back to the patch index
Back to the overall index
- Lines: 595
- Date:
Mon Mar 3 09:37:45 1997
- Orig file:
v2.1.27/linux/net/ipv4/udp.c
- Orig date:
Fri Feb 7 05:54:55 1997
diff -u --recursive --new-file v2.1.27/linux/net/ipv4/udp.c linux/net/ipv4/udp.c
@@ -49,6 +49,9 @@
* Mike Shaver : RFC1122 checks.
* Alan Cox : Nonblocking error fix.
* Willy Konynenberg : Transparent proxying support.
+ * David S. Miller : New socket lookup architecture.
+ * Last socket cache retained as it
+ * does have a high hit rate.
*
*
* This program is free software; you can redistribute it and/or
@@ -115,29 +118,295 @@
struct udp_mib udp_statistics;
-/*
- * Cached last hit socket
+struct sock *udp_hash[UDP_HTABLE_SIZE];
+
+static int udp_v4_verify_bind(struct sock *sk, unsigned short snum)
+{
+ struct sock *sk2;
+ int retval = 0;
+
+ SOCKHASH_LOCK();
+ for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) {
+ if((sk2->num == snum) && (sk2 != sk)) {
+ if(!sk2->rcv_saddr || !sk->rcv_saddr) {
+ if(sk->reuse && sk->reuse && (sk2->state != TCP_LISTEN))
+ continue;
+ retval = 1;
+ break;
+ }
+ if((sk2->rcv_saddr == sk->rcv_saddr) &&
+ (!sk->reuse || !sk2->reuse || (sk2->state == TCP_LISTEN))) {
+ retval = 1;
+ break;
+ }
+ }
+ }
+ SOCKHASH_UNLOCK();
+ return retval;
+}
+
+static inline int udp_lport_inuse(int num)
+{
+ struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
+
+ for(; sk != NULL; sk = sk->next) {
+ if(sk->num == num)
+ return 1;
+ }
+ return 0;
+}
+
+/* Shared by v4/v6 tcp. */
+unsigned short udp_good_socknum(void)
+{
+ static int start = 0;
+ unsigned short base;
+ int i, best = 0, size = 32767; /* a big num. */
+ int result;
+
+ base = PROT_SOCK + (start & 1023) + 1;
+
+ SOCKHASH_LOCK();
+ for(i = 0; i < UDP_HTABLE_SIZE; i++) {
+ struct sock *sk = udp_hash[i];
+ if(!sk) {
+ start = (i + 1 + start) & 1023;
+ result = i + base + 1;
+ goto out;
+ } else {
+ int j = 0;
+ do {
+ if(++j >= size)
+ goto next;
+ } while((sk = sk->next));
+ best = i;
+ size = j;
+ }
+ next:
+ }
+
+ while(udp_lport_inuse(base + best + 1))
+ best += UDP_HTABLE_SIZE;
+ result = (best + base + 1);
+out:
+ SOCKHASH_UNLOCK();
+ return result;
+}
+
+/* Last hit UDP socket cache, this is ipv4 specific so make it static. */
+static u32 uh_cache_saddr, uh_cache_daddr;
+static u16 uh_cache_dport, uh_cache_sport;
+static struct sock *uh_cache_sk = NULL;
+
+static void udp_v4_hash(struct sock *sk)
+{
+ struct sock **skp;
+ int num = sk->num;
+
+ num &= (UDP_HTABLE_SIZE - 1);
+ skp = &udp_hash[num];
+
+ SOCKHASH_LOCK();
+ sk->next = *skp;
+ *skp = sk;
+ sk->hashent = num;
+ SOCKHASH_UNLOCK();
+}
+
+static void udp_v4_unhash(struct sock *sk)
+{
+ struct sock **skp;
+ int num = sk->num;
+
+ num &= (UDP_HTABLE_SIZE - 1);
+ skp = &udp_hash[num];
+
+ SOCKHASH_LOCK();
+ while(*skp != NULL) {
+ if(*skp == sk) {
+ *skp = sk->next;
+ break;
+ }
+ skp = &((*skp)->next);
+ }
+ if(uh_cache_sk == sk)
+ uh_cache_sk = NULL;
+ SOCKHASH_UNLOCK();
+}
+
+static void udp_v4_rehash(struct sock *sk)
+{
+ struct sock **skp;
+ int num = sk->num;
+ int oldnum = sk->hashent;
+
+ num &= (UDP_HTABLE_SIZE - 1);
+ skp = &udp_hash[oldnum];
+
+ SOCKHASH_LOCK();
+ while(*skp != NULL) {
+ if(*skp == sk) {
+ *skp = sk->next;
+ break;
+ }
+ skp = &((*skp)->next);
+ }
+ sk->next = udp_hash[num];
+ udp_hash[num] = sk;
+ sk->hashent = num;
+ if(uh_cache_sk == sk)
+ uh_cache_sk = NULL;
+ SOCKHASH_UNLOCK();
+}
+
+/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
+ * harder than this here plus the last hit cache. -DaveM
*/
-
-volatile unsigned long uh_cache_saddr,uh_cache_daddr;
-volatile unsigned short uh_cache_dport, uh_cache_sport;
-volatile struct sock *uh_cache_sk;
-
-void udp_cache_zap(void)
-{
- unsigned long flags;
- save_flags(flags);
- cli();
- uh_cache_saddr=0;
- uh_cache_daddr=0;
- uh_cache_dport=0;
- uh_cache_sport=0;
- uh_cache_sk=NULL;
- restore_flags(flags);
+struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport)
+{
+ struct sock *sk, *result = NULL;
+ unsigned short hnum = ntohs(dport);
+ int badness = -1;
+
+ for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
+ if((sk->num == hnum) && !(sk->dead && (sk->state == TCP_CLOSE))) {
+ int score = 0;
+ if(sk->rcv_saddr) {
+ if(sk->rcv_saddr != daddr)
+ continue;
+ score++;
+ }
+ if(sk->daddr) {
+ if(sk->daddr != saddr)
+ continue;
+ score++;
+ }
+ if(sk->dummy_th.dest) {
+ if(sk->dummy_th.dest != sport)
+ continue;
+ score++;
+ }
+ if(score == 3) {
+ result = sk;
+ break;
+ } else if(score > badness) {
+ result = sk;
+ badness = score;
+ }
+ }
+ }
+ return result;
}
-#define min(a,b) ((a)<(b)?(a):(b))
+__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport)
+{
+ struct sock *sk;
+ if(uh_cache_sk &&
+ uh_cache_saddr == saddr &&
+ uh_cache_sport == sport &&
+ uh_cache_dport == dport &&
+ uh_cache_daddr == daddr)
+ return uh_cache_sk;
+
+ sk = udp_v4_lookup_longway(saddr, sport, daddr, dport);
+ uh_cache_sk = sk;
+ uh_cache_saddr = saddr;
+ uh_cache_daddr = daddr;
+ uh_cache_sport = sport;
+ uh_cache_dport = dport;
+ return sk;
+}
+
+#ifdef CONFIG_IP_TRANSPARENT_PROXY
+#define secondlist(hpnum, sk, fpass) \
+({ struct sock *s1; if(!(sk) && (fpass)--) \
+ s1 = udp_hash[(hpnum) & (TCP_HTABLE_SIZE - 1)]; \
+ else \
+ s1 = (sk); \
+ s1; \
+})
+
+#define udp_v4_proxy_loop_init(hnum, hpnum, sk, fpass) \
+ secondlist((hpnum), udp_hash[(hnum)&(TCP_HTABLE_SIZE-1)],(fpass))
+
+#define udp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \
+ secondlist((hpnum),(sk)->next,(fpass))
+
+struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
+ unsigned short rnum, unsigned long laddr,
+ unsigned long paddr, unsigned short pnum)
+{
+ struct sock *s, *result = NULL;
+ int badness = -1;
+ unsigned short hnum = ntohs(num);
+ unsigned short hpnum = ntohs(pnum);
+ int firstpass = 1;
+
+ SOCKHASH_LOCK();
+ for(s = udp_v4_proxy_loop_init(hnum, hpnum, s, firstpass);
+ s != NULL;
+ s = udp_v4_proxy_loop_next(hnum, hpnum, s, firstpass)) {
+ if(s->num == hnum || s->num == hpnum) {
+ int score = 0;
+ if(s->dead && (s->state == TCP_CLOSE))
+ continue;
+ if(s->rcv_saddr) {
+ if((s->num != hpnum || s->rcv_saddr != paddr) &&
+ (s->num != hnum || s->rcv_saddr != laddr))
+ continue;
+ score++;
+ }
+ if(s->daddr) {
+ if(s->daddr != raddr)
+ continue;
+ score++;
+ }
+ if(s->dummy_th.dest) {
+ if(s->dummy_th.dest != rnum)
+ continue;
+ score++;
+ }
+ if(score == 3 && s->num == hnum) {
+ result = s;
+ break;
+ } else if(score > badness && (s->num == hpnum || s->rcv_saddr)) {
+ result = s;
+ badness = score;
+ }
+ }
+ }
+ SOCKHASH_UNLOCK();
+ return result;
+}
+
+#undef secondlist
+#undef udp_v4_proxy_loop_init
+#undef udp_v4_proxy_loop_next
+
+#endif
+
+static inline struct sock *udp_v4_mcast_next(struct sock *sk,
+ unsigned short num,
+ unsigned long raddr,
+ unsigned short rnum,
+ unsigned long laddr)
+{
+ struct sock *s = sk;
+ unsigned short hnum = ntohs(num);
+ for(; s; s = s->next) {
+ if ((s->num != hnum) ||
+ (s->dead && (s->state == TCP_CLOSE)) ||
+ (s->daddr && s->daddr!=raddr) ||
+ (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) ||
+ (s->rcv_saddr && s->rcv_saddr != laddr))
+ continue;
+ break;
+ }
+ return s;
+}
+
+#define min(a,b) ((a)<(b)?(a):(b))
/*
* This routine is called by the ICMP module when it gets some
@@ -158,8 +427,7 @@
int code = skb->h.icmph->code;
struct sock *sk;
- sk = get_sock(&udp_prot, uh->source, iph->daddr, uh->dest, iph->saddr);
-
+ sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest);
if (sk == NULL)
return; /* No socket for error */
@@ -234,9 +502,10 @@
};
/*
- * Copy and checksum a UDP packet from user space into a buffer. We still have to do the planning to
- * get ip_build_xmit to spot direct transfer to network card and provide an additional callback mode
- * for direct user->board I/O transfers. That one will be fun.
+ * Copy and checksum a UDP packet from user space into a buffer. We still have
+ * to do the planning to get ip_build_xmit to spot direct transfer to network
+ * card and provide an additional callback mode for direct user->board I/O
+ * transfers. That one will be fun.
*/
static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen)
@@ -271,9 +540,9 @@
/*
* Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing
- * that we use two routines for this for speed. Probably we ought to have a CONFIG_FAST_NET
- * set for >10Mb/second boards to activate this sort of coding. Timing needed to verify if
- * this is a valid decision.
+ * that we use two routines for this for speed. Probably we ought to have a
+ * CONFIG_FAST_NET set for >10Mb/second boards to activate this sort of coding.
+ * Timing needed to verify if this is a valid decision.
*/
static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen)
@@ -616,7 +885,8 @@
sk->rcv_saddr=INADDR_ANY;
sk->daddr=INADDR_ANY;
sk->state = TCP_CLOSE;
- udp_cache_zap();
+ if(uh_cache_sk == sk)
+ uh_cache_sk = NULL;
return 0;
}
@@ -638,7 +908,8 @@
sk->daddr = rt->rt_dst;
sk->dummy_th.dest = usin->sin_port;
sk->state = TCP_ESTABLISHED;
- udp_cache_zap();
+ if(uh_cache_sk == sk)
+ uh_cache_sk = NULL;
ip_rt_put(rt);
return(0);
}
@@ -648,8 +919,8 @@
{
lock_sock(sk);
sk->state = TCP_CLOSE;
- if(uh_cache_sk==sk)
- udp_cache_zap();
+ if(uh_cache_sk == sk)
+ uh_cache_sk = NULL;
release_sock(sk);
sk->dead = 1;
destroy_sock(sk);
@@ -682,6 +953,41 @@
udp_queue_rcv_skb(sk, skb);
}
+/*
+ * Multicasts and broadcasts go to each listener.
+ */
+static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
+ u32 saddr, u32 daddr)
+{
+ struct sock *sk;
+ int given = 0;
+
+ SOCKHASH_LOCK();
+ sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
+ sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr);
+ if(sk) {
+ struct sock *sknext = NULL;
+
+ do {
+ struct sk_buff *skb1 = skb;
+
+ sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr,
+ uh->source, daddr);
+ if(sknext)
+ skb1 = skb_clone(skb, GFP_ATOMIC);
+
+ if(skb1)
+ udp_deliver(sk, skb1);
+ sk = sknext;
+ } while(sknext);
+ given = 1;
+ }
+ SOCKHASH_UNLOCK();
+ if(!given)
+ kfree_skb(skb, FREE_READ);
+ return 0;
+}
+
#ifdef CONFIG_IP_TRANSPARENT_PROXY
/*
* Check whether a received UDP packet might be for one of our
@@ -694,35 +1000,18 @@
struct udphdr *uh = (struct udphdr *)(skb->nh.raw + iph->ihl*4);
struct sock *sk;
- sk = get_sock(&udp_prot, uh->dest, iph->saddr, uh->source, iph->daddr);
+ sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest);
+ if (!sk)
+ return 0;
- if (!sk) return 0;
/* 0 means accept all LOCAL addresses here, not all the world... */
- if (sk->rcv_saddr == 0) return 0;
+ if (sk->rcv_saddr == 0)
+ return 0;
+
return 1;
}
#endif
-
-static __inline__ struct sock *
-get_udp_sock(unsigned short dport, unsigned long saddr, unsigned short sport,
- unsigned long daddr)
-{
- struct sock *sk;
-
- if (saddr==uh_cache_saddr && daddr==uh_cache_daddr &&
- dport==uh_cache_dport && sport==uh_cache_sport)
- return (struct sock *)uh_cache_sk;
- sk = get_sock(&udp_prot, dport, saddr, sport, daddr);
- uh_cache_saddr=saddr;
- uh_cache_daddr=daddr;
- uh_cache_dport=dport;
- uh_cache_sport=sport;
- uh_cache_sk=sk;
- return sk;
-}
-
-
/*
* All we need to do is get the socket, and then do a checksum.
*/
@@ -769,12 +1058,10 @@
/* FIXME list for IP, though, so I wouldn't worry about it. */
/* (That's the Right Place to do it, IMHO.) -- MS */
- if (uh->check && (
- ( (skb->ip_summed == CHECKSUM_HW) && udp_check(uh, len, saddr, daddr, skb->csum ) ) ||
- ( (skb->ip_summed == CHECKSUM_NONE) && udp_check(uh, len, saddr, daddr,csum_partial((char*)uh, len, 0)))
- /* skip if CHECKSUM_UNNECESSARY */
- )
- ) {
+ if (uh->check &&
+ (((skb->ip_summed==CHECKSUM_HW)&&udp_check(uh,len,saddr,daddr,skb->csum)) ||
+ ((skb->ip_summed==CHECKSUM_NONE) &&
+ (udp_check(uh,len,saddr,daddr, csum_partial((char*)uh, len, 0)))))) {
/* <mea@utu.fi> wants to know, who sent it, to
go and stomp on the garbage sender... */
@@ -802,38 +1089,17 @@
skb_trim(skb,len);
- if (rt->rt_flags&(RTF_BROADCAST|RTF_MULTICAST))
- {
- /*
- * Multicasts and broadcasts go to each listener.
- */
- struct sock *sknext=NULL;
- sk=get_sock_mcast(udp_prot.sock_array[ntohs(uh->dest)&(SOCK_ARRAY_SIZE-1)], uh->dest,
- saddr, uh->source, daddr);
- if(sk) {
- do {
- struct sk_buff *skb1;
-
- sknext=get_sock_mcast(sk->next, uh->dest, saddr, uh->source, daddr);
- if(sknext)
- skb1=skb_clone(skb,GFP_ATOMIC);
- else
- skb1=skb;
- if(skb1)
- udp_deliver(sk, skb1);
- sk=sknext;
- } while(sknext!=NULL);
- } else
- kfree_skb(skb, FREE_READ);
- return 0;
- }
+ if(rt->rt_flags & (RTF_BROADCAST|RTF_MULTICAST))
+ return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
#ifdef CONFIG_IP_TRANSPARENT_PROXY
if (IPCB(skb)->redirport)
- sk = get_sock_proxy(&udp_prot, uh->dest, saddr, uh->source, daddr, skb->dev->pa_addr, IPCB(skb)->redirport);
+ sk = udp_v4_proxy_lookup(uh->dest, saddr, uh->source,
+ daddr, skb->dev->pa_addr,
+ IPCB(skb)->redirport);
else
#endif
- sk = get_udp_sock(uh->dest, saddr, uh->source, daddr);
+ sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest);
if (sk == NULL) {
udp_statistics.UdpNoPorts++;
@@ -851,26 +1117,33 @@
}
struct proto udp_prot = {
- udp_close,
- udp_connect,
- NULL,
- NULL,
- NULL,
- NULL,
- datagram_poll,
- udp_ioctl,
- NULL,
- NULL,
- NULL,
- ip_setsockopt,
- ip_getsockopt,
- udp_sendmsg,
- udp_recvmsg,
- NULL, /* No special bind function */
- udp_queue_rcv_skb,
- 128,
- 0,
- "UDP",
- 0, 0,
- NULL,
+ (struct sock *)&udp_prot, /* sklist_next */
+ (struct sock *)&udp_prot, /* sklist_prev */
+ udp_close, /* close */
+ udp_connect, /* connect */
+ NULL, /* accept */
+ NULL, /* retransmit */
+ NULL, /* write_wakeup */
+ NULL, /* read_wakeup */
+ datagram_poll, /* poll */
+ udp_ioctl, /* ioctl */
+ NULL, /* init */
+ NULL, /* destroy */
+ NULL, /* shutdown */
+ ip_setsockopt, /* setsockopt */
+ ip_getsockopt, /* getsockopt */
+ udp_sendmsg, /* sendmsg */
+ udp_recvmsg, /* recvmsg */
+ NULL, /* bind */
+ udp_queue_rcv_skb, /* backlog_rcv */
+ udp_v4_hash, /* hash */
+ udp_v4_unhash, /* unhash */
+ udp_v4_rehash, /* rehash */
+ udp_good_socknum, /* good_socknum */
+ udp_v4_verify_bind, /* verify_bind */
+ 128, /* max_header */
+ 0, /* retransmits */
+ "UDP", /* name */
+ 0, /* inuse */
+ 0 /* highestinuse */
};
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov