patch-2.1.124 linux/net/ipv4/ip_masq.c

Next file: linux/net/ipv4/ip_masq_app.c
Previous file: linux/net/ipv4/ip_input.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.1.123/linux/net/ipv4/ip_masq.c linux/net/ipv4/ip_masq.c
@@ -4,7 +4,7 @@
  *
  * 	Copyright (c) 1994 Pauline Middelink
  *
- * Version:	@(#)ip_masq.c  0.12      97/11/30
+ *	$Id: ip_masq.c,v 1.26 1998/09/24 03:38:58 davem Exp $
  *
  *
  *	See ip_fw.c for original log
@@ -32,12 +32,25 @@
  *	Steven Clarke		:	IP_MASQ_S_xx state design
  *	Juan Jose Ciarlante	:	IP_MASQ_S state implementation 
  *	Juan Jose Ciarlante	: 	xx_get() clears timer, _put() inserts it
+ *	Juan Jose Ciarlante	: 	create /proc/net/ip_masq/ 
+ *	Juan Jose Ciarlante	: 	reworked checksums (save payload csum if possible)
+ *	Juan Jose Ciarlante	: 	added missing ip_fw_masquerade checksum
+ *	Juan Jose Ciarlante	: 	csum savings
+ *	Juan Jose Ciarlante	: 	added user-space tunnel creation/del, etc
+ *	Juan Jose Ciarlante	: 	(last) moved to ip_masq_user runtime module
+ *	Juan Jose Ciarlante	: 	user timeout handling again
+ *	Juan Jose Ciarlante	: 	make new modules support optional
+ *	Juan Jose Ciarlante	: 	u-space context => locks reworked
+ *	Juan Jose Ciarlante	: 	fixed stupid SMP locking bug
+ *	Juan Jose Ciarlante	: 	fixed "tap"ing in demasq path by copy-on-w
  *	
- *
  */
 
 #include <linux/config.h>
 #include <linux/module.h>
+#ifdef CONFIG_KMOD
+#include <linux/kmod.h>
+#endif
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -55,17 +68,14 @@
 #include <net/udp.h>
 #include <net/checksum.h>
 #include <net/ip_masq.h>
-#include <net/ip_masq_mod.h>
-#include <linux/sysctl.h>
-#include <linux/ip_fw.h>
 
-#ifdef CONFIG_IP_MASQUERADE_IPAUTOFW
-#include <net/ip_autofw.h>
-#endif
-#ifdef CONFIG_IP_MASQUERADE_IPPORTFW
-#include <net/ip_portfw.h>
+#ifdef CONFIG_IP_MASQUERADE_MOD
+#include <net/ip_masq_mod.h>
 #endif
 
+#include <linux/sysctl.h>
+#include <linux/ip_fw.h>
+#include <linux/ip_masq.h>
 
 int sysctl_ip_masq_debug = 0;
 
@@ -77,6 +87,8 @@
 	return sysctl_ip_masq_debug;
 }
 
+struct ip_masq_hook *ip_masq_user_hook = NULL;
+
 /*
  *	Timeout table[state]
  */
@@ -98,7 +110,7 @@
 		5*60*HZ,	/*	IP_MASQ_S_UDP,	*/
 		1*60*HZ,	/*	IP_MASQ_S_ICMP,	*/
 		2*HZ,/*	IP_MASQ_S_LAST	*/
-	},
+	},	/* timeout */
 };
 
 #define MASQUERADE_EXPIRE_RETRY      masq_timeout_table.timeout[IP_MASQ_S_TIME_WAIT]
@@ -134,7 +146,7 @@
 	int next_state[IP_MASQ_S_LAST];	/* should be _LAST_TCP */
 };
 
-static const char * masq_state_name(int state)
+const char * ip_masq_state_name(int state)
 {
 	if (state >= IP_MASQ_S_LAST)
 		return "ERR!";
@@ -224,8 +236,8 @@
 				th->rst? 'R' : '.',
 				ntohl(ms->saddr), ntohs(ms->sport),
 				ntohl(ms->daddr), ntohs(ms->dport),
-				masq_state_name(ms->state),
-				masq_state_name(new_state));
+				ip_masq_state_name(ms->state),
+				ip_masq_state_name(new_state));
 	return masq_set_state_timeout(ms, new_state);
 }
 
@@ -235,20 +247,19 @@
  */
 static int masq_set_state(struct ip_masq *ms, int output, struct iphdr *iph, void *tp)
 {
-	struct tcphdr	*th = tp;
 	switch (iph->protocol) {
 		case IPPROTO_ICMP:
 			return masq_set_state_timeout(ms, IP_MASQ_S_ICMP);
 		case IPPROTO_UDP:
 			return masq_set_state_timeout(ms, IP_MASQ_S_UDP);
 		case IPPROTO_TCP:
-			return masq_tcp_state(ms, output, th);
+			return masq_tcp_state(ms, output, tp);
 	}
 	return -1;
 }
 
 /*
- *	Moves tunnel to listen state
+ *	Set LISTEN timeout. (ip_masq_put will setup timer)
  */
 int ip_masq_listen(struct ip_masq *ms)
 {
@@ -256,8 +267,6 @@
 	return ms->timeout;
 }
 
-#define IP_MASQ_TAB_SIZE 256    /* must be power of 2 */
-
 /* 
  *	Dynamic address rewriting 
  */
@@ -266,9 +275,7 @@
 /*
  *	Lookup lock
  */
-static struct wait_queue *masq_wait;
-atomic_t __ip_masq_lock = ATOMIC_INIT(0);
-
+rwlock_t __ip_masq_lock = RW_LOCK_UNLOCKED;
 
 /*
  *	Implement IP packet masquerading
@@ -305,6 +312,9 @@
  *	Will cycle in MASQ_PORT boundaries.
  */
 static __u16 masq_port = PORT_MASQ_BEGIN;
+#ifdef __SMP__
+static spinlock_t masq_port_lock = SPIN_LOCK_UNLOCKED;
+#endif
 
 /*
  *	free ports counters (UDP & TCP)
@@ -333,20 +343,26 @@
         ATOMIC_INIT((PORT_MASQ_END-PORT_MASQ_BEGIN) * PORT_MASQ_MUL),/* ICMP */
 };
 
+/*
+ *	Counts entries that have been requested with specific mport.
+ *	Used for incoming packets to "relax" input rule (port in MASQ range).
+ */
+atomic_t mport_count = ATOMIC_INIT(0);
+
 EXPORT_SYMBOL(ip_masq_get_debug_level);
 EXPORT_SYMBOL(ip_masq_new);
 EXPORT_SYMBOL(ip_masq_listen);
-/*
-EXPORT_SYMBOL(ip_masq_set_expire);
-*/
 EXPORT_SYMBOL(ip_masq_free_ports);
-EXPORT_SYMBOL(ip_masq_expire);
 EXPORT_SYMBOL(ip_masq_out_get);
 EXPORT_SYMBOL(ip_masq_in_get);
 EXPORT_SYMBOL(ip_masq_put);
 EXPORT_SYMBOL(ip_masq_control_add);
 EXPORT_SYMBOL(ip_masq_control_del);
 EXPORT_SYMBOL(ip_masq_control_get);
+EXPORT_SYMBOL(ip_masq_user_hook);
+EXPORT_SYMBOL(ip_masq_m_tab);
+EXPORT_SYMBOL(ip_masq_state_name);
+EXPORT_SYMBOL(ip_masq_select_addr);
 EXPORT_SYMBOL(__ip_masq_lock);
 
 /*
@@ -360,13 +376,16 @@
  * timeouts
  */
 
+#if 000 /* FIXED timeout handling */
 static struct ip_fw_masq ip_masq_dummy = {
 	MASQUERADE_EXPIRE_TCP,
 	MASQUERADE_EXPIRE_TCP_FIN,
 	MASQUERADE_EXPIRE_UDP
 };
 
+EXPORT_SYMBOL(ip_masq_expire);
 struct ip_fw_masq *ip_masq_expire = &ip_masq_dummy;
+#endif
 
 
 /*
@@ -375,7 +394,7 @@
  *	Warning: it does not check/delete previous timer!
  */
 
-void __ip_masq_set_expire(struct ip_masq *ms, unsigned long tout)
+static void __ip_masq_set_expire(struct ip_masq *ms, unsigned long tout)
 {
         if (tout) {
                 ms->timer.expires = jiffies+tout;
@@ -398,7 +417,7 @@
 
 /*
  *	Hashes ip_masq by its proto,addrs,ports.
- *	should be called with masked interrupts.
+ *	should be called with locked tables.
  *	returns bool success.
  */
 
@@ -434,7 +453,7 @@
 
 /*
  *	UNhashes ip_masq from ip_masq_[ms]_tables.
- *	should be called with masked interrupts.
+ *	should be called with locked tables.
  *	returns bool success.
  */
 
@@ -488,16 +507,18 @@
  * 	phoenix and get a reply from any other interface(==dst)!
  *
  * 	[Only for UDP] - AC
+ *	
+ *	Caller must lock tables
  */
 
-struct ip_masq * __ip_masq_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+static struct ip_masq * __ip_masq_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
 {
         unsigned hash;
         struct ip_masq *ms = NULL;
 
-	ip_masq_lock(&__ip_masq_lock, 0);
-
         hash = ip_masq_hash_key(protocol, d_addr, d_port);
+
+
         for(ms = ip_masq_m_tab[hash]; ms ; ms = ms->m_link) {
  		if (protocol==ms->protocol &&
 		    ((s_addr==ms->daddr || ms->flags & IP_MASQ_F_NO_DADDR)) &&
@@ -521,7 +542,6 @@
 	       d_port);
 
 out:
-	ip_masq_unlock(&__ip_masq_lock, 0);
         return ms;
 }
 
@@ -537,9 +557,11 @@
  *	hash is keyed on source port so if the first lookup fails then try again
  *	with a zero port, this time only looking at entries marked "no source
  *	port".
+ *	
+ *	Caller must lock tables
  */
 
-struct ip_masq * __ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
+static struct ip_masq * __ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
 {
         unsigned hash;
         struct ip_masq *ms = NULL;
@@ -549,8 +571,6 @@
 	 */
         hash = ip_masq_hash_key(protocol, s_addr, s_port);
 	
-	ip_masq_lock(&__ip_masq_lock, 0);
-
         for(ms = ip_masq_s_tab[hash]; ms ; ms = ms->s_link) {
 		if (protocol == ms->protocol &&
 		    s_addr == ms->saddr && s_port == ms->sport &&
@@ -596,7 +616,6 @@
 	       d_port);
 
 out:
-	ip_masq_unlock(&__ip_masq_lock, 0);
         return ms;
 }
 
@@ -604,6 +623,8 @@
 /*
  *	Returns ip_masq for given proto,m_addr,m_port.
  *      called by allocation routine to find an unused m_port.
+ *	
+ *	Caller must lock tables
  */
 
 static struct ip_masq * __ip_masq_getbym(int protocol, __u32 m_addr, __u16 m_port)
@@ -613,8 +634,6 @@
 
         hash = ip_masq_hash_key(protocol, m_addr, m_port);
 
-	ip_masq_lock(&__ip_masq_lock, 0);
-
         for(ms = ip_masq_m_tab[hash]; ms ; ms = ms->m_link) {
  		if ( protocol==ms->protocol &&
                     (m_addr==ms->maddr && m_port==ms->mport)) {
@@ -624,7 +643,6 @@
         }
 
 out:
-	ip_masq_unlock(&__ip_masq_lock, 0);
         return ms;
 }
 #endif
@@ -632,7 +650,11 @@
 struct ip_masq * ip_masq_out_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) 
 {
 	struct ip_masq *ms;
+
+	read_lock(&__ip_masq_lock);
 	ms = __ip_masq_out_get(protocol, s_addr, s_port, d_addr, d_port);
+	read_unlock(&__ip_masq_lock);
+
 	if (ms)
 		__ip_masq_set_expire(ms, 0);
 	return ms;
@@ -641,7 +663,11 @@
 struct ip_masq * ip_masq_in_get(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port)
 {
 	struct ip_masq *ms;
+
+	read_lock(&__ip_masq_lock);
 	ms =  __ip_masq_in_get(protocol, s_addr, s_port, d_addr, d_port);
+	read_unlock(&__ip_masq_lock);
+
 	if (ms)
 		__ip_masq_set_expire(ms, 0);
 	return ms;
@@ -685,8 +711,9 @@
 			masq_proto_name(ms->protocol),
 			ntohl(ms->saddr),ntohs(ms->sport));
 
-	ip_masq_lock(&__ip_masq_lock, 1);
+	write_lock(&__ip_masq_lock);
 
+#if 0000
 	/*
 	 *	Already locked, do bounce ...
 	 */
@@ -694,6 +721,7 @@
 		goto masq_expire_later;
 	}
 
+#endif
 	/*
 	 * 	do I control anybody?
 	 */
@@ -708,8 +736,11 @@
 		ip_masq_control_del(ms);
 
         if (ip_masq_unhash(ms)) {
-		if (!(ms->flags&IP_MASQ_F_MPORT))
+		if (ms->flags&IP_MASQ_F_MPORT) {
+			atomic_dec(&mport_count);
+		} else {
 			atomic_inc(ip_masq_free_ports + masq_proto_num(ms->protocol));
+		}
 		ip_masq_unbind_app(ms);
         }
 
@@ -718,28 +749,45 @@
 	 */
 	if (atomic_read(&ms->refcnt) == 1) {
 		kfree_s(ms,sizeof(*ms));
+		MOD_DEC_USE_COUNT;
 		goto masq_expire_out;
 	}
 
 masq_expire_later:
-	IP_MASQ_DEBUG(0, "masq_expire delayed: %s %08lX:%04X->%08lX:%04X nlocks-1=%d masq.refcnt-1=%d masq.n_control=%d\n",
+	IP_MASQ_DEBUG(0, "masq_expire delayed: %s %08lX:%04X->%08lX:%04X masq.refcnt-1=%d masq.n_control=%d\n",
 		masq_proto_name(ms->protocol),
 		ntohl(ms->saddr), ntohs(ms->sport),
 		ntohl(ms->daddr), ntohs(ms->dport),
-		ip_masq_nlocks(&__ip_masq_lock)-1,
 		atomic_read(&ms->refcnt)-1,
 		atomic_read(&ms->n_control));
 
 	ip_masq_put(ms);
 
 masq_expire_out:
-	ip_masq_unlock(&__ip_masq_lock, 1);
+	write_unlock(&__ip_masq_lock);
+}
+
+static __u16 get_next_mport(void)
+{
+	__u16 mport;
+	
+	spin_lock_irq(&masq_port_lock);
+	/*
+	 *	Try the next available port number
+	 */
+	mport = htons(masq_port++);
+	if (masq_port==PORT_MASQ_END) masq_port = PORT_MASQ_BEGIN;
+
+	spin_unlock_irq(&masq_port_lock);
+	return mport;
 }
 
 /*
  * 	Create a new masquerade list entry, also allocate an
  * 	unused mport, keeping the portnumber between the
  * 	given boundaries MASQ_BEGIN and MASQ_END.
+ *
+ * 	Be careful, it can be called from u-space
  */
 
 struct ip_masq * ip_masq_new(int proto, __u32 maddr, __u16 mport, __u32 saddr, __u16 sport, __u32 daddr, __u16 dport, unsigned mflags)
@@ -748,6 +796,7 @@
         int ports_tried;
 	atomic_t *free_ports_p = NULL;
         static int n_fails = 0;
+	int prio;
 
 
 	if (masq_proto_num(proto)!=-1 && mport == 0) {
@@ -760,13 +809,17 @@
 			return NULL;
 		}
 	}
-        ms = (struct ip_masq *) kmalloc(sizeof(struct ip_masq), GFP_ATOMIC);
+
+	prio = (mflags&IP_MASQ_F_USER) ? GFP_KERNEL : GFP_ATOMIC;
+
+        ms = (struct ip_masq *) kmalloc(sizeof(struct ip_masq), prio);
         if (ms == NULL) {
                 if (++n_fails < 5)
                         IP_MASQ_ERR("ip_masq_new(proto=%s): no memory available.\n",
                                masq_proto_name(proto));
                 return NULL;
         }
+	MOD_INC_USE_COUNT;
         memset(ms, 0, sizeof(*ms));
 	init_timer(&ms->timer);
 	ms->timer.data     = (unsigned long)ms;
@@ -805,22 +858,33 @@
 		/* 
 		 *	Check 5-upla uniqueness
 		 */
-		ip_masq_lock(&__ip_masq_lock, 1);
+		if (mflags & IP_MASQ_F_USER) 	
+			write_lock_bh(&__ip_masq_lock);
+		else 
+			write_lock(&__ip_masq_lock);
 
                 mst = __ip_masq_in_get(proto, daddr, dport, maddr, mport);
 		if (mst==NULL) {
 			ms->flags |= IP_MASQ_F_MPORT;
 
+			atomic_inc(&mport_count);
                         ip_masq_hash(ms);
-			ip_masq_unlock(&__ip_masq_lock, 1);
+
+			if (mflags & IP_MASQ_F_USER) 	
+				write_unlock_bh(&__ip_masq_lock);
+			else 
+				write_unlock(&__ip_masq_lock);
 
 			ip_masq_bind_app(ms);
 			atomic_inc(&ms->refcnt);
 			masq_set_state_timeout(ms, IP_MASQ_S_NONE);
 			return ms;
 		}
+		if (mflags & IP_MASQ_F_USER) 	
+			write_unlock_bh(&__ip_masq_lock);
+		else 
+			write_unlock(&__ip_masq_lock);
 
-		ip_masq_unlock(&__ip_masq_lock, 1);
 		__ip_masq_put(mst);
 
 		IP_MASQ_ERR( "Already used connection: %s, %d.%d.%d.%d:%d => %d.%d.%d.%d:%d, called from %p\n",
@@ -838,20 +902,15 @@
 	     (atomic_read(free_ports_p) && (ports_tried <= (PORT_MASQ_END - PORT_MASQ_BEGIN)));
 	     ports_tried++){
 
-		cli();
-		/*
-		 *	Try the next available port number
-		 */
-		mport = ms->mport = htons(masq_port++);
-		if (masq_port==PORT_MASQ_END) masq_port = PORT_MASQ_BEGIN;
-
-		sti();
-
+		mport = ms->mport = get_next_mport();
 		/*
 		 *	lookup to find out if this connection is used.
 		 */
 
-		ip_masq_lock(&__ip_masq_lock, 1);
+		if (mflags & IP_MASQ_F_USER) 
+			write_lock_bh(&__ip_masq_lock);
+		else
+			write_lock(&__ip_masq_lock);
 
 #ifdef CONFIG_IP_MASQUERADE_NREUSE
 		mst = __ip_masq_getbym(proto, maddr, mport);
@@ -861,12 +920,20 @@
 		if (mst == NULL) {
 
 			if (atomic_read(free_ports_p) == 0) {
-				ip_masq_unlock(&__ip_masq_lock, 1);
+				if (mflags & IP_MASQ_F_USER) 
+					write_unlock_bh(&__ip_masq_lock);
+				else
+					write_unlock(&__ip_masq_lock);
+
 				break;
 			}
 			atomic_dec(free_ports_p);
 			ip_masq_hash(ms);
-			ip_masq_unlock(&__ip_masq_lock, 1);
+
+			if (mflags & IP_MASQ_F_USER) 
+				write_unlock_bh(&__ip_masq_lock);
+			else
+				write_unlock(&__ip_masq_lock);
 
 			ip_masq_bind_app(ms);
 			n_fails = 0;
@@ -874,7 +941,11 @@
 			masq_set_state_timeout(ms, IP_MASQ_S_NONE);
 			return ms;
 		}
-		ip_masq_unlock(&__ip_masq_lock, 1);
+		if (mflags & IP_MASQ_F_USER) 
+			write_unlock_bh(&__ip_masq_lock);
+		else
+			write_unlock(&__ip_masq_lock);
+
 		__ip_masq_put(mst);
         }
 
@@ -884,48 +955,97 @@
 		       atomic_read(free_ports_p));
 mport_nono:
         kfree_s(ms, sizeof(*ms));
+
+	MOD_DEC_USE_COUNT;
         return NULL;
 }
 
-static void recalc_check(struct udphdr *uh, __u32 saddr,
-	__u32 daddr, int len)
+static __inline__ unsigned proto_doff(unsigned proto, char *th)
 {
-	uh->check=0;
-	uh->check=csum_tcpudp_magic(saddr,daddr,len,
-		IPPROTO_UDP, csum_partial((char *)uh,len,0));
-	if(uh->check==0)
-		uh->check=0xFFFF;
+	switch (proto) {
+		case IPPROTO_UDP:
+			return sizeof(struct udphdr);
+		case IPPROTO_TCP:
+			return ((struct tcphdr*)th)->doff << 2;
+	}
+	return 0;
 }
 
-int ip_fw_masquerade(struct sk_buff **skb_ptr, __u32 maddr)
+int ip_fw_masquerade(struct sk_buff **skb_p, __u32 maddr)
 {
-	struct sk_buff  *skb=*skb_ptr;
+	struct sk_buff  *skb = *skb_p;
 	struct iphdr	*iph = skb->nh.iph;
-	__u16	*portptr;
+	union ip_masq_tphdr h;
 	struct ip_masq	*ms;
 	int		size;
 
+	/* 
+	 * 	Magic "doff" csum semantics
+	 *		!0: saved payload csum IS valid, doff is correct
+	 *		0: csum not valid
+	 */
+	unsigned doff = 0;
+	int csum = 0;
+
 	/*
-	 * We can only masquerade protocols with ports...
-	 * [TODO]
-	 * We may need to consider masq-ing some ICMP related to masq-ed protocols
+	 * We can only masquerade protocols with ports... and hack some ICMPs
 	 */
 
-        if (iph->protocol==IPPROTO_ICMP) 
-            return (ip_fw_masq_icmp(skb_ptr, maddr));
+	h.raw = (char*) iph + iph->ihl * 4;
 
-	if (iph->protocol!=IPPROTO_UDP && iph->protocol!=IPPROTO_TCP)
-		return -1;
+	switch (iph->protocol) {
+	case IPPROTO_ICMP:
+		return(ip_fw_masq_icmp(skb_p, maddr));
+	case IPPROTO_UDP:
+		if (h.uh->check == 0)
+			/* No UDP checksum */
+			break;
+	case IPPROTO_TCP:
+		/* Make sure packet is in the masq range */
+		size = ntohs(iph->tot_len) - (iph->ihl * 4);
+		IP_MASQ_DEBUG(3, "O-pkt: %s size=%d\n",
+				masq_proto_name(iph->protocol),
+				size);
+
+		/* Check that the checksum is OK */
+		switch (skb->ip_summed)
+		{
+			case CHECKSUM_NONE:
+				doff = proto_doff(iph->protocol, h.raw);
+				csum = csum_partial(h.raw + doff, size - doff, 0);
+				IP_MASQ_DEBUG(3, "O-pkt: %s I-datacsum=%d\n",
+						masq_proto_name(iph->protocol),
+						csum);
+
+				skb->csum = csum_partial(h.raw , doff, csum);
 
+			case CHECKSUM_HW:
+				if (csum_tcpudp_magic(iph->saddr, iph->daddr, 
+						size, iph->protocol, skb->csum))
+				{
+					IP_MASQ_DEBUG(0, "Outgoing failed %s checksum from %d.%d.%d.%d (size=%d)!\n",
+					       masq_proto_name(iph->protocol),
+					       NIPQUAD(iph->saddr),
+					       size);
+					return -1;
+				}
+			default:
+				/* CHECKSUM_UNNECESSARY */
+		}
+		break;
+	default:
+		return -1;
+	}
 	/*
 	 *	Now hunt the list to see if we have an old entry
 	 */
 
-	portptr = (__u16 *)&(((char *)iph)[iph->ihl*4]);
+	/* h.raw = (char*) iph + iph->ihl * 4; */
+
  	IP_MASQ_DEBUG(2, "Outgoing %s %08lX:%04X -> %08lX:%04X\n",
   		masq_proto_name(iph->protocol),
-  		ntohl(iph->saddr), ntohs(portptr[0]),
-  		ntohl(iph->daddr), ntohs(portptr[1]));
+  		ntohl(iph->saddr), ntohs(h.portp[0]),
+  		ntohl(iph->daddr), ntohs(h.portp[1]));
 
         ms = ip_masq_out_get_iph(iph);
         if (ms!=NULL) {
@@ -942,13 +1062,13 @@
                                        NIPQUAD(ms->maddr),NIPQUAD(maddr));
                         }
 
-			ip_masq_lock(&__ip_masq_lock, 1);
+			write_lock(&__ip_masq_lock);
 
                         ip_masq_unhash(ms);
                         ms->maddr = maddr;
                         ip_masq_hash(ms);
 
-			ip_masq_unlock(&__ip_masq_lock, 1);
+			write_unlock(&__ip_masq_lock);
                 }
                 
 		/*
@@ -960,13 +1080,13 @@
 		if ( ms->flags & IP_MASQ_F_NO_SPORT && ms->protocol == IPPROTO_TCP ) {
 			ms->flags &= ~IP_MASQ_F_NO_SPORT;
 
-			ip_masq_lock(&__ip_masq_lock, 1);
+			write_lock(&__ip_masq_lock);
 			
 			ip_masq_unhash(ms);
-			ms->sport = portptr[0];
+			ms->sport = h.portp[0];
 			ip_masq_hash(ms);	/* hash on new sport */
 
-			ip_masq_unlock(&__ip_masq_lock, 1);
+			write_unlock(&__ip_masq_lock);
 			
 			IP_MASQ_DEBUG(1, "ip_fw_masquerade(): filled sport=%d\n",
 			       ntohs(ms->sport));
@@ -976,68 +1096,113 @@
 		 *	Nope, not found, create a new entry for it
 		 */
 
-		if (!(ms = ip_masq_mod_out_create(iph, portptr, maddr))) 
+#ifdef CONFIG_IP_MASQUERADE_MOD
+		if (!(ms = ip_masq_mod_out_create(skb, iph, maddr))) 
+#endif
 			ms = ip_masq_new(iph->protocol,
 					maddr, 0,
-					iph->saddr, portptr[0],
-					iph->daddr, portptr[1],
+					iph->saddr, h.portp[0],
+					iph->daddr, h.portp[1],
 					0);
                 if (ms == NULL)
 			return -1;
  	}
 
-	ip_masq_mod_out_update(iph, portptr, ms);
+	/*
+ 	 * 	Call module's output update hook
+	 */
+
+#ifdef CONFIG_IP_MASQUERADE_MOD
+	ip_masq_mod_out_update(skb, iph, ms);
+#endif
 
  	/*
  	 *	Change the fragments origin
  	 */
 
- 	size = skb->len - ((unsigned char *)portptr - skb->nh.raw);
+ 	size = skb->len - (h.raw - skb->nh.raw);
+
         /*
          *	Set iph addr and port from ip_masq obj.
          */
  	iph->saddr = ms->maddr;
- 	portptr[0] = ms->mport;
+ 	h.portp[0] = ms->mport;
+
+	/*
+	 *	Invalidate csum saving if tunnel has masq helper
+	 */
+
+	if (ms->app) 
+		doff = 0;
 
  	/*
  	 *	Attempt ip_masq_app call.
          *	will fix ip_masq and iph seq stuff
  	 */
-        if (ip_masq_app_pkt_out(ms, skb_ptr, maddr) != 0)
+        if (ip_masq_app_pkt_out(ms, skb_p, maddr) != 0)
 	{
                 /*
                  *	skb has possibly changed, update pointers.
                  */
-                skb = *skb_ptr;
+                skb = *skb_p;
                 iph = skb->nh.iph;
-                portptr = (__u16 *)&(((char *)iph)[iph->ihl*4]);
-                size = skb->len - ((unsigned char *)portptr-skb->nh.raw);
+		h.raw = (char*) iph + iph->ihl *4;
+                size = skb->len - (h.raw - skb->nh.raw);
         }
 
  	/*
  	 *	Adjust packet accordingly to protocol
  	 */
 
- 	if (iph->protocol == IPPROTO_UDP)
- 	{
- 		recalc_check((struct udphdr *)portptr,iph->saddr,iph->daddr,size);
- 	} else {
- 		struct tcphdr *th = (struct tcphdr *)portptr;
+	/*
+	 *	Transport's payload partial csum
+	 */
 
+	if (!doff) {
+		doff = proto_doff(iph->protocol, h.raw);
+		csum = csum_partial(h.raw + doff, size - doff, 0);
+	}
+	skb->csum = csum;
 
-		skb->csum = csum_partial((void *)(th + 1), size - sizeof(*th), 0);
-		th->check = 0;
-		th->check = tcp_v4_check(th, size, iph->saddr, iph->daddr,
-					 csum_partial((char *)th, sizeof(*th),
-						      skb->csum));
- 	}
+	IP_MASQ_DEBUG(3, "O-pkt: %s size=%d O-datacsum=%d\n",
+			masq_proto_name(iph->protocol),
+			size,
+			csum);
+
+	/*
+	 * 	Protocol csum
+	 */
+	switch (iph->protocol) {
+		case IPPROTO_TCP:
+			h.th->check = 0;
+			h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
+					size, iph->protocol, 
+					csum_partial(h.raw , doff, csum));
+			IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n",
+					masq_proto_name(iph->protocol),
+					h.th->check,
+					(char*) & (h.th->check) - (char*) h.raw);
 
+			break;
+		case IPPROTO_UDP:
+			h.uh->check = 0;
+			h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
+					size, iph->protocol, 
+					csum_partial(h.raw , doff, csum));
+			if (h.uh->check == 0) 
+				h.uh->check = 0xFFFF;
+			IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n",
+					masq_proto_name(iph->protocol),
+					h.uh->check,
+					(char*) &(h.uh->check)- (char*) h.raw);
+			break;
+	}
 	ip_send_check(iph);
 
   	IP_MASQ_DEBUG(2, "O-routed from %08lX:%04X with masq.addr %08lX\n",
 		ntohl(ms->maddr),ntohs(ms->mport),ntohl(maddr));
 
-	masq_set_state(ms, 1, iph, portptr);
+	masq_set_state(ms, 1, iph, h.portp);
 	ip_masq_put(ms);
 
 	return 0;
@@ -1106,13 +1271,13 @@
 				       NIPQUAD(ms->maddr), NIPQUAD(maddr));
 			}
 
-			ip_masq_lock(&__ip_masq_lock, 1);
+			write_lock(&__ip_masq_lock);
 			
                         ip_masq_unhash(ms);
                         ms->maddr = maddr;
                         ip_masq_hash(ms);
 
-			ip_masq_unlock(&__ip_masq_lock, 1);
+			write_unlock(&__ip_masq_lock);
                 }
                 
 		iph->saddr = ms->maddr;
@@ -1166,11 +1331,13 @@
 		       ntohs(icmp_id(cicmph)),
 		       cicmph->type);
 
+		read_lock(&__ip_masq_lock);
 		ms = __ip_masq_out_get(ciph->protocol, 
 				      ciph->daddr,
 				      icmp_id(cicmph),
 				      ciph->saddr,
 				      icmp_hv_rep(cicmph));
+		read_unlock(&__ip_masq_lock);
 
 		if (ms == NULL)
 			return 0;
@@ -1239,11 +1406,13 @@
 	/* This is pretty much what __ip_masq_in_get_iph() does */
 	ms = __ip_masq_in_get(ciph->protocol, ciph->saddr, pptr[0], ciph->daddr, pptr[1]);
 #endif
+	read_lock(&__ip_masq_lock);
 	ms = __ip_masq_out_get(ciph->protocol,
 			       ciph->daddr,
 			       pptr[1],
 			       ciph->saddr,
 			       pptr[0]);
+	read_unlock(&__ip_masq_lock);
 
 	if (ms == NULL)
 		return 0;
@@ -1274,6 +1443,30 @@
 	return 1;
 }
 
+
+/*
+ *	Own skb_cow() beast, tweaked for rewriting commonly
+ *	used pointers in masq code
+ */
+static struct sk_buff * masq_skb_cow(struct sk_buff **skb_p, 
+			struct iphdr **iph_p, unsigned char **t_p) {
+	struct sk_buff *skb=(*skb_p);
+	if (skb_cloned(skb)) {
+		skb = skb_copy(skb, GFP_ATOMIC);
+		if (skb) {
+			/*
+			 *	skb changed, update other pointers
+			 */
+			struct iphdr *iph = skb->nh.iph;
+			kfree_skb(*skb_p);
+			*skb_p = skb;
+			*iph_p = iph;
+			*t_p = (char*) iph + iph->ihl * 4;
+		}
+	}
+	return skb;
+}
+
 /*
  *	Handle ICMP messages in reverse (demasquerade) direction.
  *	Find any that might be relevant, check against existing connections,
@@ -1323,6 +1516,11 @@
                  */
                 ms->flags &= ~IP_MASQ_F_NO_REPLY;
 
+		if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) {
+			ip_masq_put(ms);
+			return -1;
+		}
+
 		/* Reset source address */
 		iph->daddr = ms->saddr;
 		/* Redo IP header checksum */
@@ -1378,15 +1576,23 @@
 		       ntohs(icmp_id(cicmph)),
 		       cicmph->type);
 
+		read_lock(&__ip_masq_lock);
 		ms = __ip_masq_in_get(ciph->protocol, 
 				      ciph->daddr, 
 				      icmp_hv_req(cicmph),
 				      ciph->saddr, 
 				      icmp_id(cicmph));
+		read_unlock(&__ip_masq_lock);
 
 		if (ms == NULL)
 			return 0;
 
+		if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) {
+			__ip_masq_put(ms);
+			return -1;
+		}
+		ciph = (struct iphdr *) (icmph + 1);
+
 		/* Now we do real damage to this packet...! */
 		/* First change the dest IP address, and recalc checksum */
 		iph->daddr = ms->saddr;
@@ -1445,15 +1651,23 @@
 
 
 	/* This is pretty much what __ip_masq_in_get_iph() does, except params are wrong way round */
+	read_lock(&__ip_masq_lock);
 	ms = __ip_masq_in_get(ciph->protocol,
 			      ciph->daddr,
 			      pptr[1],
 			      ciph->saddr,
 			      pptr[0]);
+	read_unlock(&__ip_masq_lock);
 
 	if (ms == NULL)
 		return 0;
 
+	if ((skb=masq_skb_cow(skb_p, &iph, (unsigned char**)&icmph)) == NULL) {
+		__ip_masq_put(ms);
+		return -1;
+	}
+	ciph = (struct iphdr *) (icmph + 1);
+
 	/* Now we do real damage to this packet...! */
 	/* First change the dest IP address, and recalc checksum */
 	iph->daddr = ms->saddr;
@@ -1480,7 +1694,6 @@
 	return 1;
 }
 
-
  /*
   *	Check if it's an masqueraded port, look it up,
   *	and send it on its way...
@@ -1492,44 +1705,70 @@
 
 int ip_fw_demasquerade(struct sk_buff **skb_p)
 {
-        struct sk_buff 	*skb = *skb_p;
- 	struct iphdr	*iph = skb->nh.iph;
- 	__u16	*portptr;
- 	struct ip_masq	*ms;
-	unsigned short len;
-
+	struct sk_buff 	*skb = *skb_p;
+	struct iphdr	*iph = skb->nh.iph;
+	union ip_masq_tphdr h;
+	struct ip_masq	*ms;
+	unsigned short size;
+	unsigned doff = 0;
+	int csum = 0;
 	__u32 maddr;
 
+	/*
+	 *	Big tappo: only PACKET_HOST (nor loopback neither mcasts)
+	 *	... don't know why 1st test DOES NOT include 2nd (?)
+	 */
+
+	if (skb->pkt_type != PACKET_HOST || skb->dev == &loopback_dev) {
+		IP_MASQ_DEBUG(2, "ip_fw_demasquerade(): packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
+			skb->pkt_type,
+			iph->protocol,
+			NIPQUAD(iph->daddr));
+		return 0;
+	}
+
 	maddr = iph->daddr;
+	h.raw = (char*) iph + iph->ihl * 4;
 
 	switch (iph->protocol) {
 	case IPPROTO_ICMP:
 		return(ip_fw_demasq_icmp(skb_p));
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
-		/* Make sure packet is in the masq range */
-		portptr = (__u16 *)&(((char *)iph)[iph->ihl*4]);
-		if ((ntohs(portptr[1]) < PORT_MASQ_BEGIN
-				|| ntohs(portptr[1]) > PORT_MASQ_END)
-				&& (ip_masq_mod_in_rule(iph, portptr) != 1))
+		/* 
+		 *	Make sure packet is in the masq range 
+		 *	... or some mod-ule relaxes input range
+		 *	... or there is still some `special' mport opened
+		 */
+		if ((ntohs(h.portp[1]) < PORT_MASQ_BEGIN
+				|| ntohs(h.portp[1]) > PORT_MASQ_END)
+#ifdef CONFIG_IP_MASQUERADE_MOD
+				&& (ip_masq_mod_in_rule(skb, iph) != 1) 
+#endif
+				&& atomic_read(&mport_count) == 0 )
 			return 0;
 
 		/* Check that the checksum is OK */
-		len = ntohs(iph->tot_len) - (iph->ihl * 4);
-		if ((iph->protocol == IPPROTO_UDP) && (portptr[3] == 0))
+		size = ntohs(iph->tot_len) - (iph->ihl * 4);
+		if ((iph->protocol == IPPROTO_UDP) && (h.uh->check == 0))
 			/* No UDP checksum */
 			break;
 
 		switch (skb->ip_summed)
 		{
 			case CHECKSUM_NONE:
-				skb->csum = csum_partial((char *)portptr, len, 0);
+				doff = proto_doff(iph->protocol, h.raw);
+				csum = csum_partial(h.raw + doff, size - doff, 0);
+				skb->csum = csum_partial(h.raw , doff, csum);
+
 			case CHECKSUM_HW:
-				if (csum_tcpudp_magic(iph->saddr, iph->daddr, len,
-						      iph->protocol, skb->csum))
+				if (csum_tcpudp_magic(iph->saddr, iph->daddr, 
+						size, iph->protocol, skb->csum))
 				{
-					IP_MASQ_DEBUG(2, "failed TCP/UDP checksum from %d.%d.%d.%d!\n",
-					       NIPQUAD(iph->saddr));
+					IP_MASQ_DEBUG(0, "Incoming failed %s checksum from %d.%d.%d.%d (size=%d)!\n",
+					       masq_proto_name(iph->protocol),
+					       NIPQUAD(iph->saddr),
+					       size);
 					return -1;
 				}
 			default:
@@ -1544,8 +1783,8 @@
 
  	IP_MASQ_DEBUG(2, "Incoming %s %08lX:%04X -> %08lX:%04X\n",
  		masq_proto_name(iph->protocol),
- 		ntohl(iph->saddr), ntohs(portptr[0]),
- 		ntohl(iph->daddr), ntohs(portptr[1]));
+ 		ntohl(iph->saddr), ntohs(h.portp[0]),
+ 		ntohl(iph->daddr), ntohs(h.portp[1]));
 
  	/*
  	 * reroute to original host:port if found...
@@ -1553,10 +1792,19 @@
 
         ms = ip_masq_in_get_iph(iph);
 
+	/*
+ 	 * 	Give additional modules a chance to create an entry
+	 */
+#ifdef CONFIG_IP_MASQUERADE_MOD
 	if (!ms) 
-		ms = ip_masq_mod_in_create(iph, portptr, maddr);
+		ms = ip_masq_mod_in_create(skb, iph, maddr);
+
+	/*
+ 	 * 	Call module's input update hook
+	 */
+	ip_masq_mod_in_update(skb, iph, ms);
+#endif
 
-	ip_masq_mod_in_update(iph, portptr, ms);
 
         if (ms != NULL)
         {
@@ -1572,7 +1820,7 @@
 
                 if ( ms->flags & IP_MASQ_F_NO_DPORT ) { /*  && ms->protocol == IPPROTO_TCP ) { */
                         ms->flags &= ~IP_MASQ_F_NO_DPORT;
-                        ms->dport = portptr[0];
+                        ms->dport = h.portp[0];
 
                         IP_MASQ_DEBUG(1, "ip_fw_demasquerade(): filled dport=%d\n",
                                ntohs(ms->dport));
@@ -1582,12 +1830,23 @@
                         ms->flags &= ~IP_MASQ_F_NO_DADDR;
                         ms->daddr = iph->saddr;
 
-                        IP_MASQ_DEBUG(1, "ip_fw_demasquerade(): filled daddr=%X\n",
-                               ntohs(ms->daddr));
+                        IP_MASQ_DEBUG(1, "ip_fw_demasquerade(): filled daddr=%lX\n",
+                               ntohl(ms->daddr));
 
                 }
+		if ((skb=masq_skb_cow(skb_p, &iph, &h.raw)) == NULL) {
+			ip_masq_put(ms);
+			return -1;
+		}
                 iph->daddr = ms->saddr;
-                portptr[1] = ms->sport;
+                h.portp[1] = ms->sport;
+
+		/*
+		 *	Invalidate csum saving if tunnel has masq helper
+		 */
+
+		if (ms->app) 
+			doff = 0;
 
                 /*
                  *	Attempt ip_masq_app call.
@@ -1602,34 +1861,48 @@
 
                         skb = *skb_p;
                         iph = skb->nh.iph;
-                        portptr = (__u16 *)&(((char *)iph)[iph->ihl*4]);
-                        len = ntohs(iph->tot_len) - (iph->ihl * 4);
+			h.raw = (char*) iph + iph->ihl*4;
+                        size = ntohs(iph->tot_len) - (iph->ihl * 4);
                 }
 
                 /*
-                 * Yug! adjust UDP/TCP and IP checksums, also update
-		 * timeouts.
-		 * If a TCP RST is seen collapse the tunnel (by using short timeout)!
-                 */
-                if (iph->protocol == IPPROTO_UDP) {
-                        recalc_check((struct udphdr *)portptr,iph->saddr,iph->daddr,len);
-		} else {
-			struct tcphdr *th = (struct tcphdr *)portptr;
-                        skb->csum = csum_partial((void *)(th + 1),
-                                                 len - sizeof(struct tcphdr), 0);
-
-			th->check = 0;
-                        th->check = tcp_v4_check(th, len, iph->saddr, iph->daddr,
-						 csum_partial((char *)th,
-							      sizeof(*th),
-				     			      skb->csum));
+                 * Yug! adjust UDP/TCP checksums
+		 */
 
-                }
+		/*
+		 *	Transport's payload partial csum
+		 */
+
+		if (!doff) {
+			doff = proto_doff(iph->protocol, h.raw);
+			csum = csum_partial(h.raw + doff, size - doff, 0);
+		}
+		skb->csum = csum;
+
+		/*
+		 * 	Protocol csum
+		 */
+		switch (iph->protocol) {
+			case IPPROTO_TCP:
+				h.th->check = 0;
+				h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
+						size, iph->protocol, 
+						csum_partial(h.raw , doff, csum));
+				break;
+			case IPPROTO_UDP:
+				h.uh->check = 0;
+				h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, 
+						size, iph->protocol, 
+						csum_partial(h.raw , doff, csum));
+				if (h.uh->check == 0) 
+					h.uh->check = 0xFFFF;
+				break;
+		}
                 ip_send_check(iph);
 
-                IP_MASQ_DEBUG(2, "I-routed to %08lX:%04X\n",ntohl(iph->daddr),ntohs(portptr[1]));
+                IP_MASQ_DEBUG(2, "I-routed to %08lX:%04X\n",ntohl(iph->daddr),ntohs(h.portp[1]));
 
-		masq_set_state (ms, 0, iph, portptr);
+		masq_set_state (ms, 0, iph, h.portp);
 		ip_masq_put(ms);
 
                 return 1;
@@ -1683,6 +1956,7 @@
 	return ms->control;
 }
 
+
 #ifdef CONFIG_PROC_FS
 /*
  *	/proc/net entries
@@ -1697,7 +1971,6 @@
         int idx = 0;
 	int len=0;
 
-	ip_masq_lockz(&__ip_masq_lock, &masq_wait, 0);
 
 	if (offset < 128)
 	{
@@ -1710,12 +1983,21 @@
 	}
 	pos = 128;
 
-        for(idx = 0; idx < IP_MASQ_TAB_SIZE; idx++)
+        for(idx = 0; idx < IP_MASQ_TAB_SIZE; idx++) 
+	{
+	/*
+	 *	Lock is actually only need in next loop 
+	 *	we are called from uspace: must stop bh.
+	 */
+	read_lock_bh(&__ip_masq_lock);
+
         for(ms = ip_masq_m_tab[idx]; ms ; ms = ms->m_link)
 	{
 		pos += 128;
-		if (pos <= offset)
+		if (pos <= offset) {
+			len = 0;
 			continue;
+		}
 
 		/*
 		 *	We have locked the tables, no need to del/add timers
@@ -1733,12 +2015,17 @@
 			ms->timer.expires-jiffies);
 		len += sprintf(buffer+len, "%-127s\n", temp);
 
-		if(len >= length)
+		if(len >= length) {
+
+			read_unlock_bh(&__ip_masq_lock);
 			goto done;
+		}
         }
+	read_unlock_bh(&__ip_masq_lock);
+
+	}
 done:
 
-	ip_masq_unlockz(&__ip_masq_lock, &masq_wait, 0);
 
 	begin = len - (pos - offset);
 	*start = buffer + begin;
@@ -1748,82 +2035,173 @@
 	return len;
 }
 
-static int ip_masq_procinfo(char *buffer, char **start, off_t offset,
-			      int length, int unused)
+#endif
+
+/* 
+ *	Timeouts handling by ipfwadm/ipchains
+ * 	From ip_fw.c
+ */
+
+int ip_fw_masq_timeouts(void *m, int len) 
 {
-	off_t pos=0, begin;
-	struct ip_masq *ms;
-	char temp[129];
-        int idx = 0;
-	int len=0;
+	struct ip_fw_masq *masq;
+	int ret = EINVAL;
 
-	ip_masq_lockz(&__ip_masq_lock, &masq_wait, 0);
+	if (len != sizeof(struct ip_fw_masq)) {
+		IP_MASQ_DEBUG(1, "ip_fw_masq_timeouts: length %d, expected %d\n",
+				len, sizeof(struct ip_fw_masq));
+	} else {
+		masq = (struct ip_fw_masq *)m;
+		if (masq->tcp_timeout)
+			masq_timeout_table.timeout[IP_MASQ_S_ESTABLISHED]
+				= masq->tcp_timeout;
+
+		if (masq->tcp_fin_timeout)
+			masq_timeout_table.timeout[IP_MASQ_S_FIN_WAIT]
+				= masq->tcp_fin_timeout;
+
+		if (masq->udp_timeout)
+			masq_timeout_table.timeout[IP_MASQ_S_UDP]
+				= masq->udp_timeout;
+		ret = 0;
+	}
+	return ret;
+}
+/*
+ *	Module autoloading stuff
+ */
 
-	if (offset < 128)
-	{
-		sprintf(temp,
-			"Prot SrcIP    SPrt DstIP    DPrt MAddr    MPrt State        Ref Ctl Expires (free=%d,%d,%d)",
-			atomic_read(ip_masq_free_ports), 
-			atomic_read(ip_masq_free_ports+1), 
-			atomic_read(ip_masq_free_ports+2));
-		len = sprintf(buffer, "%-127s\n", temp);
+static int ip_masq_user_check_hook(void) {
+#ifdef CONFIG_KMOD
+	if (ip_masq_user_hook == NULL) {
+		IP_MASQ_DEBUG(1, "About to request \"ip_masq_user\" module\n");
+		request_module("ip_masq_user");
 	}
-	pos = 128;
+#endif /* CONFIG_KMOD */
+	return (ip_masq_user_hook != NULL);
+}
 
-        for(idx = 0; idx < IP_MASQ_TAB_SIZE; idx++)
-        for(ms = ip_masq_m_tab[idx]; ms ; ms = ms->m_link)
-	{
-		pos += 128;
-		if (pos <= offset)
-			continue;
+/*
+ *	user module hook- info
+ */
+static int ip_masq_user_info(char *buffer, char **start, off_t offset,
+			      int len, int *eof, void *data)
+{
+	int ret = -ENOPKG;
+	if (ip_masq_user_check_hook()) {
+		ret = ip_masq_user_hook->info(buffer, start, offset, len, (int) data);
+	}
+	return ret;
+}
 
-		/*
-		 *	We have locked the tables, no need to del/add timers
-		 *	nor cli()  8)
-		 */
+/*
+ *	user module hook- entry mgmt
+ */
+static int ip_masq_user_ctl(int optname, void *arg, int arglen)
+{
+	int ret = -ENOPKG;
+	if (ip_masq_user_check_hook())  {
+		ret = ip_masq_user_hook->ctl(optname, arg, arglen);
+	}
+	return ret;
+}
 
-		sprintf(temp,"%-4s %08lX:%04X %08lX:%04X %08lX:%04X %-12s %3d %3d %7lu",
-			masq_proto_name(ms->protocol),
-			ntohl(ms->saddr), ntohs(ms->sport),
-			ntohl(ms->daddr), ntohs(ms->dport),
-			ntohl(ms->maddr), ntohs(ms->mport),
-			masq_state_name(ms->state),
-			atomic_read(&ms->refcnt),
-			atomic_read(&ms->n_control),
-			(ms->timer.expires-jiffies)/HZ);
-		len += sprintf(buffer+len, "%-127s\n", temp);
+/*
+ *	Control from ip_sockglue
+ *	MAIN ENTRY point from userspace (apart from /proc *info entries)
+ *	Returns errno
+ */
+int ip_masq_uctl(int optname, char * optval , int optlen)
+{
+	struct ip_masq_ctl masq_ctl;
+	int ret = -EINVAL;
 
-		if(len >= length)
-			goto done;
-        }
-done:
+	if(optlen>sizeof(masq_ctl))
+		return -EINVAL;
 
-	ip_masq_unlockz(&__ip_masq_lock, &masq_wait, 0);
+	if(copy_from_user(&masq_ctl,optval,optlen))
+		return -EFAULT;
 
-	begin = len - (pos - offset);
-	*start = buffer + begin;
-	len -= begin;
-	if(len>length)
-		len = length;
-	return len;
+	IP_MASQ_DEBUG(1,"ip_masq_ctl(optname=%d, optlen=%d, target=%d, cmd=%d)\n",
+		optname, optlen, masq_ctl.m_target, masq_ctl.m_cmd);
+
+	switch (masq_ctl.m_target) {
+		case IP_MASQ_TARGET_USER:
+			ret = ip_masq_user_ctl(optname, &masq_ctl, optlen);
+			break;
+#ifdef CONFIG_IP_MASQUERADE_MOD
+		case IP_MASQ_TARGET_MOD:
+			ret = ip_masq_mod_ctl(optname, &masq_ctl, optlen);
+			break;
+#endif
+	}
+
+	/* 	
+	 *	If ret>0, copy to user space 
+	 */
+
+	if (ret > 0 && ret <= sizeof (masq_ctl)) {
+		if (copy_to_user(optval, &masq_ctl, ret) )
+			return -EFAULT;
+		ret = 0;
+	}
+
+	return ret;
 }
 
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry	*proc_net_ip_masq = NULL;
+
+#ifdef MODULE
+static void ip_masq_proc_count(struct inode *inode, int fill)
+{
+	if (fill)
+		MOD_INC_USE_COUNT;
+	else
+		MOD_DEC_USE_COUNT;
+}
 #endif
 
+int ip_masq_proc_register(struct proc_dir_entry *ent)
+{
+	if (!proc_net_ip_masq) return -1;
+	IP_MASQ_DEBUG(1, "registering \"/proc/net/ip_masq/%s\" entry\n",
+			ent->name);
+	return proc_register(proc_net_ip_masq, ent);
+}
+void ip_masq_proc_unregister(struct proc_dir_entry *ent)
+{
+	if (!proc_net_ip_masq) return;
+	IP_MASQ_DEBUG(1, "unregistering \"/proc/net/ip_masq/%s\" entry\n",
+			ent->name);
+	proc_unregister(proc_net_ip_masq, ent->low_ino);
+}
+
 /*
- *	Control from ip_sockglue
- *	From userspace
+ *	Wrapper over inet_select_addr()
  */
-int ip_masq_ctl(int optname, void *arg, int arglen)
+u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope)
 {
-	struct ip_fw_masqctl *mctl = arg;
-	int ret = EINVAL;
-
-	if (1) /*  (mctl->mctl_action == IP_MASQ_MOD_CTL)  */
-		ret = ip_masq_mod_ctl(optname, mctl, arglen);
+	return inet_select_addr(dev, dst, scope);
+}
 
-	return ret;
+__initfunc(static void masq_proc_init(void))
+{	
+	IP_MASQ_DEBUG(1,"registering /proc/net/ip_masq\n");
+	if (!proc_net_ip_masq) {
+		struct proc_dir_entry *ent;
+		ent = create_proc_entry("net/ip_masq", S_IFDIR, 0);
+		if (ent) {
+#ifdef MODULE
+			ent->fill_inode = ip_masq_proc_count;
+#endif
+			proc_net_ip_masq = ent;
+		 } else {
+			 IP_MASQ_ERR("Could not create \"/proc/net/ip_masq\" entry\n");
+		 }
+	}
 }
+#endif	/* CONFIG_PROC_FS */
 
 /*
  *	Initialize ip masquerading
@@ -1837,11 +2215,37 @@
 		0, &proc_net_inode_operations,
 		ip_msqhst_procinfo
 	});
-	proc_net_register(&(struct proc_dir_entry) {
-		0, 7, "ip_masq",
+	masq_proc_init();
+
+	ip_masq_proc_register(&(struct proc_dir_entry) {
+		0, 3, "tcp",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		NULL,	/* get_info */
+		NULL,	/* fill_inode */
+		NULL, NULL, NULL,
+		(char *) IPPROTO_TCP,
+		ip_masq_user_info
+	});
+	ip_masq_proc_register(&(struct proc_dir_entry) {
+		0, 3, "udp",
 		S_IFREG | S_IRUGO, 1, 0, 0,
 		0, &proc_net_inode_operations,
-		ip_masq_procinfo
+		NULL,	/* get_info */
+		NULL,	/* fill_inode */
+		NULL, NULL, NULL,
+		(char *) IPPROTO_UDP,
+		ip_masq_user_info
+	});
+	ip_masq_proc_register(&(struct proc_dir_entry) {
+		0, 4, "icmp",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		NULL,	/* get_info */
+		NULL,	/* fill_inode */
+		NULL, NULL, NULL,
+		(char *) IPPROTO_ICMP,
+		ip_masq_user_info
 	});
 #endif	
 #ifdef CONFIG_IP_MASQUERADE_IPAUTOFW
@@ -1849,6 +2253,9 @@
 #endif
 #ifdef CONFIG_IP_MASQUERADE_IPPORTFW
 	ip_portfw_init();
+#endif
+#ifdef CONFIG_IP_MASQUERADE_IPMARKFW
+	ip_markfw_init();
 #endif
         ip_masq_app_init();
 

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov