patch-1.3.31 linux/net/ipv4/ipmr.c

Next file: linux/net/ipv4/tcp.c
Previous file: linux/net/ipv4/ip_fw.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v1.3.30/linux/net/ipv4/ipmr.c linux/net/ipv4/ipmr.c
@@ -12,10 +12,16 @@
  *
  *	Fixes:
  *	Michael Chastain	:	Incorrect size of copying.
- *
+ *	Alan Cox		:	Added the cache manager code
  *
  *	Status:
- *		Tree building works. Cache manager to be added next.
+ *		Cache manager under test. Forwarding in vague test mode
+ *	Todo:
+ *		Flow control
+ *		Tunnels
+ *		Wipe cache on mrouted exit
+ *		Debug cache ttl handling properly
+ *		Resolve IFF_ALLMULTI for most cards
  */
 
 #include <asm/system.h>
@@ -27,10 +33,12 @@
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/fcntl.h>
+#include <linux/stat.h>
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
+#include <linux/proc_fs.h>
 #include <linux/mroute.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -47,9 +55,12 @@
  *	Multicast router conrol variables
  */
 
-static struct vif_device vif_table[MAXVIFS];
-static unsigned long vifc_map;
-int mroute_do_pim = 0;
+static struct vif_device vif_table[MAXVIFS];		/* Devices 		*/
+static unsigned long vifc_map;				/* Active device map	*/
+int mroute_do_pim = 0;					/* Set in PIM assert	*/
+static struct mfc_cache *mfc_cache_array[MFC_LINES];	/* Forwarding cache	*/
+static struct mfc_cache *cache_resolve_queue;		/* Unresolved cache	*/
+int cache_resolve_queue_len = 0;			/* Size of unresolved	*/
 
 /*
  *	Delete a VIF entry
@@ -64,6 +75,342 @@
 	}
 	v->dev=NULL;
 }
+
+/*
+ *	Find a vif
+ */
+ 
+static int ipmr_vifi_find(struct device *dev)
+{
+	struct vif_device *v=&vif_table[0];
+	int ct;
+	for(ct=0;ct<MAXVIFS;ct++,v++)
+	{
+		if(v->dev==dev)
+			return ct;
+	}
+	return -1;
+}
+
+/*
+ *	Delete a multicast route cache entry
+ */
+ 
+static void ipmr_cache_delete(struct mfc_cache *cache)
+{
+	struct sk_buff *skb;
+	int line;
+	struct mfc_cache **cp;
+	
+	/*
+	 *	Find the right cache line
+	 */
+
+	if(cache->mfc_flags&MFC_QUEUED)
+	{	
+		cp=&cache_resolve_queue;
+		del_timer(&cache->mfc_timer);
+	}	
+	else
+	{
+		line=MFC_HASH(cache->mfc_mcastgrp,cache->mfc_origin);
+		cp=&(mfc_cache_array[line]);
+	}
+	
+	/*
+	 *	Unlink the buffer
+	 */
+	
+	while(*cp!=NULL)
+	{
+		if(*cp==cache)
+		{
+			*cp=cache->next;
+			break;
+		}
+		cp=&((*cp)->next);
+	}
+	
+	/*
+	 *	Free the buffer. If it is a pending resolution
+	 *	clean up the other resources.
+	 */
+
+	if(cache->mfc_flags&MFC_QUEUED)
+	{
+		cache_resolve_queue_len--;
+		while((skb=skb_dequeue(&cache->mfc_unresolved)))
+			kfree_skb(skb, FREE_WRITE);
+	}
+	kfree_s(cache,sizeof(cache));
+}
+
+/*
+ *	Cache expiry timer
+ */	
+ 
+static void ipmr_cache_timer(unsigned long data)
+{
+	struct mfc_cache *cache=(struct mfc_cache *)data;
+	ipmr_cache_delete(cache);
+}
+
+/*
+ *	Insert a multicast cache entry
+ */
+
+static void ipmr_cache_insert(struct mfc_cache *c)
+{
+	int line=MFC_HASH(c->mfc_mcastgrp,c->mfc_origin);
+	c->next=mfc_cache_array[line];
+	mfc_cache_array[line]=c;
+}
+ 
+/*
+ *	Find a multicast cache entry
+ */
+ 
+struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
+{
+	int line=MFC_HASH(mcastgrp,origin);
+	struct mfc_cache *cache;
+	cache=mfc_cache_array[line];
+	while(cache!=NULL)
+	{
+		if(cache->mfc_origin==origin && cache->mfc_mcastgrp==mcastgrp)
+			return cache;
+		cache=cache->next;
+	}
+	cache=cache_resolve_queue;
+	while(cache!=NULL)
+	{
+		if(cache->mfc_origin==origin && cache->mfc_mcastgrp==mcastgrp)
+			return cache;
+		cache=cache->next;
+	}
+	return NULL;
+}
+
+/*
+ *	Allocate a multicast cache entry
+ */
+ 
+static struct mfc_cache *ipmr_cache_alloc(int priority)
+{
+	struct mfc_cache *c=(struct mfc_cache *)kmalloc(sizeof(struct mfc_cache), priority);
+	if(c==NULL)
+		return NULL;
+	c->mfc_queuelen=0;
+	skb_queue_head_init(&c->mfc_unresolved);
+	init_timer(&c->mfc_timer);
+	c->mfc_timer.data=(long)c;
+	c->mfc_timer.function=ipmr_cache_timer;
+	return c;
+}
+ 
+/*
+ *	A cache entry has gone into a resolved state from queued
+ */
+ 
+static void ipmr_cache_resolve(struct mfc_cache *cache)
+{
+	struct mfc_cache **p;
+	struct sk_buff *skb;
+	/*
+	 *	Kill the queue entry timer.
+	 */
+	del_timer(&cache->mfc_timer);
+	cache->mfc_flags&=~MFC_QUEUED;
+	/*
+	 *	Remove from the resolve queue
+	 */
+	p=&cache_resolve_queue;
+	while((*p)!=NULL)
+	{
+		if((*p)==cache)
+		{
+			*p=cache->next;
+			break;
+		}
+		p=&((*p)->next);
+	}
+	cache_resolve_queue_len--;
+	sti();
+	/*
+	 *	Insert into the main cache
+	 */
+	ipmr_cache_insert(cache);
+	/*
+	 *	Play the pending entries through our router
+	 */
+	while((skb=skb_dequeue(&cache->mfc_unresolved)))
+		ipmr_forward(skb, skb->protocol);
+}
+
+/*
+ *	Bounce a cache query up to mrouted. We could use netlink for this but mrouted
+ *	expects the following bizarre scheme..
+ */
+ 
+static void ipmr_cache_report(struct sk_buff *pkt)
+{
+	struct sk_buff *skb=alloc_skb(128, GFP_ATOMIC);
+	int ihl=pkt->ip_hdr->ihl<<2;
+	struct igmphdr *igmp;
+	if(!skb)
+		return;
+		
+	skb->free=1;
+	
+	/*
+	 *	Copy the IP header
+	 */
+
+	skb->ip_hdr=(struct iphdr *)skb_put(skb,ihl);
+	skb->h.iph=skb->ip_hdr;
+	memcpy(skb->data,pkt->data,ihl);
+	skb->ip_hdr->protocol = 0;			/* Flag to the kernel this is a route add */
+	
+	/*
+	 *	Add our header
+	 */
+	
+	igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
+	igmp->type	=	IGMPMSG_NOCACHE;		/* non IGMP dummy message */
+	igmp->code 	=	0;
+	skb->ip_hdr->tot_len=htons(skb->len);			/* Fix the length */
+	
+	/*
+	 *	Deliver to mrouted
+	 */
+	if(sock_queue_rcv_skb(mroute_socket,skb)<0)
+	{
+		skb->sk=NULL;
+		kfree_skb(skb, FREE_READ);
+	}
+}
+		
+ 
+/*
+ *	Queue a packet for resolution
+ */
+ 
+static void ipmr_cache_unresolved(struct mfc_cache *cache, vifi_t vifi, struct sk_buff *skb, int is_frag)
+{
+	if(cache==NULL)
+	{	
+		/*
+		 *	Create a new entry if allowable
+		 */
+		if(cache_resolve_queue_len>=10 || (cache=ipmr_cache_alloc(GFP_ATOMIC))==NULL)
+		{
+			kfree_skb(skb, FREE_WRITE);
+			return;
+		}
+		/*
+		 *	Fill in the new cache entry
+		 */
+		cache->mfc_parent=vifi;
+		cache->mfc_origin=skb->ip_hdr->saddr;
+		cache->mfc_mcastgrp=skb->ip_hdr->daddr;
+		cache->mfc_flags=MFC_QUEUED;
+		/*
+		 *	Link to the unresolved list
+		 */
+		cache->next=cache_resolve_queue;
+		cache_resolve_queue=cache;
+		cache_resolve_queue_len++;
+		/*
+		 *	Fire off the expiry timer
+		 */
+		cache->mfc_timer.expires=jiffies+10*HZ;
+		add_timer(&cache->mfc_timer);
+		/*
+		 *	Reflect first query at mrouted.
+		 */
+		if(mroute_socket)
+			ipmr_cache_report(skb);
+	}
+	/*
+	 *	See if we can append the packet
+	 */
+	if(cache->mfc_queuelen>3)
+	{
+		kfree_skb(skb, FREE_WRITE);
+		return;
+	}
+	/*
+	 *	Add to our 'pending' list. Cache the is_frag data
+	 *	in skb->protocol now it is spare.
+	 */
+	cache->mfc_queuelen++;
+	skb->protocol=is_frag;
+	skb_queue_tail(&cache->mfc_unresolved,skb);
+}
+
+/*
+ *	MFC cache manipulation by user space mroute daemon
+ */
+ 
+int ipmr_mfc_modify(int action, struct mfcctl *mfc)
+{
+	struct mfc_cache *cache;
+	if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
+		return -EINVAL;
+	/*
+	 *	Find the cache line
+	 */
+	
+	cli();
+	cache=ipmr_cache_find(mfc->mfcc_origin.s_addr,mfc->mfcc_mcastgrp.s_addr);
+	
+	/*
+	 *	Delete an entry
+	 */
+	if(action==MRT_DEL_MFC)
+	{
+		if(cache)
+		{
+			ipmr_cache_delete(cache);
+			sti();
+			return 0;
+		}
+		return -ENOENT;
+	}
+	if(cache)
+	{
+		/*
+		 *	Update the cache, see if it frees a pending queue
+		 */
+
+		cache->mfc_flags|=MFC_RESOLVED;
+		memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls));
+		 
+		/*
+		 *	Check to see if we resolved a queued list. If so we
+		 *	need to send on the frames and tidy up.
+		 */
+		 
+		if(cache->mfc_flags&MFC_QUEUED)
+			ipmr_cache_resolve(cache);	/* Unhook & send the frames */
+		sti();
+		return 0;
+	}
+	/*
+	 *	Unsolicited update - thats ok add anyway.
+	 */
+	sti();
+	cache=ipmr_cache_alloc(GFP_KERNEL);
+	if(cache==NULL)
+		return -ENOMEM;
+	cache->mfc_flags=MFC_RESOLVED;
+	cache->mfc_origin=mfc->mfcc_origin.s_addr;
+	cache->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
+	cache->mfc_parent=mfc->mfcc_parent;
+	memcpy(cache->mfc_ttls, mfc->mfcc_ttls,sizeof(cache->mfc_ttls));
+	ipmr_cache_insert(cache);
+	return 0;
+}
  
 /*
  *	Socket options and virtual interface manipulation. The whole
@@ -76,6 +423,7 @@
 {
 	int err;
 	struct vifctl vif;
+	struct mfcctl mfc;
 	
 	if(optname!=MRT_INIT)
 	{
@@ -184,7 +532,11 @@
 		 */
 		case MRT_ADD_MFC:
 		case MRT_DEL_MFC:
-			return -EOPNOTSUPP;
+			err=verify_area(VERIFY_READ, optval, sizeof(mfc));
+			if(err)
+				return err;
+			memcpy_fromfs(&mfc,optval, sizeof(mfc));
+			return ipmr_mfc_modify(optname, &mfc);
 		/*
 		 *	Control PIM assert.
 		 */
@@ -301,6 +653,15 @@
 		v++;
 	}		
 	vifc_map=0;	
+	/*
+	 *	Wipe the cache
+	 */
+	for(i=0;i<MFC_LINES;i++)
+	{
+		while(mfc_cache_array[i]!=NULL)
+			ipmr_cache_delete(mfc_cache_array[i]);
+	}	
+	/* The timer will clear any 'pending' stuff */
 }
 
 static int ipmr_device_event(unsigned long event, void *ptr)
@@ -328,12 +689,226 @@
 	NULL,
 	0
 };
+
+/*
+ *	Processing handlers for ipmr_forward
+ */
+
+static void ipmr_queue_xmit(struct sk_buff *skb, struct vif_device *vif, struct device *in_dev, int frag)
+{
+	int tunnel=0;
+	__u32 raddr=skb->raddr;
+	if(vif->flags&VIFF_TUNNEL)
+	{
+		tunnel=16;
+		raddr=vif->remote;
+	}
+	vif->pkt_out++;
+	vif->bytes_out+=skb->len;
+	skb->dev=vif->dev;
+	skb->raddr=skb->h.iph->daddr;
+	if(ip_forward(skb, in_dev, frag|8|tunnel, raddr)==-1)
+		kfree_skb(skb, FREE_WRITE);
+}
+
+/*
+ *	Multicast packets for forwarding arrive here
+ */
+
+void ipmr_forward(struct sk_buff *skb, int is_frag)
+{
+	struct mfc_cache *cache;
+	struct sk_buff *skb2;
+	int psend = -1;
+	int vif=ipmr_vifi_find(skb->dev);
+	if(vif==-1)
+	{
+		kfree_skb(skb, FREE_WRITE);
+		return;
+	}
+	
+	vif_table[vif].pkt_in++;
+	vif_table[vif].bytes_in+=skb->len;
 	
+	cache=ipmr_cache_find(skb->ip_hdr->saddr,skb->ip_hdr->daddr);
+	
+	/*
+	 *	No usable cache entry
+	 */
+	 
+	if(cache==NULL || (cache->mfc_flags&MFC_QUEUED))
+		ipmr_cache_unresolved(cache,vif,skb, is_frag);
+	else
+	{
+		/*
+		 *	Forward the frame
+		 */
+		 int ct=0;
+		 while(ct<MAXVIFS)
+		 {
+		 	/*
+		 	 *	0 means don't do it. Silly idea, 255 as don't do it would be cleaner!
+		 	 */
+		 	if(skb->ip_hdr->ttl > cache->mfc_ttls[ct] && cache->mfc_ttls[ct]>0)
+		 	{
+		 		if(psend!=-1)
+		 		{
+		 			skb2=skb_clone(skb, GFP_ATOMIC);
+		 			if(skb2)
+		 			{
+		 				skb2->free=1;
+		 				ipmr_queue_xmit(skb2, &vif_table[psend], skb->dev, is_frag);
+		 			}
+				}
+				psend=ct;
+			}
+			ct++;
+		}
+		if(psend==-1)
+			kfree_skb(skb, FREE_WRITE);
+		else
+		{
+			ipmr_queue_xmit(skb, &vif_table[psend], skb->dev, is_frag);
+		}
+		/*
+		 *	Adjust the stats
+		 */
+	}
+}
 
+/*
+ *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
+ */
+ 
+int ipmr_vif_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	struct vif_device *vif;
+	int len=0;
+	off_t pos=0;
+	off_t begin=0;
+	int size;
+	int ct;
+
+	len += sprintf(buffer,
+		 "Interface  Bytes In  Pkts In  Bytes Out  Pkts Out  Flags Local    Remote\n");
+	pos=len;
+  
+	for (ct=0;ct<MAXVIFS;ct++) 
+	{
+		vif=&vif_table[ct];
+		if(!(vifc_map&(1<<ct)))
+			continue;
+		if(vif->dev==NULL)
+			continue;
+        	size = sprintf(buffer+len, "%-10s %8ld  %7ld  %8ld   %7ld   %05X %08lX %08lX\n",
+        		vif->dev->name,vif->bytes_in, vif->pkt_in, vif->bytes_out,vif->pkt_out,
+        		vif->flags, vif->local, vif->remote);
+		len+=size;
+		pos+=size;
+		if(pos<offset)
+		{
+			len=0;
+			begin=pos;
+		}
+		if(pos>offset+length)
+			break;
+  	}
+  	
+  	*start=buffer+(offset-begin);
+  	len-=(offset-begin);
+  	if(len>length)
+  		len=length;
+  	return len;
+}
+
+int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length, int dummy)
+{
+	struct mfc_cache *mfc;
+	int len=0;
+	off_t pos=0;
+	off_t begin=0;
+	int size;
+	int ct;
+
+	len += sprintf(buffer,
+		 "Group    Origin   SrcIface \n");
+	pos=len;
+  
+	for (ct=0;ct<MFC_LINES;ct++) 
+	{
+		cli();
+		mfc=mfc_cache_array[ct];
+		while(mfc!=NULL)
+		{
+			char *name="none";
+			char vifmap[MAXVIFS+1];
+			int n;
+			/*
+			 *	Device name
+			 */
+			if(vifc_map&(1<<mfc->mfc_parent))
+				name=vif_table[mfc->mfc_parent].dev->name;
+			/*
+			 *	Interface forwarding map
+			 */
+			for(n=0;n<MAXVIFS;n++)
+				if(vifc_map&(1<<n) && mfc->mfc_ttls[ct])
+					vifmap[n]='X';
+				else
+					vifmap[n]='-';
+			vifmap[n]=0;
+			/*
+			 *	Now print it out
+			 */
+			size = sprintf(buffer+len, "%08lX %08lX %-8s %s\n",
+				(unsigned long)mfc->mfc_mcastgrp,
+				(unsigned long)mfc->mfc_origin,
+				name,
+				vifmap);
+			len+=size;
+			pos+=size;
+			if(pos<offset)
+			{
+				len=0;
+				begin=pos;
+			}
+			if(pos>offset+length)
+			{
+				sti();
+				goto done;
+			}
+			mfc=mfc->next;
+	  	}
+	  	sti();
+  	}
+done:
+  	*start=buffer+(offset-begin);
+  	len-=(offset-begin);
+  	if(len>length)
+  		len=length;
+  	return len;
+}
+
+/*
+ *	Setup for IP multicast routing
+ */
+ 
 void ip_mr_init(void)
 {
-	printk("Linux IP multicast router 0.00pre-working 8)\n");
+	printk("Linux IP multicast router 0.02pre-working 8)\n");
 	register_netdevice_notifier(&ip_mr_notifier);
+	proc_net_register(&(struct proc_dir_entry) {
+		PROC_NET_IPMR_VIF, 9 ,"ip_mr_vif",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		ipmr_vif_info
+	});
+	proc_net_register(&(struct proc_dir_entry) {
+		PROC_NET_IPMR_MFC, 11 ,"ip_mr_cache",
+		S_IFREG | S_IRUGO, 1, 0, 0,
+		0, &proc_net_inode_operations,
+		ipmr_mfc_info
+	});
 }
 
 #endif

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov with Sam's (original) version
of this