static char rcsid[] = "registry.c,v 1.118 1996/01/17 10:07:42 duane Exp";
/* 
 *  registry.c -- Registry Module, Keeps track of all objects in the Broker.
 *  William G. Camargo, Penn State Univ.
 *  Darren Hardy, U. Colorado, Boulder
 *
 *  DEBUG: section  70, level 1, 5, 9	Broker registry
 *  DEBUG: section  73, level 1, 5, 9	Broker registry hash tables
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */
#include "broker.h"
#include "log.h"

/* Global variables */
REGISTRY_HEADER *RegHdr = NULL;
reg_t *Registry = NULL;
extern char *DIRpath;
extern int reg_limit;
extern int do_fast_start;
extern int qsock;

/* 
 *  Hash search structure for the Registry.  We use a fixed size hash table
 *  and chaining to quickly search the Registry for a FD without needing 
 *  to page in the entire registry.
 *
 *  We also build a hash table for URLs to cut down the search
 *  time needed to locate Registry objects during collections.
 */
#if 0
/*  
 *  Here are some good prime number choices.  It's important not to
 *  choose a prime number that is too close to exact powers of 2.
 */
#undef  HASH_SIZE 103		/* prime number < 128 */
#undef  HASH_SIZE 229		/* prime number < 256 */
#undef  HASH_SIZE 467		/* prime number < 512 */
#undef  HASH_SIZE 977		/* prime number < 1024 */
#undef  HASH_SIZE 1979		/* prime number < 2048 */
#undef  HASH_SIZE 4019		/* prime number < 4096 */
#undef  HASH_SIZE 6037		/* prime number < 6144 */
#undef  HASH_SIZE 7951		/* prime number < 8192 */
#undef  HASH_SIZE 12149		/* prime number < 12288 */
#undef  HASH_SIZE 16231		/* prime number < 16384 */
#undef  HASH_SIZE 33493		/* prime number < 32768 */
#undef  HASH_SIZE 65357		/* prime number < 65536 */
#endif

#define HASH_SIZE 6037		/* prime number < 6144 */

#undef  uhash
#define uhash(x)	((x) % HASH_SIZE)	/* for unsigned */
#undef  hash
#define hash(x)		(((x) < 0 ? -(x) : (x)) % HASH_SIZE)

typedef struct HASH_LINK {
	reg_t *item;
	struct HASH_LINK *next;
} hash_link;

static hash_link *fdhtable[HASH_SIZE];	/* Hash table based on fd's */
static hash_link *urlhtable[HASH_SIZE];	/* Hash table based on url's */
static hash_link *md5htable[HASH_SIZE];	/* Hash table based on md5's */

/* Local functions */
static reg_t *RG_hash_search_byfd();
static hash_link *RG_hash_url_bucket();
static hash_link *RG_hash_md5_bucket();
static void RG_hash_init();
static void RG_hash_build();
static void RG_hash_insert();
static void RG_hash_delete();
static void RG_hash_destroy();
static void RG_hash_print();
static int hash_md5();
static int hash_url();
static void RG_print_reg_t();

/***********************************************************************
 			Initialization and Tear Down Routines
 ***********************************************************************/

/* ----------------------------------------------------------------- 
   RG_Init() -- initialize the registry; build from files if neccessary.
   ----------------------------------------------------------------- */
int RG_Init()
{
	int status = SUCCESS;

	/* Set the Registry file */
	if (init_registry_file() == ERROR)
		return ERROR;

	RG_hash_init();		/* reset the hash table */
	RG_gid_init();		/* reset the Gatherer ID mgmt */
	Registry = NULL;	/* start of the linked list */

	if (RegHdr != NULL) 
		xfree(RegHdr);

	/* Must read the header first */
	if ((RegHdr = read_header()) == NULL)
		return ERROR;

	/* There's nothing in the Registry, probably non-existant */
	if (RegHdr == (REGISTRY_HEADER *) REGISTRY_EOF) {
		RegHdr = (REGISTRY_HEADER *) xmalloc(sizeof(REGISTRY_HEADER));
		RegHdr->magic = REGISTRY_MAGIC;
		RegHdr->version = REGISTRY_VERSION;
		RegHdr->nrecords = 0;
		RegHdr->nrecords_deleted = 0;
		RegHdr->nrecords_valid = 0;
		status = RG_Sync_Registry();
	} else {
		/* We have a good registry file so read it into memory */
		status = RG_Build_Registry();
	}
	return (status);
}

int RG_Sync_Registry()
{
	Log("Syncing Registry file.\n");
	if (write_header(RegHdr) == ERROR) {
		errorlog("Could not rewrite the Registry file Header.\n");
		return ERROR;
	}
	return SUCCESS;
}

/* -----------------------------------------------------------------
   RG_Build_Registry() -- build registry from disk.
   Assume that Registry == NULL on entry.
   ----------------------------------------------------------------- */
int RG_Build_Registry()
{
	reg_t *tmp;
	int ncount = 0, vcount = 0, dcount = 0, stale = 0, status;

	Log("Building the in-memory Registry from disk %s...\n",
	    do_fast_start ? "(in fast mode)" : "");

	/* Must issue read_header first to reset the Registry file ptr */
	if (read_header() == NULL)
		return ERROR;

	tmp = (reg_t *) xmalloc(sizeof(reg_t));
	while (1) {
		/* Grab the next Registry entry from disk */
		memset(tmp, '\0', sizeof(reg_t));
		status = get_record(tmp);


		/* No more Registry entries */
		if (status == REGISTRY_EOF) {
        		RG_Free_Entry(tmp);             /* free memory */
			break;
		}

		/* Something went wrong, so stop processing */
		if (status == ERROR) {
        		RG_Free_Entry(tmp);             /* free memory */
			return ERROR;
		}

		ncount++;	/* number of objs processed */

		/* just skip deleted objects, reuse tmp buffer */
		/* deleted objects are determined by the header */
		if (status == ENTRY_DELETED) {
			dcount++;
			continue;
		}

		/*
		 *  See if the object really exists in the storage 
		 *  manager.  If it doesn't then, delete the object
		 *  from the registry file.   Remember to mark, then
		 *  restore the current place in the Registry when
		 *  deleting the bad record.
		 *  Otherwise, add it to the in-memory Registry by
		 *  placing it at the front of the Registry.
		 */
		Debug(70,9,("Read Registry record: %d\n", tmp->FD));
		if (!do_fast_start && (SM_Exist_Obj(tmp->FD) == FALSE)) {
			Log("WARNING: Missing object (FD %d), deleting from Registry file.\n", tmp->FD);
			set_registry_mark();
        		if (remove_record(tmp) == ERROR)
				errorlog("Cannot delete OBJ%d\n", tmp->FD);
			else
				dcount++;
        		RG_Free_Entry(tmp);             /* free memory */
			restore_registry_mark();
		} else {
			tmp->next = Registry;
			tmp->prev = NULL;
			if (Registry)
				Registry->prev = tmp;
			Registry = tmp;
			vcount++;
		}
                if ((ncount & 0x1F) == 0) { /* check on pending connections */
                        (void)select_loop(0, 0, 0);
                }
		tmp = (reg_t *) xmalloc(sizeof(reg_t));
	}
	RG_hash_build();	/* build the hash table for searching */

	/* do some sanity checks */
	if (RegHdr->nrecords != ncount) {
		Log("WARNING: Stale Registry header: record cnt mismatch: %d != %d\n", RegHdr->nrecords, ncount);
		RegHdr->nrecords = ncount;
		stale = 1;
	}
	if (RegHdr->nrecords_deleted != dcount) {
		Log("WARNING: Stale Registry header: delete cnt mismatch: %d != %d\n", RegHdr->nrecords_deleted, dcount);
		RegHdr->nrecords_deleted = dcount;
		stale = 1;
	}
	if (RegHdr->nrecords_valid != vcount) {
		Log("WARNING: Stale Registry header: valid cnt mismatch: %d != %d\n", RegHdr->nrecords_valid, vcount);
		RegHdr->nrecords_valid = vcount;
		stale = 1;
	}
	if (stale == 1) {
		(void) RG_Sync_Registry();
	}
	return SUCCESS;
}


/* -----------------------------------------------------------------
   RG_Registry_Shutdown() -- save header and close registry file.
   Does not free the registry, use RG_Free_Registry() if you want to.
   ----------------------------------------------------------------- */
void RG_Registry_Shutdown()
{
	(void)RG_Sync_Registry();
	finish_registry_file();
}

/***********************************************************************
 		Registry maintenance: adding, deleting, compressing
 ***********************************************************************/

/* ----------------------------------------------------------------- *
   RG_Register() -- add an OID to the registry.
   ----------------------------------------------------------------- */
int RG_Register(new_item)
reg_t *new_item;
{
	reg_t *add = new_item;

	Debug(70,1,("RG_Register: Adding object: %s\n", new_item->url));

	/* Add to the Registry file */
	if (append_new_record(add) == ERROR)
		return ERROR;

	/* add new entry by prepending it to the Registry */
	add->prev = NULL;
	add->next = Registry;
	if (Registry != NULL) {
		Registry->prev = add;
	}
	Registry = add;

	if (debug_ok(70,9))
		RG_print_reg_t(add);

	RG_hash_insert(add);	/* add to hash table */

	RegHdr->nrecords++;	/* update Registry header */
	RegHdr->nrecords_valid++;
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
   RG_Unregister() -- unregister OID.
   ----------------------------------------------------------------- */
int RG_Unregister(tmp)
reg_t *tmp;
{
	Debug(70,1,("RG_Unregister: Deleting object: %s\n", tmp->url));

	if (debug_ok(70,9))
		RG_print_reg_t(tmp);

	RG_hash_delete(tmp);		/* remove hash entry */

	/* Delete from the Registry file */
	if (remove_record(tmp) == ERROR)
		return ERROR;

	/* Delete it from the in-memory Registry */
	if (tmp == Registry) {
		/* The item is the first item */
		Registry = Registry->next;
		if (Registry != NULL)
			Registry->prev = NULL;
	} else if ((tmp->next != NULL) && (tmp->prev != NULL)) {
		/* The item is somewhere in the middle */
		tmp->prev->next = tmp->next;
		tmp->next->prev = tmp->prev;
	} else if ((tmp->next == NULL) && (tmp->prev != NULL)) {
		/* The item is the last item in the Registry */
		tmp->prev->next = NULL;
	} else {
		errorlog("RG_Unregister: Fatal Internal Error! %d: %s\n",
			 tmp->FD, tmp->url);
		return ERROR;
	}

	RG_Free_Entry(tmp);		/* free memory */
	RegHdr->nrecords_deleted++;	/* update Reg Header */
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
   RG_Free_Registry()  -- free memory associated with the registry.
   ----------------------------------------------------------------- */
int RG_Free_Registry()
{
	reg_t *tmp, *next;

	Debug(70,1,("Freeing the entire Registry\n"));
	RG_hash_destroy();		/* nukes the entire hash structure */
	RG_gid_destroy();		/* reset the Gatherer ID mgmt */
	tmp = Registry;
	while (tmp) {
		next = tmp->next;
		RG_Free_Entry(tmp);
		tmp = next;
	}
	Registry = NULL;
	return SUCCESS;
}

/* ----------------------------------------------------------------- 
   RG_Free_Entry() -- free memory associated with a registry entry.
   ----------------------------------------------------------------- */
int RG_Free_Entry(tmp)
reg_t *tmp;
{
	if (tmp) {
		if (tmp->url)
			xfree(tmp->url);
		if (tmp->md5)
			xfree(tmp->md5);
		if (tmp->desc)
			xfree(tmp->desc);
		xfree(tmp);
	}
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
   RG_Count_Reg() -- count the number of objects in the registry.
   ----------------------------------------------------------------- */
int RG_Count_Reg()
{
	reg_t *tmp;
	int count = 0;

	for (tmp = Registry; tmp != NULL; tmp = tmp->next) 
		count++;
	Debug(70,5,("RG_Count_Reg: Count is %d\n", count));
	return (count);
}

/* ----------------------------------------------------------------- 
   RG_Cleaner() -- remove expired objects and compress registry if needed.
   ----------------------------------------------------------------- */
int RG_Cleaner()
{
	reg_t *tmp, *tmp2;
	time_t now;
	int clnd = 0, n = 0;

	LOGCLEANER;
	if ((now = UTIL_Get_Time()) <= 0)
		return ERROR;

	Log("Starting Cleaning.\n");

	/* Walk the entire Registry */
	tmp = Registry;
	while (tmp) {
		/* This object's TTL has expired? */
		if ((tmp->ttl + tmp->update_time) < now) {
			Debug(70,5,("RG_Cleaner: Expiring: %d, %s\n", 
				tmp->FD, tmp->url));
			tmp2 = tmp;
			tmp = tmp->next;
			LOGCLEAN(tmp2);
			(void)RG_Clean_Entry(tmp2);
			clnd++;
		} else {
			tmp = tmp->next;
		}
                if ((n++ & 0x1F) == 0) { /* check on pending connections */
                        (void)select_loop(0, 0, 0);
                }
	}

	/* Update the Registry file header if it's changed */
	if (clnd > 0) 
		(void)RG_Sync_Registry();

	Log("Finished Cleaning, %d objects expired.\n", clnd);
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
   RG_Clean_Entry() -- remove an object from the Broker
   ----------------------------------------------------------------- */
int RG_Clean_Entry(tmp)
reg_t *tmp;
{
	int err = SUCCESS;

	/* remove old object from the storage manager */
	if (SM_Destroy_Obj(tmp->FD) == ERROR) 
		err = ERROR;
	/* remove old object from indexer */
	if (do_IND_Destroy_Obj(tmp) == ERROR) 
		err = ERROR;
	/* unregister old object, and frees memory */
	if (RG_Unregister(tmp) == ERROR) 
		err = ERROR;

	return (err);
}
/* -----------------------------------------------------------------
   RG_Compress() -- rewrites the Registry to removed deleted records.
   ----------------------------------------------------------------- */
int RG_Compress()
{
	char *new_fn, *old_fn, *back_fn;
	reg_t *tmp;
	REGISTRY_HEADER reghdr;
	int ndel = 0, n = 0;

	LOGCOMPRESSION;

	Log("Begin Compressing Registry File...\n");

	/* We don't want to write on 'Registry' until we're done */
	back_fn = UTIL_make_admin_filename("Registry.old");
	old_fn = UTIL_make_admin_filename("Registry");
	new_fn = UTIL_make_admin_filename("Registry.compressing");
	if (set_registry_file(new_fn) != SUCCESS) {
		xfree(back_fn);
		xfree(old_fn);
		xfree(new_fn);
		return ERROR;
	}

	/* Write a clean header */
	reghdr.magic = REGISTRY_MAGIC;
	reghdr.version = REGISTRY_VERSION;
	reghdr.nrecords = 0;
	reghdr.nrecords_deleted = 0;
	reghdr.nrecords_valid = 0;
	if (write_header(&reghdr) != SUCCESS) {
		finish_registry_file();
		(void) unlink(new_fn);
		goto reinstate_reg;
	}

	/* 
	 *  Go through the entire Registry and write the valid entries,
	 *  while ignoring the invalid ones.
	 */
	for (tmp = Registry; tmp; tmp = tmp->next) {
		if (!IS_DELETED(tmp->flag)) {
			if (write_record(tmp) != SUCCESS) {
				finish_registry_file();
				(void) unlink(new_fn);
				goto reinstate_reg;
			}
			reghdr.nrecords++;
			reghdr.nrecords_valid++;
		} else {
			ndel++;
		}
                if ((n++ & 0x1F) == 0) { /* check on pending connections */
                        (void)select_loop(0, 0, 0);
                }
	}

	/* Now write the correct Registry header, and close the file */
	if (write_header(&reghdr) != SUCCESS) {
		finish_registry_file();
		(void) unlink(new_fn);
		goto reinstate_reg;
	}
	finish_registry_file();

	/* Backup the old Registry, and use the new one instead */
	(void)rename(old_fn, back_fn);
	if (rename(new_fn, old_fn) < 0) {
		log_errno(old_fn);
		(void) unlink(new_fn);
		goto reinstate_reg;
	}
	Log("Wrote %d valid records, deleted %d records.\n",
	     reghdr.nrecords_valid, ndel + RegHdr->nrecords_deleted);
	Log("Finished Compressing Registry File.\n");

reinstate_reg:
	xfree(back_fn);
	xfree(new_fn);
	xfree(old_fn);
	/* Now rebuild the in-mem registry from scratch */
	RG_Free_Registry();
	return (RG_Init());

}

/***********************************************************************
 		Registry services: bulk xfer by timestamp
 ***********************************************************************/

/* ----------------------------------------------------------------- *
   RG_bulk_query() -- perform a bulk transfer.
   ----------------------------------------------------------------- */
int RG_bulk_query(rsock, ptime)
int rsock;
time_t ptime;
{
	FILE *fp;
	reg_t *tmp;

	if ((fp = fdopen(rsock, "w")) == NULL) {
		log_errno("fdopen");
		(void)QM_send_bulk_err(rsock);
		return ERROR;
	}
/* Should be safe to fork here.  In the child process, we should      */
/* be only reading from the registry, and not altering anything.      */
#ifdef FORK_ON_BULK
	if (fork() == 0) {	/* child process */
		(void)close(qsock);	/* don't need main socket */
#endif
		QM_send_bulk_begin(rsock);
		/* Send all new objects */
		for (tmp = Registry; tmp != NULL; tmp = tmp->next) {
			if (tmp->update_time >= ptime) {
			    (void)QM_send_bulk_fd(tmp->FD, fp, tmp);
			}
		}
		fflush(fp);
		(void)QM_send_bulk_end(rsock);
#ifdef FORK_ON_BULK
		fclose(fp);
		_exit(0);
	}
#endif
	fclose(fp);
	return SUCCESS;
}

/***********************************************************************
 		Registry searching: lookup by FD, URL, MD5
 ***********************************************************************/

/* ----------------------------------------------------------------- *
   RG_Get_Entry() -- find the registry entry associated with an fd.
   ----------------------------------------------------------------- */
reg_t *RG_Get_Entry(fd)
fd_t fd;
{
	return (RG_hash_search_byfd(fd));
}

/* ----------------------------------------------------------------- *
   FG_Get_URL -- find the URL associated with an fd.
   ----------------------------------------------------------------- */
char *RG_Get_URL(fd)
fd_t fd;
{
	static reg_t *tmp;

	if ((tmp = RG_Get_Entry(fd)) != NULL)
		return (tmp->url);
	return (NULL);
}

/* ----------------------------------------------------------------- 
   RG_FD_Exists() -- check for duplicate fd
   ----------------------------------------------------------------- */
int RG_FD_Exists(fd)
fd_t fd;
{
	return ((RG_Get_Entry(fd) != NULL) ? TRUE : FALSE);
}


/*
 *  A very safe memcmp: works for same length, equivalence only.
 *  Returns 0 if the two memory buffers match byte-for-byte, and
 *  are the same length; non-zero otherwise.
 */
#define safe_memmatch(a,asz,b,bsz) \
	((((asz) == (bsz)) && ((a) != NULL) && ((b) != NULL)) ? \
	(memcmp((a),(b),(asz))) : 1)

/*
 *  An object matches if:
 *      - the MD5 is the same, *OR* the URL is the same, and
 *      - the Gatherer Identifiers are the same
 *      (the Gatherer-Name is the same, and
 *       the Gatherer-Version is the same, and
 *       the Gatherer-Host is the same)
 *  Some objects in the registry might not have an MD5.
 *  All objects in the registry have a URL.
 * 
 *  We've written this out so that it's optimized for speed.
 *  Generally, the Gatherer-* stuff is the same and the MD5 is
 *  the shortest and most different piece of information.
 *  Size compares are used first before memcmp's for speed.
 */
#define RG_Object_Match(a,b) \
(((a)->GID == (b)->GID) && \
 (!safe_memmatch((a)->md5, (a)->md5s, (b)->md5, (b)->md5s)  \
	|| \
  !safe_memmatch((a)->url, (a)->urls, (b)->url, (b)->urls)))

/*
 * Match on Ojbect URLs only
 */
#define RG_Object_URL_Match(a,b) \
  (!safe_memmatch((a)->url, (a)->urls, (b)->url, (b)->urls))

/* -----------------------------------------------------------------
   RG_Object_Search_Entry() -- search for an entry in the registry.
   ----------------------------------------------------------------- */
reg_t *RG_Object_Search_Entry(e)
reg_t *e;
{
	static hash_link *tmp;

	if (e == NULL)
		return (NULL);

	/*
	 *  We used to traverse the entire Registry to locate an object.
	 *  Now we use the URL and the MD5 hash buckets.  We must
	 *  search both hash buckets since a matching object may lie 
	 *  anywhere in either.
	 */
	if (e->md5s > 8) {	/* md5 may be null */
		tmp = RG_hash_md5_bucket(e->md5);
	} else {
		tmp = NULL;
	}
	for (/* empty */ ; tmp != NULL; tmp = tmp->next) {
		Debug(70,9,("RG_Object_Search_Entry: MD5 bucket: Looking at %d, %s\n", tmp->item->FD, tmp->item->url));
		if (RG_Object_Match(e, tmp->item)) 
			return(tmp->item);
	}

	/* url is always set */
	for (tmp = RG_hash_url_bucket(e->url); tmp != NULL; tmp = tmp->next) {
		Debug(70,9,("RG_Object_Search_Entry: URL bucket: Looking at %d, %s\n", tmp->item->FD, tmp->item->url));
		if (RG_Object_Match(e, tmp->item)) {
			return(tmp->item);
		}
	}

	return (NULL);
}

/* -----------------------------------------------------------------
   RG_Object_Search_ByURL() -- approx. search for an entry in the registry
   which matches the given URL.  Note that this search is only approx.
   since may objects may exist with the same URL, but different GIDs.
   ----------------------------------------------------------------- */
reg_t *RG_Object_Search_ByURL(url)
char *url;
{
	static hash_link *tmp;

	if (url == NULL || strlen(url) < 1)
		return (NULL);

	for (tmp = RG_hash_url_bucket(url); tmp != NULL; tmp = tmp->next) {
		Debug(70,9,("RG_Object_Search_ByURL: URL bucket: Looking at %d, %s\n", tmp->item->FD, tmp->item->url));
		if (!strcmp(url, tmp->item->url)) {
			return(tmp->item);
		}
	}

	return (NULL);
}

/* -----------------------------------------------------------------
    RG_Uniqify_by_URL() - Remove duplicate URLs from the broker
   ----------------------------------------------------------------- */
int RG_Uniqify_by_URL()
{
	reg_t		*entry = NULL;
	hash_link	*tmp = NULL;
	hash_link	*next = NULL;
	int		n = 0;

	for (entry=Registry; entry; entry=entry->next) {

		for (tmp=RG_hash_url_bucket(entry->url); tmp; tmp=next) {
			next = tmp->next;
			if (tmp->item == entry) continue;
			if (!RG_Object_URL_Match(entry,tmp->item)) continue;
			if (tmp->item->update_time <= entry->update_time) {
				Debug (70,1,("RG_Uniqify: Removing duplicate: %s\n", tmp->item->url));
				RG_Unregister (tmp->item);
				n++;
			}
		}
	}

	Log("RG_Uniqify: %d objects removed.\n", n);
	RG_Compress ();
	return n;
}
#undef safe_memmatch
#undef RG_Object_Match
#undef RG_Object_URL_Match


/***********************************************************************
 		Registry hashing support
 ***********************************************************************/

/*
 *  hash_url() - Returns a well-distributed hash function for URLs.
 *  The best way is to sum up the last half of the string.
 *  Adapted from code written by Mic Bowman.  -Darren
 *  Generates a standard deviation = 15.73
 */
static int hash_url(s)
char *s;
{
        unsigned int i, j, n;

        j = strlen(s);
        for (i = j/2, n = 0; i < j; i++)
                n ^= 271 * (unsigned)s[i];
	i = n ^ (j * 271);
	return(uhash(i));
}

/*
 *  hash_md5() - Returns a well-distributed hash function for MD5s
 *  Do a quickie based on first 8 characters.  Convert MD5 into an int.
 *  This is very fast and providers a great even distribution.
 *  Generates a standard deviation =  8.70
 */
static int hash_md5(s)
char *s;
{
        unsigned int n = 0;
	char tmp[9];

	strncpy(tmp, s, 8);
	tmp[8] = '\0';
	sscanf(tmp, "%x", &n);
	return(uhash(n));
}


static void RG_hash_init()
{
	Debug(73,1,("RG_hash_init: HASH_SIZE %d, hash_link %d, reg_t %d.\n",
		HASH_SIZE, sizeof(hash_link), sizeof(reg_t)));
	memset(fdhtable, '\0',  HASH_SIZE * sizeof(hash_link *));
	memset(urlhtable, '\0', HASH_SIZE * sizeof(hash_link *));
	memset(md5htable, '\0', HASH_SIZE * sizeof(hash_link *));
}

/*
 *  RG_hash_build() - Builds all of the hash tables for each Registry entry.
 */
static void RG_hash_build()
{
	reg_t *tmp;
	int cnt = 0;

	Log("Building fast search structure for the Registry...\n");
	for (tmp = Registry; tmp != NULL; tmp = tmp->next, cnt++) {
		RG_hash_insert(tmp);
	}
	Log("Search structure contains %d objects.\n", cnt);
	if (debug_ok(70,9)) 
		RG_hash_print();
}

/*
 *  RG_hash_insert() - Inserts the Registry item into the hash tables.
 *  One based on the item's FD, one based on the URL, and one based
 *  on the MD5.
 */
static void RG_hash_insert(item)
reg_t *item;
{
	int i;
	hash_link *new;

	/* Add to the FD hash table */
	new = (hash_link *) xmalloc(sizeof(hash_link));
	new->item = item;
	i = hash(item->FD);
	Debug(73,5,("RG_hash_insert: Inserting %p %d in FD bucket %d\n", item, item->FD, i));
	if (fdhtable[i] == NULL) {		/* first item */
		fdhtable[i] = new;
		fdhtable[i]->next = NULL;
	} else {				/* prepend to list */
		new->next = fdhtable[i];
		fdhtable[i] = new;
	}

	/* Add to the URL hash table */
	if (item->url != NULL && item->urls > 1) {	/* not 0 length */
		new = (hash_link *) xmalloc(sizeof(hash_link));
		new->item = item;
		i = hash_url(item->url);
		Debug(73,5,("RG_hash_insert: Inserting %p %d in URL bucket %d\n", item, item->FD, i));
		if (urlhtable[i] == NULL) {		/* first item */
			urlhtable[i] = new;
			urlhtable[i]->next = NULL;
		} else {				/* prepend to list */
			new->next = urlhtable[i];
			urlhtable[i] = new;
		}
	}

	/* Add to the MD5 hash table */
	if (item->md5 != NULL && item->md5s > 8) {
		new = (hash_link *) xmalloc(sizeof(hash_link));
		new->item = item;
		i = hash_md5(item->md5);
		Debug(73,5,("RG_hash_insert: Inserting %p %d in MD5 bucket %d\n", item, item->FD, i));
		if (md5htable[i] == NULL) {		/* first item */
			md5htable[i] = new;
			md5htable[i]->next = NULL;
		} else {				/* prepend to list */
			new->next = md5htable[i];
			md5htable[i] = new;
		}
	}
}

/*
 *  RG_hash_destroy() - frees all memory associated with the hash 
 *  structure; and nulls the hash tables.
 */
static void RG_hash_destroy()
{
	hash_link *walker, *t;
	int i;

	Debug(73,1,("RG_hash_destroy: Deleting the entire hash table.\n"));
	for (i = 0; i < HASH_SIZE; i++) {
		/* Delete fd hash */
		walker = fdhtable[i];
		while (walker != NULL) {
			t = walker;
			walker = walker->next;
			xfree(t);
		}
		fdhtable[i] = NULL;

		/* Delete URL hash */
		walker = urlhtable[i];
		while (walker != NULL) {
			t = walker;
			walker = walker->next;
			xfree(t);
		}
		urlhtable[i] = NULL;

		/* Delete MD5 hash */
		walker = md5htable[i];
		while (walker != NULL) {
			t = walker;
			walker = walker->next;
			xfree(t);
		}
		md5htable[i] = NULL;
	}
}


/* 
 *  RG_hash_search_byfd - Returns the Registry item that matches FD.
 */
static reg_t *RG_hash_search_byfd(FD)
fd_t FD;
{
	int i = hash(FD);
	static hash_link *walker;

	Debug(73,9,("RG_hash_search_byfd: Looking up %d in bucket %d\n",FD,i));
	for (walker = fdhtable[i]; walker != NULL; walker = walker->next) {
		if (walker->item->FD == FD)
			return (walker->item);
	}
	return (NULL);
}

static hash_link *RG_hash_url_bucket(url)
char *url;
{
	int i = hash_url(url);
	Debug(73,9,("RG_hash_url_bucket: Looking up %s in bucket %d\n",url,i));
	return(urlhtable[i]);
}

static hash_link *RG_hash_md5_bucket(md5)
char *md5;
{
	int i = hash_md5(md5);
	Debug(73,9,("RG_hash_md5_bucket: Looking up %s in bucket %d\n",md5,i));
	return(md5htable[i]);
}

/*
 *  RG_hash_delete() - Deletes the Registry item from the Hash tables.
 */
static void RG_hash_delete(item)
reg_t *item;
{
	int i;
	hash_link *walker, *prev;

	Debug(73,5,("RG_hash_delete: Deleting item %p, %d, %p, %p\n",
		item, item->FD, item->url, item->md5));
	i = hash(item->FD);
	Debug(73,9,("RG_hash_delete: Deleting %d: FD bucket %d\n",item->FD,i));
	prev = walker = fdhtable[i];
	while (walker != NULL) {
		if (walker->item->FD == item->FD) {
			if (prev == fdhtable[i])
				fdhtable[i] = walker->next;
			else
				prev->next = walker->next;
			xfree(walker);
			break;
		}
		prev = walker;
		walker = walker->next;
	}

	if (item->url != NULL && item->urls > 1) {
		i = hash_url(item->url);
		Debug(73,9,("RG_hash_delete: Deleting %d: URL bucket %d\n",item->FD,i));
		prev = walker = urlhtable[i];
		while (walker != NULL) {
			if (walker->item->FD == item->FD) {
				if (prev == urlhtable[i])
					urlhtable[i] = walker->next;
				else
					prev->next = walker->next;
				xfree(walker);
				break;
			}
			prev = walker;
			walker = walker->next;
		}
	}

	if (item->md5 != NULL && item->md5s > 8) {
		i = hash_md5(item->md5);
		Debug(73,9,("RG_hash_delete: Deleting %d: MD5 bucket %d\n",item->FD,i));
		prev = walker = md5htable[i];
		while (walker != NULL) {
			if (walker->item->FD == item->FD) {
				if (prev == md5htable[i])
					md5htable[i] = walker->next;
				else
					prev->next = walker->next;
				xfree(walker);
				break;
			}
			prev = walker;
			walker = walker->next;
		}
	}
}

/* RG_hash_print - debugging output; prints the entire hash table */
static void RG_hash_print()
{
	hash_link *walker;
	int i, cnt;

	for (i = 0; i < HASH_SIZE; i++) {
		cnt = 0;
		walker = fdhtable[i];
		while (walker != NULL) {
			cnt++;
			printf("FD  Hash: %5d: %10p: FD %10d.\n", i, 
				walker->item, walker->item->FD);
			walker = walker->next;
		}
		printf("FD  Hash: %5d: Total %d\n", i, cnt);

		cnt = 0;
		walker = urlhtable[i];
		while (walker != NULL) {
			cnt++;
			printf("URL Hash: %5d: %10p: FD %10d.\n", i, 
				walker->item, walker->item->FD);
			walker = walker->next;
		}
		printf("URL Hash: %5d: Total %d\n", i, cnt);

		cnt = 0;
		walker = md5htable[i];
		while (walker != NULL) {
			cnt++;
			printf("MD5 Hash: %5d: %10p: FD %10d.\n", i, 
				walker->item, walker->item->FD);
			walker = walker->next;
		}
		printf("MD5 Hash: %5d: Total %d\n", i, cnt);
	}
}

/*
 *  RG_print_reg_t - For debugging only.  prints a Registry entry.
 */
static void RG_print_reg_t(r)
reg_t *r;
{
	char *s;

	printf("reg_t: %p\n", r);
	s = r->url;
	printf("reg_t: url: %p, %s\n", s, s ? s : "(null)");
	printf("reg_t: urls: %d\n", r->urls);
	s = r->md5;
	printf("reg_t: md5: %p, %s\n", s, s ? s : "(null)");
	printf("reg_t: md5s: %d\n", r->md5s);
	printf("reg_t: ttl: %d\n", r->ttl);
	printf("reg_t: lmt: %d\n", r->lmt);
	printf("reg_t: refresh_rate: %d\n", r->refresh_rate);
	printf("reg_t: update_time: %d\n", r->update_time);
	printf("reg_t: FD: %d\n", r->FD);
	printf("reg_t: GID: %d\n", r->GID);
	s = r->desc;
	printf("reg_t: desc: %p, %s\n", s, s ? s : "(null)");
	printf("reg_t: descs: %d\n", r->descs);
	printf("reg_t: flag: %d\n", r->flag);
	printf("reg_t: rec_off: %ld\n", (long) r->rec_off);
	printf("reg_t: next: %p\n", r->next);
	printf("reg_t: prev: %p\n", r->prev);
	fflush(stdout);
}
