static char rcsid[] = "index.c,v 1.142 1996/01/18 07:02:15 duane Exp";
/* 
 *  index.c -- Broker indexing/search support using Glimpse
 *
 *  William G. Camargo, Chanda Dharap, Mic Bowman, Penn State Univ.
 *  Darren Hardy, Duane Wessels, U. Colorado - Boulder
 *
 *  DEBUG: section 102, level 1         Broker glimpse indexing engine
 *
 *  You can define the following values in the broker.conf file:
 *
 *      Glimpse                 'glimpse'command
 *      GlimpseIndex            'glimpseindex' command
 *      GlimpseIndex-Option     'glimpseindex' options
 *      GlimpseIndex-Flags      'glimpseindex' (extra) flags
 *      GlimpseServer           'glimpseserver' command
 *      GlimpseServer-Host      'glimpseserver' host
 *      GlimpseServer-Restart   Restart glimpseserver after every N queries
 *      Glimpse-MaxLife         Max lifetime value
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */
#include "broker.h"
#include "log.h"
#include "Glimpse/index.h"

#ifndef USE_PARENS_FOR_BOOLEAN
#define USE_PARENS_FOR_BOOLEAN
#endif

#ifndef MAX_OPDATA_SIZE
#define MAX_OPDATA_SIZE 1024
#endif

/* Global variables */
extern char *DIRpath;
extern char *brk_obj_url;
extern int qsock;
extern int IndexType;
extern int QM_opaqueflag;
extern int QM_gotphrase;	/* got a quoted phrase or not */
extern int IndexServer_pid;
extern int IndexServer_ForceRestart;	/* used by #restart-index-server */

extern char *SM_Get_Obj_Filename();	/* only UNIX filesystem SM */

/* Local functions */
#define LOCAL static
LOCAL int GL_Index_Object _PARAMS((reg_t *));
LOCAL char *GL_do_qlist _PARAMS((qlist_t *));
LOCAL char *GL_build_select _PARAMS((qlist_t *));
LOCAL fd_t GL_getfd _PARAMS((char *));
LOCAL void Glimpse_Start_Glimpseserver _PARAMS((void));
LOCAL void Glimpse_Kill_Glimpseserver _PARAMS((void));
LOCAL int Glimpse_Start_Indexing _PARAMS((char *));

/* Local variables */
LOCAL char *GL_Glimpse = NULL;
LOCAL char *GL_GlimpseInd = NULL;
LOCAL char *GL_GlimpseServer = NULL;
LOCAL char *GL_GlimpseSrvHost = NULL;
LOCAL int GL_NewObj;
LOCAL int GL_GlimpseSrvPort = 0;
LOCAL int GL_GlimpseSrvRestart = 0;
LOCAL int GL_caseflag;
LOCAL int GL_wordflag;
LOCAL int GL_errflag;
LOCAL int GL_regexflag;
LOCAL int GL_maxresults = 0;	/* old way */
LOCAL int GL_maxlines = 0;	/* new way */
LOCAL int GL_maxfiles = 0;	/* new way */
LOCAL char *GL_IndexOption = NULL;
LOCAL char *GL_IndexFlags = NULL;
LOCAL int GL_illegal_query = 0;
LOCAL int GL_ncalled = 0;	/* number of queries against current GLsvr */
LOCAL int GL_lifetime = 15 * 60;
LOCAL int GL_max_lifetime = 15 * 60;	/* 15 minutes */

#define BADQ_STR \
	"103 - ERROR: Glimpse Indexer cannot support your query.\n"
#define REPLUSWORD_ERR \
	"103 - ERROR: Glimpse Indexer cannot support regular expression matching on word boundaries!\n"

#define BIG_BUFSIZ	(8*BUFSIZ)	/* for very long lines */

/*
 *  Glimpse_Start_Glimpseserver - starts a Glimpse server process.
 */
LOCAL void Glimpse_Start_Glimpseserver()
{
	static char comm[BUFSIZ];
	static int ntries = 0;

	if (!strcasecmp(GL_GlimpseServer, "false")) {
		GL_GlimpseSrvPort = -1;
		return;
	}
	if (GL_GlimpseSrvPort <= IPPORT_USERRESERVED) {
		/* choose a random port number between 16384-32767 */
#if defined(HAVE_SRAND48) && defined(HAVE_LRAND48)
		srand48(time(NULL));
		GL_GlimpseSrvPort = (lrand48() & 0x3FFF) | 0x4000;
#else
		srand(time(NULL));
		GL_GlimpseSrvPort = (rand() & 0x3FFF) | 0x4000;
#endif
	}
	Log("Starting %s on port %d.\n", GL_GlimpseServer, GL_GlimpseSrvPort);
	sprintf(comm, "%s -H %s -K %d", GL_GlimpseServer, DIRpath,
	    GL_GlimpseSrvPort);
	Debug(102, 1, ("\tcommand:%s:\n", comm));
	/* must use fork() rather than vfork() which causes memory leaks */
	if ((IndexServer_pid = fork()) == 0) {	/* child */
		char *argv[64];

		close_all_fds(3);
		memset(argv, '\0', sizeof(argv));
		parse_argv(argv, comm);
		execvp(argv[0], argv);
		perror(argv[0]);
		_exit(1);
	}
	/* parent */
	/* 
	 *  leave IndexServer_pid negative so that it doesn't get 
	 *  restarted later in do_query.
	 */
	ntries++;
	if (IndexServer_pid < 0) {
		log_errno("fork");
		return;
	}
	sleep(5);		/* give glimpseserver a little time */

	/* See if it is still running.  It might have problems binding  */
	/* to the port or some other nonsense.                          */
	if (kill(IndexServer_pid, 0) < 0) {
		Log("%s failed to start.  The full command is\n    %s\n",
		    GL_GlimpseServer, comm);
		IndexServer_pid = 0;	/* make it try again */
		if (ntries > 3) {	/* only three times */
			IndexServer_pid = -1;
			GL_GlimpseSrvPort = 0;
		}
		return;
	}
	Log("%s (pid %d) is on-line...\n", GL_GlimpseServer, IndexServer_pid);
	ntries = 0;		/* reset */
	return;			/* parent */
}

LOCAL void Glimpse_Kill_Glimpseserver()
{
	int nsleep = 60;

	if (IndexServer_pid > 0) {
		Log("Killing glimpseserver (pid %d)...\n", IndexServer_pid);
		(void) kill(IndexServer_pid, SIGUSR1);	/* clean up */
		sleep(5);
		(void) kill(IndexServer_pid, SIGTERM);	/* die */

		while (--nsleep > 0 && !kill(IndexServer_pid, 0))
			sleep(1);
		(void) kill(IndexServer_pid, SIGKILL);	/* I mean it, die */

		/*      Dont sleep 60 seconds anymore.  It wasn't       */
		/*      working on Solaris 2.4 anyway.  Now             */
		/*      glimpseserver uses SO_REUSEADDR instead.        */
		/*      -DW  7/11/95                                    */
		/*
		 * Log("Waiting %d seconds for glimpseserver port to be released...\n", nsleep);
		 * sleep(nsleep);
		 */
	}
	IndexServer_pid = 0;
}

/*
 *  Glimpse_Start_Indexing - Start a glimpseindex process.
 */
LOCAL int Glimpse_Start_Indexing(comm)
     char *comm;
{
	int pid;
	int status = 0;

	/* If there's a glimpseserver running, kill it */
	if (GL_GlimpseSrvPort > 0 && IndexServer_pid > 0) {
		Glimpse_Kill_Glimpseserver();
	}
	Debug(102, 1, ("\tcommand: %s:\n", comm));

	/* must use fork() rather than vfork() which causes memory leaks */
	if ((pid = fork()) < 0) {
		log_errno("fork");
		return ERROR;
	}
	if (pid == 0) {		/* child */
		char *argv[64];

		close_all_fds(3);
		memset(argv, '\0', sizeof(argv));
		parse_argv(argv, comm);
		execvp(argv[0], argv);
		perror(argv[0]);
		_exit(1);
	}
	/* parent */
	Log("Waiting for glimpseindex to finish...\n");
	/* while glimpseindex is running, explicitly wait for it */
	while (waitpid(pid, &status, WNOHANG) != pid) {
		select_loop(15, 0, 0);	/* deny outside connections */
		if (kill(pid, 0) != 0)
			break;	/* child died, and was caught by sigreap */
	}

	/* Restart glimpseserver */
	if (strcasecmp(GL_GlimpseServer, "false")) {
		Glimpse_Start_Glimpseserver();
		GL_ncalled = 0;
	}
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
 * GL_Index_Object -- using glimpse -a to index a single object
 * ----------------------------------------------------------------- */
LOCAL int GL_Index_Object(entry)
     reg_t *entry;
{
	static char comm[BUFSIZ];
	char *fn = NULL;

	fn = SM_Get_Obj_Filename(entry->FD);
	sprintf(comm, "%s %s %s -a -H %s %s", GL_GlimpseInd, GL_IndexOption,
	    GL_IndexFlags, DIRpath, fn);
	xfree(fn);
	fn = NULL;

	return (Glimpse_Start_Indexing(comm));
}


/* ----------------------------------------------------------------- *
 * GL_bulk_query - do bulk transfer of all objects that match the query
 * ----------------------------------------------------------------- */
LOCAL int GL_bulk_query(rsock, indexfp, ptime)
     int rsock;
     FILE *indexfp;
     time_t ptime;
{
	static char ret[BIG_BUFSIZ];
	fd_t qfd;
	fd_t oldfd = -1;
	int cnt = 0;
	reg_t *bentry = NULL;
	FILE *fp = NULL;

	if ((fp = fdopen(rsock, "w")) == NULL) {
		log_errno("fdopen");
		QM_send_bulk_err(rsock);
		return ERROR;
	}
	QM_send_bulk_begin(rsock);
	while (fgets(ret, BIG_BUFSIZ, indexfp) != NULL) {
		if (((qfd = GL_getfd(ret)) != ERROR) &&
		    (qfd != oldfd) &&
		    ((bentry = RG_Get_Entry(qfd)) != NULL) &&
		    (bentry->update_time >= ptime) &&
		    (QM_send_bulk_fd(qfd, fp, bentry) == SUCCESS)) {
			cnt++;
		}
	}
	fflush(fp);		/* critical, must flush before termination */
	QM_send_bulk_end(rsock);

	fclose(fp);
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
 * GL_del_query -- delete all objects that match the query. 
 * ----------------------------------------------------------------- */
LOCAL int GL_del_query(rsock, indexfp)
     int rsock;
     FILE *indexfp;
{
	static char ret[BIG_BUFSIZ];
	fd_t qfd, oldfd = -1;
	int cnt = 0;
	reg_t *rme = NULL;

	while (fgets(ret, BIG_BUFSIZ, indexfp) != NULL) {
		if (((qfd = GL_getfd(ret)) != ERROR) &&
		    (qfd != oldfd) &&
		    ((rme = RG_Get_Entry(qfd)) != NULL)) {
			COL_DEL_Obj(rme);
			cnt++;
		}
	}
	Log("Deleted %d objects based on query.\n", cnt);
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
 * GL_user_query -- Read the output of the Glimpse query on indexfp, then
 * send to rsock via protocol.
 * ----------------------------------------------------------------- */
LOCAL int GL_user_query(rsock, indexfp)
     int rsock;
     FILE *indexfp;
{
	fd_t fd1;
	fd_t fd2 = (fd_t) (-1);
	char *inb = NULL;
	char *opb = NULL;
	char **opdata = NULL;
	char *tmp = NULL;
	char *s = NULL;
	int opsize = 0;
	int obcnt = 0;
	int i;

	/* If the query was illegal, give up quickly */
	if (GL_illegal_query) {
		SWRITE(rsock, BADQ_STR, strlen(BADQ_STR));
		return ERROR;
	}
	/*
	 *  Before we return the query results, we perform 2 write's on
	 *  the socket to the client to test whether or not the client
	 *  will be able to receive the query results.
	 *  We have to do two writes because the first will complete 
	 *  even though the other side is gone.
	 */
	(void) write(rsock, PIPECHK, strlen(PIPECHK));
	if (write(rsock, PIPECHK, strlen(PIPECHK)) == -1) {
		errorlog("Client is gone -- aborting user query results.\n");
		close(rsock);
		return ERROR;
	}
	opdata = (char **) xmalloc(MAX_OPDATA_SIZE * sizeof(char *));
	inb = (char *) xmalloc(BIG_BUFSIZ * sizeof(char));
	opb = (char *) xmalloc(BIG_BUFSIZ * sizeof(char));

	while (fgets(inb, BIG_BUFSIZ, indexfp) != NULL) {
		if ((fd1 = GL_getfd(inb)) == ERROR) {
			if (!strncmp(inb, "glimpse:", 8))	/* a msg */
				Log("%s", inb);
			continue;
		}
		if ((fd1 != fd2) && (fd2 != (fd_t) (-1))) {
			/* return the previous object */
			Debug(102, 1, ("GL_user_query: Calling QM_user_object(rsock=%d, fd2=%d, opsize=%d, opdata=%08x)\n", rsock, fd2, opsize, opdata));
			if (QM_user_object(rsock, fd2, opsize, opdata)
			    == SUCCESS)
				obcnt++;

			Debug(102, 1, ("GL_user_query: About to free %d opaque strings\n", opsize));
			/* free the opaque data */
			for (i = 0; i < opsize; i++) {
				Debug(102, 1, ("GL_user_query: freeing opdata[%d]\n", i));
				xfree(opdata[i]);
				opdata[i] = (char *) NULL;
			}
			opsize = 0;
		}
		/* 
		 *  If there's a : in the output, then Glimpse is 
		 *  displaying a matched line.
		 */
		if ((tmp = strchr(inb, ':')) != NULL) {
			if (s = strrchr(tmp, '\n'))
				*s = '\0';
			sprintf(opb, "Matched line: %s", ++tmp);
			Debug(102, 1, ("GL_user_query: got matched line %d\n", opsize));
			if (opsize < MAX_OPDATA_SIZE)
				opdata[opsize++] = xstrdup(opb);
		}
		fd2 = fd1;
	}

	/* Get the last object */
	if (fd2 != (fd_t) (-1)) {
		Debug(102, 1, ("GL_user_query: Calling QM_user_object(rsock=%d, fd2=%d, opsize=%d, opdata=%08x)\n", rsock, fd2, opsize, opdata));
		if (QM_user_object(rsock, fd2, opsize, opdata) == SUCCESS)
			obcnt++;
	}
	QM_user_done(rsock, obcnt);

	/* free the opaque data */
	Debug(102, 1, ("GL_user_query: About to free %d opaque strings\n", opsize));
	for (i = 0; i < opsize; i++) {
		Debug(102, 1, ("GL_user_query: freeing opdata[%d]\n", i));
		xfree(opdata[i]);
		opdata[i] = (char *) NULL;
	}
	opsize = 0;
	xfree(opdata);
	xfree(inb);
	xfree(opb);

	return SUCCESS;
}

/* ----------------------------------------------------------------- *
 * GL_do_qlist -- Recursive function to build a query from the list.
 * ----------------------------------------------------------------- */
LOCAL char *GL_do_qlist(ql)
     qlist_t *ql;
{
	char *ll = NULL;
	char *rl = NULL;
	static char *nl = NULL;
#ifdef USE_PARENS_FOR_BOOLEAN

	if (ql->type == LOGICAL) {
		if (ql->op == NOT) {
			return NULL;
		}
		if ((ll = GL_do_qlist((qlist_t *) ql->llist)) == NULL) {
			return NULL;
		}
		if ((rl = GL_do_qlist((qlist_t *) ql->rlist)) == NULL) {
			xfree(ll);
			ll = NULL;
			return NULL;
		}
		nl = (char *) xmalloc(SEL_SIZE);
		nl[0] = '{';
		nl[1] = '\0';
		strcat(nl, ll);

		switch (ql->op) {
		case AND:
			strncat(nl, ";", 1);
			break;
		case OR:
			strncat(nl, ",", 1);
			break;
		default:
			xfree(nl);
			nl = NULL;
			xfree(rl);
			rl = NULL;
			xfree(ll);
			ll = NULL;
			return NULL;
		}
		strcat(nl, rl);
		strcat(nl, "}");

		xfree(ll);
		ll = NULL;
		xfree(rl);
		rl = NULL;

		return (nl);
	}
	return (GL_build_select(ql));
#else
	if (ql->type == LOGICAL) {
		if (ql->op == NOT) {
			return NULL;
		}
		if ((ll = GL_do_qlist((qlist_t *) ql->llist)) == NULL) {
			return NULL;
		}
		if ((rl = GL_do_qlist((qlist_t *) ql->rlist)) == NULL) {
			xfree(ll);
			ll = NULL;
			return NULL;
		}
		switch (ql->op) {
		case AND:
			strncat(ll, ";", 1);
			break;
		case OR:
			strncat(ll, ",", 1);
			break;
		default:
			xfree(rl);
			rl = NULL;
			xfree(ll);
			ll = NULL;
			return NULL;
		}
		strcat(ll, rl);
		xfree(rl);
		rl = NULL;
		return (ll);
	}
	return (GL_build_select(ql));
#endif
}

/* ----------------------------------------------------------------- *
 * GL_build_select -- Build the basic Glimpse query. 
 * ----------------------------------------------------------------- */
LOCAL char *GL_build_select(ql)
     qlist_t *ql;
{
	char *tmp = NULL;

	if (ql->op == EXACT) {
		GL_regexflag = 1;
		tmp = (char *) xmalloc(SEL_SIZE);
		tmp[0] = '\0';
		GL_errflag = -1;
		if (ql->llist) {
			sprintf(tmp, "%s=%s", ql->llist, ql->rlist);
			xfree(ql->rlist);
			ql->rlist = NULL;
			xfree(ql->llist);
			ql->llist = NULL;
			return (tmp);
		}
		sprintf(tmp, "%s", ql->rlist);
		xfree(ql->rlist);
		ql->rlist = NULL;
		return (tmp);
	}
	if (ql->op == REGEX) {
		tmp = (char *) xmalloc(SEL_SIZE);
		tmp[0] = '\0';
		if (ql->llist) {
			sprintf(tmp, "%s=%s", ql->llist, ql->rlist);
			xfree(ql->rlist);
			ql->rlist = NULL;
			xfree(ql->llist);
			ql->llist = NULL;
			return (tmp);
		}
		sprintf(tmp, "%s", ql->rlist);
		xfree(ql->rlist);
		ql->rlist = NULL;
		return (tmp);
	}
	return NULL;
}

/* ----------------------------------------------------------------- *
 * GL_getfd -- Get the fd of the Glimpse return.
 * ----------------------------------------------------------------- */
LOCAL fd_t GL_getfd(instr)
     char *instr;
{
	char *tmp = NULL;

	if ((tmp = strstr(instr, "/OBJ")) == NULL)
		return ERROR;
	tmp += 4;		/* strlen("/OBJ") */
	return ((fd_t) atol(tmp));
}

LOCAL void GL_query_inline_timeout(sig)
     int sig;
{
	Log("Inline query timeout.  Sending SIGUSR1 to glimpseserver\n");
	kill(IndexServer_pid, SIGUSR1);
}


/* ----------------------------------------------------------------- *
 * GL_do_query_inline -- the broker directly contacts glimpseserver
 * 
 * has minimal error reporting becuase we use external glimpse
 * as a fallback.
 * ----------------------------------------------------------------- */
LOCAL int GL_do_query_inline(ql, rsock, qflag, ptime)
     qlist_t *ql;
     int rsock;
     int qflag;
     time_t ptime;
{
	Host *h = NULL;
	int gl_sock;
	struct sockaddr_in sa;
	char *argv[64];
	int argc;
	static char tbuf[64];
	int i;
	int pid;
	FILE *fp = NULL;
	int err;
	char *patstr = NULL;
	void (*alrmfunc) () = NULL;

	Debug(102, 1, ("GL_do_query_inline: ql=%p, rsock=%d, qflag=%d, ptime=%d\n", ql, rsock, qflag, ptime));

	if (qflag != UQUERY) {	/* only do user queries here */
		errorlog("GL_do_query_inline: Only USER queries handled here.\n");
		return FAIL;
	}
	if (!strcasecmp(GL_GlimpseServer, "false")) {
		errorlog("GL_do_query_inline: GL_GlimpseServer is 'false'.\n");
		return FAIL;
	}
	/*      Create a stream socket to glimpseserver                 */
	if ((h = get_host(GL_GlimpseSrvHost)) == (Host *) NULL) {
		errorlog("GL_do_query_inline: Failed to lookup '%s'.\n",
		    GL_GlimpseSrvHost);
		return FAIL;
	}
	if ((gl_sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) {
		log_errno2(__FILE__, __LINE__, "GL_do_query_inline: socket");
		return FAIL;
	}
	memcpy(&sa.sin_addr, h->ipaddr, h->addrlen);
	sa.sin_family = AF_INET;
	sa.sin_port = htons(GL_GlimpseSrvPort);
	Debug(102, 1, ("GL_do_query_inline: connecting to %s, port %d\n",
		inet_ntoa(sa.sin_addr), GL_GlimpseSrvPort));
	if (connect(gl_sock, (struct sockaddr *) &sa, sizeof(sa)) < 0) {
		log_errno2(__FILE__, __LINE__, "GL_do_query_inline: connect");
		close(gl_sock);
		return FAIL;
	}
	/*      Build command line args to pass to glimpseserver        */
	argc = 0;
	argv[argc++] = xstrdup("glimpse");
	argv[argc++] = xstrdup("-a");
	argv[argc++] = xstrdup("-H");
	argv[argc++] = xstrdup(DIRpath);
	argv[argc++] = xstrdup("-C");
	argv[argc++] = xstrdup("-J");
	argv[argc++] = xstrdup(GL_GlimpseSrvHost);
	argv[argc++] = xstrdup("-K");
	sprintf(tbuf, "%d", GL_GlimpseSrvPort);
	argv[argc++] = xstrdup(tbuf);
	argv[argc++] = xstrdup("-y");
	if (GL_errflag > 0) {
		sprintf(tbuf, "-%d", GL_errflag);
		argv[argc++] = xstrdup(tbuf);
	}
	argv[argc++] = xstrdup("-L");
	sprintf(tbuf, "%d", GL_maxresults < 1 ? 1000 : GL_maxresults);
	if (GL_maxfiles > 0)
		sprintf(tbuf, "%s:%d", tbuf, GL_maxfiles);
	if (GL_maxlines > 0)
		sprintf(tbuf, "%s:%d", tbuf, GL_maxlines);
	argv[argc++] = xstrdup(tbuf);
	if (GL_caseflag == 1) {
		argv[argc++] = xstrdup("-i");
	}
	if (GL_wordflag == 1) {
		argv[argc++] = xstrdup("-w");
	}
	if (QM_opaqueflag != 1) {
#ifdef GLIMPSE_3
		if ((GL_caseflag == 1) && (GL_wordflag == 1))
			argv[argc++] = xstrdup("-N");
		else
#endif
			argv[argc++] = xstrdup("-l");
	}
	if ((patstr = GL_do_qlist(ql)) == (char *) NULL) {
		err = ERROR;
		goto query_inline_done;
	}
	argv[argc++] = patstr;	/* patstr is already malloc'd */

	/*      Write the "request" to glimpseserver                    */
	/*      format is pid,argc,argv[0],argv[1],...                  */

	pid = (int) getpid();
	tbuf[0] = (pid >> 24) & 0xFF;
	tbuf[1] = (pid >> 16) & 0xFF;
	tbuf[2] = (pid >> 8) & 0xFF;
	tbuf[3] = pid & 0xFF;
	write(gl_sock, tbuf, 4);

	tbuf[0] = (argc >> 24) & 0xFF;
	tbuf[1] = (argc >> 16) & 0xFF;
	tbuf[2] = (argc >> 8) & 0xFF;
	tbuf[3] = argc & 0xFF;
	write(gl_sock, tbuf, 4);

	for (i = 0; i < argc; i++) {
		write(gl_sock, argv[i], strlen(argv[i]) + 1);
		write(gl_sock, "\n", 1);
		Debug(102, 5, ("GL_do_query_inline: Passed argv[%d]: %s\n",
			i, argv[i]));
	}
	shutdown(gl_sock, 1);	/* no more writing to glimpseserver */

	/*      Read the query results                                  */

	alrmfunc = signal(SIGALRM, GL_query_inline_timeout);
	alarm(GL_lifetime);

	if ((fp = fdopen(gl_sock, "r")) != (FILE *) NULL) {
		err = GL_user_query(rsock, fp);
	} else {
		err = FAIL;
	}

      query_inline_done:
	alarm(0);
	signal(SIGALRM, alrmfunc);
	for (i = 0; i < argc; i++) {
		xfree(argv[i]);
		argv[i] = NULL;
	}
	if (fp)
		fclose(fp);
	close(gl_sock);
	return err;
}



/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 
 * PUBLIC FUNCTIONS
 * XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */

/* ----------------------------------------------------------------- *
 * IND_New_Object -- index a new object
 * ----------------------------------------------------------------- */
int Glimpse_IND_New_Object(entry)
     reg_t *entry;
{
	int ret = SUCCESS;

	if (IndexType == I_PER_OBJ)
		ret = GL_Index_Object(entry);

	if (ret == SUCCESS)
		GL_NewObj++;

	return (ret);
}

/* ----------------------------------------------------------------- *
 * IND_Index_Full() -- perform a complete index of all objects.
 * ----------------------------------------------------------------- */
int Glimpse_IND_Index_Full()
{
	static char comm[BUFSIZ];

	/* PURIFY: sez 'uninitialized memory read' */
	memset(comm, '\0', BUFSIZ);

	Log("Begin Glimpse Full Indexing...\n");

	sprintf(comm, "%s %s %s -H %s %s/objects", GL_GlimpseInd,
	    GL_IndexOption, GL_IndexFlags, DIRpath, DIRpath);
	Glimpse_Start_Indexing(comm);

	Log("Finished Glimpse Full Indexing.\n");
	return SUCCESS;
}


/* ----------------------------------------------------------------- *
 * IND_Index_Incremental -- perform an incremental index
 * ----------------------------------------------------------------- */
int Glimpse_IND_Index_Incremental()
{
	static char comm[BUFSIZ];

	Log("Begin Glimpse Incremental Indexing...\n");

	memset(comm, '\0', BUFSIZ);
	if (strstr(GL_IndexOption, "-b") != NULL) {
		sprintf(comm, "%s %s %s -H %s %s/objects", GL_GlimpseInd,
		    GL_IndexOption, GL_IndexFlags, DIRpath, DIRpath);
		Log("Sorry, but Glimpse incremental indexing is not supported with byte-level indexes (-b).\n");
		Log("Using full indexing instead: %s\n", comm);
	} else {
		sprintf(comm, "%s %s %s -f -H %s %s/objects", GL_GlimpseInd,
		    GL_IndexOption, GL_IndexFlags, DIRpath, DIRpath);
	}

	Glimpse_Start_Indexing(comm);

	Log("Finished Glimpse Incremental Indexing.\n");
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
 * IND_Index_Start -- prepare for indexing a stream of objects.
 * ----------------------------------------------------------------- */
int Glimpse_IND_Index_Start()
{
	GL_NewObj = 0;
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
 * IND_Index_Flush -- finish indexing a stream of objects.
 * ----------------------------------------------------------------- */
int Glimpse_IND_Index_Flush()
{
	if (GL_NewObj > 0) {
#ifdef USE_INCREMENTAL_OPTIMIZATION
		/* 
		 *  Try to optimize by performing an incremental indexing
		 *  if only approx 10% of the database has changed.  For the
		 *  approximation we take the total number of current items
		 *  in the registry and see if 10% of that is GL_NewObj.
		 */
		if ((int) (RG_Count_Reg() / 10) > (int) GL_NewObj) {
			Log("Performing an incremental indexing, since less than 10%% of the objects has changed.\n");
			return (Glimpse_IND_Index_Incremental());
		}
#endif
		/* Do the default indexing operation */
		switch (IndexType) {
		case I_FULL:
			return (Glimpse_IND_Index_Full());
		case I_INCR:
			return (Glimpse_IND_Index_Incremental());
		case I_PER_OBJ:
			break;
		default:
			fatal("Glimpse_IND_Index_Flush: Internal error.\n");
		}
	}
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
 * IND_Destroy_Obj -- remove an object from the indexer.
 * ----------------------------------------------------------------- */
int Glimpse_IND_Destroy_Obj(entry)
     reg_t *entry;
{
	/* Nop in Glimpse */
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
 * IND_initialize -- initialize interface to indexer
 * ----------------------------------------------------------------- */
/*
 * ** PURIFY: complains that all these strdup()'s are memory leaks.
 */
int Glimpse_IND_initialize()
{
	GL_IndexOption = xstrdup("-b -s -T -B -M 10");
	IndexType = I_FULL;
	GL_IndexFlags = xstrdup("");
	GL_Glimpse = xstrdup("glimpse");
	GL_GlimpseInd = xstrdup("glimpseindex");
	GL_GlimpseServer = xstrdup("glimpseserver");
	GL_GlimpseSrvHost = xstrdup(getfullhostname());
	GL_GlimpseSrvPort = 0;
	GL_GlimpseSrvRestart = 0;
	GL_ncalled = 0;
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
 * IND_config -- configure indexer specific variables 
 * ----------------------------------------------------------------- */
int Glimpse_IND_config(value, tag)
     char *value;
     char *tag;
{
	if (tag == NULL || value == NULL)
		return ERROR;
	Debug(102, 1, ("Glimpse Configuration: %s %s\n", value, tag));

	if (strcasecmp(tag, S_GLIMPSE) == 0) {
		GL_Glimpse = xstrdup(value);
		if (verify_exe(GL_Glimpse) == ERROR)
			return ERROR;
	} else if (strcasecmp(tag, S_GLIMPSEIND) == 0) {
		GL_GlimpseInd = xstrdup(value);
		if (verify_exe(GL_GlimpseInd) == ERROR)
			return ERROR;
	} else if (strcasecmp(tag, S_GLIMPSESERVER) == 0) {
		GL_GlimpseServer = xstrdup(value);
		if (strcasecmp(GL_GlimpseServer, "false") &&
		    verify_exe(GL_GlimpseServer) == ERROR)
			return ERROR;
	} else if (strcasecmp(tag, S_GLIMPSESRVHOST) == 0) {
		GL_GlimpseSrvHost = xstrdup(value);
	} else if (strcasecmp(tag, S_GLIMPSESRVPORT) == 0) {
		sscanf(value, "%d", &GL_GlimpseSrvPort);
	} else if (strcasecmp(tag, S_GLIMPSEMAXLIFE) == 0) {
		if (sscanf(value, "%d", &GL_max_lifetime) != 1)
			fatal("sscanf GL_max_lifetime failed");
	} else if (strcasecmp(tag, S_GLIMPSESRVRESTART) == 0) {
		if (sscanf(value, "%d", &GL_GlimpseSrvRestart) != 1)
			fatal("sscanf GL_GlimpseSrvRestart failed");
	} else if (strcasecmp(tag, S_GLIMPSEINDOPT) == 0) {
		if (strcasecmp(value, "Fast-Search") == 0) {
			GL_IndexOption = xstrdup("-b -s -T -B -M 10");
		} else if (strcasecmp(value, "Medium") == 0) {
			GL_IndexOption = xstrdup("-o -s -T -B -M 10");
		} else if (strcasecmp(value, "Small-Index") == 0) {
			GL_IndexOption = xstrdup("-s -T -B -M 10");
		}
		return SUCCESS;
	} else if (strcasecmp(tag, S_GLIMPSEINDEXTOPT) == 0) {
		GL_IndexFlags = xstrdup(value);
	}
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
 * IND_do_query -- process a query string 
 * ----------------------------------------------------------------- */
int Glimpse_IND_do_query(ql, rsock, qflag, ptime)
     qlist_t *ql;
     int rsock, qflag;
     time_t ptime;
{
	FILE *indexfp = NULL;
	static char commandstr[BUFSIZ];
	static char xbuf[BUFSIZ];
	char *patstr = NULL;
	char *tfn = NULL;
	int err = SUCCESS;

	GL_ncalled++;

	/* Run the glimpseserver if it hasn't been started yet. */
	if (GL_ncalled == 1 && GL_GlimpseSrvPort > -1 && IndexServer_pid == 0) {
		Glimpse_Start_Glimpseserver();
	}
	if (GL_do_query_inline(ql, rsock, qflag, ptime) == SUCCESS) {
		err = SUCCESS;
		Debug(102, 1, ("Inline query successful!\n"));
		goto do_query_done;
	}
	strcpy(commandstr, GL_Glimpse);		/* glimpse command */

	strcat(commandstr, " -a");	/* print SOIF attrributes */

	/*  Use glimpseserver if the port is defined */
	if (GL_GlimpseSrvPort > 0 && IndexServer_pid > 0) {
		sprintf(xbuf, " -C -J %s -K %d",
		    GL_GlimpseSrvHost,
		    GL_GlimpseSrvPort);
		strcat(commandstr, xbuf);
	}
	/* Always give the directory.  If client/server fails, glimpse
	 * will search the filesystem itself */
	sprintf(xbuf, " -H %s", DIRpath);
	strcat(commandstr, xbuf);

	strcat(commandstr, " -y");	/* force the search */

	if (GL_errflag < 1) {
		/* usually 0 or -1 */
		sprintf(xbuf, " -%d", GL_errflag);
		strcat(commandstr, xbuf);
	}
	/* Glimpse 2.0 can limit the result set; always use -L 1000 if undef */
	sprintf(xbuf, " -L %d", GL_maxresults < 1 ? 1000 : GL_maxresults);
	strcat(commandstr, xbuf);
	if (GL_maxfiles > 0) {
		sprintf(xbuf, ":%d", GL_maxfiles);
		strcat(commandstr, xbuf);
	}
	if (GL_maxfiles > 0 && GL_maxlines > 0) {
		sprintf(xbuf, ":%d", GL_maxlines);
		strcat(commandstr, xbuf);
	}
	/* Add Glimpse flags */
	if (GL_caseflag == 1) {
		strcat(commandstr, " -i");
	}
	if (GL_wordflag == 1) {
		strcat(commandstr, " -w");
	}
	if (QM_opaqueflag != 1) {
#ifdef GLIMPSE_3
		if ((GL_caseflag == 1) && (GL_wordflag == 1))
			strcat(commandstr, " -N");
		else
#endif
			strcat(commandstr, " -l");
	}
	/* Generate Glimpse pattern to search */
	patstr = GL_do_qlist(ql);

	/* Check to see if the user did a regular expression + word match */
	if (qflag == UQUERY && QM_gotphrase == 1 && GL_wordflag == 1 &&
	    patstr != NULL &&
	    (strchr(patstr, '*') ||	/* Glimpse REGEX characters */
		strchr(patstr, '.') ||
		strchr(patstr, '|') ||
		strchr(patstr, '('))) {
		(void) write(rsock, REPLUSWORD_ERR, strlen(REPLUSWORD_ERR));
		close(rsock);
		return ERROR;
	}
	if (patstr != NULL) {
		sprintf(xbuf, " \'%s\'", patstr);
		strcat(commandstr, xbuf);
		xfree(patstr);
		patstr = NULL;

		/* Need a tmpfile for glimpse output */
		if ((tfn = tempnam(NULL, "query")) != NULL) {
			strcat(commandstr, " > ");
			strcat(commandstr, tfn);
		} else {
			SWRITE(rsock, IND_FAIL, IND_FAIL_S);
			return ERROR;	/* shouldn't really happen */
		}

		Debug(102, 1, ("Glimpse search command: %s\n", commandstr));

		/* Run the user query, give only GL_lifetime seconds */
		do_system_lifetime(commandstr, GL_lifetime);

		/* Send USR1 to glimpseserver to tell it to clean up properly */
		if (GL_GlimpseSrvPort > 0 && IndexServer_pid > 0)
			(void) kill(IndexServer_pid, SIGUSR1);

		/* Now process the tempfile that contains the results */
		if ((indexfp = fopen(tfn, "r")) == NULL) {
			log_errno(tfn);
			(void) unlink(tfn);
			xfree(tfn);
			tfn = NULL;
			if (qflag == UQUERY) {
				SWRITE(rsock, IND_FAIL, IND_FAIL_S);
			} else {
				QM_send_bulk_err(rsock);
			}
			(void) close(rsock);
			return ERROR;
		}
		/* Process the glimpse results based on this query type */
		switch (qflag) {
		case QBULK:
#ifdef FORK_ON_BULK
			if (fork() == 0) {	/* child */
				close(qsock);
				(void) GL_bulk_query(rsock, indexfp, ptime);
				(void) fclose(indexfp);
				(void) unlink(tfn);
				(void) close(rsock);
				_exit(0);
			}
			err = SUCCESS;
#else
			err = GL_bulk_query(rsock, indexfp, ptime);
#endif
			break;
		case UQUERY:
			err = GL_user_query(rsock, indexfp);
			break;
		case QDELETE:
			err = GL_del_query(rsock, indexfp);
			break;
		default:
			break;
		}

		/* Clean up */
		(void) fclose(indexfp);
		(void) unlink(tfn);
		xfree(tfn);
		tfn = NULL;
	} else if (qflag == QBULK) {
		QM_send_bulk_err(rsock);
		err = ERROR;
	} else {
		(void) write(rsock, ERR_MSG, strlen(ERR_MSG));
		Log(ERR_MSG);
		err = ERROR;
	}
	(void) close(rsock);	/* close so that results are sent */

      do_query_done:

	/* Support for restarting Glimpseserver after N queries */
	if ((GL_GlimpseSrvRestart > 0 && GL_ncalled >= GL_GlimpseSrvRestart)
	    || IndexServer_ForceRestart) {
		Log("Restarting glimpseserver after %d queries...\n",
		    GL_ncalled);
		Glimpse_Kill_Glimpseserver();
		Glimpse_Start_Glimpseserver();
		GL_ncalled = 0;
		IndexServer_ForceRestart = 0;
	}
	return err;
}

/* ----------------------------------------------------------------- *
 * IND_Init_Flags -- intialize query parser flags 
 * ----------------------------------------------------------------- */
void Glimpse_IND_Init_Flags()
{
	GL_lifetime = GL_max_lifetime;	/* reset on each query */
	GL_errflag = 0;		/* Number of Errors allowed */
	GL_caseflag = 1;	/* Case Insenstive or not */
	GL_wordflag = 0;	/* Match word */
	GL_maxresults = 0;	/* Max number of hits in the result set */
	GL_maxfiles = 0;	/* Max number of objects in the result set */
	GL_maxlines = 0;	/* Max number of lines per object */
	GL_illegal_query = 0;	/* Is Glimpse capable of this query */
	GL_regexflag = 0;	/* RE or not? */
}

/* ----------------------------------------------------------------- *
 * IND_Set_Flags -- set query parser flag
 * ----------------------------------------------------------------- */
void Glimpse_IND_Set_Flags(flag, val)
     char *flag;
     char *val;
{
	if (flag == NULL)
		return;

	if (strcasecmp(flag, "error") == 0) {
		GL_errflag = 0;
		if (val != NULL) {
			GL_errflag = atoi(val);
			if (GL_errflag < 0)
				GL_errflag = 0;
			if (GL_errflag > 3)
				GL_errflag = 3;
		}
		return;
	}
	if (strcasecmp(flag, "timeout") == 0) {
		if (val != NULL)
			GL_lifetime = atoi(val);
		if (GL_lifetime < 10)
			GL_lifetime = 10;	/* at least 10 seconds */
		if (GL_lifetime > GL_max_lifetime)
			GL_lifetime = GL_max_lifetime;
		return;
	}
	if (strcasecmp(flag, "matchword") == 0) {
		GL_wordflag = 1;
		return;
	}
	if (strcasecmp(flag, "case") == 0) {
		if (val != NULL) {
			if (strcasecmp(val, "insensitive") == 0)
				GL_caseflag = 1;
			else if (strcasecmp(val, "sensitive") == 0)
				GL_caseflag = 0;
		}
		return;
	}
	if ((strcasecmp(flag, "maxresult") == 0) && val != NULL) {
		GL_maxresults = atoi(val);
		if (GL_maxresults < 1)
			GL_maxresults = 0;
		return;
	}
	if ((strcasecmp(flag, "maxlines") == 0) && val != NULL) {
		GL_maxlines = atoi(val);
		if (GL_maxlines < 1)
			GL_maxlines = 0;
		return;
	}
	if ((strcasecmp(flag, "maxfiles") == 0) && val != NULL) {
		GL_maxfiles = atoi(val);
		if (GL_maxfiles < 1)
			GL_maxfiles = 0;
		return;
	}
	GL_illegal_query = 1;
}
