patch-2.2.18 linux/fs/nfsd/vfs.c

Next file: linux/fs/nls/Config.in
Previous file: linux/fs/nfsd/stats.c
Back to the patch index
Back to the overall index

diff -u --new-file --recursive --exclude-from /usr/src/exclude v2.2.17/fs/nfsd/vfs.c linux/fs/nfsd/vfs.c
@@ -31,6 +31,10 @@
 
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
+#ifdef CONFIG_NFSD_V3
+#include <linux/nfs3.h>
+#include <linux/nfsd/xdr3.h>
+#endif /* CONFIG_NFSD_V3 */
 #include <linux/nfsd/nfsfh.h>
 #include <linux/quotaops.h>
 
@@ -41,16 +45,15 @@
 #define NFSDDBG_FACILITY		NFSDDBG_FILEOP
 #define NFSD_PARANOIA
 
-/* Open mode for nfsd_open */
-#define OPEN_READ	0
-#define OPEN_WRITE	1
-
-/* Hack until we have a macro check for mandatory locks. */
-#ifndef IS_ISMNDLK
-#define IS_ISMNDLK(i)	(((i)->i_mode & (S_ISGID|S_IXGRP|S_IFMT)) \
-			 == (S_ISGID|S_IFREG))
-#endif
 
+/* We must ignore files (but only files) which might have mandatory
+ * locks on them because there is no way to know if the accesser has
+ * the lock.
+ */
+/* MANDATORY_LOCK taken from 2.3 */
+#define MANDATORY_LOCK(inode) \
+	(IS_MANDLOCK(inode) && ((inode)->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+#define IS_ISMNDLK(i)	(S_ISREG((i)->i_mode) && MANDATORY_LOCK(i))
 /* Time difference margin in seconds for comparison. It is a
    dynamically-tunable parameter via /proc/fs/nfs/time-diff-margin.
  */
@@ -83,65 +86,58 @@
 static struct raparms *		raparml = NULL;
 static struct raparms *		raparm_cache = NULL;
 
+
+/*
+ * We need to do a check-parent every time
+ * after we have locked the parent - to verify
+ * that the parent is still our parent and
+ * that we are still hashed onto it..
+ *
+ * This is required in case two processes race
+ * on removing (or moving) the same entry: the
+ * parent lock will serialize them, but the
+ * other process will be too late..
+ *
+ * Note that this nfsd_check_parent is identical
+ * the check_parent in linux/fs/namei.c.
+ */
+#define nfsd_check_parent(dir, dentry) \
+	((dir) == (dentry)->d_parent && !list_empty(&dentry->d_hash))
+
 /*
  * Lock a parent directory following the VFS locking protocol.
  */
 int
 fh_lock_parent(struct svc_fh *parent_fh, struct dentry *dchild)
 {
-	int	nfserr = 0;
-
 	fh_lock(parent_fh);
 	/*
 	 * Make sure the parent->child relationship still holds,
 	 * and that the child is still hashed.
 	 */
-	if (dchild->d_parent != parent_fh->fh_dentry) 
-		goto out_not_parent;
-	if (list_empty(&dchild->d_hash))
-		goto out_not_hashed; 
-out:
-	return nfserr;
+	if (nfsd_check_parent(parent_fh->fh_dentry, dchild))
+	    return 0;
 
-out_not_parent:
 	printk(KERN_WARNING
-		"fh_lock_parent: %s/%s parent changed\n",
+		"fh_lock_parent: %s/%s parent changed or child unhashed\n",
 		dchild->d_parent->d_name.name, dchild->d_name.name);
-	goto out_unlock;
-out_not_hashed:
-	printk(KERN_WARNING
-		"fh_lock_parent: %s/%s unhashed\n",
-		dchild->d_parent->d_name.name, dchild->d_name.name);
-out_unlock:
-	nfserr = nfserr_noent;
-	fh_unlock(parent_fh);
-	goto out;
-}
 
-/*
- * Deny access to certain file systems
- */
-static inline int
-fs_off_limits(struct super_block *sb)
-{
-	return !sb || sb->s_magic == NFS_SUPER_MAGIC
-	           || sb->s_magic == PROC_SUPER_MAGIC;
+	fh_unlock(parent_fh);
+	return nfserr_noent;
 }
 
-/*
- * Check whether directory is a mount point, but it is all right if
- * this is precisely the local mount point being exported.
- */
-static inline int
-nfsd_iscovered(struct dentry *dentry, struct svc_export *exp)
-{
-	return (dentry != dentry->d_covers &&
-		dentry != exp->ex_dentry);
-}
 
 /*
  * Look up one component of a pathname.
  * N.B. After this call _both_ fhp and resfh need an fh_put
+ *
+ * If the lookup would cross a mountpoint, and the mounted filesystem
+ * is exported to the client with NFSEXP_CROSSMNT, then the lookup is
+ * accepted as it stands and the mounted directory is
+ * returned. Otherwise the covered directory is returned.
+ * NOTE: this mountpoint crossing is not supported properly by all
+ *   clients and is explicitly disallowed for NFSv3
+ *      NeilBrown <neilb@cse.unsw.edu.au>
  */
 int
 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
@@ -166,33 +162,62 @@
 	if (err)
 		goto out;
 #endif
-	err = nfserr_noent;
-	if (fs_off_limits(dparent->d_sb))
-		goto out;
 	err = nfserr_acces;
-	if (nfsd_iscovered(dparent, exp))
-		goto out;
 
 	/* Lookup the name, but don't follow links */
-	dchild = lookup_dentry(name, dget(dparent), 0);
-	if (IS_ERR(dchild))
-		goto out_nfserr;
-	/*
-	 * check if we have crossed a mount point ...
-	 */
-	if (dchild->d_sb != dparent->d_sb) {
-		struct dentry *tdentry;
-		tdentry = dchild->d_covers;
-		if (tdentry == dchild)
-			goto out_dput;
-	        dput(dchild);
-		dchild = dget(tdentry);
-	        if (dchild->d_sb != dparent->d_sb) {
-printk("nfsd_lookup: %s/%s crossed mount point!\n", dparent->d_name.name, dchild->d_name.name);
-			goto out_dput;
+	if (strcmp(name, "..")==0) {
+		/* checking mountpoint crossing is very different when stepping up */
+		if (dparent == exp->ex_dentry) {
+			if (!EX_CROSSMNT(exp))
+				dchild = dget(dparent); /* .. == . just like at / */
+			else
+			{
+				struct svc_export *exp2 = NULL;
+				struct dentry *dp;
+				dchild = dparent->d_covers->d_parent;
+				for (dp=dchild;
+				     exp2 == NULL && dp->d_covers->d_parent != dp;
+				     dp=dp->d_covers->d_parent)
+					exp2 = exp_get(exp->ex_client, dp->d_inode->i_dev, dp->d_inode->i_ino);
+				if (exp2==NULL || dchild->d_sb != exp2->ex_dentry->d_sb) {
+					dchild = dget(dparent);
+				} else {
+					dget(dchild);
+					exp = exp2;
+				}
+			}
+		} else
+			dchild = dget(dparent->d_parent);
+	} else {
+		dchild = lookup_dentry(name, dget(dparent), 0);
+		if (IS_ERR(dchild))
+			goto out_nfserr;
+		/*
+		 * check if we have crossed a mount point ...
+		 */
+		if (dchild->d_sb != dparent->d_sb) {
+			struct svc_export *exp2 = NULL;
+			exp2 = exp_get(rqstp->rq_client,
+				       dchild->d_inode->i_dev,
+				       dchild->d_inode->i_ino);
+			if (exp2 && EX_CROSSMNT(exp2))
+				/* successfully crossed mount point */
+				exp = exp2;
+			else if (dchild->d_covers->d_sb == dparent->d_sb) {
+				/* stay in the original filesystem */
+				struct dentry *tdentry = dget(dchild->d_covers);
+				dput(dchild);
+				dchild = tdentry;
+			} else {
+				/* This cannot possibly happen */
+				printk("nfsd_lookup: %s/%s impossible mount point!\n", dparent->d_name.name, dchild->d_name.name);
+				dput(dchild);
+				err = nfserr_acces;
+				goto out;
+
+			}
 		}
 	}
-
 	/*
 	 * Note: we compose the file handle now, but as the
 	 * dentry may be negative, it may need to be updated.
@@ -207,10 +232,6 @@
 out_nfserr:
 	err = nfserrno(-PTR_ERR(dchild));
 	goto out;
-out_dput:
-	dput(dchild);
-	err = nfserr_acces;
-	goto out;
 }
 
 /*
@@ -226,6 +247,8 @@
 	int		ftype = 0;
 	int		imode;
 	int		err;
+	kernel_cap_t	saved_cap = 0;
+	int		size_change = 0;
 
 	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
 		accmode |= MAY_WRITE;
@@ -234,39 +257,43 @@
 
 	/* Get inode */
 	err = fh_verify(rqstp, fhp, ftype, accmode);
-	if (err)
+	if (err || !iap->ia_valid)
 		goto out;
 
 	dentry = fhp->fh_dentry;
 	inode = dentry->d_inode;
 
 	err = inode_change_ok(inode, iap);
-	if (err) {
-		/* It is very tricky. When you are not the file owner,
-		   but have the write permission, you should be allowed
-		   to set atime and mtime to the current time on the
-		   server. However, the NFS V2 protocol doesn't support
-		   it. It has been fixed in V3. Here we do this: if the
-		   current server time and atime/mtime are close enough,
-		   we use the current server time. */
-#define CURRENT_TIME_SET	(ATTR_ATIME_SET | ATTR_MTIME_SET)
-		if (iap->ia_mtime == iap->ia_atime
-		    && ((iap->ia_valid & (CURRENT_TIME_SET))
-			== CURRENT_TIME_SET)) {
-			time_t now = CURRENT_TIME;
-			time_t delta = iap->ia_atime - now;
-			if (delta < 0) delta = -delta;
-			if (delta <= nfsd_time_diff_margin) {
-				iap->ia_valid &= ~CURRENT_TIME_SET;
-				goto current_time_ok;
-			}
+	/* could be a "touch" (utimes) request where the user is not the owner but does
+	 * have write permission. In this case the user should be allowed to set
+	 * both times to the current time.  We could just assume any such SETATTR
+	 * is intended to set the times to "now", but we do a couple of simple tests
+	 * to increase our confidence.
+	 */
+#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
+#define	MAX_TOUCH_TIME_ERROR (30*60)
+	if (err
+	    && (iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET
+	    && iap->ia_mtime == iap->ia_ctime
+	    ) {
+	    /* looks good.  now just make sure time is in the right ballpark.
+	     * solaris, at least, doesn't seem to care what the time request is
+	     */
+	    time_t delta = iap->ia_atime - CURRENT_TIME;
+	    if (delta<0) delta = -delta;
+	    if (delta < MAX_TOUCH_TIME_ERROR) {
+		/* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME
+		 * this will cause notify_change to setthese times to "now"
+		 */
+		iap->ia_valid &= ~BOTH_TIME_SET;
+		err = inode_change_ok(inode, iap);
 		}
-		goto out_nfserr;
 	}
 
-current_time_ok:
+	if (err)
+		goto out_nfserr;
 
-	/* The size case is special... */
+	/* The size case is special. It changes the file as well as the attributes.  */
 	if (iap->ia_valid & ATTR_SIZE) {
 if (!S_ISREG(inode->i_mode))
 printk("nfsd_setattr: size change??\n");
@@ -275,22 +302,12 @@
 			if (err)
 				goto out;
 		}
-		DQUOT_INIT(inode);
 		err = get_write_access(inode);
-		if (err) {
-			DQUOT_DROP(inode);
+		if (err)
 			goto out_nfserr;
-		}
-		/* N.B. Should we update the inode cache here? */
-		inode->i_size = iap->ia_size;
-		if (inode->i_op && inode->i_op->truncate)
-			inode->i_op->truncate(inode);
-		mark_inode_dirty(inode);
-		put_write_access(inode);
-		DQUOT_DROP(inode);
-		iap->ia_valid &= ~ATTR_SIZE;
-		iap->ia_valid |= ATTR_MTIME;
-		iap->ia_mtime = CURRENT_TIME;
+		size_change = 1;
+
+		DQUOT_INIT(inode);
 	}
 
 	imode = inode->i_mode;
@@ -312,24 +329,53 @@
 	}
 
 	/* Change the attributes. */
-	if (iap->ia_valid) {
-		kernel_cap_t	saved_cap = 0;
 
-		iap->ia_valid |= ATTR_CTIME;
-		iap->ia_ctime = CURRENT_TIME;
-		if (current->fsuid != 0) {
-			saved_cap = current->cap_effective;
-			cap_clear(current->cap_effective);
-		}
-		err = notify_change(dentry, iap);
-		if (current->fsuid != 0)
-			current->cap_effective = saved_cap;
-		if (err)
-			goto out_nfserr;
-		if (EX_ISSYNC(fhp->fh_export))
-			write_inode_now(inode);
+
+	iap->ia_valid |= ATTR_CTIME;
+	if (current->fsuid != 0) {
+		saved_cap = current->cap_effective;
+		cap_clear(current->cap_effective);
+	}
+#ifdef CONFIG_QUOTA
+	/* DQUOT_TRANSFER needs both ia_uid and ia_gid defined */
+	if (iap->ia_valid & (ATTR_UID|ATTR_GID)) {
+		if (! (iap->ia_valid & ATTR_UID))
+			iap->ia_uid = inode->i_uid;
+		if (! (iap->ia_valid & ATTR_GID))
+			iap->ia_gid = inode->i_gid;
+		iap->ia_valid |= ATTR_UID|ATTR_GID;
 	}
+#endif /* CONFIG_QUOTA */
+
+	fh_lock(fhp);
+#ifdef CONFIG_QUOTA
+	if (iap->ia_valid & (ATTR_UID|ATTR_GID)) 
+		err = DQUOT_TRANSFER(dentry, iap);
+	else
+#endif
+		err = notify_change(dentry, iap);
+
+	if (size_change) {
+		if (!err) {
+			vmtruncate(inode,iap->ia_size);		
+			if (inode->i_op && inode->i_op->truncate)
+				inode->i_op->truncate(inode);
+		}
+		fh_unlock(fhp);
+		put_write_access(inode);
+	} else
+		fh_unlock(fhp);
+
+	if (current->fsuid != 0)
+		current->cap_effective = saved_cap;
+	if (err)
+		goto out_nfserr;
+	if (EX_ISSYNC(fhp->fh_export))
+		write_inode_now(inode);
 	err = 0;
+
+	/* Don't unlock inode; the nfssvc_release functions are supposed
+	 * to do this. */
 out:
 	return err;
 
@@ -338,20 +384,107 @@
 	goto out;
 }
 
+#ifdef CONFIG_NFSD_V3
+/*
+ * Check server access rights to a file system object
+ */
+struct accessmap {
+	u32		access;
+	int		how;
+};
+static struct accessmap	nfs3_regaccess[] = {
+    {	NFS3_ACCESS_READ,	MAY_READ			},
+    {	NFS3_ACCESS_EXECUTE,	MAY_EXEC			},
+    {	NFS3_ACCESS_MODIFY,	MAY_WRITE|MAY_TRUNC		},
+    {	NFS3_ACCESS_EXTEND,	MAY_WRITE			},
+
+    {	0,			0				}
+};
+
+static struct accessmap	nfs3_diraccess[] = {
+    {	NFS3_ACCESS_READ,	MAY_READ			},
+    {	NFS3_ACCESS_LOOKUP,	MAY_EXEC			},
+    {	NFS3_ACCESS_MODIFY,	MAY_EXEC|MAY_WRITE|MAY_TRUNC	},
+    {	NFS3_ACCESS_EXTEND,	MAY_EXEC|MAY_WRITE		},
+    {	NFS3_ACCESS_DELETE,	MAY_REMOVE			},
+
+    {	0,			0				}
+};
+
+static struct accessmap	nfs3_anyaccess[] = {
+    /* XXX: should we try to cover read/write here for clients that
+     * rely on us to do their access checking for special files? */
+
+    {	0,			0				}
+};
+
+int
+nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access)
+{
+	struct accessmap	*map;
+	struct svc_export	*export;
+	struct dentry		*dentry;
+	u32			query, result = 0;
+	int			error;
+
+	error = fh_verify(rqstp, fhp, 0, MAY_NOP);
+	if (error)
+		goto out;
+
+	export = fhp->fh_export;
+	dentry = fhp->fh_dentry;
+
+	if (S_ISREG(dentry->d_inode->i_mode)) {
+		map = nfs3_regaccess;
+	} else if (S_ISDIR(dentry->d_inode->i_mode)) {
+		map = nfs3_diraccess;
+	} else {
+		map = nfs3_anyaccess;
+	}
+
+	query = *access;
+	while (map->access) {
+		if (map->access & query) {
+			error = nfsd_permission(export, dentry, (map->how | NO_OWNER_OVERRIDE));
+			if (error == 0)
+				result |= map->access;
+			else if ((error == nfserr_perm) || (error == nfserr_acces)) {
+				/*
+				 *  This access type is denyed; but the 
+				 *  access query itself succeeds.
+				 */
+				error = 0;
+			} else {
+				/*
+				 *  Some fatal error.  Fail the query.
+				 */
+				goto out;
+			}
+		}
+		map++;
+	}
+	*access = result;
+
+out:
+	return error;
+}
+#endif
+
+
+
 /*
  * Open an existing file or directory.
- * The wflag argument indicates write access.
+ * The access argument indicates the type of open (read/write/lock)
  * N.B. After this call fhp needs an fh_put
  */
 int
 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
-			int wflag, struct file *filp)
+			int access, struct file *filp)
 {
 	struct dentry	*dentry;
 	struct inode	*inode;
-	int		access, err;
+	int		err;
 
-	access = wflag? MAY_WRITE : MAY_READ;
 	err = fh_verify(rqstp, fhp, type, access);
 	if (err)
 		goto out;
@@ -368,24 +501,27 @@
 	if (!inode->i_op || !inode->i_op->default_file_ops)
 		goto out;
 
-	if (wflag && (err = get_write_access(inode)) != 0)
+	if ((access & MAY_WRITE) && (err = get_write_access(inode)) != 0)
 		goto out_nfserr;
 
 	memset(filp, 0, sizeof(*filp));
 	filp->f_op    = inode->i_op->default_file_ops;
 	filp->f_count = 1;
-	filp->f_flags = wflag? O_WRONLY : O_RDONLY;
-	filp->f_mode  = wflag? FMODE_WRITE : FMODE_READ;
 	filp->f_dentry = dentry;
-
-	if (wflag)
+	if (access & MAY_WRITE) {
+		filp->f_flags = O_WRONLY;
+		filp->f_mode  = FMODE_WRITE;
 		DQUOT_INIT(inode);
+	} else {
+		filp->f_flags = O_RDONLY;
+		filp->f_mode  = FMODE_READ;
+	}
 
 	err = 0;
 	if (filp->f_op && filp->f_op->open) {
 		err = filp->f_op->open(inode, filp);
 		if (err) {
-			if (wflag)
+			if (access & MAY_WRITE)
 				put_write_access(inode);
 
 			/* I nearly added put_filp() call here, but this filp
@@ -419,17 +555,33 @@
 		filp->f_op->release(inode, filp);
 	if (filp->f_mode & FMODE_WRITE) {
 		put_write_access(inode);
-		DQUOT_DROP(inode);
 	}
 }
 
 /*
  * Sync a file
+ * As this calls fsync (not fdatasync) there is no need for a write_inode
+ * after it.
  */
 void
-nfsd_sync(struct inode *inode, struct file *filp)
+nfsd_sync(struct file *filp)
 {
+	dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
+	down(&filp->f_dentry->d_inode->i_sem);
 	filp->f_op->fsync(filp, filp->f_dentry);
+	up(&filp->f_dentry->d_inode->i_sem);
+}
+
+void
+nfsd_sync_dir(struct dentry *dp)
+{
+	struct inode *inode = dp->d_inode;
+	int (*fsync) (struct file *, struct dentry *);
+	
+	if (inode->i_op->default_file_ops
+	    && (fsync = inode->i_op->default_file_ops->fsync)) {
+		fsync(NULL, dp);
+	}
 }
 
 /*
@@ -478,7 +630,7 @@
 	int		err;
 	struct file	file;
 
-	err = nfsd_open(rqstp, fhp, S_IFREG, OPEN_READ, &file);
+	err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
 	if (err)
 		goto out;
 	err = nfserr_perm;
@@ -543,11 +695,11 @@
 	uid_t			saved_euid;
 #endif
 
-	if (!cnt)
-		goto out;
-	err = nfsd_open(rqstp, fhp, S_IFREG, OPEN_WRITE, &file);
+	err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
 	if (err)
 		goto out;
+	if (!cnt)
+		goto out_close;
 	err = nfserr_perm;
 	if (!file.f_op->write)
 		goto out_close;
@@ -560,11 +712,21 @@
 	 * Request sync writes if
 	 *  -	the sync export option has been set, or
 	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
+	 *  -   The file system doesn't support fsync().
 	 * When gathered writes have been configured for this volume,
 	 * flushing the data to disk is handled separately below.
 	 */
+#ifdef CONFIG_NFSD_V3
+	if (rqstp->rq_vers == 2)
+		stable = EX_ISSYNC(exp);
+	else if (file.f_op->fsync == 0)
+	       stable = 1;
+	if (stable && !EX_WGATHER(exp))
+		file.f_flags |= O_SYNC;
+#else
 	if ((stable || (stable = EX_ISSYNC(exp))) && !EX_WGATHER(exp))
 		file.f_flags |= O_SYNC;
+#endif /* CONFIG_NFSD_V3 */
 
 	fh_lock(fhp);			/* lock inode */
 	file.f_pos = offset;		/* set write offset */
@@ -618,26 +780,25 @@
 		 */
 		if (EX_WGATHER(exp) && (inode->i_writecount > 1
 		 || (last_ino == inode->i_ino && last_dev == inode->i_dev))) {
-#if 0
-			interruptible_sleep_on_timeout(&inode->i_wait, 10 * HZ / 1000);
-#else
 			dprintk("nfsd: write defer %d\n", current->pid);
+			current->state = TASK_UNINTERRUPTIBLE;
 			schedule_timeout((HZ+99)/100);
+			current->state = TASK_RUNNING;
 			dprintk("nfsd: write resume %d\n", current->pid);
-#endif
 		}
 
 		if (inode->i_state & I_DIRTY) {
 			dprintk("nfsd: write sync %d\n", current->pid);
-			nfsd_sync(inode, &file);
-			write_inode_now(inode);
+			nfsd_sync(&file);
 		}
+#if 0
 		wake_up(&inode->i_wait);
+#endif
 		last_ino = inode->i_ino;
 		last_dev = inode->i_dev;
 	}
 
-	dprintk("nfsd: write complete\n");
+	dprintk("nfsd: write complete err=%d\n", err);
 	if (err >= 0)
 		err = 0;
 	else 
@@ -648,6 +809,38 @@
 	return err;
 }
 
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Commit all pendig writes to stable storage.
+ * Strictly speaking, we could sync just indicated the file region here,
+ * but there's currently no way we can ask the VFS to do so.
+ *
+ * We lock the file to make sure we return full WCC data to the client.
+ */
+int
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
+               off_t offset, unsigned long count)
+{
+	struct file	file;
+	int		err;
+
+	if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0)
+		return err;
+
+	fh_lock(fhp);
+	if (file.f_op && file.f_op->fsync) {
+		file.f_op->fsync(&file, file.f_dentry);
+	} else {
+		err = nfserr_notsupp;
+	}
+	fh_unlock(fhp);
+
+	nfsd_close(&file);
+	return err;
+}
+#endif /* CONFIG_NFSD_V3 */
+
 /*
  * Create a file (regular, directory, device, fifo); UNIX sockets 
  * not yet implemented.
@@ -669,6 +862,7 @@
 	err = nfserr_perm;
 	if (!flen)
 		goto out;
+
 	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
 	if (err)
 		goto out;
@@ -679,40 +873,42 @@
 	err = nfserr_notdir;
 	if(!dirp->i_op || !dirp->i_op->lookup)
 		goto out;
+
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
+		goto out;
 	/*
 	 * Check whether the response file handle has been verified yet.
 	 * If it has, the parent directory should already be locked.
 	 */
-	if (!resfhp->fh_dverified) {
+	if (!resfhp->fh_dentry) {
 		dchild = lookup_dentry(fname, dget(dentry), 0);
 		err = PTR_ERR(dchild);
 		if (IS_ERR(dchild))
 			goto out_nfserr;
 		fh_compose(resfhp, fhp->fh_export, dchild);
+		/* Lock the parent and check for errors ... */
+		err = fh_lock_parent(fhp, dchild);
+		if (err)
+			goto out;
 	} else {
 		dchild = resfhp->fh_dentry;
-		if (!fhp->fh_locked)
+		if (!fhp->fh_locked) {
+			/* not actually possible */
 			printk(KERN_ERR
 				"nfsd_create: parent %s/%s not locked!\n",
 				dentry->d_parent->d_name.name,
 				dentry->d_name.name);
-	}
-	err = nfserr_exist;
-	if (dchild->d_inode)
-		goto out;
-	if (!fhp->fh_locked) {
-		/* Lock the parent and check for errors ... */
-		err = fh_lock_parent(fhp, dchild);
-		if (err)
+			err = -EIO;
 			goto out;
 	}
+	}
 	/*
 	 * Make sure the child dentry is still negative ...
 	 */
 	err = nfserr_exist;
 	if (dchild->d_inode) {
-		printk(KERN_WARNING
-			"nfsd_create: dentry %s/%s not negative!\n",
+		dprintk("nfsd_create: dentry %s/%s not negative!\n",
 			dentry->d_name.name, dchild->d_name.name);
 		goto out; 
 	}
@@ -739,24 +935,29 @@
 	case S_IFSOCK:
 		opfunc = dirp->i_op->mknod;
 		break;
+	default:
+	        printk("nfsd: bad file type %o in nfsd_create\n", type);
+		err = nfserr_inval;
 	}
 	if (!opfunc)
 		goto out;
 
 	if (!(iap->ia_valid & ATTR_MODE))
 		iap->ia_mode = 0;
+	iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
 
 	/*
 	 * Call the dir op function to create the object.
 	 */
 	DQUOT_INIT(dirp);
 	err = opfunc(dirp, dchild, iap->ia_mode, rdev);
-	DQUOT_DROP(dirp);
 	if (err < 0)
 		goto out_nfserr;
 
-	if (EX_ISSYNC(fhp->fh_export))
-		write_inode_now(dirp);
+	if (EX_ISSYNC(fhp->fh_export)) {
+		nfsd_sync_dir(dentry);
+		write_inode_now(dchild->d_inode);
+	}
 
 	/*
 	 * Update the file handle to get the new inode info.
@@ -779,6 +980,129 @@
 	goto out;
 }
 
+#ifdef CONFIG_NFSD_V3
+/*
+ * NFSv3 version of nfsd_create
+ */
+int
+nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		char *fname, int flen, struct iattr *iap,
+		struct svc_fh *resfhp, int createmode, u32 *verifier)
+{
+	struct dentry	*dentry, *dchild;
+	struct inode	*dirp;
+	int		err;
+
+	err = nfserr_perm;
+	if (!flen)
+		goto out;
+	if (!(iap->ia_valid & ATTR_MODE))
+		iap->ia_mode = 0;
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	dirp = dentry->d_inode;
+
+	/* Get all the sanity checks out of the way before
+	 * we lock the parent. */
+	err = nfserr_notdir;
+	if(!dirp->i_op || !dirp->i_op->lookup)
+		goto out;
+	err = nfserr_perm;
+	if(!dirp->i_op->create)
+		goto out;
+
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
+		goto out;
+	/*
+	 * Compose the response file handle.
+	 */
+	dchild = lookup_dentry(fname, dget(dentry), 0);
+	err = PTR_ERR(dchild);
+	if(IS_ERR(dchild))
+		goto out_nfserr;
+	fh_compose(resfhp, fhp->fh_export, dchild);
+
+	/*
+	 * We must lock the directory before we check for the inode.
+	 */
+	err = fh_lock_parent(fhp, dchild);
+	if (err)
+	    goto out;
+
+	if (dchild->d_inode) {
+		err = 0;
+
+		if (resfhp->fh_handle.fh_ino == 0)
+		     /* inode might have been instantiated while we slept */
+		    fh_update(resfhp);
+		
+		switch (createmode) {
+		case NFS3_CREATE_UNCHECKED:
+			if (! S_ISREG(dchild->d_inode->i_mode))
+				err = nfserr_exist;
+			else {
+				iap->ia_valid &= ATTR_SIZE;
+				goto set_attr;
+			}
+			break;
+		case NFS3_CREATE_EXCLUSIVE:
+			if (   dchild->d_inode->i_mtime == verifier[0]
+			    && dchild->d_inode->i_atime == verifier[1]
+			    && dchild->d_inode->i_mode == S_IFREG
+			    && dchild->d_inode->i_size == 0 )
+				break;
+			 /* fallthru */
+		case NFS3_CREATE_GUARDED:
+			err = nfserr_exist;
+		}
+		goto out;
+	}
+
+	err = dirp->i_op->create(dirp, dchild, iap->ia_mode);
+	if (err < 0)
+		goto out_nfserr;
+
+	if (EX_ISSYNC(fhp->fh_export)) {
+		nfsd_sync_dir(dentry);
+		/* setattr will sync the child (or not) */
+	}
+
+	/*
+	 * Update the filehandle to get the new inode info.
+	 */
+	fh_update(resfhp);
+	err = 0;
+
+	if (createmode == NFS3_CREATE_EXCLUSIVE) {
+		/* Cram the verifier into atime/mtime */
+		iap->ia_valid = ATTR_MTIME|ATTR_ATIME|ATTR_MTIME_SET|ATTR_ATIME_SET;
+		iap->ia_mtime = verifier[0];
+		iap->ia_atime = verifier[1];
+	}
+
+	/* Set file attributes. Mode has already been set and
+	 * setting uid/gid works only for root. Irix appears to
+	 * send along the gid when it tries to implement setgid
+	 * directories via NFS. Clear out all that cruft.
+	 */
+ set_attr:
+	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
+ 		err = nfsd_setattr(rqstp, resfhp, iap);
+
+ out:
+	fh_unlock(fhp);
+ 	return err;
+ 
+ out_nfserr:
+	err = nfserrno(-err);
+	goto out;
+}
+#endif /* CONFIG_NFSD_V3 */
+
 /*
  * Truncate a file.
  * The calling routines must make sure to update the ctime
@@ -817,15 +1141,16 @@
 		cap_clear(current->cap_effective);
 	}
 	err = notify_change(dentry, &newattrs);
-	if (current->fsuid != 0)
-		current->cap_effective = saved_cap;
 	if (!err) {
 		vmtruncate(inode, size);
 		if (inode->i_op && inode->i_op->truncate)
 			inode->i_op->truncate(inode);
 	}
+	if (current->fsuid != 0)
+		current->cap_effective = saved_cap;
 	put_write_access(inode);
-	DQUOT_DROP(inode);
+	if (EX_ISSYNC(fhp->fh_export))
+		nfsd_sync_dir(dentry);
 	fh_unlock(fhp);
 out_nfserr:
 	if (err)
@@ -859,7 +1184,10 @@
 		goto out;
 
 	UPDATE_ATIME(inode);
-	/* N.B. Why does this call need a get_fs()?? */
+	/* N.B. Why does this call need a get_fs()??
+	 * Remove the set_fs and watch the fireworks:-) --okir
+	 */
+
 	oldfs = get_fs(); set_fs(KERNEL_DS);
 	err = inode->i_op->readlink(dentry, buf, *lenp);
 	set_fs(oldfs);
@@ -884,7 +1212,8 @@
 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 				char *fname, int flen,
 				char *path,  int plen,
-				struct svc_fh *resfhp)
+				struct svc_fh *resfhp,
+				struct iattr *iap)
 {
 	struct dentry	*dentry, *dnew;
 	struct inode	*dirp;
@@ -899,9 +1228,11 @@
 		goto out;
 	dentry = fhp->fh_dentry;
 
-	err = nfserr_perm;
-	if (nfsd_iscovered(dentry, fhp->fh_export))
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
 		goto out;
+
+	err = nfserr_perm;
 	dirp = dentry->d_inode;
 	if (!dirp->i_op	|| !dirp->i_op->symlink)
 		goto out;
@@ -922,10 +1253,20 @@
 	if (!dnew->d_inode) {
 		DQUOT_INIT(dirp);
 		err = dirp->i_op->symlink(dirp, dnew, path);
-		DQUOT_DROP(dirp);
 		if (!err) {
 			if (EX_ISSYNC(fhp->fh_export))
-				write_inode_now(dirp);
+				nfsd_sync_dir(dentry);
+			if (iap) {
+				iap->ia_valid &= ATTR_MODE /* ~(ATTR_MODE|ATTR_UID|ATTR_GID)*/;
+				if (iap->ia_valid) {
+					iap->ia_valid |= ATTR_CTIME;
+					iap->ia_mode = (iap->ia_mode&S_IALLUGO)
+						| S_IFLNK;
+					err = notify_change(dnew, iap);
+					if (!err && EX_ISSYNC(fhp->fh_export))
+						write_inode_now(dentry->d_inode);
+			       }
+			}
 		} else
 			err = nfserrno(-err);
 	}
@@ -949,7 +1290,7 @@
  */
 int
 nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
-				char *fname, int len, struct svc_fh *tfhp)
+				char *fname, int flen, struct svc_fh *tfhp)
 {
 	struct dentry	*ddir, *dnew, *dold;
 	struct inode	*dirp, *dest;
@@ -958,12 +1299,16 @@
 	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
 	if (err)
 		goto out;
-	err = fh_verify(rqstp, tfhp, S_IFREG, MAY_NOP);
+	err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
 	if (err)
 		goto out;
 
 	err = nfserr_perm;
-	if (!len)
+	if (!flen)
+		goto out;
+
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
 		goto out;
 
 	ddir = ffhp->fh_dentry;
@@ -987,10 +1332,7 @@
 	dold = tfhp->fh_dentry;
 	dest = dold->d_inode;
 
-	err = nfserr_acces;
-	if (nfsd_iscovered(ddir, ffhp->fh_export))
-		goto out_unlock;
-	/* FIXME: nxdev for NFSv3 */
+	err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
 	if (dirp->i_dev != dest->i_dev)
 		goto out_unlock;
 
@@ -1002,10 +1344,9 @@
 
 	DQUOT_INIT(dirp);
 	err = dirp->i_op->link(dold, dirp, dnew);
-	DQUOT_DROP(dirp);
 	if (!err) {
 		if (EX_ISSYNC(ffhp->fh_export)) {
-			write_inode_now(dirp);
+			nfsd_sync_dir(ddir);
 			write_inode_now(dest);
 		}
 	} else
@@ -1024,26 +1365,12 @@
 }
 
 /*
- * We need to do a check-parent every time
- * after we have locked the parent - to verify
- * that the parent is still our parent and
- * that we are still hashed onto it..
- *
- * This is requied in case two processes race
- * on removing (or moving) the same entry: the
- * parent lock will serialize them, but the
- * other process will be too late..
- */
-#define check_parent(dir, dentry) \
-	((dir) == (dentry)->d_parent->d_inode && !list_empty(&dentry->d_hash))
-
-/*
  * This follows the model of double_lock() in the VFS.
  */
 static inline void nfsd_double_down(struct semaphore *s1, struct semaphore *s2)
 {
 	if (s1 != s2) {
-		if ((unsigned long) s1 > (unsigned long) s2) {
+		if ((unsigned long) s1 < (unsigned long) s2) {
 			struct semaphore *tmp = s1;
 			s1 = s2;
 			s2 = tmp;
@@ -1085,12 +1412,12 @@
 	tdentry = tfhp->fh_dentry;
 	tdir = tdentry->d_inode;
 
-	/* N.B. We shouldn't need this ... dentry layer handles it */
+	err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
+	if (fdir->i_dev != tdir->i_dev)
+		goto out;
+
 	err = nfserr_perm;
-	if (!flen || (fname[0] == '.' && 
-	    (flen == 1 || (flen == 2 && fname[1] == '.'))) ||
-	    !tlen || (tname[0] == '.' && 
-	    (tlen == 1 || (tlen == 2 && tname[1] == '.'))))
+	if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
 		goto out;
 
 	odentry = lookup_dentry(fname, dget(fdentry), 0);
@@ -1111,31 +1438,36 @@
 	 * Lock the parent directories.
 	 */
 	nfsd_double_down(&tdir->i_sem, &fdir->i_sem);
+
+#ifdef CONFIG_NFSD_V3
+	/* Fill in the pre-op attr for the wcc data for both 
+	 * tdir and fdir
+	 */ 
+	fill_pre_wcc(ffhp);
+	fill_pre_wcc(tfhp);
+#endif /* CONFIG_NFSD_V3 */
+
 	err = -ENOENT;
 	/* GAM3 check for parent changes after locking. */
-	if (check_parent(fdir, odentry) &&
-	    check_parent(tdir, ndentry)) {
+	if (nfsd_check_parent(fdentry, odentry) &&
+	    nfsd_check_parent(tdentry, ndentry)) {
 
 		err = vfs_rename(fdir, odentry, tdir, ndentry);
 		if (!err && EX_ISSYNC(tfhp->fh_export)) {
-			write_inode_now(fdir);
-			write_inode_now(tdir);
+			nfsd_sync_dir(tdentry);
+			nfsd_sync_dir(fdentry);
 		}
 	} else
 		dprintk("nfsd: Caught race in nfsd_rename");
-	DQUOT_DROP(fdir);
-	DQUOT_DROP(tdir);
 
+#ifdef CONFIG_NFSD_V3
+        /* Fill in the post-op attr for the wcc data for both 
+         * tdir and fdir
+         */
+	fill_post_wcc(ffhp);
+	fill_post_wcc(tfhp);
+#endif /* CONFIG_NFSD_V3 */
 	nfsd_double_up(&tdir->i_sem, &fdir->i_sem);
-
-	if (!err && odentry->d_inode) {
-		add_to_rename_cache(tdir->i_ino,
-				    odentry->d_inode->i_dev,
-				    fdir->i_ino,
-				    odentry->d_inode->i_ino);
-	} else {
-		printk(": no inode in rename or err: %d.\n", err);
-	}
 	dput(ndentry);
 
 out_dput_old:
@@ -1162,13 +1494,12 @@
 	struct inode	*dirp;
 	int		err;
 
-	/* N.B. We shouldn't need this test ... handled by dentry layer */
-	err = nfserr_acces;
-	if (!flen || isdotent(fname, flen))
-		goto out;
 	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
 	if (err)
 		goto out;
+	err = nfserr_acces;
+	if (!flen || isdotent(fname, flen))
+		goto out;
 
 	dentry = fhp->fh_dentry;
 	dirp = dentry->d_inode;
@@ -1177,13 +1508,13 @@
 	err = PTR_ERR(rdentry);
 	if (IS_ERR(rdentry))
 		goto out_nfserr;
+
 	if (!rdentry->d_inode) {
 		dput(rdentry);
 		err = nfserr_noent;
 		goto out;
 	}
 
-	expire_by_dentry(rdentry);
 
 	if (type != S_IFDIR) {
 		/* It's UNLINK */
@@ -1194,30 +1525,33 @@
 
 		err = vfs_unlink(dirp, rdentry);
 
-		DQUOT_DROP(dirp);
 		fh_unlock(fhp);
 
 		dput(rdentry);
-		expire_by_dentry(rdentry);
+
 	} else {
 		/* It's RMDIR */
 		/* See comments in fs/namei.c:do_rmdir */
 
 		rdentry->d_count++;
 		nfsd_double_down(&dirp->i_sem, &rdentry->d_inode->i_sem);
-		if (!fhp->fh_pre_mtime)
-			fhp->fh_pre_mtime = dirp->i_mtime;
+
+#ifdef CONFIG_NFSD_V3
+		fill_pre_wcc(fhp);
+#else
 		fhp->fh_locked = 1;
+#endif /* CONFIG_NFSD_V3 */
 
 		err = -ENOENT;
-		if (check_parent(dirp, rdentry))
+		if (nfsd_check_parent(dentry, rdentry))
 			err = vfs_rmdir(dirp, rdentry);
 
 		rdentry->d_count--;
-		DQUOT_DROP(dirp);
-		if (!fhp->fh_post_version)
-			fhp->fh_post_version = dirp->i_version;
+#ifdef CONFIG_NFSD_V3
+		fill_post_wcc(fhp);
+#else
 		fhp->fh_locked = 0;
+#endif /* CONFIG_NFSD_V3 */
 		nfsd_double_up(&dirp->i_sem, &rdentry->d_inode->i_sem);
 
 		dput(rdentry);
@@ -1225,9 +1559,11 @@
 
 	if (err)
 		goto out_nfserr;
-	if (EX_ISSYNC(fhp->fh_export))
-		write_inode_now(dirp);
-
+	if (EX_ISSYNC(fhp->fh_export)) {
+		down(&dentry->d_inode->i_sem);
+		nfsd_sync_dir(dentry);
+		up(&dentry->d_inode->i_sem);
+	}
 out:
 	return err;
 
@@ -1238,10 +1574,11 @@
 
 /*
  * Read entries from a directory.
+ * The verifier is an NFSv3 thing we ignore for now.
  */
 int
 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, 
-			encode_dent_fn func, u32 *buffer, int *countp)
+             encode_dent_fn func, u32 *buffer, int *countp, u32 *verf)
 {
 	struct inode	*inode;
 	u32		*p;
@@ -1249,13 +1586,11 @@
 	struct file	file;
 	struct readdir_cd cd;
 
-	err = 0;
-	if (offset > ~(u32) 0)
-		goto out;
-
-	err = nfsd_open(rqstp, fhp, S_IFDIR, OPEN_READ, &file);
+	err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file);
 	if (err)
 		goto out;
+	if (offset > ~(u32) 0)
+		goto out_close;
 
 	err = nfserr_notdir;
 	if (!file.f_op->readdir)
@@ -1267,6 +1602,7 @@
 	cd.rqstp  = rqstp;
 	cd.buffer = buffer;
 	cd.buflen = *countp; /* count of words */
+	cd.dirfh  = fhp;
 
 	/*
 	 * Read the directory entries. This silly loop is necessary because
@@ -1296,8 +1632,14 @@
 	/* If we didn't fill the buffer completely, we're at EOF */
 	eof = !cd.eob;
 
-	if (cd.offset)
-		*cd.offset = htonl(file.f_pos);
+	if (cd.offset) {
+#ifdef CONFIG_NFSD_V3
+		if (rqstp->rq_vers == 3)
+			(void)enc64(cd.offset, file.f_pos);
+		else
+#endif /* CONFIG_NFSD_V3 */
+			*cd.offset = htonl(file.f_pos);
+	}
 
 	p = cd.buffer;
 	*p++ = 0;			/* no more entries */
@@ -1360,17 +1702,33 @@
 	struct inode	*inode = dentry->d_inode;
 	int		err;
 	kernel_cap_t	saved_cap = 0;
+	int		owneraccess;
+
+	/*
+	 *  Check if we are to use "owner may always access" semantics,
+	 *  then clean out the flag bit which controls this. It might be
+	 *  clearer to reverse the logic of this flag, but I didn't
+	 *  want to change a lot of code in a stable kernel - dhiggen.
+	 */
+
+	if (acc & NO_OWNER_OVERRIDE) {
+		owneraccess = 0;
+		acc &= ~NO_OWNER_OVERRIDE;
+	} else {
+		owneraccess = 1;
+	}
 
 	if (acc == MAY_NOP)
 		return 0;
 #if 0
-	dprintk("nfsd: permission 0x%x%s%s%s%s%s mode 0%o%s%s%s\n",
+	dprintk("nfsd: permission 0x%x%s%s%s%s%s%s mode 0%o%s%s%s\n",
 		acc,
 		(acc & MAY_READ)?	" read"  : "",
 		(acc & MAY_WRITE)?	" write" : "",
 		(acc & MAY_EXEC)?	" exec"  : "",
 		(acc & MAY_SATTR)?	" sattr" : "",
 		(acc & MAY_TRUNC)?	" trunc" : "",
+		(acc & MAY_LOCK)?	" lock"  : "",
 		inode->i_mode,
 		IS_IMMUTABLE(inode)?	" immut" : "",
 		IS_APPEND(inode)?	" append" : "",
@@ -1378,33 +1736,40 @@
 	dprintk("      owner %d/%d user %d/%d\n",
 		inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
 #endif
-#ifndef CONFIG_NFSD_SUN
-	if (dentry->d_mounts != dentry) {
-		return nfserr_perm;
-	}
-#endif
 
 	if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
 		if (EX_RDONLY(exp) || IS_RDONLY(inode))
 			return nfserr_rofs;
-		if (S_ISDIR(inode->i_mode) && nfsd_iscovered(dentry, exp))
-			return nfserr_perm;
 		if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
 			return nfserr_perm;
 	}
 	if ((acc & MAY_TRUNC) && IS_APPEND(inode))
 		return nfserr_perm;
 
+	if (acc & MAY_LOCK) {
+		/* If we cannot rely on authentication in NLM requests,
+		 * just allow locks, others require read permission
+		 */
+		if (exp->ex_flags & NFSEXP_NOAUTHNLM)
+			return 0;
+		else
+			acc = MAY_READ;
+	}
 	/*
-	 * The file owner always gets access permission. This is to make
-	 * file access work even when the client has done a fchmod(fd, 0).
+	 * The file owner always gets access permission (except in the 
+	 * special case of a V3 ACCESS call, which is used for checking at 
+	 * open() time). This is to make file access work even when the 
+	 * client has done a fchmod(fd, 0).
 	 *
 	 * However, `cp foo bar' should fail nevertheless when bar is
 	 * readonly. A sensible way to do this might be to reject all
 	 * attempts to truncate a read-only file, because a creat() call
 	 * always implies file truncation.
+	 *  dhXXX we are not currently setting MAY_TRUNC from a SETATTR which
+	 *  changes the size, so this check is not enforced.  It probably
+	 *  should be?
 	 */
-	if (inode->i_uid == current->fsuid /* && !(acc & MAY_TRUNC) */)
+	if (owneraccess && inode->i_uid == current->fsuid /* && !(acc & MAY_TRUNC) */)
 		return 0;
 
 	if (current->fsuid != 0) {

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)