patch-2.2.0-pre8 linux/include/asm-i386/semaphore.h

Next file: linux/include/asm-i386/softirq.h
Previous file: linux/include/asm-i386/processor.h
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.2.0-pre7/linux/include/asm-i386/semaphore.h linux/include/asm-i386/semaphore.h
@@ -23,14 +23,49 @@
 #include <asm/atomic.h>
 #include <asm/spinlock.h>
 
+/*
+ * Semaphores are recursive: we allow the holder process
+ * to recursively do down() operations on a semaphore that
+ * the process already owns. In order to do that, we need
+ * to keep a semaphore-local copy of the owner and the
+ * "depth of ownership".
+ *
+ * NOTE! Nasty memory ordering rules:
+ *  - "owner" and "owner_count" may only be modified once you hold the
+ *    lock. 
+ *  - "owner_count" must be written _after_ modifying owner, and
+ *    must be read _before_ reading owner. There must be appropriate
+ *    write and read barriers to enforce this.
+ *
+ * On an x86, writes are always ordered, so the only enformcement
+ * necessary is to make sure that the owner_depth is written after
+ * the owner value in program order.
+ *
+ * For read ordering guarantees, the semaphore wake_lock spinlock
+ * is already giving us ordering guarantees.
+ *
+ * Other (saner) architectures would use "wmb()" and "rmb()" to
+ * do this in a more obvious manner.
+ */
 struct semaphore {
 	atomic_t count;
+	unsigned long owner, owner_depth;
 	int waking;
 	struct wait_queue * wait;
 };
 
-#define MUTEX ((struct semaphore) { ATOMIC_INIT(1), 0, NULL })
-#define MUTEX_LOCKED ((struct semaphore) { ATOMIC_INIT(0), 0, NULL })
+/*
+ * Because we want the non-contention case to be
+ * fast, we save the stack pointer into the "owner"
+ * field, and to get the true task pointer we have
+ * to do the bit masking. That moves the masking
+ * operation into the slow path.
+ */
+#define semaphore_owner(sem) \
+	((struct task_struct *)((2*PAGE_MASK) & (sem)->owner))
+
+#define MUTEX ((struct semaphore) { ATOMIC_INIT(1), 0, 0, 0, NULL })
+#define MUTEX_LOCKED ((struct semaphore) { ATOMIC_INIT(0), 0, 1, 0, NULL })
 
 asmlinkage void __down_failed(void /* special register calling convention */);
 asmlinkage int  __down_failed_interruptible(void  /* params in registers */);
@@ -59,13 +94,53 @@
 	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
 }
 
-static inline int waking_non_zero(struct semaphore *sem)
+/*
+ * NOTE NOTE NOTE!
+ *
+ * We read owner-count _before_ getting the semaphore. This
+ * is important, because the semaphore also acts as a memory
+ * ordering point between reading owner_depth and reading
+ * the owner.
+ *
+ * Why is this necessary? The "owner_depth" essentially protects
+ * us from using stale owner information - in the case that this
+ * process was the previous owner but somebody else is racing to
+ * aquire the semaphore, the only way we can see ourselves as an
+ * owner is with "owner_depth" of zero (so that we know to avoid
+ * the stale value).
+ *
+ * In the non-race case (where we really _are_ the owner), there
+ * is not going to be any question about what owner_depth is.
+ *
+ * In the race case, the race winner will not even get here, because
+ * it will have successfully gotten the semaphore with the locked
+ * decrement operation.
+ *
+ * Basically, we have two values, and we cannot guarantee that either
+ * is really up-to-date until we have aquired the semaphore. But we
+ * _can_ depend on a ordering between the two values, so we can use
+ * one of them to determine whether we can trust the other:
+ *
+ * Cases:
+ *  - owner_depth == zero: ignore the semaphore owner, because it
+ *    cannot possibly be us. Somebody else may be in the process
+ *    of modifying it and the zero may be "stale", but it sure isn't
+ *    going to say that "we" are the owner anyway, so who cares?
+ *  - owner_depth is non-zero. That means that even if somebody
+ *    else wrote the non-zero count value, the write ordering requriement
+ *    means that they will have written themselves as the owner, so
+ *    if we now see ourselves as an owner we can trust it to be true.
+ */
+static inline int waking_non_zero(struct semaphore *sem, struct task_struct *tsk)
 {
 	unsigned long flags;
+	unsigned long owner_depth = sem->owner_depth;
 	int ret = 0;
 
 	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->waking > 0) {
+	if (sem->waking > 0 || (owner_depth && semaphore_owner(sem) == tsk)) {
+		sem->owner = (unsigned long) tsk;
+		sem->owner_depth++;	/* Don't use the possibly stale value */
 		sem->waking--;
 		ret = 1;
 	}
@@ -86,7 +161,9 @@
 		"lock ; "
 #endif
 		"decl 0(%0)\n\t"
-		"js 2f\n"
+		"js 2f\n\t"
+		"movl %%esp,4(%0)\n"
+		"movl $1,8(%0)\n\t"
 		"1:\n"
 		".section .text.lock,\"ax\"\n"
 		"2:\tpushl $1b\n\t"
@@ -108,6 +185,8 @@
 #endif
 		"decl 0(%1)\n\t"
 		"js 2f\n\t"
+		"movl %%esp,4(%1)\n\t"
+		"movl $1,8(%1)\n\t"
 		"xorl %0,%0\n"
 		"1:\n"
 		".section .text.lock,\"ax\"\n"
@@ -131,6 +210,7 @@
 {
 	__asm__ __volatile__(
 		"# atomic up operation\n\t"
+		"decl 8(%0)\n\t"
 #ifdef __SMP__
 		"lock ; "
 #endif

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov