patch-2.3.16 linux/arch/ppc/kernel/hashtable.S
Next file: linux/arch/ppc/kernel/head.S
Previous file: linux/arch/ppc/kernel/gemini_setup.c
Back to the patch index
Back to the overall index
- Lines: 477
- Date:
Tue Aug 31 11:36:43 1999
- Orig file:
v2.3.15/linux/arch/ppc/kernel/hashtable.S
- Orig date:
Wed Dec 31 16:00:00 1969
diff -u --recursive --new-file v2.3.15/linux/arch/ppc/kernel/hashtable.S linux/arch/ppc/kernel/hashtable.S
@@ -0,0 +1,476 @@
+/*
+ * arch/ppc/kernel/hashtable.S
+ *
+ * $Id: hashtable.S,v 1.2 1999/08/23 02:53:17 paulus Exp $
+ *
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Adapted for Power Macintosh by Paul Mackerras.
+ * Low-level exception handlers and MMU support
+ * rewritten by Paul Mackerras.
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This file contains low-level assembler routines for managing
+ * the PowerPC MMU hash table. (PPC 8xx processors don't use a
+ * hash table, so this file is not used on them.)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include "ppc_asm.h"
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <linux/config.h>
+
+/*
+ * Load a PTE into the hash table, if possible.
+ * The address is in r3, and r4 contains access flags:
+ * _PAGE_USER (4) if a user-mode access, ored with
+ * _PAGE_RW (2) if a write. r20 contains DSISR or SRR1,
+ * so bit 1 (0x40000000) is set if the exception was due
+ * to no matching PTE being found in the hash table.
+ * r5 contains the physical address of the current task's thread.
+ *
+ * Returns to the caller if the access is illegal or there is no
+ * mapping for the address. Otherwise it places an appropriate PTE
+ * in the hash table and returns from the exception.
+ * Uses r0, r2 - r6, ctr, lr.
+ *
+ * For speed, 4 of the instructions get patched once the size and
+ * physical address of the hash table are known. These definitions
+ * of Hash_base and Hash_bits below are just an example.
+ */
+Hash_base = 0x180000
+Hash_bits = 12 /* e.g. 256kB hash table */
+Hash_msk = (((1 << Hash_bits) - 1) * 64)
+
+ .globl hash_page
+hash_page:
+#ifdef __SMP__
+ eieio
+ lis r2,hash_table_lock@h
+ ori r2,r2,hash_table_lock@l
+ tophys(r2,r2)
+ lis r6,100000000@h
+ mtctr r6
+ lwz r0,PROCESSOR-THREAD(r5)
+ or r0,r0,r6
+10: lwarx r6,0,r2
+ cmpi 0,r6,0
+ bne- 12f
+ stwcx. r0,0,r2
+ beq+ 11f
+12: cmpw r6,r0
+ bdnzf 2,10b
+ tw 31,31,31
+11: eieio
+#endif
+ /* Get PTE (linux-style) and check access */
+ mfspr r2,SPRG3 /* current task's THREAD (phys) */
+ lwz r5,PGDIR(r2) /* virt page-table root */
+ tophys(r5,r5) /* convert to phys addr */
+ rlwimi r5,r3,12,20,29 /* insert top 10 bits of address */
+ lwz r5,0(r5) /* get pmd entry */
+ rlwinm. r5,r5,0,0,19 /* extract address of pte page */
+#ifdef __SMP__
+ beq- hash_page_out /* return if no mapping */
+#else
+ /* XXX it seems like the 601 will give a machine fault on the
+ rfi if its alignment is wrong (bottom 4 bits of address are
+ 8 or 0xc) and we have had a not-taken conditional branch
+ to the address following the rfi. */
+ beqlr-
+#endif
+ tophys(r2,r5)
+ rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
+ lwz r6,0(r2) /* get linux-style pte */
+ ori r4,r4,1 /* set _PAGE_PRESENT bit in access */
+ andc. r0,r4,r6 /* check access & ~permission */
+#ifdef __SMP__
+ bne- hash_page_out /* return if access not permitted */
+#else
+ bnelr-
+#endif
+
+ ori r6,r6,0x100 /* set _PAGE_ACCESSED in pte */
+ rlwinm r5,r4,5,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */
+ rlwimi r5,r4,7,22,22 /* _PAGE_RW -> _PAGE_HWWRITE */
+ or r6,r6,r5
+ stw r6,0(r2) /* update PTE (accessed/dirty bits) */
+
+ /* Convert linux-style PTE to low word of PPC-style PTE */
+#ifdef CONFIG_PPC64
+ /* clear the high 32 bits just in case */
+ clrldi r6,r6,32
+ clrldi r4,r4,32
+#endif /* CONFIG_PPC64 */
+ rlwinm r4,r6,32-9,31,31 /* _PAGE_HWWRITE -> PP lsb */
+ rlwimi r6,r6,32-1,31,31 /* _PAGE_USER -> PP (both bits now) */
+ ori r4,r4,0xe04 /* clear out reserved bits */
+ andc r6,r6,r4 /* PP=2 or 0, when _PAGE_HWWRITE */
+
+ /* Construct the high word of the PPC-style PTE */
+ mfsrin r5,r3 /* get segment reg for segment */
+#ifdef CONFIG_PPC64
+ sldi r5,r5,12
+#else /* CONFIG_PPC64 */
+ rlwinm r5,r5,7,1,24 /* put VSID in 0x7fffff80 bits */
+#endif /* CONFIG_PPC64 */
+
+#ifndef __SMP__ /* do this later for SMP */
+#ifdef CONFIG_PPC64
+ ori r5,r5,1 /* set V (valid) bit */
+#else /* CONFIG_PPC64 */
+ oris r5,r5,0x8000 /* set V (valid) bit */
+#endif /* CONFIG_PPC64 */
+#endif
+
+#ifdef CONFIG_PPC64
+/* XXX: does this insert the api correctly? -- Cort */
+ rlwimi r5,r3,17,21,25 /* put in API (abbrev page index) */
+#else /* CONFIG_PPC64 */
+ rlwimi r5,r3,10,26,31 /* put in API (abbrev page index) */
+#endif /* CONFIG_PPC64 */
+ /* Get the address of the primary PTE group in the hash table */
+ .globl hash_page_patch_A
+hash_page_patch_A:
+ lis r4,Hash_base@h /* base address of hash table */
+#ifdef CONFIG_PPC64
+ /* just in case */
+ clrldi r4,r4,32
+#endif
+ rlwimi r4,r5,32-1,26-Hash_bits,25 /* (VSID & hash_mask) << 6 */
+ rlwinm r0,r3,32-6,26-Hash_bits,25 /* (PI & hash_mask) << 6 */
+ xor r4,r4,r0 /* make primary hash */
+
+ /* See whether it was a PTE not found exception or a
+ protection violation. */
+ andis. r0,r20,0x4000
+ li r2,8 /* PTEs/group */
+ bne 10f /* no PTE: go look for an empty slot */
+ tlbie r3 /* invalidate TLB entry */
+
+ /* Search the primary PTEG for a PTE whose 1st word matches r5 */
+ mtctr r2
+ addi r3,r4,-8
+1: lwzu r0,8(r3) /* get next PTE */
+ cmp 0,r0,r5
+ bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
+ beq+ found_slot
+
+ /* Search the secondary PTEG for a matching PTE */
+ ori r5,r5,0x40 /* set H (secondary hash) bit */
+ .globl hash_page_patch_B
+hash_page_patch_B:
+ xoris r3,r4,Hash_msk>>16 /* compute secondary hash */
+ xori r3,r3,0xffc0
+ addi r3,r3,-8
+ mtctr r2
+2: lwzu r0,8(r3)
+ cmp 0,r0,r5
+ bdnzf 2,2b
+ beq+ found_slot
+ xori r5,r5,0x40 /* clear H bit again */
+
+ /* Search the primary PTEG for an empty slot */
+10: mtctr r2
+ addi r3,r4,-8 /* search primary PTEG */
+1: lwzu r0,8(r3) /* get next PTE */
+ srwi. r0,r0,31 /* only want to check valid bit */
+ bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
+ beq+ found_empty
+
+ /* Search the secondary PTEG for an empty slot */
+ ori r5,r5,0x40 /* set H (secondary hash) bit */
+ .globl hash_page_patch_C
+hash_page_patch_C:
+ xoris r3,r4,Hash_msk>>16 /* compute secondary hash */
+ xori r3,r3,0xffc0
+ addi r3,r3,-8
+ mtctr r2
+2: lwzu r0,8(r3)
+ srwi. r0,r0,31 /* only want to check valid bit */
+ bdnzf 2,2b
+ beq+ found_empty
+
+ /*
+ * Choose an arbitrary slot in the primary PTEG to overwrite.
+ * Since both the primary and secondary PTEGs are full, and we
+ * have no information that the PTEs in the primary PTEG are
+ * more important or useful than those in the secondary PTEG,
+ * and we know there is a definite (although small) speed
+ * advantage to putting the PTE in the primary PTEG, we always
+ * put the PTE in the primary PTEG.
+ */
+ xori r5,r5,0x40 /* clear H bit again */
+ lwz r2,next_slot@l(0)
+ addi r2,r2,8
+ andi. r2,r2,0x38
+ stw r2,next_slot@l(0)
+ add r3,r4,r2
+11:
+ /* update counter of evicted pages */
+ lis r2,htab_evicts@h
+ ori r2,r2,htab_evicts@l
+ tophys(r2,r2)
+ lwz r4,0(r2)
+ addi r4,r4,1
+ stw r4,0(r2)
+
+#ifndef __SMP__
+ /* Store PTE in PTEG */
+found_empty:
+ stw r5,0(r3)
+found_slot:
+ stw r6,4(r3)
+ sync
+
+#else /* __SMP__ */
+/*
+ * Between the tlbie above and updating the hash table entry below,
+ * another CPU could read the hash table entry and put it in its TLB.
+ * There are 3 cases:
+ * 1. using an empty slot
+ * 2. updating an earlier entry to change permissions (i.e. enable write)
+ * 3. taking over the PTE for an unrelated address
+ *
+ * In each case it doesn't really matter if the other CPUs have the old
+ * PTE in their TLB. So we don't need to bother with another tlbie here,
+ * which is convenient as we've overwritten the register that had the
+ * address. :-) The tlbie above is mainly to make sure that this CPU comes
+ * and gets the new PTE from the hash table.
+ *
+ * We do however have to make sure that the PTE is never in an invalid
+ * state with the V bit set.
+ */
+found_empty:
+found_slot:
+ stw r5,0(r3) /* clear V (valid) bit in PTE */
+ sync
+ tlbsync
+ sync
+ stw r6,4(r3) /* put in correct RPN, WIMG, PP bits */
+ sync
+ oris r5,r5,0x8000
+ stw r5,0(r3) /* finally set V bit in PTE */
+#endif /* __SMP__ */
+
+/*
+ * Update the hash table miss count. We only want misses here
+ * that _are_ valid addresses and have a pte otherwise we don't
+ * count it as a reload. do_page_fault() takes care of bad addrs
+ * and entries that need linux-style pte's created.
+ *
+ * safe to use r2 here since we're not using it as current yet
+ * update the htab misses count
+ * -- Cort
+ */
+ lis r2,htab_reloads@h
+ ori r2,r2,htab_reloads@l
+ tophys(r2,r2)
+ lwz r3,0(r2)
+ addi r3,r3,1
+ stw r3,0(r2)
+
+#ifdef __SMP__
+ lis r2,hash_table_lock@ha
+ tophys(r2,r2)
+ li r0,0
+ stw r0,hash_table_lock@l(r2)
+ eieio
+#endif
+
+ /* Return from the exception */
+ lwz r3,_CCR(r21)
+ lwz r4,_LINK(r21)
+ lwz r5,_CTR(r21)
+ mtcrf 0xff,r3
+ mtlr r4
+ mtctr r5
+ lwz r0,GPR0(r21)
+ lwz r1,GPR1(r21)
+ lwz r2,GPR2(r21)
+ lwz r3,GPR3(r21)
+ lwz r4,GPR4(r21)
+ lwz r5,GPR5(r21)
+ lwz r6,GPR6(r21)
+ /* we haven't used xer */
+ mtspr SRR1,r23
+ mtspr SRR0,r22
+ lwz r20,GPR20(r21)
+ lwz r22,GPR22(r21)
+ lwz r23,GPR23(r21)
+ lwz r21,GPR21(r21)
+ rfi
+
+#ifdef __SMP__
+hash_page_out:
+ lis r2,hash_table_lock@ha
+ tophys(r2,r2)
+ li r0,0
+ stw r0,hash_table_lock@l(r2)
+ eieio
+ blr
+
+ .globl hash_table_lock
+hash_table_lock:
+ .long 0
+#endif
+
+next_slot:
+ .long 0
+
+/*
+ * Flush entries from the hash table with VSIDs in the range
+ * given.
+ */
+_GLOBAL(flush_hash_segments)
+ lis r5,Hash@ha
+ lwz r5,Hash@l(r5) /* base of hash table */
+ cmpwi 0,r5,0
+ bne+ 99f
+ tlbia
+ sync
+#ifdef __SMP__
+ tlbsync
+ sync
+#endif
+ blr
+99:
+#ifdef __SMP__
+ /* Note - we had better not do anything which could generate
+ a hash table miss while we have the hash table locked,
+ or we'll get a deadlock. -paulus */
+ mfmsr r10
+ sync
+ rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
+ mtmsr r0
+ SYNC
+ lis r9,hash_table_lock@h
+ ori r9,r9,hash_table_lock@l
+ lwz r8,PROCESSOR(r2)
+ oris r8,r8,8
+10: lwarx r6,0,r9
+ cmpi 0,r6,0
+ bne- 10b
+ stwcx. r8,0,r9
+ bne- 10b
+ eieio
+#endif
+ rlwinm r3,r3,7,1,24 /* put VSID lower limit in position */
+ oris r3,r3,0x8000 /* set V bit */
+ rlwinm r4,r4,7,1,24 /* put VSID upper limit in position */
+ oris r4,r4,0x8000
+ ori r4,r4,0x7f
+ lis r6,Hash_size@ha
+ lwz r6,Hash_size@l(r6) /* size in bytes */
+ srwi r6,r6,3 /* # PTEs */
+ mtctr r6
+ addi r5,r5,-8
+ li r0,0
+1: lwzu r6,8(r5) /* get next tag word */
+ cmplw 0,r6,r3
+ cmplw 1,r6,r4
+ cror 0,0,5 /* set cr0.lt if out of range */
+ blt 2f /* branch if out of range */
+ stw r0,0(r5) /* invalidate entry */
+2: bdnz 1b /* continue with loop */
+ sync
+ tlbia
+ sync
+#ifdef __SMP__
+ tlbsync
+ sync
+ lis r3,hash_table_lock@ha
+ stw r0,hash_table_lock@l(r3)
+ mtmsr r10
+ SYNC
+#endif
+ blr
+
+/*
+ * Flush the entry for a particular page from the hash table.
+ *
+ * flush_hash_page(unsigned context, unsigned long va)
+ */
+_GLOBAL(flush_hash_page)
+ lis r6,Hash@ha
+ lwz r6,Hash@l(r6) /* hash table base */
+ cmpwi 0,r6,0 /* hash table in use? */
+ bne+ 99f
+ tlbie r4 /* in hw tlb too */
+ sync
+#ifdef __SMP__
+ tlbsync
+ sync
+#endif
+ blr
+99:
+#ifdef __SMP__
+ /* Note - we had better not do anything which could generate
+ a hash table miss while we have the hash table locked,
+ or we'll get a deadlock. -paulus */
+ mfmsr r10
+ sync
+ rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
+ mtmsr r0
+ SYNC
+ lis r9,hash_table_lock@h
+ ori r9,r9,hash_table_lock@l
+ lwz r8,PROCESSOR(r2)
+ oris r8,r8,9
+10: lwarx r7,0,r9
+ cmpi 0,r7,0
+ bne- 10b
+ stwcx. r8,0,r9
+ bne- 10b
+ eieio
+#endif
+ rlwinm r3,r3,11,1,20 /* put context into vsid */
+ rlwimi r3,r4,11,21,24 /* put top 4 bits of va into vsid */
+ oris r3,r3,0x8000 /* set V (valid) bit */
+ rlwimi r3,r4,10,26,31 /* put in API (abbrev page index) */
+ rlwinm r7,r4,32-6,10,25 /* get page index << 6 */
+ rlwinm r5,r3,32-1,7,25 /* vsid << 6 */
+ xor r7,r7,r5 /* primary hash << 6 */
+ lis r5,Hash_mask@ha
+ lwz r5,Hash_mask@l(r5) /* hash mask */
+ slwi r5,r5,6 /* << 6 */
+ and r7,r7,r5
+ add r6,r6,r7 /* address of primary PTEG */
+ li r8,8
+ mtctr r8
+ addi r7,r6,-8
+1: lwzu r0,8(r7) /* get next PTE */
+ cmpw 0,r0,r3 /* see if tag matches */
+ bdnzf 2,1b /* while --ctr != 0 && !cr0.eq */
+ beq 3f /* if we found it */
+ ori r3,r3,0x40 /* set H (alt. hash) bit */
+ xor r6,r6,r5 /* address of secondary PTEG */
+ mtctr r8
+ addi r7,r6,-8
+2: lwzu r0,8(r7) /* get next PTE */
+ cmpw 0,r0,r3 /* see if tag matches */
+ bdnzf 2,2b /* while --ctr != 0 && !cr0.eq */
+ bne 4f /* if we didn't find it */
+3: li r0,0
+ stw r0,0(r7) /* invalidate entry */
+4: sync
+ tlbie r4 /* in hw tlb too */
+ sync
+#ifdef __SMP__
+ tlbsync
+ sync
+ lis r3,hash_table_lock@h
+ li r0,0
+ stw r0,hash_table_lock@l(r3)
+ mtmsr r10
+ SYNC
+#endif
+ blr
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)