Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 15 Sep 2012 14:27:23 -0500
From:      Alan Cox <alc@rice.edu>
To:        "arm@freebsd.org" <arm@freebsd.org>
Cc:        Alan Cox <alc@rice.edu>
Subject:   Re: arm pmap locking
Message-ID:  <5054D69B.40209@rice.edu>
In-Reply-To: <20120915045040.GZ58312@funkthat.com>
References:  <502FD67A.7030003@rice.edu> <1345315508.27688.260.camel@revolution.hippie.lan> <503D12AE.1050705@rice.edu> <1346350374.1140.525.camel@revolution.hippie.lan> <5045351F.6060201@rice.edu> <1346723041.1140.602.camel@revolution.hippie.lan> <504B85BE.3030101@rice.edu> <1347316458.1137.41.camel@revolution.hippie.lan> <504F8BAC.4040902@rice.edu> <20120915045040.GZ58312@funkthat.com>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------060701070903070900000909
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

On 09/14/2012 23:50, John-Mark Gurney wrote:
> Alan Cox wrote this message on Tue, Sep 11, 2012 at 14:06 -0500:
>> On 09/10/2012 17:34, Ian Lepore wrote:
>>> On Sat, 2012-09-08 at 12:51 -0500, Alan Cox wrote:
>>>> Here is another patch.  This simplifies the kernel pmap locking in
>>>> pmap_enter_pv() and corrects some comments.
>>>>
>>>> Thanks in advance,
>>>> Alan
>>>>
>>> I'm afraid I'm not going to be able to do this any time soon.  I bricked
>>> my DreamPlug last Friday (the bright side: the nandfs_newfs command in
>>> -current apparently works just fine when applied to real hardware rather
>>> than the simulator device).  I haven't had any success in getting
>>> openocd to transfer a new uboot image to it.  So I'm probably going to
>>> be without arm hardware that can run anything newer than 8.2 for a few
>>> weeks (until my new atmel eval board arrives).
>>>
>> Thanks for letting me know.
>>
>> Could someone else here please test the attached patch?
> I figure since you've been looking at arm's pmap, that I should report
> to you a LOR that I observered recently on my armv6 board:
> lock order reversal:
>   1st 0xc1cf70b0 pmap (pmap) @ /usr/src.HEAD/sys/arm/arm/pmap-v6.c:673
>   2nd 0xc091e608 PV ENTRY (UMA zone) @ /usr/src.HEAD/sys/vm/uma_core.c:2084
> KDB: stack backtrace:
> db_trace_self() at db_trace_self+0xc
> scp=0xc05a294c rlv=0xc025b298 (X_db_sym_numargs+0x1bc)
>          rsp=0xc9d1a8fc rfp=0xc9d1aa18
> X_db_sym_numargs() at X_db_sym_numargs+0x194
> scp=0xc025b270 rlv=0xc0398340 (kdb_backtrace+0x3c)
>          rsp=0xc9d1aa1c rfp=0xc9d1aa2c
>          r4=0xc06bda44
> kdb_backtrace() at kdb_backtrace+0xc
> scp=0xc0398310 rlv=0xc03ad3f8 (witness_display_spinlock+0x80)
>          rsp=0xc9d1aa30 rfp=0xc9d1aa44
>          r4=0x00000001
> witness_display_spinlock() at witness_display_spinlock+0x5c
> scp=0xc03ad3d4 rlv=0xc03ae6d8 (witness_checkorder+0x884)
>          rsp=0xc9d1aa48 rfp=0xc9d1aa98
>          r5=0xc1cf70b0 r4=0xc06263d4
> witness_checkorder() at witness_checkorder+0xc
> scp=0xc03ade60 rlv=0xc0355b9c (_mtx_lock_flags+0xcc)
>          rsp=0xc9d1aa9c rfp=0xc9d1aabc
>          r10=0xc1cf70b0 r9=0x00000000
>          r8=0xc091d6e0 r7=0xc0620730 r6=0x00000824 r5=0x00000000
>          r4=0xc091e608
> _mtx_lock_flags() at _mtx_lock_flags+0xc
> scp=0xc0355adc rlv=0xc057d290 (uma_zalloc_arg+0x1a8)
>          rsp=0xc9d1aac0 rfp=0xc9d1aafc
>          r7=0xc091d748 r6=0x00000000
>          r5=0xc08fc0b8 r4=0xc0620730
> uma_zalloc_arg() at uma_zalloc_arg+0xc
> scp=0xc057d0f4 rlv=0xc05abb04 (pmap_growkernel+0xf20)
>          rsp=0xc9d1ab00 rfp=0xc9d1ab70
>          r10=0xc1cf70b0 r9=0x00000000
>          r8=0x00000000 r7=0x8122e032 r6=0x00000000 r5=0xc08fc0b8
>          r4=0x00000001
> pmap_growkernel() at pmap_growkernel+0x3fc
> scp=0xc05aafe0 rlv=0xc05ac14c (pmap_enter+0x70)
>          rsp=0xc9d1ab74 rfp=0xc9d1aba0
>          r10=0x00000007 r9=0x00000000
>          r8=0xc09885a8 r7=0xbffff000 r6=0xc08278c0 r5=0xc1cf70b0
>          r4=0xc06261e0
> pmap_enter() at pmap_enter+0xc
> scp=0xc05ac0e8 rlv=0xc057ff44 (vm_fault_hold+0x1638)
>          rsp=0xc9d1aba4 rfp=0xc9d1ad14
>          r10=0xc9d1ade4 r8=0x00000000
>          r7=0x00000002 r6=0x00000000 r5=0xc1cf7000 r4=0xc09885a8
> vm_fault_hold() at vm_fault_hold+0xc
> scp=0xc057e918 rlv=0xc058054c (vm_fault+0x8c)
>          rsp=0xc9d1ad18 rfp=0xc9d1ad3c
>          r10=0xc9d1ade4 r9=0x00000002
>          r8=0x00000000 r7=0x00000002 r6=0xbffff000 r5=0xc1cf7000
>          r4=0xc1cf5000
> vm_fault() at vm_fault+0xc
> scp=0xc05804cc rlv=0xc05b0ac8 (data_abort_handler+0x35c)
>          rsp=0xc9d1ad40 rfp=0xc9d1ade0
>          r8=0xbffff000 r7=0xc1cf5000
>          r6=0x00000000 r5=0xc0626844 r4=0xc1cf3088
> data_abort_handler() at data_abort_handler+0xc
> scp=0xc05b0778 rlv=0xc05a4150 (address_exception_entry+0x50)
>          rsp=0xc9d1ade4 rfp=0xc9d1ae84
>          r10=0xc0682bde r9=0xc1cf3000
>          r8=0xc9d1aeac r7=0xc0682bde r6=0xc0682bd4 r5=0xc05f1648
>          r4=0xbfffffff
> exec_shell_imgact() at exec_shell_imgact+0x75c
> scp=0xc031d348 rlv=0xc033b68c (fork_exit+0x94)
>          rsp=0xc9d1ae88 rfp=0xc9d1aea8
>          r10=0x00000000 r9=0x00000000
>          r8=0xc9d1aeac r7=0x00000000 r6=0xc031d33c r5=0xc1cf3000
>          r4=0xc1cf5000
> fork_exit() at fork_exit+0xc
> scp=0xc033b604 rlv=0xc05afe44 (fork_trampoline+0x14)
>          rsp=0xc9d1aeac rfp=0x00000000
>          r8=0x00000000 r7=0x8ffbf8ef
>          r6=0xf5f7f4a9 r5=0x00000000 r4=0xc031d33c
>
> FreeBSD beaglebone 10.0-CURRENT FreeBSD 10.0-CURRENT #1 r240480: Thu Nov  3 14:57:01 PDT 2011     jmg@pcbsd-779:/usr/obj/arm.armv6/usr/src.HEAD/sys/helium  arm
>
> Do you need any more information?
>

I'm not sure what to make of this stack trace.  In particular, 
pmap_enter() does not directly call pmap_growkernel().  In fact, the 
only caller to pmap_growkernel() in the entire kernel is 
vm_map_insert(), which doesn't appear in this stack trace.  Moreover, 
this appears to be a legitimate page fault within the kernel address 
space.  (The image activator touches pageable kernel memory.)  However, 
such page faults should never need to grow the kernel page table.  The 
necessary page table pages should have been allocated during 
initialization when the various kernel map submaps were created.  The 
bottom line is that I don't have an immediate answer.  I'm going to need 
to think about this.  In general, I'm a bit uncomfortable with the way 
that the l2 and l2 table zones are created.

In the meantime, my next patch is attached.  This applies to both the 
original pmap and the new v6 pmap.  Can some folks here please test 
this?  Here is the description:

Since UMA_ZONE_NOFREE is specified when l2zone and l2table_zone are created,
there is no need to release and reacquire the pmap and pvh global locks
around calls to uma_zfree().  Recursion into the pmap simply won't occur.

Eliminate the use of M_USE_RESERVE.  It is deprecated and, in fact, counter-
productive, meaning that it actually makes the memory allocation request
more likely to fail.

Eliminate the macros pmap_{alloc,free}_l2_dtable().  They are of limited
utility, and pmap_free_l2_dtable() was inconsistently used.

Tidy up pmap_init().  In particular, change the initialization of the PV
zone so that it doesn't span the initialization of the l2 and l2table zones.



Alan


--------------060701070903070900000909
Content-Type: text/plain;
 name="arm_pmap15.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="arm_pmap15.patch"

Index: arm/arm/pmap-v6.c
===================================================================
--- arm/arm/pmap-v6.c	(revision 240321)
+++ arm/arm/pmap-v6.c	(working copy)
@@ -357,14 +357,6 @@ struct l2_dtable {
 #define	L2_NEXT_BUCKET(va)	(((va) & L1_S_FRAME) + L1_S_SIZE)
 
 /*
- * L2 allocation.
- */
-#define	pmap_alloc_l2_dtable()		\
-		(void*)uma_zalloc(l2table_zone, M_NOWAIT|M_USE_RESERVE)
-#define	pmap_free_l2_dtable(l2)		\
-		uma_zfree(l2table_zone, l2)
-
-/*
  * We try to map the page tables write-through, if possible.  However, not
  * all CPUs have a write-through cache mode, so on those we have to sync
  * the cache when we frob page tables.
@@ -621,10 +613,9 @@ pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
 		 * no entry in the L1 table.
 		 * Need to allocate a new l2_dtable.
 		 */
-again_l2table:
 		PMAP_UNLOCK(pm);
 		rw_wunlock(&pvh_global_lock);
-		if ((l2 = pmap_alloc_l2_dtable()) == NULL) {
+		if ((l2 = uma_zalloc(l2table_zone, M_NOWAIT)) == NULL) {
 			rw_wlock(&pvh_global_lock);
 			PMAP_LOCK(pm);
 			return (NULL);
@@ -632,18 +623,12 @@ pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
 		rw_wlock(&pvh_global_lock);
 		PMAP_LOCK(pm);
 		if (pm->pm_l2[L2_IDX(l1idx)] != NULL) {
-			PMAP_UNLOCK(pm);
-			rw_wunlock(&pvh_global_lock);
-			uma_zfree(l2table_zone, l2);
-			rw_wlock(&pvh_global_lock);
-			PMAP_LOCK(pm);
-			l2 = pm->pm_l2[L2_IDX(l1idx)];
-			if (l2 == NULL)
-				goto again_l2table;
 			/*
 			 * Someone already allocated the l2_dtable while
 			 * we were doing the same.
 			 */
+			uma_zfree(l2table_zone, l2);
+			l2 = pm->pm_l2[L2_IDX(l1idx)];
 		} else {
 			bzero(l2, sizeof(*l2));
 			/*
@@ -665,21 +650,14 @@ pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
 		 * No L2 page table has been allocated. Chances are, this
 		 * is because we just allocated the l2_dtable, above.
 		 */
-again_ptep:
 		PMAP_UNLOCK(pm);
 		rw_wunlock(&pvh_global_lock);
-		ptep = (void*)uma_zalloc(l2zone, M_NOWAIT|M_USE_RESERVE);
+		ptep = uma_zalloc(l2zone, M_NOWAIT);
 		rw_wlock(&pvh_global_lock);
 		PMAP_LOCK(pm);
 		if (l2b->l2b_kva != 0) {
 			/* We lost the race. */
-			PMAP_UNLOCK(pm);
-			rw_wunlock(&pvh_global_lock);
 			uma_zfree(l2zone, ptep);
-			rw_wlock(&pvh_global_lock);
-			PMAP_LOCK(pm);
-			if (l2b->l2b_kva == 0)
-				goto again_ptep;
 			return (l2b);
 		}
 		l2b->l2b_phys = vtophys(ptep);
@@ -691,7 +669,7 @@ pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
 			 */
 			if (l2->l2_occupancy == 0) {
 				pm->pm_l2[L2_IDX(l1idx)] = NULL;
-				pmap_free_l2_dtable(l2);
+				uma_zfree(l2table_zone, l2);
 			}
 			return (NULL);
 		}
@@ -789,7 +767,7 @@ pmap_free_l2_bucket(pmap_t pm, struct l2_bucket *l
 	 * the pointer in the parent pmap and free the l2_dtable.
 	 */
 	pm->pm_l2[L2_IDX(l1idx)] = NULL;
-	pmap_free_l2_dtable(l2);
+	uma_zfree(l2table_zone, l2);
 }
 
 /*
@@ -1175,28 +1153,25 @@ pmap_init(void)
 
 	PDEBUG(1, printf("pmap_init: phys_start = %08x\n", PHYSADDR));
 
+	l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor,
+	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+	l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable), NULL,
+	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+
 	/*
-	 * init the pv free list
+	 * Initialize the PV entry allocator.
 	 */
 	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
+	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
+	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
+	pv_entry_high_water = 9 * (pv_entry_max / 10);
+
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	PDEBUG(1, printf("pmap_init: done!\n"));
-
-	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
-
-	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
-	pv_entry_high_water = 9 * (pv_entry_max / 10);
-	l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor,
-	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
-	l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
-	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
-
-	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
-
 }
 
 int
@@ -2544,7 +2519,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t
 }
 
 /*
- *	The page queues and pmap must be locked.
+ *	The pvh global and pmap locks must be held.
  */
 static void
 pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
Index: arm/arm/pmap.c
===================================================================
--- arm/arm/pmap.c	(revision 240442)
+++ arm/arm/pmap.c	(working copy)
@@ -366,14 +366,6 @@ struct l2_dtable {
 #define	L2_NEXT_BUCKET(va)	(((va) & L1_S_FRAME) + L1_S_SIZE)
 
 /*
- * L2 allocation.
- */
-#define	pmap_alloc_l2_dtable()		\
-		(void*)uma_zalloc(l2table_zone, M_NOWAIT|M_USE_RESERVE)
-#define	pmap_free_l2_dtable(l2)		\
-		uma_zfree(l2table_zone, l2)
-
-/*
  * We try to map the page tables write-through, if possible.  However, not
  * all CPUs have a write-through cache mode, so on those we have to sync
  * the cache when we frob page tables.
@@ -875,10 +867,9 @@ pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
 		 * no entry in the L1 table.
 		 * Need to allocate a new l2_dtable.
 		 */
-again_l2table:
 		PMAP_UNLOCK(pm);
 		rw_wunlock(&pvh_global_lock);
-		if ((l2 = pmap_alloc_l2_dtable()) == NULL) {
+		if ((l2 = uma_zalloc(l2table_zone, M_NOWAIT)) == NULL) {
 			rw_wlock(&pvh_global_lock);
 			PMAP_LOCK(pm);
 			return (NULL);
@@ -886,18 +877,12 @@ pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
 		rw_wlock(&pvh_global_lock);
 		PMAP_LOCK(pm);
 		if (pm->pm_l2[L2_IDX(l1idx)] != NULL) {
-			PMAP_UNLOCK(pm);
-			rw_wunlock(&pvh_global_lock);
-			uma_zfree(l2table_zone, l2);
-			rw_wlock(&pvh_global_lock);
-			PMAP_LOCK(pm);
-			l2 = pm->pm_l2[L2_IDX(l1idx)];
-			if (l2 == NULL)
-				goto again_l2table;
 			/*
 			 * Someone already allocated the l2_dtable while
 			 * we were doing the same.
 			 */
+			uma_zfree(l2table_zone, l2);
+			l2 = pm->pm_l2[L2_IDX(l1idx)];
 		} else {
 			bzero(l2, sizeof(*l2));
 			/*
@@ -919,21 +904,14 @@ pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
 		 * No L2 page table has been allocated. Chances are, this
 		 * is because we just allocated the l2_dtable, above.
 		 */
-again_ptep:
 		PMAP_UNLOCK(pm);
 		rw_wunlock(&pvh_global_lock);
-		ptep = (void*)uma_zalloc(l2zone, M_NOWAIT|M_USE_RESERVE);
+		ptep = uma_zalloc(l2zone, M_NOWAIT);
 		rw_wlock(&pvh_global_lock);
 		PMAP_LOCK(pm);
 		if (l2b->l2b_kva != 0) {
 			/* We lost the race. */
-			PMAP_UNLOCK(pm);
-			rw_wunlock(&pvh_global_lock);
 			uma_zfree(l2zone, ptep);
-			rw_wlock(&pvh_global_lock);
-			PMAP_LOCK(pm);
-			if (l2b->l2b_kva == 0)
-				goto again_ptep;
 			return (l2b);
 		}
 		l2b->l2b_phys = vtophys(ptep);
@@ -945,7 +923,7 @@ pmap_alloc_l2_bucket(pmap_t pm, vm_offset_t va)
 			 */
 			if (l2->l2_occupancy == 0) {
 				pm->pm_l2[L2_IDX(l1idx)] = NULL;
-				pmap_free_l2_dtable(l2);
+				uma_zfree(l2table_zone, l2);
 			}
 			return (NULL);
 		}
@@ -1066,7 +1044,7 @@ pmap_free_l2_bucket(pmap_t pm, struct l2_bucket *l
 	 * the pointer in the parent pmap and free the l2_dtable.
 	 */
 	pm->pm_l2[L2_IDX(l1idx)] = NULL;
-	pmap_free_l2_dtable(l2);
+	uma_zfree(l2table_zone, l2);
 }
 
 /*
@@ -1834,28 +1812,25 @@ pmap_init(void)
 
 	PDEBUG(1, printf("pmap_init: phys_start = %08x\n", PHYSADDR));
 
+	l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor,
+	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+	l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable), NULL,
+	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+
 	/*
-	 * init the pv free list
+	 * Initialize the PV entry allocator.
 	 */
 	pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
+	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
+	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
+	pv_entry_high_water = 9 * (pv_entry_max / 10);
+
 	/*
 	 * Now it is safe to enable pv_table recording.
 	 */
 	PDEBUG(1, printf("pmap_init: done!\n"));
-
-	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
-	
-	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
-	pv_entry_high_water = 9 * (pv_entry_max / 10);
-	l2zone = uma_zcreate("L2 Table", L2_TABLE_SIZE_REAL, pmap_l2ptp_ctor,
-	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
-	l2table_zone = uma_zcreate("L2 Table", sizeof(struct l2_dtable),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
-	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
-
-	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
-
 }
 
 int
@@ -3302,7 +3277,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t
 }
 
 /*
- *	The page queues and pmap must be locked.
+ *	The pvh global and pmap locks must be held.
  */
 static void
 pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,

--------------060701070903070900000909--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?5054D69B.40209>