Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 14 Jun 2009 19:51:43 +0000 (UTC)
From:      Alan Cox <alc@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r194209 - in head/sys: amd64/amd64 i386/i386 vm
Message-ID:  <200906141951.n5EJphEV058615@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: alc
Date: Sun Jun 14 19:51:43 2009
New Revision: 194209
URL: http://svn.freebsd.org/changeset/base/194209

Log:
  Long, long ago in r27464 special case code for mapping device-backed
  memory with 4MB pages was added to pmap_object_init_pt().  This code
  assumes that the pages of a OBJT_DEVICE object are always physically
  contiguous.  Unfortunately, this is not always the case.  For example,
  jhb@ informs me that the recently introduced /dev/ksyms driver creates
  a OBJT_DEVICE object that violates this assumption.  Thus, this
  revision modifies pmap_object_init_pt() to abort the mapping if the
  OBJT_DEVICE object's pages are not physically contiguous.  This
  revision also changes some inconsistent if not buggy behavior.  For
  example, the i386 version aborts if the first 4MB virtual page that
  would be mapped is already valid.  However, it incorrectly replaces
  any subsequent 4MB virtual page mappings that it encounters,
  potentially leaking a page table page.  The amd64 version has a bug of
  my own creation.  It potentially busies the wrong page and always an
  insufficent number of pages if it blocks allocating a page table page.
  
  To my knowledge, there have been no reports of these bugs, hence,
  their persistance.  I suspect that the existing restrictions that
  pmap_object_init_pt() placed on the OBJT_DEVICE objects that it would
  choose to map, for example, that the first page must be aligned on a 2
  or 4MB physical boundary and that the size of the mapping must be a
  multiple of the large page size, were enough to avoid triggering the
  bug for drivers like ksyms.  However, one side effect of testing the
  OBJT_DEVICE object's pages for physical contiguity is that a dubious
  difference between pmap_object_init_pt() and the standard path for
  mapping devices pages, i.e., vm_fault(), has been eliminated.
  Previously, pmap_object_init_pt() would only instantiate the first
  PG_FICTITOUS page being mapped because it never examined the rest.
  Now, however, pmap_object_init_pt() uses the new function
  vm_object_populate() to instantiate them all (in order to support
  testing their physical contiguity).  These pages need to be
  instantiated for the mechanism that I have prototyped for
  automatically maintaining the consistency of the PAT settings across
  multiple mappings, particularly, amd64's direct mapping, to work.
  (Translation: This change is also being made to support jhb@'s work on
  the Nvidia feature requests.)
  
  Discussed with:	jhb@

Modified:
  head/sys/amd64/amd64/pmap.c
  head/sys/i386/i386/pmap.c
  head/sys/vm/vm_object.c
  head/sys/vm/vm_object.h

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Sun Jun 14 19:51:05 2009	(r194208)
+++ head/sys/amd64/amd64/pmap.c	Sun Jun 14 19:51:43 2009	(r194209)
@@ -3322,78 +3322,74 @@ void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
-	vm_offset_t va;
+	pd_entry_t *pde;
+	vm_paddr_t pa, ptepa;
 	vm_page_t p, pdpg;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	KASSERT(object->type == OBJT_DEVICE,
 	    ("pmap_object_init_pt: non-device object"));
-	if (((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
-		vm_page_t m[1];
-		pd_entry_t ptepa, *pde;
-
-		PMAP_LOCK(pmap);
-		pde = pmap_pde(pmap, addr);
-		if (pde != 0 && (*pde & PG_V) != 0)
-			goto out;
-		PMAP_UNLOCK(pmap);
-retry:
+	if ((addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
+		if (!vm_object_populate(object, pindex, pindex + atop(size)))
+			return;
 		p = vm_page_lookup(object, pindex);
-		if (p != NULL) {
-			if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
-				goto retry;
-		} else {
-			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
-			if (p == NULL)
-				return;
-			m[0] = p;
-
-			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
-				vm_page_lock_queues();
-				vm_page_free(p);
-				vm_page_unlock_queues();
-				return;
-			}
-
-			p = vm_page_lookup(object, pindex);
-			vm_page_wakeup(p);
-		}
+		KASSERT(p->valid == VM_PAGE_BITS_ALL,
+		    ("pmap_object_init_pt: invalid page %p", p));
 
+		/*
+		 * Abort the mapping if the first page is not physically
+		 * aligned to a 2MB page boundary.
+		 */
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1))
 			return;
 
-		p->valid = VM_PAGE_BITS_ALL;
+		/*
+		 * Skip the first page.  Abort the mapping if the rest of
+		 * the pages are not physically contiguous.
+		 */
+		p = TAILQ_NEXT(p, listq);
+		for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
+		    pa += PAGE_SIZE) {
+			KASSERT(p->valid == VM_PAGE_BITS_ALL,
+			    ("pmap_object_init_pt: invalid page %p", p));
+			if (pa != VM_PAGE_TO_PHYS(p))
+				return;
+			p = TAILQ_NEXT(p, listq);
+		}
 
+		/* Map using 2MB pages. */
 		PMAP_LOCK(pmap);
-		for (va = addr; va < addr + size; va += NBPDR) {
-			while ((pdpg =
-			    pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) {
-				PMAP_UNLOCK(pmap);
-				vm_page_busy(p);
-				VM_OBJECT_UNLOCK(object);
-				VM_WAIT;
-				VM_OBJECT_LOCK(object);
-				vm_page_wakeup(p);
-				PMAP_LOCK(pmap);
+		for (pa = ptepa; pa < ptepa + size; pa += NBPDR) {
+			pdpg = pmap_allocpde(pmap, addr, M_NOWAIT);
+			if (pdpg == NULL) {
+				/*
+				 * The creation of mappings below is only an
+				 * optimization.  If a page directory page
+				 * cannot be allocated without blocking,
+				 * continue on to the next mapping rather than
+				 * blocking.
+				 */
+				addr += NBPDR;
+				continue;
 			}
 			pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
-			pde = &pde[pmap_pde_index(va)];
+			pde = &pde[pmap_pde_index(addr)];
 			if ((*pde & PG_V) == 0) {
-				pde_store(pde, ptepa | PG_PS | PG_M | PG_A |
+				pde_store(pde, pa | PG_PS | PG_M | PG_A |
 				    PG_U | PG_RW | PG_V);
-				pmap->pm_stats.resident_count +=
-				    NBPDR / PAGE_SIZE;
+				pmap->pm_stats.resident_count += NBPDR /
+				    PAGE_SIZE;
+				pmap_pde_mappings++;
 			} else {
+				/* Continue on if the PDE is already valid. */
 				pdpg->wire_count--;
 				KASSERT(pdpg->wire_count > 0,
 				    ("pmap_object_init_pt: missing reference "
-				     "to page directory page, va: 0x%lx", va));
+				    "to page directory page, va: 0x%lx", addr));
 			}
-			ptepa += NBPDR;
+			addr += NBPDR;
 		}
-		pmap_invalidate_all(pmap);
-out:
 		PMAP_UNLOCK(pmap);
 	}
 }

Modified: head/sys/i386/i386/pmap.c
==============================================================================
--- head/sys/i386/i386/pmap.c	Sun Jun 14 19:51:05 2009	(r194208)
+++ head/sys/i386/i386/pmap.c	Sun Jun 14 19:51:43 2009	(r194209)
@@ -3442,62 +3442,57 @@ void
 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
     vm_pindex_t pindex, vm_size_t size)
 {
+	pd_entry_t *pde;
+	vm_paddr_t pa, ptepa;
 	vm_page_t p;
 
 	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
 	KASSERT(object->type == OBJT_DEVICE,
 	    ("pmap_object_init_pt: non-device object"));
 	if (pseflag && 
-	    ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
-		int i;
-		vm_page_t m[1];
-		unsigned int ptepindex;
-		int npdes;
-		pd_entry_t ptepa;
-
-		PMAP_LOCK(pmap);
-		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
-			goto out;
-		PMAP_UNLOCK(pmap);
-retry:
+	    (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) {
+		if (!vm_object_populate(object, pindex, pindex + atop(size)))
+			return;
 		p = vm_page_lookup(object, pindex);
-		if (p != NULL) {
-			if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
-				goto retry;
-		} else {
-			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
-			if (p == NULL)
-				return;
-			m[0] = p;
-
-			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
-				vm_page_lock_queues();
-				vm_page_free(p);
-				vm_page_unlock_queues();
-				return;
-			}
-
-			p = vm_page_lookup(object, pindex);
-			vm_page_wakeup(p);
-		}
+		KASSERT(p->valid == VM_PAGE_BITS_ALL,
+		    ("pmap_object_init_pt: invalid page %p", p));
 
+		/*
+		 * Abort the mapping if the first page is not physically
+		 * aligned to a 2/4MB page boundary.
+		 */
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1))
 			return;
 
-		p->valid = VM_PAGE_BITS_ALL;
+		/*
+		 * Skip the first page.  Abort the mapping if the rest of
+		 * the pages are not physically contiguous.
+		 */
+		p = TAILQ_NEXT(p, listq);
+		for (pa = ptepa + PAGE_SIZE; pa < ptepa + size;
+		    pa += PAGE_SIZE) {
+			KASSERT(p->valid == VM_PAGE_BITS_ALL,
+			    ("pmap_object_init_pt: invalid page %p", p));
+			if (pa != VM_PAGE_TO_PHYS(p))
+				return;
+			p = TAILQ_NEXT(p, listq);
+		}
 
+		/* Map using 2/4MB pages. */
 		PMAP_LOCK(pmap);
-		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
-		npdes = size >> PDRSHIFT;
-		for(i = 0; i < npdes; i++) {
-			pde_store(&pmap->pm_pdir[ptepindex],
-			    ptepa | PG_U | PG_RW | PG_V | PG_PS);
-			ptepa += NBPDR;
-			ptepindex += 1;
+		for (pa = ptepa; pa < ptepa + size; pa += NBPDR) {
+			pde = pmap_pde(pmap, addr);
+			if (*pde == 0) {
+				pde_store(pde, pa | PG_PS | PG_M | PG_A |
+				    PG_U | PG_RW | PG_V);
+				pmap->pm_stats.resident_count += NBPDR /
+				    PAGE_SIZE;
+				pmap_pde_mappings++;
+			}
+			/* Else continue on if the PDE is already valid. */
+			addr += NBPDR;
 		}
-		pmap_invalidate_all(pmap);
-out:
 		PMAP_UNLOCK(pmap);
 	}
 }

Modified: head/sys/vm/vm_object.c
==============================================================================
--- head/sys/vm/vm_object.c	Sun Jun 14 19:51:05 2009	(r194208)
+++ head/sys/vm/vm_object.c	Sun Jun 14 19:51:43 2009	(r194209)
@@ -1931,6 +1931,55 @@ skipmemq:
 }
 
 /*
+ *	Populate the specified range of the object with valid pages.  Returns
+ *	TRUE if the range is successfully populated and FALSE otherwise.
+ *
+ *	Note: This function should be optimized to pass a larger array of
+ *	pages to vm_pager_get_pages() before it is applied to a non-
+ *	OBJT_DEVICE object.
+ *
+ *	The object must be locked.
+ */
+boolean_t
+vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
+{
+	vm_page_t m, ma[1];
+	vm_pindex_t pindex;
+	int rv;
+
+	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
+	for (pindex = start; pindex < end; pindex++) {
+		m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL |
+		    VM_ALLOC_RETRY);
+		if (m->valid != VM_PAGE_BITS_ALL) {
+			ma[0] = m;
+			rv = vm_pager_get_pages(object, ma, 1, 0);
+			m = vm_page_lookup(object, pindex);
+			if (m == NULL)
+				break;
+			if (rv != VM_PAGER_OK) {
+				vm_page_lock_queues();
+				vm_page_free(m);
+				vm_page_unlock_queues();
+				break;
+			}
+		}
+		/*
+		 * Keep "m" busy because a subsequent iteration may unlock
+		 * the object.
+		 */
+	}
+	if (pindex > start) {
+		m = vm_page_lookup(object, start);
+		while (m != NULL && m->pindex < pindex) {
+			vm_page_wakeup(m);
+			m = TAILQ_NEXT(m, listq);
+		}
+	}
+	return (pindex == end);
+}
+
+/*
  *	Routine:	vm_object_coalesce
  *	Function:	Coalesces two objects backing up adjoining
  *			regions of memory into a single object.

Modified: head/sys/vm/vm_object.h
==============================================================================
--- head/sys/vm/vm_object.h	Sun Jun 14 19:51:05 2009	(r194208)
+++ head/sys/vm/vm_object.h	Sun Jun 14 19:51:43 2009	(r194209)
@@ -207,6 +207,7 @@ void vm_object_set_writeable_dirty (vm_o
 void vm_object_init (void);
 void vm_object_page_clean (vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t);
 void vm_object_page_remove (vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t);
+boolean_t vm_object_populate(vm_object_t, vm_pindex_t, vm_pindex_t);
 void vm_object_reference (vm_object_t);
 void vm_object_reference_locked(vm_object_t);
 void vm_object_shadow (vm_object_t *, vm_ooffset_t *, vm_size_t);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200906141951.n5EJphEV058615>