Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 5 Aug 2013 08:55:35 +0000 (UTC)
From:      Attilio Rao <attilio@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r253953 - in head/sys: cddl/contrib/opensolaris/uts/common/fs/zfs fs/tmpfs kern vm
Message-ID:  <201308050855.r758tZLZ011247@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: attilio
Date: Mon Aug  5 08:55:35 2013
New Revision: 253953
URL: http://svnweb.freebsd.org/changeset/base/253953

Log:
  Revert r253939:
  We cannot busy a page before doing pagefaults.
  Infact, it can deadlock against vnode lock, as it tries to vget().
  Other functions, right now, have an opposite lock ordering, like
  vm_object_sync(), which acquires the vnode lock first and then
  sleeps on the busy mechanism.
  
  Before this patch is reinserted we need to break this ordering.
  
  Sponsored by:	EMC / Isilon storage division
  Reported by:	kib

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
  head/sys/fs/tmpfs/tmpfs_vnops.c
  head/sys/kern/imgact_elf.c
  head/sys/kern/kern_exec.c
  head/sys/kern/sys_process.c
  head/sys/vm/vm_extern.h
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_glue.c
  head/sys/vm/vm_map.h

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Mon Aug  5 08:27:35 2013	(r253952)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Mon Aug  5 08:55:35 2013	(r253953)
@@ -324,8 +324,7 @@ zfs_ioctl(vnode_t *vp, u_long com, intpt
 }
 
 static vm_page_t
-page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes,
-    boolean_t alloc)
+page_busy(vnode_t *vp, int64_t start, int64_t off, int64_t nbytes)
 {
 	vm_object_t obj;
 	vm_page_t pp;
@@ -347,8 +346,6 @@ page_busy(vnode_t *vp, int64_t start, in
 				continue;
 			}
 		} else if (pp == NULL) {
-			if (!alloc)
-				break;
 			pp = vm_page_alloc(obj, OFF_TO_IDX(start),
 			    VM_ALLOC_SYSTEM | VM_ALLOC_IFCACHED |
 			    VM_ALLOC_NOBUSY);
@@ -359,10 +356,8 @@ page_busy(vnode_t *vp, int64_t start, in
 
 		if (pp != NULL) {
 			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
-			vm_page_io_start(pp);
-			if (!alloc)
-				break;
 			vm_object_pip_add(obj, 1);
+			vm_page_io_start(pp);
 			pmap_remove_write(pp);
 			vm_page_clear_dirty(pp, off, nbytes);
 		}
@@ -372,12 +367,55 @@ page_busy(vnode_t *vp, int64_t start, in
 }
 
 static void
-page_unbusy(vm_page_t pp, boolean_t unalloc)
+page_unbusy(vm_page_t pp)
 {
 
 	vm_page_io_finish(pp);
-	if (unalloc)
-		vm_object_pip_subtract(pp->object, 1);
+	vm_object_pip_subtract(pp->object, 1);
+}
+
+static vm_page_t
+page_hold(vnode_t *vp, int64_t start)
+{
+	vm_object_t obj;
+	vm_page_t pp;
+
+	obj = vp->v_object;
+	zfs_vmobject_assert_wlocked(obj);
+
+	for (;;) {
+		if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
+		    pp->valid) {
+			if ((pp->oflags & VPO_BUSY) != 0) {
+				/*
+				 * Reference the page before unlocking and
+				 * sleeping so that the page daemon is less
+				 * likely to reclaim it.
+				 */
+				vm_page_reference(pp);
+				vm_page_sleep(pp, "zfsmwb");
+				continue;
+			}
+
+			ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
+			vm_page_lock(pp);
+			vm_page_hold(pp);
+			vm_page_unlock(pp);
+
+		} else
+			pp = NULL;
+		break;
+	}
+	return (pp);
+}
+
+static void
+page_unhold(vm_page_t pp)
+{
+
+	vm_page_lock(pp);
+	vm_page_unhold(pp);
+	vm_page_unlock(pp);
 }
 
 static caddr_t
@@ -441,8 +479,7 @@ update_pages(vnode_t *vp, int64_t start,
 
 			zfs_vmobject_wlock(obj);
 			vm_page_undirty(pp);
-		} else if ((pp = page_busy(vp, start, off, nbytes,
-		    TRUE)) != NULL) {
+		} else if ((pp = page_busy(vp, start, off, nbytes)) != NULL) {
 			zfs_vmobject_wunlock(obj);
 
 			va = zfs_map_page(pp, &sf);
@@ -451,7 +488,7 @@ update_pages(vnode_t *vp, int64_t start,
 			zfs_unmap_page(sf);
 
 			zfs_vmobject_wlock(obj);
-			page_unbusy(pp, TRUE);
+			page_unbusy(pp);
 		}
 		len -= nbytes;
 		off = 0;
@@ -561,7 +598,7 @@ mappedread(vnode_t *vp, int nbytes, uio_
 		vm_page_t pp;
 		uint64_t bytes = MIN(PAGESIZE - off, len);
 
-		if (pp = page_busy(vp, start, 0, 0, FALSE)) {
+		if (pp = page_hold(vp, start)) {
 			struct sf_buf *sf;
 			caddr_t va;
 
@@ -570,7 +607,7 @@ mappedread(vnode_t *vp, int nbytes, uio_
 			error = uiomove(va + off, bytes, UIO_READ, uio);
 			zfs_unmap_page(sf);
 			zfs_vmobject_wlock(obj);
-			page_unbusy(pp, FALSE);
+			page_unhold(pp);
 		} else {
 			zfs_vmobject_wunlock(obj);
 			error = dmu_read_uio(os, zp->z_id, uio, bytes);

Modified: head/sys/fs/tmpfs/tmpfs_vnops.c
==============================================================================
--- head/sys/fs/tmpfs/tmpfs_vnops.c	Mon Aug  5 08:27:35 2013	(r253952)
+++ head/sys/fs/tmpfs/tmpfs_vnops.c	Mon Aug  5 08:55:35 2013	(r253953)
@@ -485,13 +485,13 @@ tmpfs_nocacheread(vm_object_t tobj, vm_p
 			vm_page_zero_invalid(m, TRUE);
 		vm_page_wakeup(m);
 	}
-	vm_page_io_start(m);
+	vm_page_lock(m);
+	vm_page_hold(m);
+	vm_page_unlock(m);
 	VM_OBJECT_WUNLOCK(tobj);
 	error = uiomove_fromphys(&m, offset, tlen, uio);
-	VM_OBJECT_WLOCK(tobj);
-	vm_page_io_finish(m);
-	VM_OBJECT_WUNLOCK(tobj);
 	vm_page_lock(m);
+	vm_page_unhold(m);
 	if (m->queue == PQ_NONE) {
 		vm_page_deactivate(m);
 	} else {
@@ -602,14 +602,16 @@ tmpfs_mappedwrite(vm_object_t tobj, size
 			vm_page_zero_invalid(tpg, TRUE);
 		vm_page_wakeup(tpg);
 	}
-	vm_page_io_start(tpg);
+	vm_page_lock(tpg);
+	vm_page_hold(tpg);
+	vm_page_unlock(tpg);
 	VM_OBJECT_WUNLOCK(tobj);
 	error = uiomove_fromphys(&tpg, offset, tlen, uio);
 	VM_OBJECT_WLOCK(tobj);
-	vm_page_io_finish(tpg);
 	if (error == 0)
 		vm_page_dirty(tpg);
 	vm_page_lock(tpg);
+	vm_page_unhold(tpg);
 	if (tpg->queue == PQ_NONE) {
 		vm_page_deactivate(tpg);
 	} else {

Modified: head/sys/kern/imgact_elf.c
==============================================================================
--- head/sys/kern/imgact_elf.c	Mon Aug  5 08:27:35 2013	(r253952)
+++ head/sys/kern/imgact_elf.c	Mon Aug  5 08:55:35 2013	(r253953)
@@ -378,7 +378,7 @@ __elfN(map_partial)(vm_map_t map, vm_obj
 		off = offset - trunc_page(offset);
 		error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start,
 		    end - start);
-		vm_imgact_unmap_page(object, sf);
+		vm_imgact_unmap_page(sf);
 		if (error) {
 			return (KERN_FAILURE);
 		}
@@ -433,7 +433,7 @@ __elfN(map_insert)(vm_map_t map, vm_obje
 					sz = PAGE_SIZE - off;
 				error = copyout((caddr_t)sf_buf_kva(sf) + off,
 				    (caddr_t)start, sz);
-				vm_imgact_unmap_page(object, sf);
+				vm_imgact_unmap_page(sf);
 				if (error) {
 					return (KERN_FAILURE);
 				}
@@ -553,7 +553,7 @@ __elfN(load_section)(struct image_params
 		    trunc_page(offset + filsz);
 		error = copyout((caddr_t)sf_buf_kva(sf) + off,
 		    (caddr_t)map_addr, copy_len);
-		vm_imgact_unmap_page(object, sf);
+		vm_imgact_unmap_page(sf);
 		if (error) {
 			return (error);
 		}

Modified: head/sys/kern/kern_exec.c
==============================================================================
--- head/sys/kern/kern_exec.c	Mon Aug  5 08:27:35 2013	(r253952)
+++ head/sys/kern/kern_exec.c	Mon Aug  5 08:55:35 2013	(r253953)
@@ -973,7 +973,7 @@ exec_map_first_page(imgp)
 		vm_page_wakeup(ma[0]);
 	}
 	vm_page_lock(ma[0]);
-	vm_page_wire(ma[0]);
+	vm_page_hold(ma[0]);
 	vm_page_unlock(ma[0]);
 	VM_OBJECT_WUNLOCK(object);
 
@@ -994,7 +994,7 @@ exec_unmap_first_page(imgp)
 		sf_buf_free(imgp->firstpage);
 		imgp->firstpage = NULL;
 		vm_page_lock(m);
-		vm_page_unwire(m, 0);
+		vm_page_unhold(m);
 		vm_page_unlock(m);
 	}
 }

Modified: head/sys/kern/sys_process.c
==============================================================================
--- head/sys/kern/sys_process.c	Mon Aug  5 08:27:35 2013	(r253952)
+++ head/sys/kern/sys_process.c	Mon Aug  5 08:55:35 2013	(r253953)
@@ -263,7 +263,6 @@ proc_rwmem(struct proc *p, struct uio *u
 	writing = uio->uio_rw == UIO_WRITE;
 	reqprot = writing ? VM_PROT_COPY | VM_PROT_READ : VM_PROT_READ;
 	fault_flags = writing ? VM_FAULT_DIRTY : VM_FAULT_NORMAL;
-	fault_flags |= VM_FAULT_IOBUSY;
 
 	/*
 	 * Only map in one page at a time.  We don't have to, but it
@@ -288,9 +287,9 @@ proc_rwmem(struct proc *p, struct uio *u
 		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
 
 		/*
-		 * Fault and busy the page on behalf of the process.
+		 * Fault and hold the page on behalf of the process.
 		 */
-		error = vm_fault_handle(map, pageno, reqprot, fault_flags, &m);
+		error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m);
 		if (error != KERN_SUCCESS) {
 			if (error == KERN_RESOURCE_SHORTAGE)
 				error = ENOMEM;
@@ -316,9 +315,9 @@ proc_rwmem(struct proc *p, struct uio *u
 		/*
 		 * Release the page.
 		 */
-		VM_OBJECT_WLOCK(m->object);
-		vm_page_io_finish(m);
-		VM_OBJECT_WUNLOCK(m->object);
+		vm_page_lock(m);
+		vm_page_unhold(m);
+		vm_page_unlock(m);
 
 	} while (error == 0 && uio->uio_resid > 0);
 

Modified: head/sys/vm/vm_extern.h
==============================================================================
--- head/sys/vm/vm_extern.h	Mon Aug  5 08:27:35 2013	(r253952)
+++ head/sys/vm/vm_extern.h	Mon Aug  5 08:55:35 2013	(r253953)
@@ -63,7 +63,7 @@ void vm_fault_copy_entry(vm_map_t, vm_ma
     vm_ooffset_t *);
 int vm_fault_disable_pagefaults(void);
 void vm_fault_enable_pagefaults(int save);
-int vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
     int fault_flags, vm_page_t *m_hold);
 int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
     vm_prot_t prot, vm_page_t *ma, int max_count);
@@ -87,7 +87,7 @@ void vnode_pager_setsize(struct vnode *,
 int vslock(void *, size_t);
 void vsunlock(void *, size_t);
 struct sf_buf *vm_imgact_map_page(vm_object_t object, vm_ooffset_t offset);
-void vm_imgact_unmap_page(vm_object_t, struct sf_buf *sf);
+void vm_imgact_unmap_page(struct sf_buf *sf);
 void vm_thread_dispose(struct thread *td);
 int vm_thread_new(struct thread *td, int pages);
 int vm_mlock(struct proc *, struct ucred *, const void *, size_t);

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c	Mon Aug  5 08:27:35 2013	(r253952)
+++ head/sys/vm/vm_fault.c	Mon Aug  5 08:55:35 2013	(r253953)
@@ -221,8 +221,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr
 	if (map != kernel_map && KTRPOINT(td, KTR_FAULT))
 		ktrfault(vaddr, fault_type);
 #endif
-	result = vm_fault_handle(map, trunc_page(vaddr), fault_type,
-	    fault_flags, NULL);
+	result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags,
+	    NULL);
 #ifdef KTRACE
 	if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND))
 		ktrfaultend(result);
@@ -231,7 +231,7 @@ vm_fault(vm_map_t map, vm_offset_t vaddr
 }
 
 int
-vm_fault_handle(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
     int fault_flags, vm_page_t *m_hold)
 {
 	vm_prot_t prot;
@@ -943,10 +943,7 @@ vnode_locked:
 		vm_page_activate(fs.m);
 	if (m_hold != NULL) {
 		*m_hold = fs.m;
-		if (fault_flags & VM_FAULT_IOBUSY)
-			vm_page_io_start(fs.m);
-		else
-			vm_page_hold(fs.m);
+		vm_page_hold(fs.m);
 	}
 	vm_page_unlock(fs.m);
 	vm_page_wakeup(fs.m);
@@ -1148,7 +1145,7 @@ vm_fault_quick_hold_pages(vm_map_t map, 
 		 * and hold these pages.
 		 */
 		for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE)
-			if (*mp == NULL && vm_fault_handle(map, va, prot,
+			if (*mp == NULL && vm_fault_hold(map, va, prot,
 			    VM_FAULT_NORMAL, mp) != KERN_SUCCESS)
 				goto error;
 	}

Modified: head/sys/vm/vm_glue.c
==============================================================================
--- head/sys/vm/vm_glue.c	Mon Aug  5 08:27:35 2013	(r253952)
+++ head/sys/vm/vm_glue.c	Mon Aug  5 08:55:35 2013	(r253953)
@@ -223,7 +223,7 @@ vsunlock(void *addr, size_t len)
  * Return the pinned page if successful; otherwise, return NULL.
  */
 static vm_page_t
-vm_imgact_page_iostart(vm_object_t object, vm_ooffset_t offset)
+vm_imgact_hold_page(vm_object_t object, vm_ooffset_t offset)
 {
 	vm_page_t m, ma[1];
 	vm_pindex_t pindex;
@@ -249,7 +249,9 @@ vm_imgact_page_iostart(vm_object_t objec
 		}
 		vm_page_wakeup(m);
 	}
-	vm_page_io_start(m);
+	vm_page_lock(m);
+	vm_page_hold(m);
+	vm_page_unlock(m);
 out:
 	VM_OBJECT_WUNLOCK(object);
 	return (m);
@@ -264,7 +266,7 @@ vm_imgact_map_page(vm_object_t object, v
 {
 	vm_page_t m;
 
-	m = vm_imgact_page_iostart(object, offset);
+	m = vm_imgact_hold_page(object, offset);
 	if (m == NULL)
 		return (NULL);
 	sched_pin();
@@ -275,16 +277,16 @@ vm_imgact_map_page(vm_object_t object, v
  * Destroy the given CPU private mapping and unpin the page that it mapped.
  */
 void
-vm_imgact_unmap_page(vm_object_t object, struct sf_buf *sf)
+vm_imgact_unmap_page(struct sf_buf *sf)
 {
 	vm_page_t m;
 
 	m = sf_buf_page(sf);
 	sf_buf_free(sf);
 	sched_unpin();
-	VM_OBJECT_WLOCK(object);
-	vm_page_io_finish(m);
-	VM_OBJECT_WUNLOCK(object);
+	vm_page_lock(m);
+	vm_page_unhold(m);
+	vm_page_unlock(m);
 }
 
 void

Modified: head/sys/vm/vm_map.h
==============================================================================
--- head/sys/vm/vm_map.h	Mon Aug  5 08:27:35 2013	(r253952)
+++ head/sys/vm/vm_map.h	Mon Aug  5 08:55:35 2013	(r253953)
@@ -329,7 +329,6 @@ long vmspace_resident_count(struct vmspa
 #define VM_FAULT_NORMAL 0		/* Nothing special */
 #define VM_FAULT_CHANGE_WIRING 1	/* Change the wiring as appropriate */
 #define	VM_FAULT_DIRTY 2		/* Dirty the page; use w/VM_PROT_COPY */
-#define	VM_FAULT_IOBUSY 4		/* Busy the faulted page */
 
 /*
  * Initially, mappings are slightly sequential.  The maximum window size must



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308050855.r758tZLZ011247>