Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 5 Sep 2014 15:38:26 +0300
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        Pieter de Goeje <pieter@degoeje.nl>
Cc:        alc@freebsd.org, hackers@freebsd.org
Subject:   Re: mmap MAP_NOSYNC regression in 10.x
Message-ID:  <20140905123826.GP2737@kib.kiev.ua>
In-Reply-To: <5409A4F8.6020204@degoeje.nl>
References:  <540903FF.6010602@degoeje.nl> <CAJUyCcNiLwLuL9crpQBjSdg4ED5kR53fPjyJG3HNmP5Roor8RQ@mail.gmail.com> <20140905080633.GM2737@kib.kiev.ua> <5409A4F8.6020204@degoeje.nl>

next in thread | previous in thread | raw e-mail | index | archive | help

--W2+iMEDMST8vYcDN
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Fri, Sep 05, 2014 at 01:56:40PM +0200, Pieter de Goeje wrote:
> Thanks, works for me!

I realized that the patch contains yet another bug. The oflags page
flags update is protected by the exclusive vm object lock, which is only
held in shared mode on the fast path. Below is the fixed patch, where
I take the page lock around setting VPO_NOSYNC (exclusive lock owners
cannot race with fast path since we own shared lock, and two parallel
fast path execution would be handled by the page lock).

diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 30b0456..f327ab0 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -174,6 +174,54 @@ unlock_and_deallocate(struct faultstate *fs)
 	}
 }
=20
+static void
+vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot,
+    vm_prot_t fault_type, int fault_flags, boolean_t set_wd)
+{
+
+	if (((prot & VM_PROT_WRITE) !=3D 0 ||
+	    (fault_flags & VM_FAULT_DIRTY) !=3D 0) &&
+	    (m->oflags & VPO_UNMANAGED) =3D=3D 0) {
+		if (set_wd)
+			vm_object_set_writeable_dirty(m->object);
+
+		/*
+		 * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
+		 * if the page is already dirty to prevent data written with
+		 * the expectation of being synced from not being synced.
+		 * Likewise if this entry does not request NOSYNC then make
+		 * sure the page isn't marked NOSYNC.  Applications sharing
+		 * data should use the same flags to avoid ping ponging.
+		 */
+		if (entry->eflags & MAP_ENTRY_NOSYNC) {
+			if (m->dirty =3D=3D 0) {
+				if (!set_wd)
+					vm_page_lock(m);
+				m->oflags |=3D VPO_NOSYNC;
+				if (!set_wd)
+					vm_page_unlock(m);
+			}
+		} else {
+			m->oflags &=3D ~VPO_NOSYNC;
+		}
+
+		/*
+		 * If the fault is a write, we know that this page is being
+		 * written NOW so dirty it explicitly to save on
+		 * pmap_is_modified() calls later.
+		 *
+		 * Also tell the backing pager, if any, that it should remove
+		 * any swap backing since the page is now dirty.
+		 */
+		if (((fault_type & VM_PROT_WRITE) !=3D 0 &&
+		    (fault_flags & VM_FAULT_CHANGE_WIRING) =3D=3D 0) ||
+		    (fault_flags & VM_FAULT_DIRTY) !=3D 0) {
+			vm_page_dirty(m);
+			vm_pager_page_unswapped(m);
+		}
+	}
+}
+
 /*
  * TRYPAGER - used by vm_fault to calculate whether the pager for the
  *	      current object *might* contain the page.
@@ -321,11 +369,8 @@ RetryFault:;
 			vm_page_hold(m);
 			vm_page_unlock(m);
 		}
-		if ((fault_type & VM_PROT_WRITE) !=3D 0 &&
-		    (m->oflags & VPO_UNMANAGED) =3D=3D 0) {
-			vm_page_dirty(m);
-			vm_pager_page_unswapped(m);
-		}
+		vm_fault_dirty(fs.entry, m, prot, fault_type, fault_flags,
+		    FALSE);
 		VM_OBJECT_RUNLOCK(fs.first_object);
 		if (!wired)
 			vm_fault_prefault(&fs, vaddr, 0, 0);
@@ -898,42 +943,7 @@ vnode_locked:
 	if (hardfault)
 		fs.entry->next_read =3D fs.pindex + faultcount - reqpage;
=20
-	if (((prot & VM_PROT_WRITE) !=3D 0 ||
-	    (fault_flags & VM_FAULT_DIRTY) !=3D 0) &&
-	    (fs.m->oflags & VPO_UNMANAGED) =3D=3D 0) {
-		vm_object_set_writeable_dirty(fs.object);
-
-		/*
-		 * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
-		 * if the page is already dirty to prevent data written with
-		 * the expectation of being synced from not being synced.
-		 * Likewise if this entry does not request NOSYNC then make
-		 * sure the page isn't marked NOSYNC.  Applications sharing
-		 * data should use the same flags to avoid ping ponging.
-		 */
-		if (fs.entry->eflags & MAP_ENTRY_NOSYNC) {
-			if (fs.m->dirty =3D=3D 0)
-				fs.m->oflags |=3D VPO_NOSYNC;
-		} else {
-			fs.m->oflags &=3D ~VPO_NOSYNC;
-		}
-
-		/*
-		 * If the fault is a write, we know that this page is being
-		 * written NOW so dirty it explicitly to save on=20
-		 * pmap_is_modified() calls later.
-		 *
-		 * Also tell the backing pager, if any, that it should remove
-		 * any swap backing since the page is now dirty.
-		 */
-		if (((fault_type & VM_PROT_WRITE) !=3D 0 &&
-		    (fault_flags & VM_FAULT_CHANGE_WIRING) =3D=3D 0) ||
-		    (fault_flags & VM_FAULT_DIRTY) !=3D 0) {
-			vm_page_dirty(fs.m);
-			vm_pager_page_unswapped(fs.m);
-		}
-	}
-
+	vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, TRUE);
 	vm_page_assert_xbusied(fs.m);
=20
 	/*
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index f12b76c..a45648d 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -147,7 +147,7 @@ struct vm_page {
 	uint16_t hold_count;		/* page hold count (P) */
 	uint16_t flags;			/* page PG_* flags (P) */
 	uint8_t aflags;			/* access is atomic */
-	uint8_t oflags;			/* page VPO_* flags (O) */
+	uint8_t oflags;			/* page VPO_* flags (OM) */
 	uint8_t	queue;			/* page queue index (P,Q) */
 	int8_t psind;			/* pagesizes[] index (O) */
 	int8_t segind;
@@ -163,8 +163,9 @@ struct vm_page {
 /*
  * Page flags stored in oflags:
  *
- * Access to these page flags is synchronized by the lock on the object
- * containing the page (O).
+ * Access to these page flags is synchronized by the exclusive lock on
+ * the object containing the page, or combination of shared object
+ * lock and the page lock (OM).
  *
  * Note: VPO_UNMANAGED (used by OBJT_DEVICE, OBJT_PHYS and OBJT_SG)
  * 	 indicates that the page is not under PV management but

--W2+iMEDMST8vYcDN
Content-Type: application/pgp-signature

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2

iQIcBAEBAgAGBQJUCa7CAAoJEJDCuSvBvK1Bka4P/iPGCr9I5LVmIkugcj4QzELB
UwzBrQlK42bNiW5sAiU1qRbkMrLVfZyjPTBHPsFjsiVmVb8b5Xi+KbWFZesyFnxj
SMynOb6msXm0NJItlZNuWXFF6Uf8LAFdR7LJypNbgQb1Rgu7ztmzpugNs/8I2A8n
J8XG/W4+Jcb6QEZ9yfhCGDWy4YnHqEOh9WzjNYPyE//GN+ommNaDOOcbsFBKBw6G
5vYfiP0tIXHOVRPAzvUl6jrkXYT4S9CqFvVDGdLQoTeAfm7jj49D67bf1CMy4uUN
jVINMhtTPmzqkcLSHFnzRQmy7AtAQI+VBgoJxg3OCk6pVuUs4w5WB1by9ecA7ZcF
8q49QLcsxGjIVNrNB551djl2eu2hGl1rp0IgYZb0wO1zHuDYMlJZc5X8KEh1XQUy
ckmutFCdEt8piqCzIK7cShisbMyMY+rmFiBuwWM9X/7NYjjXU9M43tfByBqGeh65
XSpz9mHvckEG0BCouhu6UIs9JdipRLKRLu+8w233gDf31FCPRTf38T6zAvVHdG4l
9BINyz4vt9hJl0q7M5S5KxUGVtW+0nDvApA2a52h3RfL5QtYO/JHC6dVIdzOnHHJ
eb9gazv0gS10LZQ23igTfFpZ6zTixk3q73orvqG3rJN62uSz/iFIjHqjCHeH6z36
FV0TpFkI495w4C7YlTS7
=Z8iA
-----END PGP SIGNATURE-----

--W2+iMEDMST8vYcDN--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20140905123826.GP2737>