Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 5 Sep 2014 11:06:33 +0300
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        Pieter de Goeje <pieter@degoeje.nl>
Cc:        alc@freebsd.org, hackers@freebsd.org
Subject:   Re: mmap MAP_NOSYNC regression in 10.x
Message-ID:  <20140905080633.GM2737@kib.kiev.ua>
In-Reply-To: <CAJUyCcNiLwLuL9crpQBjSdg4ED5kR53fPjyJG3HNmP5Roor8RQ@mail.gmail.com>
References:  <540903FF.6010602@degoeje.nl> <CAJUyCcNiLwLuL9crpQBjSdg4ED5kR53fPjyJG3HNmP5Roor8RQ@mail.gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help

--izE3dcXa7Fnj97wq
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Fri, Sep 05, 2014 at 02:02:51AM -0500, Alan Cox wrote:
> On Thu, Sep 4, 2014 at 7:29 PM, Pieter de Goeje <pieter@degoeje.nl> wrote:
>=20
> > After upgrading my month old 10-stable installation today (to r271093) ,
> > I've noticed a that the kernel no longer honors the MAP_NOSYNC flag.
> > Attached is a demonstration program that highlights the issue (also
> > available here: http://pastebin.com/y0kvdn0r ).
> >
> > The program creates and mmap()s a 200MiB file and repeatedly writes zer=
os
> > to it. The expected behavior is that under normal circumstances (no mem=
ory
> > pressure), the dirtied pages are not flushed to disk. Observed is howev=
er
> > that every ~30 seconds the syncer kicks in and basically halts the prog=
ram
> > while it does its job. The program prints a line everytime the throughp=
ut
> > drops below 500MBps, well below memory bandwidth.
> >
> > mmap() is called like this:
> >
> >   void *p =3D mmap(NULL, len, PROT_READ | PROT_WRITE,
> >      MAP_SHARED | MAP_NOSYNC | MAP_ALIGNED_SUPER, fd, 0);
> >
> > Sample output:
> >
> > write...
> > zeroing: 209.6 MB
> >  ...write: 5.839s
> > mmap...
> >  ...mmap: 0.000s
> > 20.1s: memset #259: 34.7MBps - stalled
> > 55.7s: memset #810: 34.7MBps - stalled
> > 91.3s: memset #1359: 34.6MBps - stalled
> > 100.0s: memset #1522: 3938.5MBps
> > overall bandwidth: 3190.6MBps
> > munmap...
> >  ...munmap: 5.796s
> > done
> >
> > (this is a rather old system)
> >
> > If necessary I'm willing to find out the exact commit that caused the
> > problem.
> >
> >
>=20
> That's not necessary.  This is a bug in the page fault handler's new fast
> path.

The following patch fixed the issue for me.

diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 30b0456..803bf59 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -174,6 +174,49 @@ unlock_and_deallocate(struct faultstate *fs)
 	}
 }
=20
+static void
+vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot,
+    vm_prot_t fault_type, int fault_flags, boolean_t set_wd)
+{
+
+	if (((prot & VM_PROT_WRITE) !=3D 0 ||
+	    (fault_flags & VM_FAULT_DIRTY) !=3D 0) &&
+	    (m->oflags & VPO_UNMANAGED) =3D=3D 0) {
+		if (set_wd)
+			vm_object_set_writeable_dirty(m->object);
+
+		/*
+		 * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
+		 * if the page is already dirty to prevent data written with
+		 * the expectation of being synced from not being synced.
+		 * Likewise if this entry does not request NOSYNC then make
+		 * sure the page isn't marked NOSYNC.  Applications sharing
+		 * data should use the same flags to avoid ping ponging.
+		 */
+		if (entry->eflags & MAP_ENTRY_NOSYNC) {
+			if (m->dirty =3D=3D 0)
+				m->oflags |=3D VPO_NOSYNC;
+		} else {
+			m->oflags &=3D ~VPO_NOSYNC;
+		}
+
+		/*
+		 * If the fault is a write, we know that this page is being
+		 * written NOW so dirty it explicitly to save on=20
+		 * pmap_is_modified() calls later.
+		 *
+		 * Also tell the backing pager, if any, that it should remove
+		 * any swap backing since the page is now dirty.
+		 */
+		if (((fault_type & VM_PROT_WRITE) !=3D 0 &&
+		    (fault_flags & VM_FAULT_CHANGE_WIRING) =3D=3D 0) ||
+		    (fault_flags & VM_FAULT_DIRTY) !=3D 0) {
+			vm_page_dirty(m);
+			vm_pager_page_unswapped(m);
+		}
+	}
+}
+
 /*
  * TRYPAGER - used by vm_fault to calculate whether the pager for the
  *	      current object *might* contain the page.
@@ -321,11 +364,8 @@ RetryFault:;
 			vm_page_hold(m);
 			vm_page_unlock(m);
 		}
-		if ((fault_type & VM_PROT_WRITE) !=3D 0 &&
-		    (m->oflags & VPO_UNMANAGED) =3D=3D 0) {
-			vm_page_dirty(m);
-			vm_pager_page_unswapped(m);
-		}
+		vm_fault_dirty(fs.entry, m, prot, fault_type, fault_flags,
+		    FALSE);
 		VM_OBJECT_RUNLOCK(fs.first_object);
 		if (!wired)
 			vm_fault_prefault(&fs, vaddr, 0, 0);
@@ -898,42 +938,7 @@ vnode_locked:
 	if (hardfault)
 		fs.entry->next_read =3D fs.pindex + faultcount - reqpage;
=20
-	if (((prot & VM_PROT_WRITE) !=3D 0 ||
-	    (fault_flags & VM_FAULT_DIRTY) !=3D 0) &&
-	    (fs.m->oflags & VPO_UNMANAGED) =3D=3D 0) {
-		vm_object_set_writeable_dirty(fs.object);
-
-		/*
-		 * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC
-		 * if the page is already dirty to prevent data written with
-		 * the expectation of being synced from not being synced.
-		 * Likewise if this entry does not request NOSYNC then make
-		 * sure the page isn't marked NOSYNC.  Applications sharing
-		 * data should use the same flags to avoid ping ponging.
-		 */
-		if (fs.entry->eflags & MAP_ENTRY_NOSYNC) {
-			if (fs.m->dirty =3D=3D 0)
-				fs.m->oflags |=3D VPO_NOSYNC;
-		} else {
-			fs.m->oflags &=3D ~VPO_NOSYNC;
-		}
-
-		/*
-		 * If the fault is a write, we know that this page is being
-		 * written NOW so dirty it explicitly to save on=20
-		 * pmap_is_modified() calls later.
-		 *
-		 * Also tell the backing pager, if any, that it should remove
-		 * any swap backing since the page is now dirty.
-		 */
-		if (((fault_type & VM_PROT_WRITE) !=3D 0 &&
-		    (fault_flags & VM_FAULT_CHANGE_WIRING) =3D=3D 0) ||
-		    (fault_flags & VM_FAULT_DIRTY) !=3D 0) {
-			vm_page_dirty(fs.m);
-			vm_pager_page_unswapped(fs.m);
-		}
-	}
-
+	vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, TRUE);
 	vm_page_assert_xbusied(fs.m);
=20
 	/*

--izE3dcXa7Fnj97wq
Content-Type: application/pgp-signature

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2

iQIcBAEBAgAGBQJUCW8IAAoJEJDCuSvBvK1BrvQP/1Ih/f1BTIOdqJbVuRPXCjTh
DyKJfbWtnPrcwY7PBIEO74ILYYt5wWIhaI7W5pw6aLvY/r2z5LMR/+IKC4nqEXEm
heu/N2mUPAzBInrUoTm/tk4JYSBU0OwyGxnKgIAnjtKDRlDs9pYeMqnTXVO10QJv
LrpJnhdQt8oLS7FHasuuKzSEop61z8bGXJFklwQ9dSGLimqNHqkTY4QUdVMeHbvY
nvjzbptpXKzQ83j7ZCGddvw1AT3QW243zybrhjKZzt+T0j+7WM7WZoaC3kxO21q5
/BmSwIXICyVmmKdahLTLwFTKZ4+mThFFvIEK4g3ruWnQfzXao9RoX4Pmnhau097p
d3iqQkfea691VioTmFiBChnzWkPIcftF6nuX4SxVIwi+ksdPzq7f+8d90nvwMgbP
VnD9gnuX/f9k7zSJJsfQOCO7orJaT2IlKLtkFvuP4fn/KfxyO8Q4JjuukDpvQwoR
F5e4m3r0ReaL2RzxO9H7hBe+1jIg7Wbny0zToxDLn6jP6Wrn8FgtQc9px5GMluhl
/vZoX0WcGAX0tw5SCxmXLEVCbiN0i1EEfRuLikuC43DUqVfqIXFsysc2WPY0COT4
mNA5adnAY3rAQ9kyQs2LxTX9Ybg+0x31bldJznF2rJNuXTne+1NCFyGUfDAewc2+
64WiqA1qaZ7kLkUactjO
=oeR7
-----END PGP SIGNATURE-----

--izE3dcXa7Fnj97wq--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20140905080633.GM2737>