Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 7 Aug 2007 13:39:11 +0300
From:      Kostik Belousov <kostikbel@gmail.com>
To:        Tijl Coosemans <tijl@ulyssis.org>
Cc:        wine-freebsd@hub.org, freebsd-current@freebsd.org
Subject:   Re: mmap(2) MAP_FIXED isn't thread-safe (+testcase)
Message-ID:  <20070807103910.GI2738@deviant.kiev.zoral.com.ua>
In-Reply-To: <200708062032.10514.tijl@ulyssis.org>
References:  <200708051656.50168.tijl@ulyssis.org> <20070806113821.GB2738@deviant.kiev.zoral.com.ua> <200708062032.10514.tijl@ulyssis.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--SDQiEKxzCZ35UpgS
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Mon, Aug 06, 2007 at 08:32:08PM +0200, Tijl Coosemans wrote:
> On Monday 06 August 2007 13:38:21 Kostik Belousov wrote:
> > On Sun, Aug 05, 2007 at 04:56:46PM +0200, Tijl Coosemans wrote:
> >> While investigating ports/115092 and other reports of seemingly
> >> random page faults when running Wine, I think I've found the cause
> >> to be mmap not being thread-safe when MAP_FIXED is used. It causes
> >> mmap(MAP_FIXED) to return -1(ENOMEM) sometimes when it shouldn't,
> >> but also to return an address with wrong protections, hence the
> >> protection faults occuring.
> >>=20
> >> Attached is a test program that shows this. It runs two threads. The
> >> first mmap()'s a region, starts a second thread and then goes in a
> >> loop calling mmap(PROT_WRITE,MAP_FIXED) on that region, essentially
> >> replacing that mapping. This is basically what rtld does to map an
> >> ELF object for instance when dlopen(3) is called. The second thread
> >> tries to steal the mapping from the first by calling mmap(PROT_NONE)
> >> in a loop. After a while the program segfaults when the first thread
> >> tries to write to the mapped region.
> >>=20
> >> Some lines are commented out. If you remove the commenting, I hit on
> >> the case where mmap(MAP_FIXED) returns -1.
> >>=20
> >> The problem is in sys/vm/vm_mmap.c:vm_mmap(). In case of MAP_FIXED
> >> first vm_map_remove() is called and then later vm_map_find(). This
> >> would need some locking, but I don't know which lock or how to
> >> approach this, so can somebody have a look at this?
> >>=20
> >> #include <sys/mman.h>
> >> #include <errno.h>
> >> #include <pthread.h>
> >> #include <stdio.h>
> >>=20
> >> #define MAPSIZE		( 16 * 4096 )
> >>=20
> >> void *second_thr( void *addr ) {
> >> 	int i;
> >> 	void *addr2;
> >> 	for( i =3D 0; ; i++ ) {
> >> 		addr2 =3D mmap( NULL, MAPSIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1=
, 0 );
> >> /*
> >> 		if( addr2 !=3D ( addr + MAPSIZE ))
> >> 			break;
> >> */
> >> 		munmap( addr2, MAPSIZE );
> >> 	}
> >> 	printf( "thread2: addr(%p), addr2(%p), i(%d)\n", addr, addr2, i );
> >> 	return NULL;
> >> }
> >>=20
> >> int main( int argc, char **argv ) {
> >> 	int i;
> >> 	void *addr;
> >> 	volatile char *addr_fix;
> >> 	pthread_t thr;
> >>=20
> >> 	addr =3D mmap( NULL, MAPSIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, =
0 );
> >> 	pthread_create( &thr, NULL, &second_thr, addr );
> >>=20
> >> 	for( i =3D 0; ; i++ ) {
> >> 		addr_fix =3D mmap( addr, MAPSIZE, PROT_WRITE, MAP_ANON | MAP_PRIVATE=
 | MAP_FIXED, -1, 0 );
> >> 		if( addr_fix =3D=3D addr )
> >> 			*addr_fix =3D i; /* should be writable */
> >> /*
> >> 		else
> >> 			break;
> >> */
> >> 	}
> >> 	printf( "thread1: addr(%p), addr_fix(%p), errno(%d), i(%d)\n", addr, =
addr_fix, errno, i );
> >>=20
> >> 	pthread_join( thr, NULL );
> >> 	return 0;
> >> }
> >=20
[Old patch skipped].

> I don't think you can do this. Now, when vm_map_find() is called with
> find_space=3DFALSE, it will delete any existing mapping instead of
> failing if the address is not available. Have you checked all locations
> where this function is called to make sure this isn't harmful?

Yes, I looked over this when writing the patch. I think this is what
actually supposed to happen in that case.

Anyway, to not diverge from the old behaviour when fixing the issue,
please test patch below. I do not like introducing recursive locks where
it is quite easy to avoid them.


diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 8799cc5..177e0db 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -155,6 +155,22 @@ static void vmspace_zdtor(void *mem, int size, void *a=
rg);
 #define PROC_VMSPACE_LOCK(p) do { } while (0)
 #define PROC_VMSPACE_UNLOCK(p) do { } while (0)
=20
+/*
+ *	VM_MAP_RANGE_CHECK:	[ internal use only ]
+ *
+ *	Asserts that the starting and ending region
+ *	addresses fall within the valid range of the map.
+ */
+#define	VM_MAP_RANGE_CHECK(map, start, end)		\
+		{					\
+		if (start < vm_map_min(map))		\
+			start =3D vm_map_min(map);	\
+		if (end > vm_map_max(map))		\
+			end =3D vm_map_max(map);		\
+		if (start > end)			\
+			start =3D end;			\
+		}
+
 void
 vm_map_startup(void)
 {
@@ -1178,6 +1194,26 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_oof=
fset_t offset,
 	return (result);
 }
=20
+int
+vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
+	    vm_offset_t *addr,	/* IN/OUT */
+	    vm_size_t length, vm_prot_t prot,
+	    vm_prot_t max, int cow)
+{
+	vm_offset_t start, end;
+	int result;
+
+	start =3D *addr;
+	vm_map_lock(map);
+	end =3D start + length;
+	VM_MAP_RANGE_CHECK(map, start, end);
+	(void) vm_map_delete(map, start, end);
+	result =3D vm_map_insert(map, object, offset,
+		start, end, prot, max, cow);
+	vm_map_unlock(map);
+	return (result);
+}
+
 /*
  *	vm_map_simplify_entry:
  *
@@ -1355,22 +1391,6 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry,=
 vm_offset_t end)
 }
=20
 /*
- *	VM_MAP_RANGE_CHECK:	[ internal use only ]
- *
- *	Asserts that the starting and ending region
- *	addresses fall within the valid range of the map.
- */
-#define	VM_MAP_RANGE_CHECK(map, start, end)		\
-		{					\
-		if (start < vm_map_min(map))		\
-			start =3D vm_map_min(map);	\
-		if (end > vm_map_max(map))		\
-			end =3D vm_map_max(map);		\
-		if (start > end)			\
-			start =3D end;			\
-		}
-
-/*
  *	vm_map_submap:		[ kernel use only ]
  *
  *	Mark the given range as handled by a subordinate map.
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 93aad51..1805a79 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -333,6 +333,7 @@ boolean_t vm_map_check_protection (vm_map_t, vm_offset_=
t, vm_offset_t, vm_prot_t
 vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_t);
 int vm_map_delete (vm_map_t, vm_offset_t, vm_offset_t);
 int vm_map_find (vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_si=
ze_t, boolean_t, vm_prot_t, vm_prot_t, int);
+int vm_map_fixed (vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_s=
ize_t, vm_prot_t, vm_prot_t, int);
 int vm_map_findspace (vm_map_t, vm_offset_t, vm_size_t, vm_offset_t *);
 int vm_map_inherit (vm_map_t, vm_offset_t, vm_offset_t, vm_inherit_t);
 void vm_map_init (struct vm_map *, vm_offset_t, vm_offset_t);
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 9522fec..4f44817 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1341,7 +1341,6 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t si=
ze, vm_prot_t prot,
 		if (*addr !=3D trunc_page(*addr))
 			return (EINVAL);
 		fitit =3D FALSE;
-		(void) vm_map_remove(map, *addr, *addr + size);
 	}
 	/*
 	 * Lookup/allocate object.
@@ -1400,8 +1399,11 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t s=
ize, vm_prot_t prot,
 	if (flags & MAP_STACK)
 		rv =3D vm_map_stack(map, *addr, size, prot, maxprot,
 		    docow | MAP_STACK_GROWS_DOWN);
+	else if (fitit)
+		rv =3D vm_map_find(map, object, foff, addr, size, TRUE,
+				 prot, maxprot, docow);
 	else
-		rv =3D vm_map_find(map, object, foff, addr, size, fitit,
+		rv =3D vm_map_fixed(map, object, foff, addr, size,
 				 prot, maxprot, docow);
=20
 	if (rv !=3D KERN_SUCCESS) {

--SDQiEKxzCZ35UpgS
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.7 (FreeBSD)

iD8DBQFGuEvOC3+MBN1Mb4gRAlk4AJ9wDdefcdTmQil2GCMGURMWYBsyMQCeIP4W
f+Vtf3xEVZETqTrM9AhXeAg=
=HFT6
-----END PGP SIGNATURE-----

--SDQiEKxzCZ35UpgS--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20070807103910.GI2738>