From owner-freebsd-hackers Thu Nov 15 9: 0:33 2001 Delivered-To: freebsd-hackers@freebsd.org Received: from green.bikeshed.org (freefall.FreeBSD.org [216.136.204.21]) by hub.freebsd.org (Postfix) with ESMTP id 8557537B418 for ; Thu, 15 Nov 2001 09:00:05 -0800 (PST) Received: from localhost (green@localhost) by green.bikeshed.org (8.11.4/8.11.1) with ESMTP id fAFH03l69797 for ; Thu, 15 Nov 2001 12:00:04 -0500 (EST) (envelope-from green@green.bikeshed.org) Message-Id: <200111151700.fAFH03l69797@green.bikeshed.org> X-Mailer: exmh version 2.5 07/13/2001 with nmh-1.0.4 To: hackers@FreeBSD.org Subject: removing lockmgr from sys/vm From: Brian Fundakowski Feldman Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Date: Thu, 15 Nov 2001 12:00:03 -0500 Sender: owner-freebsd-hackers@FreeBSD.ORG Precedence: bulk List-ID: List-Archive: (Web Archive) List-Help: (List Instructions) List-Subscribe: List-Unsubscribe: X-Loop: FreeBSD.ORG I've taken a few shots at getting lockmgr removed from sys/vm to replace it with much simpler sx lock semantics, and now I'm at a point that I really feel I've done as much as I can to be sure that I'm going about this the right way, and would like feedback from others on what could be wrong. The major obstacle is the very vague set of assurances that vm gives you as to what locks are held when, and what may be okay or not okay to hold. According to WITNESS, however, I've been able to avoid now all warnings except for one that occurs in zalloc() because it locks kernel_map, and another vm_map may already be locked by the process. A second issue is that of deadlocks; I believe I may have removed a deadlock in vm_fault1() which would occur because a shared lock is never actually dropped when trying to upgrade for an exclusive lock on the map. In practice, this may be hard to reproduce unintentionally, but I think that two RFMEM processes could probably do it. Here are my diffs; I'd appreciate any insight into what I may have gotten wrong. Index: vm_fault.c =================================================================== RCS file: /home/freebsd/ncvs/src/sys/vm/vm_fault.c,v retrieving revision 1.126 diff -u -r1.126 vm_fault.c --- vm_fault.c 2001/10/26 00:08:05 1.126 +++ vm_fault.c 2001/11/15 16:42:58 @@ -128,6 +128,8 @@ unlock_map(struct faultstate *fs) { if (fs->lookup_still_valid) { + if (fs->lookup_still_valid == 2) + vm_map_lock_downgrade(fs->map); vm_map_lookup_done(fs->map, fs->entry); fs->lookup_still_valid = FALSE; } @@ -280,7 +282,7 @@ fs.first_pindex, fs.first_pindex + 1); } - fs.lookup_still_valid = TRUE; + fs.lookup_still_valid = 1; if (wired) fault_type = prot; @@ -666,10 +668,10 @@ * grab the lock if we need to */ (fs.lookup_still_valid || - lockmgr(&fs.map->lock, LK_EXCLUSIVE|LK_NOWAIT, (void *)0, curthread) == 0) + vm_map_try_lock(fs.map) == 0) ) { - - fs.lookup_still_valid = 1; + if (fs.lookup_still_valid == 0) + fs.lookup_still_valid = 2; /* * get rid of the unnecessary page */ @@ -778,7 +780,7 @@ unlock_and_deallocate(&fs); return (result); } - fs.lookup_still_valid = TRUE; + fs.lookup_still_valid = 1; if ((retry_object != fs.first_object) || (retry_pindex != fs.first_pindex)) { Index: vm_glue.c =================================================================== RCS file: /home/freebsd/ncvs/src/sys/vm/vm_glue.c,v retrieving revision 1.120 diff -u -r1.120 vm_glue.c --- vm_glue.c 2001/10/10 23:06:54 1.120 +++ vm_glue.c 2001/11/15 16:42:58 @@ -576,9 +576,7 @@ * data structures there is a * possible deadlock. */ - if (lockmgr(&vm->vm_map.lock, - LK_EXCLUSIVE | LK_NOWAIT, - NULL, curthread)) { + if (vm_map_try_lock(&vm->vm_map)) { vmspace_free(vm); PROC_UNLOCK(p); goto nextproc; Index: vm_map.c =================================================================== RCS file: /home/freebsd/ncvs/src/sys/vm/vm_map.c,v retrieving revision 1.211 diff -u -r1.211 vm_map.c --- vm_map.c 2001/10/31 03:06:32 1.211 +++ vm_map.c 2001/11/15 16:42:58 @@ -264,75 +264,63 @@ } void -vm_map_lock(vm_map_t map) +_vm_map_lock(vm_map_t map, const char *file, int line) { vm_map_printf("locking map LK_EXCLUSIVE: %p\n", map); - if (lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread) != 0) - panic("vm_map_lock: failed to get lock"); + _sx_xlock(&map->lock, file, line); map->timestamp++; } +int +_vm_map_try_lock(vm_map_t map, const char *file, int line) +{ + vm_map_printf("trying to lock map LK_EXCLUSIVE: %p\n", map); + if (_sx_try_xlock(&map->lock, file, line)) { + map->timestamp++; + return (0); + } + return (EWOULDBLOCK); +} + void -vm_map_unlock(vm_map_t map) +_vm_map_unlock(vm_map_t map, const char *file, int line) { vm_map_printf("locking map LK_RELEASE: %p\n", map); - lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread); + _sx_xunlock(&map->lock, file, line); } void -vm_map_lock_read(vm_map_t map) +_vm_map_lock_read(vm_map_t map, const char *file, int line) { vm_map_printf("locking map LK_SHARED: %p\n", map); - lockmgr(&(map)->lock, LK_SHARED, NULL, curthread); + _sx_slock(&map->lock, file, line); } void -vm_map_unlock_read(vm_map_t map) +_vm_map_unlock_read(vm_map_t map, const char *file, int line) { vm_map_printf("locking map LK_RELEASE: %p\n", map); - lockmgr(&(map)->lock, LK_RELEASE, NULL, curthread); + _sx_sunlock(&map->lock, file, line); } -static __inline__ int -_vm_map_lock_upgrade(vm_map_t map, struct thread *td) { - int error; - - vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map); - error = lockmgr(&map->lock, LK_EXCLUPGRADE, NULL, td); - if (error == 0) - map->timestamp++; - return error; -} - int -vm_map_lock_upgrade(vm_map_t map) +_vm_map_lock_upgrade(vm_map_t map, const char *file, int line) { - return(_vm_map_lock_upgrade(map, curthread)); + vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map); + if (_sx_try_upgrade(&map->lock, file, line)) { + map->timestamp++; + return (0); + } + return (EWOULDBLOCK); } void -vm_map_lock_downgrade(vm_map_t map) +_vm_map_lock_downgrade(vm_map_t map, const char *file, int line) { vm_map_printf("locking map LK_DOWNGRADE: %p\n", map); - lockmgr(&map->lock, LK_DOWNGRADE, NULL, curthread); -} - -void -vm_map_set_recursive(vm_map_t map) -{ - mtx_lock((map)->lock.lk_interlock); - map->lock.lk_flags |= LK_CANRECURSE; - mtx_unlock((map)->lock.lk_interlock); + _sx_downgrade(&map->lock, file, line); } -void -vm_map_clear_recursive(vm_map_t map) -{ - mtx_lock((map)->lock.lk_interlock); - map->lock.lk_flags &= ~LK_CANRECURSE; - mtx_unlock((map)->lock.lk_interlock); -} - vm_offset_t vm_map_min(vm_map_t map) { @@ -404,7 +392,7 @@ map->first_free = &map->header; map->hint = &map->header; map->timestamp = 0; - lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE); + sx_init(&map->lock, "thrd_sleep"); } void @@ -412,7 +400,7 @@ struct vm_map *map; { GIANT_REQUIRED; - lockdestroy(&map->lock); + sx_destroy(&map->lock); } /* @@ -1493,17 +1481,13 @@ eend = entry->end; /* First we need to allow map modifications */ - vm_map_set_recursive(map); - vm_map_lock_downgrade(map); map->timestamp++; rv = vm_fault_user_wire(map, entry->start, entry->end); if (rv) { - + vm_map_lock(map); entry->wired_count--; entry->eflags &= ~MAP_ENTRY_USER_WIRED; - - vm_map_clear_recursive(map); vm_map_unlock(map); /* @@ -1517,8 +1501,14 @@ return rv; } - vm_map_clear_recursive(map); - if (vm_map_lock_upgrade(map)) { + /* + * XXX- This is only okay because we have the + * Giant lock. If the VM system were to be + * reentrant, we'd know that we really can't + * do this. Still, this behavior is no worse + * than the old recursion... + */ + if (vm_map_try_lock(map)) { vm_map_lock(map); if (vm_map_lookup_entry(map, estart, &entry) == FALSE) { @@ -1760,13 +1750,13 @@ entry = entry->next; } - if (vm_map_pmap(map) == kernel_pmap) { - vm_map_lock(map); - } if (rv) { - vm_map_unlock(map); + if (vm_map_pmap(map) != kernel_pmap) + vm_map_unlock_read(map); (void) vm_map_pageable(map, start, failed, TRUE); return (rv); + } else if (vm_map_pmap(map) == kernel_pmap) { + vm_map_lock(map); } /* * An exclusive lock on the map is needed in order to call @@ -1775,6 +1765,7 @@ */ if (vm_map_pmap(map) != kernel_pmap && vm_map_lock_upgrade(map)) { + vm_map_unlock_read(map); vm_map_lock(map); if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) { @@ -2551,8 +2542,10 @@ * might have intended by limiting the stack size. */ if (grow_amount > stack_entry->start - end) { - if (vm_map_lock_upgrade(map)) + if (vm_map_lock_upgrade(map)) { + vm_map_unlock_read(map); goto Retry; + } stack_entry->avail_ssize = stack_entry->start - end; @@ -2582,8 +2575,10 @@ ctob(vm->vm_ssize); } - if (vm_map_lock_upgrade(map)) + if (vm_map_lock_upgrade(map)) { + vm_map_unlock_read(map); goto Retry; + } /* Get the preliminary new entry start value */ addr = stack_entry->start - grow_amount; @@ -2818,8 +2813,10 @@ * object. */ - if (vm_map_lock_upgrade(map)) + if (vm_map_lock_upgrade(map)) { + vm_map_unlock_read(map); goto RetryLookup; + } vm_object_shadow( &entry->object.vm_object, @@ -2843,8 +2840,10 @@ */ if (entry->object.vm_object == NULL && !map->system_map) { - if (vm_map_lock_upgrade(map)) + if (vm_map_lock_upgrade(map)) { + vm_map_unlock_read(map); goto RetryLookup; + } entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, atop(entry->end - entry->start)); @@ -3082,7 +3081,10 @@ pmap_remove (map->pmap, uaddr, tend); vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize); - vm_map_lock_upgrade(map); + if (vm_map_lock_upgrade(map)) { + vm_map_unlock_read(map); + vm_map_lock(map); + } if (entry == &map->header) { map->first_free = &map->header; Index: vm_map.h =================================================================== RCS file: /home/freebsd/ncvs/src/sys/vm/vm_map.h,v retrieving revision 1.68 diff -u -r1.68 vm_map.h --- vm_map.h 2001/10/31 03:06:32 1.68 +++ vm_map.h 2001/11/15 16:42:58 @@ -71,7 +71,8 @@ #ifndef _VM_MAP_ #define _VM_MAP_ -#include +#include +#include #ifdef MAP_LOCK_DIAGNOSTIC #include @@ -154,7 +155,7 @@ */ struct vm_map { struct vm_map_entry header; /* List of entries */ - struct lock lock; /* Lock for map data */ + struct sx lock; /* Lock for map data */ int nentries; /* Number of entries */ vm_size_t size; /* virtual size */ u_char system_map; /* Am I a system map? */ @@ -215,14 +216,23 @@ } while(0) #endif -void vm_map_lock(vm_map_t map); -void vm_map_unlock(vm_map_t map); -void vm_map_lock_read(vm_map_t map); -void vm_map_unlock_read(vm_map_t map); -int vm_map_lock_upgrade(vm_map_t map); -void vm_map_lock_downgrade(vm_map_t map); -void vm_map_set_recursive(vm_map_t map); -void vm_map_clear_recursive(vm_map_t map); +void _vm_map_lock(vm_map_t map, const char *file, int line); +int _vm_map_try_lock(vm_map_t map, const char *file, int line); +void _vm_map_unlock(vm_map_t map, const char *file, int line); +void _vm_map_lock_read(vm_map_t map, const char *file, int line); +void _vm_map_unlock_read(vm_map_t map, const char *file, int line); +int _vm_map_lock_upgrade(vm_map_t map, const char *file, int line); +void _vm_map_lock_downgrade(vm_map_t map, const char *file, int line); + +#define vm_map_lock(map) _vm_map_lock(map, LOCK_FILE, LOCK_LINE) +#define vm_map_try_lock(map) _vm_map_try_lock(map, LOCK_FILE, LOCK_LINE) +#define vm_map_unlock(map) _vm_map_unlock(map, LOCK_FILE, LOCK_LINE) +#define vm_map_lock_read(map) _vm_map_lock_read(map, LOCK_FILE, LOCK_LINE) +#define vm_map_unlock_read(map) _vm_map_unlock_read(map, LOCK_FILE, LOCK_LINE) +#define vm_map_lock_upgrade(map) _vm_map_lock_upgrade(map, LOCK_FILE, LOCK_LINE) +#define vm_map_lock_downgrade(map) _vm_map_lock_downgrade(map, LOCK_FILE, \ + LOCK_LINE) + vm_offset_t vm_map_min(vm_map_t map); vm_offset_t vm_map_max(vm_map_t map); struct pmap *vm_map_pmap(vm_map_t map); Index: vm_pageout.c =================================================================== RCS file: /home/freebsd/ncvs/src/sys/vm/vm_pageout.c,v retrieving revision 1.185 diff -u -r1.185 vm_pageout.c --- vm_pageout.c 2001/10/21 06:12:06 1.185 +++ vm_pageout.c 2001/11/15 16:42:58 @@ -547,9 +547,8 @@ int nothingwired; GIANT_REQUIRED; - if (lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, (void *)0, curthread)) { + if (vm_map_try_lock(map)) return; - } bigobj = NULL; nothingwired = TRUE; Index: vm_zone.c =================================================================== RCS file: /home/freebsd/ncvs/src/sys/vm/vm_zone.c,v retrieving revision 1.48 diff -u -r1.48 vm_zone.c --- vm_zone.c 2001/08/05 03:55:02 1.48 +++ vm_zone.c 2001/11/15 16:42:58 @@ -409,14 +409,14 @@ * map. */ mtx_unlock(&z->zmtx); - if (lockstatus(&kernel_map->lock, NULL)) { - item = (void *) kmem_malloc(kmem_map, nbytes, M_WAITOK); - if (item != NULL) - atomic_add_int(&zone_kmem_pages, z->zalloc); + item = (void *)kmem_alloc(kernel_map, nbytes); + if (item != NULL) { + atomic_add_int(&zone_kern_pages, z->zalloc); } else { - item = (void *) kmem_alloc(kernel_map, nbytes); + item = (void *)kmem_malloc(kmem_map, nbytes, + M_WAITOK); if (item != NULL) - atomic_add_int(&zone_kern_pages, z->zalloc); + atomic_add_int(&zone_kmem_pages, z->zalloc); } if (item != NULL) { bzero(item, nbytes); -- Brian Fundakowski Feldman \ FreeBSD: The Power to Serve! / green@FreeBSD.org `------------------------------' To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-hackers" in the body of the message