From owner-svn-src-user@FreeBSD.ORG Fri Mar 16 15:41:08 2012 Return-Path: Delivered-To: svn-src-user@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 9B0E9106566B; Fri, 16 Mar 2012 15:41:08 +0000 (UTC) (envelope-from attilio@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 77F988FC0A; Fri, 16 Mar 2012 15:41:08 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q2GFf8rY024031; Fri, 16 Mar 2012 15:41:08 GMT (envelope-from attilio@svn.freebsd.org) Received: (from attilio@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q2GFf8PR024028; Fri, 16 Mar 2012 15:41:08 GMT (envelope-from attilio@svn.freebsd.org) Message-Id: <201203161541.q2GFf8PR024028@svn.freebsd.org> From: Attilio Rao Date: Fri, 16 Mar 2012 15:41:08 +0000 (UTC) To: src-committers@freebsd.org, svn-src-user@freebsd.org X-SVN-Group: user MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r233034 - in user/attilio/vmcontention/sys: kern vm X-BeenThere: svn-src-user@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the experimental " user" src tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 16 Mar 2012 15:41:08 -0000 Author: attilio Date: Fri Mar 16 15:41:07 2012 New Revision: 233034 URL: http://svn.freebsd.org/changeset/base/233034 Log: Fix the nodes allocator in architectures without direct-mapping: - Fix bugs in the free path where the pages were not unwired and relevant locking wasn't acquired. - Introduce the rnode_map, submap of kernel_map, where to allocate from. The reason is that, in architectures without direct-mapping, kmem_alloc*() will try to insert the newly created mapping while holding the vm_object lock introducing a LOR or lock recursion. rnode_map is however a leafly-used submap, thus there cannot be any deadlock. Notes: Size the submap in order to be, by default, around 64 MB and decrase the size of the nodes as the allocation will be much smaller (and when the compacting code in the vm_radix will be implemented this will aim for much less space to be used). However note that the size of the submap can be changed at boot time via the hw.rnode_map_scale scaling factor. - Use uma_zone_set_max() covering the size of the submap. Tested by: flo Modified: user/attilio/vmcontention/sys/kern/subr_witness.c user/attilio/vmcontention/sys/vm/vm_radix.c Modified: user/attilio/vmcontention/sys/kern/subr_witness.c ============================================================================== --- user/attilio/vmcontention/sys/kern/subr_witness.c Fri Mar 16 13:46:54 2012 (r233033) +++ user/attilio/vmcontention/sys/kern/subr_witness.c Fri Mar 16 15:41:07 2012 (r233034) @@ -602,6 +602,7 @@ static struct witness_order_list_entry o * VM * */ + { "system map", &lock_class_mtx_sleep }, { "vm object", &lock_class_mtx_sleep }, { "page lock", &lock_class_mtx_sleep }, { "vm page queue mutex", &lock_class_mtx_sleep }, Modified: user/attilio/vmcontention/sys/vm/vm_radix.c ============================================================================== --- user/attilio/vmcontention/sys/vm/vm_radix.c Fri Mar 16 13:46:54 2012 (r233033) +++ user/attilio/vmcontention/sys/vm/vm_radix.c Fri Mar 16 15:41:07 2012 (r233034) @@ -49,12 +49,30 @@ #include #include #include +#ifndef UMA_MD_SMALL_ALLOC +#include +#endif #include #include #include +#ifndef UMA_MD_SMALL_ALLOC +#define VM_RADIX_RNODE_MAP_SCALE (1024 * 1024 / 2) +#define VM_RADIX_WIDTH 4 + +/* + * Bits of height in root. + * The mask of smaller power of 2 containing VM_RADIX_LIMIT. + */ +#define VM_RADIX_HEIGHT 0x1f +#else #define VM_RADIX_WIDTH 5 + +/* See the comment above. */ +#define VM_RADIX_HEIGHT 0xf +#endif + #define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) #define VM_RADIX_MASK (VM_RADIX_COUNT - 1) #define VM_RADIX_MAXVAL ((vm_pindex_t)-1) @@ -63,9 +81,6 @@ /* Flag bits stored in node pointers. */ #define VM_RADIX_FLAGS 0x3 -/* Bits of height in root. */ -#define VM_RADIX_HEIGHT 0xf - /* Calculates maximum value for a tree of height h. */ #define VM_RADIX_MAX(h) \ ((h) == VM_RADIX_LIMIT ? VM_RADIX_MAXVAL : \ @@ -84,6 +99,9 @@ CTASSERT(sizeof(struct vm_radix_node) < static uma_zone_t vm_radix_node_zone; #ifndef UMA_MD_SMALL_ALLOC +static vm_map_t rnode_map; +static u_long rnode_map_scale; + static void * vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait) { @@ -91,7 +109,7 @@ vm_radix_node_zone_allocf(uma_zone_t zon vm_page_t m; int pflags; - /* Inform UMA that this allocator uses kernel_map. */ + /* Inform UMA that this allocator uses rnode_map. */ *flags = UMA_SLAB_KERNEL; pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; @@ -104,7 +122,7 @@ vm_radix_node_zone_allocf(uma_zone_t zon VM_ALLOC_SYSTEM; if ((wait & M_ZERO) != 0) pflags |= VM_ALLOC_ZERO; - addr = kmem_alloc_nofault(kernel_map, size); + addr = kmem_alloc_nofault(rnode_map, size); if (addr == 0) return (NULL); @@ -112,7 +130,7 @@ vm_radix_node_zone_allocf(uma_zone_t zon m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS), pflags); if (m == NULL) { - kmem_free(kernel_map, addr, size); + kmem_free(rnode_map, addr, size); return (NULL); } if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0) @@ -133,14 +151,18 @@ vm_radix_node_zone_freef(void *item, int voitem = (vm_offset_t)item; m = PHYS_TO_VM_PAGE(pmap_kextract(voitem)); pmap_qremove(voitem, 1); + vm_page_lock(m); + vm_page_unwire(m, 0); vm_page_free(m); - kmem_free(kernel_map, voitem, size); + vm_page_unlock(m); + kmem_free(rnode_map, voitem, size); } static void init_vm_radix_alloc(void *dummy __unused) { + uma_zone_set_max(vm_radix_node_zone, rnode_map_scale); uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf); uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef); } @@ -193,9 +215,31 @@ vm_radix_slot(vm_pindex_t index, int lev return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); } +/* + * Initialize the radix node submap (for architectures not supporting + * direct-mapping) and the radix node zone. + * + * WITNESS reports a lock order reversal, for architectures not + * supporting direct-mapping, between the "system map" lock + * and the "vm object" lock. This is because the well established ordering + * "system map" -> "vm object" is not honoured in this case as allocating + * from the radix node submap ends up adding a mapping entry to it, meaning + * it is necessary to lock the submap. However, the radix node submap is + * a leaf and self-contained, thus a deadlock cannot happen here and + * adding MTX_NOWITNESS to all map locks would be largerly sub-optimal. + */ void vm_radix_init(void) { +#ifndef UMA_MD_SMALL_ALLOC + vm_offset_t maxaddr, minaddr; + + rnode_map_scale = VM_RADIX_RNODE_MAP_SCALE; + TUNABLE_ULONG_FETCH("hw.rnode_map_scale", &rnode_map_scale); + rnode_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, + rnode_map_scale * sizeof(struct vm_radix_node), FALSE); + rnode_map->system_map = 1; +#endif vm_radix_node_zone = uma_zcreate("RADIX NODE", sizeof(struct vm_radix_node), NULL,