From owner-svn-src-head@freebsd.org Sun Aug 18 07:06:33 2019 Return-Path: Delivered-To: svn-src-head@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 1471BBE09C; Sun, 18 Aug 2019 07:06:33 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) server-signature RSA-PSS (4096 bits) client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 46B7QJ6n5Tz4CD1; Sun, 18 Aug 2019 07:06:32 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id C6FF826490; Sun, 18 Aug 2019 07:06:32 +0000 (UTC) (envelope-from jeff@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id x7I76WA8044825; Sun, 18 Aug 2019 07:06:32 GMT (envelope-from jeff@FreeBSD.org) Received: (from jeff@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id x7I76W8U044822; Sun, 18 Aug 2019 07:06:32 GMT (envelope-from jeff@FreeBSD.org) Message-Id: <201908180706.x7I76W8U044822@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: jeff set sender to jeff@FreeBSD.org using -f From: Jeff Roberson Date: Sun, 18 Aug 2019 07:06:32 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r351181 - head/sys/vm X-SVN-Group: head X-SVN-Commit-Author: jeff X-SVN-Commit-Paths: head/sys/vm X-SVN-Commit-Revision: 351181 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 18 Aug 2019 07:06:33 -0000 Author: jeff Date: Sun Aug 18 07:06:31 2019 New Revision: 351181 URL: https://svnweb.freebsd.org/changeset/base/351181 Log: Encapsulate phys_avail manipulation in a set of simple routines. Add a NUMA aware boot time memory allocator that will be used to allocate early domain correct structures. Code partially submitted by gallatin. Reviewed by: gallatin, kib Tested by: pho Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D21251 Modified: head/sys/vm/vm_page.c head/sys/vm/vm_phys.c head/sys/vm/vm_phys.h Modified: head/sys/vm/vm_page.c ============================================================================== --- head/sys/vm/vm_page.c Sun Aug 18 04:19:41 2019 (r351180) +++ head/sys/vm/vm_page.c Sun Aug 18 07:06:31 2019 (r351181) @@ -538,7 +538,7 @@ vm_page_startup(vm_offset_t vaddr) char *list, *listend; vm_offset_t mapped; vm_paddr_t end, high_avail, low_avail, new_end, page_range, size; - vm_paddr_t biggestsize, last_pa, pa; + vm_paddr_t last_pa, pa; u_long pagecount; int biggestone, i, segind; #ifdef WITNESS @@ -548,22 +548,10 @@ vm_page_startup(vm_offset_t vaddr) long ii; #endif - biggestsize = 0; - biggestone = 0; vaddr = round_page(vaddr); - for (i = 0; phys_avail[i + 1]; i += 2) { - phys_avail[i] = round_page(phys_avail[i]); - phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); - } - for (i = 0; phys_avail[i + 1]; i += 2) { - size = phys_avail[i + 1] - phys_avail[i]; - if (size > biggestsize) { - biggestone = i; - biggestsize = size; - } - } - + vm_phys_early_startup(); + biggestone = vm_phys_avail_largest(); end = phys_avail[biggestone+1]; /* @@ -776,7 +764,8 @@ vm_page_startup(vm_offset_t vaddr) * physical pages. */ for (i = 0; phys_avail[i + 1] != 0; i += 2) - vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]); + if (vm_phys_avail_size(i) != 0) + vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]); /* * Initialize the physical memory allocator. Modified: head/sys/vm/vm_phys.c ============================================================================== --- head/sys/vm/vm_phys.c Sun Aug 18 04:19:41 2019 (r351180) +++ head/sys/vm/vm_phys.c Sun Aug 18 07:06:31 2019 (r351181) @@ -1101,8 +1101,8 @@ vm_phys_free_pages(vm_page_t m, int order) vm_page_t m_buddy; KASSERT(m->order == VM_NFREEORDER, - ("vm_phys_free_pages: page %p has unexpected order %d", - m, m->order)); + ("vm_phys_free_pages: page %p(%p) has unexpected order %d", + m, (void *)m->phys_addr, m->order)); KASSERT(m->pool < VM_NFREEPOOL, ("vm_phys_free_pages: page %p has unexpected pool %d", m, m->pool)); @@ -1499,6 +1499,222 @@ done: vm_phys_enq_range(&m_ret[npages], npages_end - npages, fl, 0); } return (m_ret); +} + +/* + * Return the index of the first unused slot which may be the terminating + * entry. + */ +static int +vm_phys_avail_count(void) +{ + int i; + + for (i = 0; phys_avail[i + 1]; i += 2) + continue; + if (i > PHYS_AVAIL_ENTRIES) + panic("Improperly terminated phys_avail %d entries", i); + + return (i); +} + +/* + * Assert that a phys_avail entry is valid. + */ +static void +vm_phys_avail_check(int i) +{ + if (phys_avail[i] & PAGE_MASK) + panic("Unaligned phys_avail[%d]: %#jx", i, + (intmax_t)phys_avail[i]); + if (phys_avail[i+1] & PAGE_MASK) + panic("Unaligned phys_avail[%d + 1]: %#jx", i, + (intmax_t)phys_avail[i]); + if (phys_avail[i + 1] < phys_avail[i]) + panic("phys_avail[%d] start %#jx < end %#jx", i, + (intmax_t)phys_avail[i], (intmax_t)phys_avail[i+1]); +} + +/* + * Return the index of an overlapping phys_avail entry or -1. + */ +static int +vm_phys_avail_find(vm_paddr_t pa) +{ + int i; + + for (i = 0; phys_avail[i + 1]; i += 2) + if (phys_avail[i] <= pa && phys_avail[i + 1] > pa) + return (i); + return (-1); +} + +/* + * Return the index of the largest entry. + */ +int +vm_phys_avail_largest(void) +{ + vm_paddr_t sz, largesz; + int largest; + int i; + + largest = 0; + largesz = 0; + for (i = 0; phys_avail[i + 1]; i += 2) { + sz = vm_phys_avail_size(i); + if (sz > largesz) { + largesz = sz; + largest = i; + } + } + + return (largest); +} + +vm_paddr_t +vm_phys_avail_size(int i) +{ + + return (phys_avail[i + 1] - phys_avail[i]); +} + +/* + * Split an entry at the address 'pa'. Return zero on success or errno. + */ +static int +vm_phys_avail_split(vm_paddr_t pa, int i) +{ + int cnt; + + vm_phys_avail_check(i); + if (pa <= phys_avail[i] || pa >= phys_avail[i + 1]) + panic("vm_phys_avail_split: invalid address"); + cnt = vm_phys_avail_count(); + if (cnt >= PHYS_AVAIL_ENTRIES) + return (ENOSPC); + memmove(&phys_avail[i + 2], &phys_avail[i], + (cnt - i) * sizeof(phys_avail[0])); + phys_avail[i + 1] = pa; + phys_avail[i + 2] = pa; + vm_phys_avail_check(i); + vm_phys_avail_check(i+2); + + return (0); +} + +/* + * This routine allocates NUMA node specific memory before the page + * allocator is bootstrapped. + */ +vm_paddr_t +vm_phys_early_alloc(int domain, size_t alloc_size) +{ + int i, mem_index, biggestone; + vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align; + + + /* + * Search the mem_affinity array for the biggest address + * range in the desired domain. This is used to constrain + * the phys_avail selection below. + */ + biggestsize = 0; + mem_index = 0; + mem_start = 0; + mem_end = -1; +#ifdef NUMA + if (mem_affinity != NULL) { + for (i = 0; ; i++) { + size = mem_affinity[i].end - mem_affinity[i].start; + if (size == 0) + break; + if (mem_affinity[i].domain != domain) + continue; + if (size > biggestsize) { + mem_index = i; + biggestsize = size; + } + } + mem_start = mem_affinity[mem_index].start; + mem_end = mem_affinity[mem_index].end; + } +#endif + + /* + * Now find biggest physical segment in within the desired + * numa domain. + */ + biggestsize = 0; + biggestone = 0; + for (i = 0; phys_avail[i + 1] != 0; i += 2) { + /* skip regions that are out of range */ + if (phys_avail[i+1] - alloc_size < mem_start || + phys_avail[i+1] > mem_end) + continue; + size = vm_phys_avail_size(i); + if (size > biggestsize) { + biggestone = i; + biggestsize = size; + } + } + alloc_size = round_page(alloc_size); + + /* + * Grab single pages from the front to reduce fragmentation. + */ + if (alloc_size == PAGE_SIZE) { + pa = phys_avail[biggestone]; + phys_avail[biggestone] += PAGE_SIZE; + vm_phys_avail_check(biggestone); + return (pa); + } + + /* + * Naturally align large allocations. + */ + align = phys_avail[biggestone + 1] & (alloc_size - 1); + if (alloc_size + align > biggestsize) + panic("cannot find a large enough size\n"); + if (align != 0 && + vm_phys_avail_split(phys_avail[biggestone + 1] - align, + biggestone) != 0) + /* Wasting memory. */ + phys_avail[biggestone + 1] -= align; + + phys_avail[biggestone + 1] -= alloc_size; + vm_phys_avail_check(biggestone); + pa = phys_avail[biggestone + 1]; + return (pa); +} + +void +vm_phys_early_startup(void) +{ + int i; + + for (i = 0; phys_avail[i + 1] != 0; i += 2) { + phys_avail[i] = round_page(phys_avail[i]); + phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); + } + +#ifdef NUMA + /* Force phys_avail to be split by domain. */ + if (mem_affinity != NULL) { + int idx; + + for (i = 0; mem_affinity[i].end != 0; i++) { + idx = vm_phys_avail_find(mem_affinity[i].start); + if (idx != -1 && + phys_avail[idx] != mem_affinity[i].start) + vm_phys_avail_split(mem_affinity[i].start, idx); + idx = vm_phys_avail_find(mem_affinity[i].end); + if (idx != -1 && + phys_avail[idx] != mem_affinity[i].end) + vm_phys_avail_split(mem_affinity[i].end, idx); + } + } +#endif } #ifdef DDB Modified: head/sys/vm/vm_phys.h ============================================================================== --- head/sys/vm/vm_phys.h Sun Aug 18 04:19:41 2019 (r351180) +++ head/sys/vm/vm_phys.h Sun Aug 18 07:06:31 2019 (r351181) @@ -103,6 +103,11 @@ vm_page_t vm_phys_scan_contig(int domain, u_long npage void vm_phys_set_pool(int pool, vm_page_t m, int order); boolean_t vm_phys_unfree_page(vm_page_t m); int vm_phys_mem_affinity(int f, int t); +vm_paddr_t vm_phys_early_alloc(int domain, size_t alloc_size); +void vm_phys_early_startup(void); +int vm_phys_avail_largest(void); +vm_paddr_t vm_phys_avail_size(int i); + /* *