Date: Sun, 6 Oct 2019 22:13:35 +0000 (UTC) From: Mateusz Guzik <mjg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r353149 - head/sys/amd64/amd64 Message-ID: <201910062213.x96MDZv3085523@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mjg Date: Sun Oct 6 22:13:35 2019 New Revision: 353149 URL: https://svnweb.freebsd.org/changeset/base/353149 Log: amd64 pmap: implement per-superpage locks The current 256-lock sized array is a problem in the following ways: - it's way too small - there are 2 locks per cacheline - it is not NUMA-aware Solve these issues by introducing per-superpage locks backed by pages allocated from respective domains. This significantly reduces contention e.g. during poudriere -j 104. See the review for results. Reviewed by: kib Discussed with: jeff Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D21833 Modified: head/sys/amd64/amd64/pmap.c Modified: head/sys/amd64/amd64/pmap.c ============================================================================== --- head/sys/amd64/amd64/pmap.c Sun Oct 6 20:36:25 2019 (r353148) +++ head/sys/amd64/amd64/pmap.c Sun Oct 6 22:13:35 2019 (r353149) @@ -316,13 +316,25 @@ pmap_pku_mask_bit(pmap_t pmap) #define PV_STAT(x) do { } while (0) #endif -#define pa_index(pa) ((pa) >> PDRSHIFT) +#undef pa_index +#define pa_index(pa) ({ \ + KASSERT((pa) <= vm_phys_segs[vm_phys_nsegs - 1].end, \ + ("address %lx beyond the last segment", (pa))); \ + (pa) >> PDRSHIFT; \ +}) +#if VM_NRESERVLEVEL > 0 +#define pa_to_pmdp(pa) (&pv_table[pa_index(pa)]) +#define pa_to_pvh(pa) (&(pa_to_pmdp(pa)->pv_page)) +#define PHYS_TO_PV_LIST_LOCK(pa) \ + (&(pa_to_pmdp(pa)->pv_lock)) +#else #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) #define NPV_LIST_LOCKS MAXCPU #define PHYS_TO_PV_LIST_LOCK(pa) \ (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) +#endif #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ @@ -400,14 +412,22 @@ static int pmap_initialized; /* * Data for the pv entry allocation mechanism. - * Updates to pv_invl_gen are protected by the pv_list_locks[] - * elements, but reads are not. + * Updates to pv_invl_gen are protected by the pv list lock but reads are not. */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static struct mtx __exclusive_cache_line pv_chunks_mutex; +#if VM_NRESERVLEVEL > 0 +struct pmap_large_md_page { + struct rwlock pv_lock; + struct md_page pv_page; + u_long pv_invl_gen; +}; +static struct pmap_large_md_page *pv_table; +#else static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS]; static u_long pv_invl_gen[NPV_LIST_LOCKS]; static struct md_page *pv_table; +#endif static struct md_page pv_dummy; /* @@ -918,12 +938,21 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait_slow, CTLFLA "Number of slow invalidation waits for lockless DI"); #endif +#if VM_NRESERVLEVEL > 0 static u_long * pmap_delayed_invl_genp(vm_page_t m) { + return (&pa_to_pmdp(VM_PAGE_TO_PHYS(m))->pv_invl_gen); +} +#else +static u_long * +pmap_delayed_invl_genp(vm_page_t m) +{ + return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]); } +#endif static void pmap_delayed_invl_callout_func(void *arg __unused) @@ -1803,6 +1832,112 @@ pmap_page_init(vm_page_t m) m->md.pat_mode = PAT_WRITE_BACK; } +#if VM_NRESERVLEVEL > 0 +static void +pmap_init_pv_table(void) +{ + struct pmap_large_md_page *pvd; + vm_size_t s; + long start, end, highest, pv_npg; + int domain, i, j, pages; + + /* + * We strongly depend on the size being a power of two, so the assert + * is overzealous. However, should the struct be resized to a + * different power of two, the code below needs to be revisited. + */ + CTASSERT((sizeof(*pvd) == 64)); + + /* + * Calculate the size of the array. + */ + pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR); + s = (vm_size_t)pv_npg * sizeof(struct pmap_large_md_page); + s = round_page(s); + pv_table = (struct pmap_large_md_page *)kva_alloc(s); + if (pv_table == NULL) + panic("%s: kva_alloc failed\n", __func__); + + /* + * Iterate physical segments to allocate space for respective pages. + */ + highest = -1; + s = 0; + for (i = 0; i < vm_phys_nsegs; i++) { + start = vm_phys_segs[i].start / NBPDR; + end = vm_phys_segs[i].end / NBPDR; + domain = vm_phys_segs[i].domain; + + if (highest >= end) + continue; + + if (start < highest) { + start = highest + 1; + pvd = &pv_table[start]; + } else { + /* + * The lowest address may land somewhere in the middle + * of our page. Simplify the code by pretending it is + * at the beginning. + */ + pvd = pa_to_pmdp(vm_phys_segs[i].start); + pvd = (struct pmap_large_md_page *)trunc_page(pvd); + start = pvd - pv_table; + } + + pages = end - start + 1; + s = round_page(pages * sizeof(*pvd)); + highest = start + (s / sizeof(*pvd)) - 1; + + for (j = 0; j < s; j += PAGE_SIZE) { + vm_page_t m = vm_page_alloc_domain(NULL, 0, + domain, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ); + if (m == NULL) + panic("vm_page_alloc_domain failed for %lx\n", (vm_offset_t)pvd + j); + pmap_qenter((vm_offset_t)pvd + j, &m, 1); + } + + for (j = 0; j < s / sizeof(*pvd); j++) { + rw_init_flags(&pvd->pv_lock, "pmap pv list", RW_NEW); + TAILQ_INIT(&pvd->pv_page.pv_list); + pvd->pv_page.pv_gen = 0; + pvd->pv_page.pat_mode = 0; + pvd->pv_invl_gen = 0; + pvd++; + } + } + TAILQ_INIT(&pv_dummy.pv_list); +} +#else +static void +pmap_init_pv_table(void) +{ + vm_size_t s; + long i, pv_npg; + + /* + * Initialize the pool of pv list locks. + */ + for (i = 0; i < NPV_LIST_LOCKS; i++) + rw_init(&pv_list_locks[i], "pmap pv list"); + + /* + * Calculate the size of the pv head table for superpages. + */ + pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR); + + /* + * Allocate memory for the pv head table for superpages. + */ + s = (vm_size_t)pv_npg * sizeof(struct md_page); + s = round_page(s); + pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); + for (i = 0; i < pv_npg; i++) + TAILQ_INIT(&pv_table[i].pv_list); + TAILQ_INIT(&pv_dummy.pv_list); +} +#endif + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -1813,8 +1948,7 @@ pmap_init(void) { struct pmap_preinit_mapping *ppim; vm_page_t m, mpte; - vm_size_t s; - int error, i, pv_npg, ret, skz63; + int error, i, ret, skz63; /* L1TF, reserve page @0 unconditionally */ vm_page_blacklist_add(0, bootverbose); @@ -1902,26 +2036,7 @@ pmap_init(void) */ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); - /* - * Initialize the pool of pv list locks. - */ - for (i = 0; i < NPV_LIST_LOCKS; i++) - rw_init(&pv_list_locks[i], "pmap pv list"); - - /* - * Calculate the size of the pv head table for superpages. - */ - pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR); - - /* - * Allocate memory for the pv head table for superpages. - */ - s = (vm_size_t)(pv_npg * sizeof(struct md_page)); - s = round_page(s); - pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); - for (i = 0; i < pv_npg; i++) - TAILQ_INIT(&pv_table[i].pv_list); - TAILQ_INIT(&pv_dummy.pv_list); + pmap_init_pv_table(); pmap_initialized = 1; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) {
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201910062213.x96MDZv3085523>