Date: Sat, 29 Apr 2006 02:00:47 GMT From: Kip Macy <kmacy@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 96341 for review Message-ID: <200604290200.k3T20lID042675@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=96341 Change 96341 by kmacy@kmacy_storage:sun4v_rwbuf on 2006/04/29 02:00:02 lock hash buckets during updates to prevent races with TSB miss handlers fix pmap_kextract lookups for second 4MB page of nucleus memory recycle contexts Affected files ... .. //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte_hash.h#9 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#38 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte.c#6 edit .. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte_hash.c#14 edit Differences ... ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/include/tte_hash.h#9 (text+ko) ==== @@ -16,13 +16,13 @@ void tte_hash_destroy(tte_hash_t th); -int tte_hash_delete(tte_hash_t hash, vm_offset_t va); +int tte_hash_delete(tte_hash_t hash, vm_offset_t va, int locked); void tte_hash_delete_all(tte_hash_t hash); void tte_hash_insert(tte_hash_t hash, vm_offset_t va, tte_t data); -tte_t *tte_hash_lookup(tte_hash_t hash, vm_offset_t va); +tte_t *tte_hash_lookup(tte_hash_t hash, vm_offset_t va, int leave_locked); uint64_t tte_hash_set_scratchpad_kernel(tte_hash_t th); ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#38 (text+ko) ==== @@ -127,7 +127,11 @@ static struct vm_object pvzone_obj; static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; int pmap_debug = 0; -static int context = 1; /* XXX */ + +static struct mtx pmap_ctx_lock; +static uint16_t ctx_stack[PMAP_CONTEXT_MAX]; +static int ctx_stack_top; + static int permanent_mappings = 0; static uint64_t nucleus_memory; static uint64_t nucleus_mappings[2]; @@ -230,6 +234,31 @@ return (0); } +static __inline void +free_context(uint16_t ctx) +{ + mtx_lock_spin(&pmap_ctx_lock); + ctx_stack[ctx_stack_top++] = ctx; + mtx_unlock_spin(&pmap_ctx_lock); + + KASSERT(ctx_stack_top < PMAP_CONTEXT_MAX, + ("context stack overrun - system error")); +} + +static __inline uint16_t +get_context(void) +{ + uint16_t ctx; + + mtx_lock_spin(&pmap_ctx_lock); + ctx = ctx_stack[--ctx_stack_top]; + mtx_unlock_spin(&pmap_ctx_lock); + + KASSERT(ctx_stack_top > 0, + ("context stack underrun - need to implement context stealing")); + + return ctx; +} static __inline void free_pv_entry(pv_entry_t pv) @@ -290,9 +319,9 @@ continue; pmap->pm_stats.resident_count--; - tte = tte_hash_lookup(pmap->pm_hash, va); + tte = tte_hash_lookup(pmap->pm_hash, va, TRUE); tte_data = *tte; - tte_hash_delete(pmap->pm_hash, va); + tte_hash_delete(pmap->pm_hash, va, TRUE); KASSERT((tte_data & VTD_WIRED) == 0, ("get_pv_entry: wired pte %#jx", (uintmax_t)tte_data)); @@ -377,12 +406,9 @@ pmap->pm_active |= 1; pmap->pm_tlbactive |= 1; #endif -#if 0 - tsb_clear(&pmap->pm_tsb); - tte_hash_clear(pmap->pm_hash); -#endif + /* XXX Is this necessary? */ + pmap_invalidate_all(pmap); - pmap->pm_context = context++; pmap->pm_hashscratch = tte_hash_set_scratchpad_user(pmap->pm_hash, pmap->pm_context); pmap->pm_tsbscratch = tsb_set_scratchpad_user(&pmap->pm_tsb); PCPU_SET(curpmap, pmap); @@ -515,10 +541,6 @@ for (j = 0; phys_avail_tmp[j + 2] != 0; phys_avail[j] = phys_avail_tmp[j], phys_avail[j + 1] = phys_avail_tmp[j + 1], j += 2) ; -#if 0 - for (i = 0; phys_avail_tmp[i + 1] != 0; i++) - phys_avail[i] = phys_avail_tmp[i]; -#endif } } @@ -696,6 +718,7 @@ pm->pm_tlbactive = ~0; PMAP_LOCK_INIT(kernel_pmap); + TAILQ_INIT(&kernel_pmap->pm_pvlist); error = hv_set_ctx0(MAX_TSB_INFO, vtophys((vm_offset_t)&kernel_td)); @@ -820,10 +843,10 @@ tte_t *src_tte, *dst_tte, tte_data; vm_page_t m; - src_tte = tte_hash_lookup(src_pmap->pm_hash, addr); + src_tte = tte_hash_lookup(src_pmap->pm_hash, addr, FALSE); tte_data = src_tte ? *src_tte : 0; if ((tte_data & VTD_MANAGED) != 0) { - if ((dst_tte = tte_hash_lookup(dst_pmap->pm_hash, addr)) == NULL) { + if ((dst_tte = tte_hash_lookup(dst_pmap->pm_hash, addr, FALSE)) == NULL) { m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data)); tte_hash_insert(dst_pmap->pm_hash, addr, tte_data & ~(VTD_W|VTD_REF)); dst_pmap->pm_stats.resident_count++; @@ -877,7 +900,7 @@ sched_pin(); tte_data = pa = VM_PAGE_TO_PHYS(m); - otte = tte_hash_lookup(pmap->pm_hash, va); + otte = tte_hash_lookup(pmap->pm_hash, va, TRUE); otte_data = otte ? *otte : 0; opa = TTE_GET_PA(otte_data); /* @@ -1037,12 +1060,10 @@ { vm_paddr_t pa; tte_t *tte; - PMAP_LOCK(pmap); - tte = tte_hash_lookup(pmap->pm_hash, va); + tte = tte_hash_lookup(pmap->pm_hash, va, FALSE); pa = TTE_GET_PA(*tte) | (va & TTE_GET_PAGE_MASK(*tte)); - PMAP_UNLOCK(pmap); return (pa); } @@ -1061,7 +1082,7 @@ vm_page_lock_queues(); PMAP_LOCK(pmap); sched_pin(); - tte_data = tte_hash_lookup(pmap->pm_hash, va); + tte_data = tte_hash_lookup(pmap->pm_hash, va, FALSE); if (tte_data != 0 && ((*tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) { m = PHYS_TO_VM_PAGE(TTE_GET_PA(*tte_data)); @@ -1083,9 +1104,15 @@ void pmap_init(void) { + /* allocate pv_entry zones */ int shpgperproc = PMAP_SHPGPERPROC; + for (ctx_stack_top = 1; ctx_stack_top < PMAP_CONTEXT_MAX; ctx_stack_top++) + ctx_stack[ctx_stack_top] = ctx_stack_top; + + mtx_init(&pmap_ctx_lock, "ctx lock", NULL, MTX_SPIN); + /* * Initialize the address space (zone) for the pv entries. Set a * high water mark so that the system can recover from excessive @@ -1155,11 +1182,10 @@ if (cpumask == pmap->pm_tlbactive) return; -#if 0 + if (pmap != kernel_pmap) active = (pmap->pm_tlbactive & ~cpumask); else -#endif active = PCPU_GET(other_cpus); #if 1 @@ -1315,7 +1341,7 @@ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t va) { - return (tte_hash_lookup(pmap->pm_hash, va) == NULL); + return (tte_hash_lookup(pmap->pm_hash, va, FALSE) == NULL); } /* @@ -1342,12 +1368,12 @@ if (va > KERNBASE && va < KERNBASE + nucleus_memory) { uint64_t offset; offset = va - KERNBASE; - pa = nucleus_mappings[offset >> 22] + offset; + pa = nucleus_mappings[offset >> 22] | (va & PAGE_MASK_4M); } if ((pa == 0) && (tte_data = tsb_lookup_tte(va, 0)) != 0) pa = TTE_GET_PA(tte_data) | (va & TTE_GET_PAGE_MASK(tte_data)); - if ((pa == 0) && (tte = tte_hash_lookup(kernel_pmap->pm_hash, va)) != NULL) + if ((pa == 0) && (tte = tte_hash_lookup(kernel_pmap->pm_hash, va, FALSE)) != NULL) pa = TTE_GET_PA(*tte) | (va & TTE_GET_PAGE_MASK(*tte)); return pa; @@ -1356,7 +1382,7 @@ static void pmap_kremove(vm_offset_t va) { - tte_hash_delete(kernel_pmap->pm_hash, va); + tte_hash_delete(kernel_pmap->pm_hash, va, FALSE); } static void @@ -1486,11 +1512,8 @@ pmap_pinit(pmap_t pmap) { - pmap->pm_context = context++; + pmap->pm_context = get_context(); - KASSERT(context < PMAP_CONTEXT_MAX, - ("max context limit hit - need to implement context recycling")); - pmap->pm_hash = tte_hash_create(pmap->pm_context, &pmap->pm_hashscratch); pmap->pm_tsb_ra = tsb_init(&pmap->pm_tsb, &pmap->pm_tsbscratch); pmap->pm_active = 0; @@ -1532,7 +1555,7 @@ uint64_t otte_data, tte_data; vm_page_t m; retry: - tte = tte_hash_lookup(pmap->pm_hash, tva); + tte = tte_hash_lookup(pmap->pm_hash, tva, TRUE); otte_data = tte_data = tte ? *tte : 0; if (tte_data & VTD_MANAGED) { m = NULL; @@ -1617,6 +1640,7 @@ pmap_lazyfix(pmap); tsb_deinit(&pmap->pm_tsb); tte_hash_destroy(pmap->pm_hash); + free_context(pmap->pm_context); PMAP_LOCK_DESTROY(pmap); } @@ -1640,10 +1664,10 @@ sched_pin(); PMAP_LOCK(pmap); for (tva = start; tva < end; tva += PAGE_SIZE) { - if ((tte = tte_hash_lookup(pmap->pm_hash, tva)) == NULL) + if ((tte = tte_hash_lookup(pmap->pm_hash, tva, TRUE)) == NULL) continue; pmap_remove_tte(pmap, tte, tva); - tte_hash_delete(pmap->pm_hash, tva); + tte_hash_delete(pmap->pm_hash, tva, TRUE); invlva = 1; } @@ -1684,9 +1708,9 @@ while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { PMAP_LOCK(pv->pv_pmap); pv->pv_pmap->pm_stats.resident_count--; - tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va); + tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va, TRUE); tte_data = *tte; - tte_hash_delete(pv->pv_pmap->pm_hash, pv->pv_va); + tte_hash_delete(pv->pv_pmap->pm_hash, pv->pv_va, TRUE); if (tte_data & VTD_WIRED) pv->pv_pmap->pm_stats.wired_count--; if (tte_data & VTD_REF) @@ -1755,7 +1779,7 @@ PMAP_LOCK(pmap); sched_pin(); for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { - tte = tte_hash_lookup(pmap->pm_hash, pv->pv_va); + tte = tte_hash_lookup(pmap->pm_hash, pv->pv_va, FALSE); tte_data = tte ? *tte : 0; if (tte_data == 0) { @@ -1780,7 +1804,6 @@ vm_page_dirty(m); } - npv = TAILQ_NEXT(pv, pv_plist); TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte.c#6 (text+ko) ==== @@ -64,7 +64,7 @@ if ((m->flags & PG_FICTITIOUS) || (flags == VTD_SW_W && (m->flags & PG_WRITEABLE) == 0)) return; - sched_pin(); + sched_pin(); mtx_assert(&vm_page_queue_mtx, MA_OWNED); /* * Loop over all current mappings setting/clearing as appropos If @@ -79,7 +79,7 @@ continue; } PMAP_LOCK(pv->pv_pmap); - tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va); + tte = tte_hash_lookup(pv->pv_pmap->pm_hash, pv->pv_va, FALSE); retry: tte_data = *tte; if (tte_data & flags) { @@ -120,12 +120,12 @@ { tte_t *tte; - tte = tte_hash_lookup(pmap->pm_hash, va); + tte = tte_hash_lookup(pmap->pm_hash, va, FALSE); - if (tte) + if (tte) { atomic_clear_long((u_long *)tte, flags); - - pmap_invalidate_page(pmap, va); + pmap_invalidate_page(pmap, va); + } } void @@ -139,7 +139,7 @@ { tte_t ttedata, *tte; - tte = tte_hash_lookup(pmap->pm_hash, va); + tte = tte_hash_lookup(pmap->pm_hash, va, FALSE); ttedata = tte ? *tte : 0; ==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/tte_hash.c#14 (text+ko) ==== @@ -110,13 +110,37 @@ } -static void +static __inline void free_tte_hash(tte_hash_t th) { tte_hash_count--; uma_zfree(thzone, th); } +static void +hash_bucket_lock(tte_hash_field_t fields) +{ + uint64_t data; + + data = fields[0].tte.data & ~VTD_LOCK; + while (atomic_cmpset_long(&fields[0].tte.data, data, data | VTD_LOCK)) + data = fields[0].tte.data & ~VTD_LOCK; + + membar(StoreLoad); + +} + +static __inline void +hash_bucket_unlock(tte_hash_field_t fields) +{ +#ifdef DEBUG + if ((fields[0].tte.data & VTD_LOCK) == 0) + panic("trying to unlock bucket that isn't locked"); +#endif + fields[0].tte.data &= ~VTD_LOCK; + membar(StoreLoad); +} + void tte_hash_init(vm_paddr_t bootmem) { @@ -220,8 +244,9 @@ free_tte_hash(th); } + int -tte_hash_delete(tte_hash_t th, vm_offset_t va) +tte_hash_delete(tte_hash_t th, vm_offset_t va, int locked) { uint64_t hash_shift, hash_index; tte_hash_field_t fields; @@ -232,9 +257,12 @@ hash_index = (va >> hash_shift) & HASH_MASK(th); fields = (th->th_hashtable[hash_index].the_fields); -#ifdef DEBUG - KASSERT(tte_hash_lookup(th, va) != 0, ("attempting to delete non-existent entry")); +#if 0 + KASSERT(tte_hash_lookup(th, va, FALSE) != 0, ("attempting to delete non-existent entry")); #endif + if (locked == FALSE) + hash_bucket_lock(fields); + for (i = 0; i <= 3; i++) if ((fields[i].tte.tag << TTARGET_VA_SHIFT) == (va & ~PAGE_MASK_4M)) break; @@ -252,6 +280,9 @@ fields[lastindex].tte.tag = 0; fields[lastindex].tte.data = 0; + if (lastindex != 0) + hash_bucket_unlock(fields); + return (vaindex < 4); } @@ -269,21 +300,25 @@ uint64_t hash_shift, hash_index, tte_tag; tte_hash_field_t fields; int i; + + +#if 0 tte_t *tte; - - tte = tte_hash_lookup(th, va); + tte = tte_hash_lookup(th, va, FALSE); if (tte) panic("mapping for va=0x%lx already exists tte_data=0x%lx\n", va, *tte); - +#endif /* XXX - only handle 8K pages for now */ hash_shift = PAGE_SHIFT; hash_index = (va >> hash_shift) & HASH_MASK(th); fields = (th->th_hashtable[hash_index].the_fields); + + hash_bucket_lock(fields); tte_tag = (((uint64_t)th->th_context << TTARGET_CTX_SHIFT)|(va >> TTARGET_VA_SHIFT)); for (i = 0; i <= 3; i++) { if ((fields[i].tte.tag == 0) || (fields[i].tte.tag == tte_tag)) { - fields[i].tte.data = tte_data; + fields[i].tte.data = tte_data | (i ? 0 : VTD_LOCK); fields[i].tte.tag = tte_tag; goto done; } @@ -292,31 +327,46 @@ panic("collision handling unimplemented - please re-consider"); done: + hash_bucket_unlock(fields); th->th_entries++; } +/* + * If leave_locked is true the tte's data field will be returned to + * the caller with the hash bucket left locked + */ + + tte_t * -tte_hash_lookup(tte_hash_t th, vm_offset_t va) +tte_hash_lookup(tte_hash_t th, vm_offset_t va, int leave_locked) { uint64_t hash_shift, hash_index; tte_hash_field_t fields; int i; + tte_t *entry; /* XXX - only handle 8K pages for now */ hash_shift = PAGE_SHIFT; hash_index = (va >> hash_shift) & HASH_MASK(th); fields = (th->th_hashtable[hash_index].the_fields); - + entry = NULL; + + hash_bucket_lock(fields); for (i = 0; i <= 3; i++) { if (((fields[i].tte.tag << TTARGET_VA_SHIFT) == (va & ~PAGE_MASK_4M)) && - (fields[i].tte.data != 0)) - return &(fields[i].tte.data); + (fields[i].tte.data != 0)) { + entry = &(fields[i].tte.data); + break; + } } - /* + if (entry == NULL || leave_locked == FALSE) + hash_bucket_unlock(fields); + + /* * XXX handle the case of collisions > 3 * */ - return (NULL); + return (entry); } @@ -331,7 +381,7 @@ hash_scratch = ((vm_offset_t)th->th_hashtable) | ((vm_offset_t)th->th_size); set_hash_kernel_scratchpad(hash_scratch); - return hash_scratch; + return (hash_scratch); } uint64_t
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200604290200.k3T20lID042675>