Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 30 Jan 2025 22:11:59 +0000
From:      Ruslan Bukin <br@freebsd.org>
To:        Doug Moore <dougm@freebsd.org>
Cc:        src-committers@freebsd.org, dev-commits-src-all@freebsd.org, dev-commits-src-main@freebsd.org
Subject:   Re: git: 0078df5f0258 - main - vm_phys: reduce touching of page->pool fields
Message-ID:  <Z5v5L%2BsAjHZeQyEt@bsdpad.com>
In-Reply-To: <202501290914.50T9EcNn077539@gitrepo.freebsd.org>
References:  <202501290914.50T9EcNn077539@gitrepo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
Hi Doug,

riscv seems to panic after this commit. See the message
https://people.freebsd.org/~br/panic.txt

I think I hit the same panic around a week ago with the previous (now reverted) commit.

Ruslan

On Wed, Jan 29, 2025 at 09:14:38AM +0000, Doug Moore wrote:
> The branch main has been updated by dougm:
> 
> URL: https://cgit.FreeBSD.org/src/commit/?id=0078df5f025854600595462e56fa95d34e732141
> 
> commit 0078df5f025854600595462e56fa95d34e732141
> Author:     Doug Moore <dougm@FreeBSD.org>
> AuthorDate: 2025-01-29 09:13:17 +0000
> Commit:     Doug Moore <dougm@FreeBSD.org>
> CommitDate: 2025-01-29 09:13:17 +0000
> 
>     vm_phys: reduce touching of page->pool fields
>     
>     Change the usage of the pool field in vm_page structs.
>     
>     Currently, every page belongs to a pool, and the pool field identifies
>     that pool, whether the page is allocated or free.
>     
>     With this change, the pool field of the first page of a free block is
>     used by the buddy allocator to identify its pool, but the buddy
>     allocator makes no guarantees about the pool field value for allocated
>     pages. The buddy allocator requires that a pool parameter be passed as
>     part of freeing memory. A function that allocates memory may use the
>     pool field of a page to record what pool to pass as that parameter
>     when the memory is freed, but might not need to do so for every
>     allocated page.
>     
>     Suggested by:   alc
>     Reviewed by:    markj (previous version)
>     Tested by:      pho
>     Differential Revision:  https://reviews.freebsd.org/D45409
> ---
>  sys/vm/vm_kern.c   |   2 +-
>  sys/vm/vm_page.c   |  97 ++++++++++++++++++++++--------------------
>  sys/vm/vm_phys.c   | 123 ++++++++++++++++++++++++++++-------------------------
>  sys/vm/vm_phys.h   |   6 +--
>  sys/vm/vm_reserv.c |  31 +++++++-------
>  5 files changed, 136 insertions(+), 123 deletions(-)
> 
> diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
> index 86ab2529e27f..2b85dbde1dd6 100644
> --- a/sys/vm/vm_kern.c
> +++ b/sys/vm/vm_kern.c
> @@ -953,7 +953,7 @@ kmem_bootstrap_free(vm_offset_t start, vm_size_t size)
>  
>  		vmd = vm_pagequeue_domain(m);
>  		vm_domain_free_lock(vmd);
> -		vm_phys_free_pages(m, 0);
> +		vm_phys_free_pages(m, m->pool, 0);
>  		vm_domain_free_unlock(vmd);
>  
>  		vm_domain_freecnt_inc(vmd, 1);
> diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
> index ba22c7f97f2f..961b32da6599 100644
> --- a/sys/vm/vm_page.c
> +++ b/sys/vm/vm_page.c
> @@ -572,6 +572,7 @@ vm_page_startup(vm_offset_t vaddr)
>  #if defined(__i386__) && defined(VM_PHYSSEG_DENSE)
>  	long ii;
>  #endif
> +	int pool;
>  #ifdef VM_FREEPOOL_LAZYINIT
>  	int lazyinit;
>  #endif
> @@ -651,6 +652,8 @@ vm_page_startup(vm_offset_t vaddr)
>  		dump_add_page(pa);
>  		pa += PAGE_SIZE;
>  	}
> +#else
> +	(void)pa;
>  #endif
>  	/*
>  	 * Compute the number of pages of memory that will be available for
> @@ -755,9 +758,12 @@ vm_page_startup(vm_offset_t vaddr)
>  	 */
>  	vm_phys_init();
>  
> +	pool = VM_FREEPOOL_DEFAULT;
>  #ifdef VM_FREEPOOL_LAZYINIT
>  	lazyinit = 1;
>  	TUNABLE_INT_FETCH("debug.vm.lazy_page_init", &lazyinit);
> +	if (lazyinit)
> +		pool = VM_FREEPOOL_LAZYINIT;
>  #endif
>  
>  	/*
> @@ -777,48 +783,27 @@ vm_page_startup(vm_offset_t vaddr)
>  		seg = &vm_phys_segs[segind];
>  
>  		/*
> -		 * If lazy vm_page initialization is not enabled, simply
> -		 * initialize all of the pages in the segment.  Otherwise, we
> -		 * only initialize:
> -		 * 1. Pages not covered by phys_avail[], since they might be
> -		 *    freed to the allocator at some future point, e.g., by
> -		 *    kmem_bootstrap_free().
> -		 * 2. The first page of each run of free pages handed to the
> -		 *    vm_phys allocator, which in turn defers initialization
> -		 *    of pages until they are needed.
> -		 * This avoids blocking the boot process for long periods, which
> -		 * may be relevant for VMs (which ought to boot as quickly as
> -		 * possible) and/or systems with large amounts of physical
> -		 * memory.
> +		 * Initialize pages not covered by phys_avail[], since they
> +		 * might be freed to the allocator at some future point, e.g.,
> +		 * by kmem_bootstrap_free().
>  		 */
> -#ifdef VM_FREEPOOL_LAZYINIT
> -		if (lazyinit) {
> -			startp = seg->start;
> -			for (i = 0; phys_avail[i + 1] != 0; i += 2) {
> -				if (startp >= seg->end)
> -					break;
> -
> -				if (phys_avail[i + 1] < startp)
> -					continue;
> -				if (phys_avail[i] <= startp) {
> -					startp = phys_avail[i + 1];
> -					continue;
> -				}
> -
> -				m = vm_phys_seg_paddr_to_vm_page(seg, startp);
> -				for (endp = MIN(phys_avail[i], seg->end);
> -				    startp < endp; startp += PAGE_SIZE, m++) {
> -					vm_page_init_page(m, startp, segind,
> -					    VM_FREEPOOL_DEFAULT);
> -				}
> +		startp = seg->start;
> +		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
> +			if (startp >= seg->end)
> +				break;
> +			if (phys_avail[i + 1] < startp)
> +				continue;
> +			if (phys_avail[i] <= startp) {
> +				startp = phys_avail[i + 1];
> +				continue;
>  			}
> -		} else
> -#endif
> -			for (m = seg->first_page, pa = seg->start;
> -			    pa < seg->end; m++, pa += PAGE_SIZE) {
> -				vm_page_init_page(m, pa, segind,
> +			m = vm_phys_seg_paddr_to_vm_page(seg, startp);
> +			for (endp = MIN(phys_avail[i], seg->end);
> +			    startp < endp; startp += PAGE_SIZE, m++) {
> +				vm_page_init_page(m, startp, segind,
>  				    VM_FREEPOOL_DEFAULT);
>  			}
> +		}
>  
>  		/*
>  		 * Add the segment's pages that are covered by one of
> @@ -835,16 +820,30 @@ vm_page_startup(vm_offset_t vaddr)
>  			if (pagecount == 0)
>  				continue;
>  
> +			/*
> +			 * If lazy vm_page initialization is not enabled, simply
> +			 * initialize all of the pages in the segment covered by
> +			 * phys_avail.  Otherwise, initialize only the first
> +			 * page of each run of free pages handed to the vm_phys
> +			 * allocator, which in turn defers initialization of
> +			 * pages until they are needed.
> +			 *
> +			 * This avoids blocking the boot process for long
> +			 * periods, which may be relevant for VMs (which ought
> +			 * to boot as quickly as possible) and/or systems with
> +			 * large amounts of physical memory.
> +			 */
>  			m = vm_phys_seg_paddr_to_vm_page(seg, startp);
> -#ifdef VM_FREEPOOL_LAZYINIT
> -			if (lazyinit) {
> -				vm_page_init_page(m, startp, segind,
> -				    VM_FREEPOOL_LAZYINIT);
> +			vm_page_init_page(m, startp, segind, pool);
> +			if (pool == VM_FREEPOOL_DEFAULT) {
> +				for (int j = 1; j < pagecount; j++) {
> +					vm_page_init_page(&m[j],
> +					    startp + ptoa(j), segind, pool);
> +				}
>  			}
> -#endif
>  			vmd = VM_DOMAIN(seg->domain);
>  			vm_domain_free_lock(vmd);
> -			vm_phys_enqueue_contig(m, pagecount);
> +			vm_phys_enqueue_contig(m, pool, pagecount);
>  			vm_domain_free_unlock(vmd);
>  			vm_domain_freecnt_inc(vmd, pagecount);
>  			vm_cnt.v_page_count += (u_int)pagecount;
> @@ -2341,6 +2340,7 @@ found:
>  	m->flags = flags;
>  	m->a.flags = 0;
>  	m->oflags = (object->flags & OBJ_UNMANAGED) != 0 ? VPO_UNMANAGED : 0;
> +	m->pool = VM_FREEPOOL_DEFAULT;
>  	if ((req & (VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
>  		m->busy_lock = VPB_CURTHREAD_EXCLUSIVE;
>  	else if ((req & VM_ALLOC_SBUSY) != 0)
> @@ -2558,6 +2558,7 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
>  			m->ref_count = 1;
>  		m->a.act_count = 0;
>  		m->oflags = oflags;
> +		m->pool = VM_FREEPOOL_DEFAULT;
>  		if (vm_page_iter_insert(&pages, m, object, pindex, mpred)) {
>  			if ((req & VM_ALLOC_WIRED) != 0)
>  				vm_wire_sub(npages);
> @@ -2655,6 +2656,7 @@ found:
>  	m->flags = (m->flags & PG_ZERO) | flags;
>  	m->a.flags = 0;
>  	m->oflags = VPO_UNMANAGED;
> +	m->pool = VM_FREEPOOL_DIRECT;
>  	m->busy_lock = VPB_UNBUSIED;
>  	if ((req & VM_ALLOC_WIRED) != 0) {
>  		vm_wire_add(1);
> @@ -2803,6 +2805,7 @@ vm_page_alloc_noobj_contig_domain(int domain, int req, u_long npages,
>  			m->ref_count = 1;
>  		m->a.act_count = 0;
>  		m->oflags = VPO_UNMANAGED;
> +		m->pool = VM_FREEPOOL_DIRECT;
>  
>  		/*
>  		 * Zero the page before updating any mappings since the page is
> @@ -2881,7 +2884,7 @@ vm_page_zone_release(void *arg, void **store, int cnt)
>  	vm_domain_free_lock(vmd);
>  	for (i = 0; i < cnt; i++) {
>  		m = (vm_page_t)store[i];
> -		vm_phys_free_pages(m, 0);
> +		vm_phys_free_pages(m, pgcache->pool, 0);
>  	}
>  	vm_domain_free_unlock(vmd);
>  	vm_domain_freecnt_inc(vmd, cnt);
> @@ -3266,7 +3269,7 @@ unlock:
>  		do {
>  			MPASS(vm_page_domain(m) == domain);
>  			SLIST_REMOVE_HEAD(&free, plinks.s.ss);
> -			vm_phys_free_pages(m, 0);
> +			vm_phys_free_pages(m, m->pool, 0);
>  			cnt++;
>  		} while ((m = SLIST_FIRST(&free)) != NULL);
>  		vm_domain_free_unlock(vmd);
> @@ -4271,7 +4274,7 @@ vm_page_free_toq(vm_page_t m)
>  		return;
>  	}
>  	vm_domain_free_lock(vmd);
> -	vm_phys_free_pages(m, 0);
> +	vm_phys_free_pages(m, m->pool, 0);
>  	vm_domain_free_unlock(vmd);
>  	vm_domain_freecnt_inc(vmd, 1);
>  }
> diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
> index cf1ed5818b2f..0b7de1d34255 100644
> --- a/sys/vm/vm_phys.c
> +++ b/sys/vm/vm_phys.c
> @@ -187,7 +187,7 @@ SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD,
>  static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain);
>  static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end);
>  static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
> -    int order, int tail);
> +    int order, int pool, int tail);
>  
>  static bool __diagused
>  vm_phys_pool_valid(int pool)
> @@ -391,10 +391,12 @@ sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS)
>  #endif
>  
>  static void
> -vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail)
> +vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int pool,
> +    int tail)
>  {
>  
>  	m->order = order;
> +	m->pool = pool;
>  	if (tail)
>  		TAILQ_INSERT_TAIL(&fl[order].pl, m, listq);
>  	else
> @@ -687,7 +689,7 @@ vm_phys_register_domains(int ndomains __numa_used,
>   */
>  static __inline void
>  vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order,
> -    int tail)
> +    int pool, int tail)
>  {
>  	vm_page_t m_buddy;
>  
> @@ -697,19 +699,20 @@ vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order,
>  		KASSERT(m_buddy->order == VM_NFREEORDER,
>  		    ("vm_phys_split_pages: page %p has unexpected order %d",
>  		    m_buddy, m_buddy->order));
> -		vm_freelist_add(fl, m_buddy, oind, tail);
> +		vm_freelist_add(fl, m_buddy, oind, pool, tail);
>          }
>  }
>  
>  static void
> -vm_phys_enq_chunk(struct vm_freelist *fl, vm_page_t m, int order, int tail)
> +vm_phys_enq_chunk(struct vm_freelist *fl, vm_page_t m, int order, int pool,
> +    int tail)
>  {
>  	KASSERT(order >= 0 && order < VM_NFREEORDER,
>  	    ("%s: invalid order %d", __func__, order));
>  
> -	vm_freelist_add(fl, m, order, tail);
> +	vm_freelist_add(fl, m, order, pool, tail);
>  #ifdef VM_FREEPOOL_LAZYINIT
> -	if (__predict_false(m->pool == VM_FREEPOOL_LAZYINIT)) {
> +	if (__predict_false(pool == VM_FREEPOOL_LAZYINIT)) {
>  		vm_page_t m_next;
>  		vm_paddr_t pa;
>  		int npages;
> @@ -738,7 +741,8 @@ vm_phys_enq_chunk(struct vm_freelist *fl, vm_page_t m, int order, int tail)
>   * The physical page m's buddy must not be free.
>   */
>  static void
> -vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail)
> +vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool,
> +    int tail)
>  {
>          int order;
>  
> @@ -754,7 +758,7 @@ vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail)
>  		order = ilog2(npages);
>  		KASSERT(order < VM_NFREEORDER,
>  		    ("%s: order %d is out of range", __func__, order));
> -		vm_phys_enq_chunk(fl, m, order, tail);
> +		vm_phys_enq_chunk(fl, m, order, pool, tail);
>  		m += 1 << order;
>  		npages -= 1 << order;
>  	}
> @@ -774,7 +778,8 @@ vm_phys_enq_beg(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail)
>   * parameter m.  Otherwise, the physical page m's buddy must not be free.
>   */
>  static vm_page_t
> -vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail)
> +vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int pool,
> +    int tail)
>  {
>  	int order;
>  
> @@ -788,7 +793,7 @@ vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail)
>  		    ("vm_phys_enq_range: page %p has unexpected order %d",
>  		    m, m->order));
>  		order = ffs(npages) - 1;
> -		vm_phys_enq_chunk(fl, m, order, tail);
> +		vm_phys_enq_chunk(fl, m, order, pool, tail);
>  		m += 1 << order;
>  		npages -= 1 << order;
>  	}
> @@ -796,33 +801,30 @@ vm_phys_enq_range(vm_page_t m, u_int npages, struct vm_freelist *fl, int tail)
>  }
>  
>  /*
> - * Set the pool for a contiguous, power of two-sized set of physical pages.
> + * Complete initialization a contiguous, power of two-sized set of physical
> + * pages.
>   *
>   * If the pages currently belong to the lazy init pool, then the corresponding
>   * page structures must be initialized.  In this case it is assumed that the
>   * first page in the run has already been initialized.
>   */
>  static void
> -vm_phys_set_pool(int pool, vm_page_t m, int order)
> +vm_phys_finish_init(vm_page_t m, int order)
>  {
>  #ifdef VM_FREEPOOL_LAZYINIT
>  	if (__predict_false(m->pool == VM_FREEPOOL_LAZYINIT)) {
>  		vm_paddr_t pa;
>  		int segind;
>  
> -		m->pool = pool;
> -
>  		TSENTER();
>  		pa = m->phys_addr + PAGE_SIZE;
>  		segind = m->segind;
>  		for (vm_page_t m_tmp = m + 1; m_tmp < &m[1 << order];
>  		    m_tmp++, pa += PAGE_SIZE)
> -			vm_page_init_page(m_tmp, pa, segind, pool);
> +			vm_page_init_page(m_tmp, pa, segind, VM_NFREEPOOL);
>  		TSEXIT();
> -	} else
> +	}
>  #endif
> -		for (vm_page_t m_tmp = m; m_tmp < &m[1 << order]; m_tmp++)
> -			m_tmp->pool = pool;
>  }
>  
>  /*
> @@ -833,7 +835,8 @@ vm_phys_set_pool(int pool, vm_page_t m, int order)
>   * The returned pages may not be physically contiguous.  However, in contrast
>   * to performing multiple, back-to-back calls to vm_phys_alloc_pages(..., 0),
>   * calling this function once to allocate the desired number of pages will
> - * avoid wasted time in vm_phys_split_pages().
> + * avoid wasted time in vm_phys_split_pages().  The allocated pages have no
> + * valid pool field set.
>   *
>   * The free page queues for the specified domain must be locked.
>   */
> @@ -869,7 +872,8 @@ vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[])
>  					 * Return excess pages to fl.  Its order
>  					 * [0, oind) queues are empty.
>  					 */
> -					vm_phys_enq_range(m, avail - i, fl, 1);
> +					vm_phys_enq_range(m, avail - i, fl,
> +					    pool, 1);
>  					return (npages);
>  				}
>  			}
> @@ -881,7 +885,7 @@ vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[])
>  				while ((m = TAILQ_FIRST(&alt[oind].pl)) !=
>  				    NULL) {
>  					vm_freelist_rem(alt, m, oind);
> -					vm_phys_set_pool(pool, m, oind);
> +					vm_phys_finish_init(m, oind);
>  					avail = i + (1 << oind);
>  					end = imin(npages, avail);
>  					while (i < end)
> @@ -893,7 +897,7 @@ vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[])
>  						 * are empty.
>  						 */
>  						vm_phys_enq_range(m, avail - i,
> -						    fl, 1);
> +						    fl, pool, 1);
>  						return (npages);
>  					}
>  				}
> @@ -940,7 +944,7 @@ vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order)
>  		if (m != NULL) {
>  			vm_freelist_rem(fl, m, oind);
>  			/* The order [order, oind) queues are empty. */
> -			vm_phys_split_pages(m, oind, fl, order, 1);
> +			vm_phys_split_pages(m, oind, fl, order, pool, 1);
>  			return (m);
>  		}
>  	}
> @@ -957,9 +961,9 @@ vm_phys_alloc_freelist_pages(int domain, int freelist, int pool, int order)
>  			m = TAILQ_FIRST(&alt[oind].pl);
>  			if (m != NULL) {
>  				vm_freelist_rem(alt, m, oind);
> -				vm_phys_set_pool(pool, m, oind);
> +				vm_phys_finish_init(m, oind);
>  				/* The order [order, oind) queues are empty. */
> -				vm_phys_split_pages(m, oind, fl, order, 1);
> +				vm_phys_split_pages(m, oind, fl, order, pool, 1);
>  				return (m);
>  			}
>  		}
> @@ -1198,11 +1202,12 @@ vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
>  
>  /*
>   * Free a contiguous, power of two-sized set of physical pages.
> + * The pool field in the first page determines the destination pool.
>   *
>   * The free page queues must be locked.
>   */
>  void
> -vm_phys_free_pages(vm_page_t m, int order)
> +vm_phys_free_pages(vm_page_t m, int pool, int order)
>  {
>  	struct vm_freelist *fl;
>  	struct vm_phys_seg *seg;
> @@ -1210,13 +1215,12 @@ vm_phys_free_pages(vm_page_t m, int order)
>  	vm_page_t m_buddy;
>  
>  	KASSERT(m->order == VM_NFREEORDER,
> -	    ("vm_phys_free_pages: page %p has unexpected order %d",
> -	    m, m->order));
> -	KASSERT(vm_phys_pool_valid(m->pool),
> -	    ("vm_phys_free_pages: page %p has unexpected pool %d",
> -	    m, m->pool));
> +	    ("%s: page %p has unexpected order %d",
> +	    __func__, m, m->order));
> +	KASSERT(vm_phys_pool_valid(pool),
> +	    ("%s: unexpected pool param %d", __func__, pool));
>  	KASSERT(order < VM_NFREEORDER,
> -	    ("vm_phys_free_pages: order %d is out of range", order));
> +	    ("%s: order %d is out of range", __func__, order));
>  	seg = &vm_phys_segs[m->segind];
>  	vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
>  	if (order < VM_NFREEORDER - 1) {
> @@ -1230,15 +1234,14 @@ vm_phys_free_pages(vm_page_t m, int order)
>  				break;
>  			fl = (*seg->free_queues)[m_buddy->pool];
>  			vm_freelist_rem(fl, m_buddy, order);
> -			if (m_buddy->pool != m->pool)
> -				vm_phys_set_pool(m->pool, m_buddy, order);
> +			vm_phys_finish_init(m_buddy, order);
>  			order++;
>  			pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1);
>  			m = vm_phys_seg_paddr_to_vm_page(seg, pa);
>  		} while (order < VM_NFREEORDER - 1);
>  	}
> -	fl = (*seg->free_queues)[m->pool];
> -	vm_freelist_add(fl, m, order, 1);
> +	fl = (*seg->free_queues)[pool];
> +	vm_freelist_add(fl, m, order, pool, 1);
>  }
>  
>  #ifdef VM_FREEPOOL_LAZYINIT
> @@ -1290,12 +1293,13 @@ vm_phys_lazy_init_domain(int domain, bool locked)
>  				    VM_ALLOC_NORMAL, 1 << oind);
>  				if (unlocked)
>  					vm_domain_free_unlock(vmd);
> -				vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind);
> +				vm_phys_finish_init(m, oind);
>  				if (unlocked) {
>  					vm_domain_freecnt_inc(vmd, 1 << oind);
>  					vm_domain_free_lock(vmd);
>  				}
> -				vm_phys_free_pages(m, oind);
> +				vm_phys_free_pages(m, VM_FREEPOOL_DEFAULT,
> +				    oind);
>  			}
>  		}
>  	}
> @@ -1344,12 +1348,12 @@ SYSINIT(vm_phys_lazy_init, SI_SUB_SMP, SI_ORDER_ANY, vm_phys_lazy_sysinit,
>  
>  /*
>   * Free a contiguous, arbitrarily sized set of physical pages, without
> - * merging across set boundaries.
> + * merging across set boundaries.  Assumes no pages have a valid pool field.
>   *
>   * The free page queues must be locked.
>   */
>  void
> -vm_phys_enqueue_contig(vm_page_t m, u_long npages)
> +vm_phys_enqueue_contig(vm_page_t m, int pool, u_long npages)
>  {
>  	struct vm_freelist *fl;
>  	struct vm_phys_seg *seg;
> @@ -1363,14 +1367,15 @@ vm_phys_enqueue_contig(vm_page_t m, u_long npages)
>  	 */
>  	vm_domain_free_assert_locked(vm_pagequeue_domain(m));
>  	seg = &vm_phys_segs[m->segind];
> -	fl = (*seg->free_queues)[m->pool];
> +	fl = (*seg->free_queues)[pool];
>  	m_end = m + npages;
>  	/* Free blocks of increasing size. */
>  	lo = atop(VM_PAGE_TO_PHYS(m));
>  	if (m < m_end &&
>  	    (diff = lo ^ (lo + npages - 1)) != 0) {
>  		order = min(ilog2(diff), VM_NFREEORDER - 1);
> -		m = vm_phys_enq_range(m, roundup2(lo, 1 << order) - lo, fl, 1);
> +		m = vm_phys_enq_range(m, roundup2(lo, 1 << order) - lo, fl,
> +		    pool, 1);
>  	}
>  
>  	/* Free blocks of maximum size. */
> @@ -1379,20 +1384,22 @@ vm_phys_enqueue_contig(vm_page_t m, u_long npages)
>  		KASSERT(seg == &vm_phys_segs[m->segind],
>  		    ("%s: page range [%p,%p) spans multiple segments",
>  		    __func__, m_end - npages, m));
> -		vm_phys_enq_chunk(fl, m, order, 1);
> +		vm_phys_enq_chunk(fl, m, order, pool, 1);
>  		m += 1 << order;
>  	}
>  	/* Free blocks of diminishing size. */
> -	vm_phys_enq_beg(m, m_end - m, fl, 1);
> +	vm_phys_enq_beg(m, m_end - m, fl, pool, 1);
>  }
>  
>  /*
>   * Free a contiguous, arbitrarily sized set of physical pages.
> + * Assumes that every page but the first has no valid pool field.
> + * Uses the pool value in the first page if valid, otherwise default.
>   *
>   * The free page queues must be locked.
>   */
>  void
> -vm_phys_free_contig(vm_page_t m, u_long npages)
> +vm_phys_free_contig(vm_page_t m, int pool, u_long npages)
>  {
>  	vm_paddr_t lo;
>  	vm_page_t m_start, m_end;
> @@ -1416,11 +1423,11 @@ vm_phys_free_contig(vm_page_t m, u_long npages)
>  	 * end of the range last.
>  	 */
>  	if (m_start < m_end)
> -		vm_phys_enqueue_contig(m_start, m_end - m_start);
> +		vm_phys_enqueue_contig(m_start, pool, m_end - m_start);
>  	if (order_start < max_order)
> -		vm_phys_free_pages(m, order_start);
> +		vm_phys_free_pages(m, pool, order_start);
>  	if (order_end < max_order)
> -		vm_phys_free_pages(m_end, order_end);
> +		vm_phys_free_pages(m_end, pool, order_end);
>  }
>  
>  /*
> @@ -1474,15 +1481,15 @@ vm_phys_unfree_page(vm_paddr_t pa)
>  	struct vm_phys_seg *seg;
>  	vm_paddr_t pa_half;
>  	vm_page_t m, m_set, m_tmp;
> -	int order;
> +	int order, pool;
>  
>  	seg = vm_phys_paddr_to_seg(pa);
>  	vm_domain_free_assert_locked(VM_DOMAIN(seg->domain));
>  
> +#ifdef VM_FREEPOOL_LAZYINIT
>  	/*
>  	 * The pages on the free lists must be initialized.
>  	 */
> -#ifdef VM_FREEPOOL_LAZYINIT
>  	vm_phys_lazy_init_domain(seg->domain, true);
>  #endif
>  
> @@ -1515,7 +1522,8 @@ vm_phys_unfree_page(vm_paddr_t pa)
>  	 * is larger than a page, shrink "m_set" by returning the half
>  	 * of "m_set" that does not contain "m" to the free lists.
>  	 */
> -	fl = (*seg->free_queues)[m_set->pool];
> +	pool = m_set->pool;
> +	fl = (*seg->free_queues)[pool];
>  	order = m_set->order;
>  	vm_freelist_rem(fl, m_set, order);
>  	while (order > 0) {
> @@ -1527,7 +1535,7 @@ vm_phys_unfree_page(vm_paddr_t pa)
>  			m_tmp = m_set;
>  			m_set = vm_phys_seg_paddr_to_vm_page(seg, pa_half);
>  		}
> -		vm_freelist_add(fl, m_tmp, order, 0);
> +		vm_freelist_add(fl, m_tmp, order, pool, 0);
>  	}
>  	KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
>  	return (true);
> @@ -1668,7 +1676,8 @@ vm_phys_find_queues_contig(
>   * alignment of the first physical page in the set.  If the given value
>   * "boundary" is non-zero, then the set of physical pages cannot cross
>   * any physical address boundary that is a multiple of that value.  Both
> - * "alignment" and "boundary" must be a power of two.
> + * "alignment" and "boundary" must be a power of two.  Sets the pool
> + * field to DEFAULT in the first allocated page.
>   */
>  vm_page_t
>  vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
> @@ -1727,12 +1736,12 @@ vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high,
>  		fl = (*queues)[m->pool];
>  		oind = m->order;
>  		vm_freelist_rem(fl, m, oind);
> -		if (m->pool != VM_FREEPOOL_DEFAULT)
> -			vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m, oind);
> +		vm_phys_finish_init(m, oind);
>  	}
>  	/* Return excess pages to the free lists. */
>  	fl = (*queues)[VM_FREEPOOL_DEFAULT];
> -	vm_phys_enq_range(&m_run[npages], m - &m_run[npages], fl, 0);
> +	vm_phys_enq_range(&m_run[npages], m - &m_run[npages], fl,
> +	    VM_FREEPOOL_DEFAULT, 0);
>  
>  	/* Return page verified to satisfy conditions of request. */
>  	pa_start = VM_PAGE_TO_PHYS(m_run);
> diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h
> index 43d94a9420f2..046fe26b476d 100644
> --- a/sys/vm/vm_phys.h
> +++ b/sys/vm/vm_phys.h
> @@ -64,15 +64,15 @@ vm_page_t vm_phys_alloc_contig(int domain, u_long npages, vm_paddr_t low,
>  int vm_phys_alloc_npages(int domain, int pool, int npages, vm_page_t ma[]);
>  vm_page_t vm_phys_alloc_pages(int domain, int pool, int order);
>  int vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high);
> -void vm_phys_enqueue_contig(vm_page_t m, u_long npages);
> +void vm_phys_enqueue_contig(vm_page_t m, int pool, u_long npages);
>  int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
>      vm_memattr_t memattr);
>  void vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end);
>  vm_page_t vm_phys_fictitious_to_vm_page(vm_paddr_t pa);
>  int vm_phys_find_range(vm_page_t bounds[], int segind, int domain,
>      u_long npages, vm_paddr_t low, vm_paddr_t high);
> -void vm_phys_free_contig(vm_page_t m, u_long npages);
> -void vm_phys_free_pages(vm_page_t m, int order);
> +void vm_phys_free_contig(vm_page_t m, int pool, u_long npages);
> +void vm_phys_free_pages(vm_page_t m, int pool, int order);
>  void vm_phys_init(void);
>  vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa);
>  vm_page_t vm_phys_seg_paddr_to_vm_page(struct vm_phys_seg *seg, vm_paddr_t pa);
> diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c
> index ad513962d50d..04453e050a38 100644
> --- a/sys/vm/vm_reserv.c
> +++ b/sys/vm/vm_reserv.c
> @@ -480,7 +480,7 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
>  	if (rv->popcnt == 0) {
>  		vm_reserv_remove(rv);
>  		vm_domain_free_lock(vmd);
> -		vm_phys_free_pages(rv->pages, VM_LEVEL_0_ORDER);
> +		vm_phys_free_pages(rv->pages, rv->pages->pool, VM_LEVEL_0_ORDER);
>  		vm_domain_free_unlock(vmd);
>  		counter_u64_add(vm_reserv_freed, 1);
>  	}
> @@ -943,7 +943,7 @@ static void
>  vm_reserv_break(vm_reserv_t rv)
>  {
>  	vm_page_t m;
> -	int hi, lo, pos;
> +	int pos, pos0, pos1;
>  
>  	vm_reserv_assert_locked(rv);
>  	CTR5(KTR_VM, "%s: rv %p object %p popcnt %d inpartpop %d",
> @@ -954,23 +954,24 @@ vm_reserv_break(vm_reserv_t rv)
>  	for (; m < rv->pages + VM_LEVEL_0_NPAGES; m += VM_SUBLEVEL_0_NPAGES)
>  #endif
>  		m->psind = 0;
> -	hi = lo = -1;
> -	pos = 0;
> -	for (;;) {
> -		bit_ff_at(rv->popmap, pos, VM_LEVEL_0_NPAGES, lo != hi, &pos);
> -		if (lo == hi) {
> -			if (pos == -1)
> -				break;
> -			lo = pos;
> -			continue;
> -		}
> +	pos0 = bit_test(rv->popmap, 0) ? -1 : 0;
> +	pos1 = -1 - pos0;
> +	for (pos = 0; pos < VM_LEVEL_0_NPAGES; ) {
> +		/* Find the first different bit after pos. */
> +		bit_ff_at(rv->popmap, pos + 1, VM_LEVEL_0_NPAGES,
> +		    pos1 < pos0, &pos);
>  		if (pos == -1)
>  			pos = VM_LEVEL_0_NPAGES;
> -		hi = pos;
> +		if (pos0 < pos1) {
> +			pos0 = pos;
> +			continue;
> +		}
> +		/* Free unused pages from pos0 to pos. */
> +		pos1 = pos;
>  		vm_domain_free_lock(VM_DOMAIN(rv->domain));
> -		vm_phys_enqueue_contig(&rv->pages[lo], hi - lo);
> +		vm_phys_enqueue_contig(&rv->pages[pos0], VM_FREEPOOL_DEFAULT,
> +		    pos1 - pos0);
>  		vm_domain_free_unlock(VM_DOMAIN(rv->domain));
> -		lo = hi;
>  	}
>  	bit_nclear(rv->popmap, 0, VM_LEVEL_0_NPAGES - 1);
>  	rv->popcnt = 0;




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?Z5v5L%2BsAjHZeQyEt>