Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 14 Jun 2018 23:54:52 +0100
From:      Steven Hartland <steven.hartland@multiplay.co.uk>
To:        Konstantin Belousov <kib@FreeBSD.org>, src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   Re: svn commit: r335171 - head/sys/vm
Message-ID:  <c241bd08-eb94-152e-c1f1-d77dc6987908@multiplay.co.uk>
In-Reply-To: <201806141941.w5EJf2qa069373@repo.freebsd.org>
References:  <201806141941.w5EJf2qa069373@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
Out of interest, how would this exhibit itself?

On 14/06/2018 20:41, Konstantin Belousov wrote:
> Author: kib
> Date: Thu Jun 14 19:41:02 2018
> New Revision: 335171
> URL: https://svnweb.freebsd.org/changeset/base/335171
>
> Log:
>    Handle the race between fork/vm_object_split() and faults.
>    
>    If fault started before vmspace_fork() locked the map, and then during
>    fork, vm_map_copy_entry()->vm_object_split() is executed, it is
>    possible that the fault instantiate the page into the original object
>    when the page was already copied into the new object (see
>    vm_map_split() for the orig/new objects terminology). This can happen
>    if split found a busy page (e.g. from the fault) and slept dropping
>    the objects lock, which allows the swap pager to instantiate
>    read-behind pages for the fault.  Then the restart of the scan can see
>    a page in the scanned range, where it was already copied to the upper
>    object.
>    
>    Fix it by instantiating the read-ahead pages before
>    swap_pager_getpages() method drops the lock to allocate pbuf.  The
>    object scan would see the whole range prefilled with the busy pages
>    and not proceed the range.
>    
>    Note that vm_fault rechecks the map generation count after the object
>    unlock, so that it restarts the handling if raced with split, and
>    re-lookups the right page from the upper object.
>    
>    In collaboration with:	alc
>    Tested by:	pho
>    Sponsored by:	The FreeBSD Foundation
>    MFC after:	1 week
>
> Modified:
>    head/sys/vm/swap_pager.c
>
> Modified: head/sys/vm/swap_pager.c
> ==============================================================================
> --- head/sys/vm/swap_pager.c	Thu Jun 14 19:01:40 2018	(r335170)
> +++ head/sys/vm/swap_pager.c	Thu Jun 14 19:41:02 2018	(r335171)
> @@ -1096,21 +1096,24 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma,
>       int *rahead)
>   {
>   	struct buf *bp;
> -	vm_page_t mpred, msucc, p;
> +	vm_page_t bm, mpred, msucc, p;
>   	vm_pindex_t pindex;
>   	daddr_t blk;
> -	int i, j, maxahead, maxbehind, reqcount, shift;
> +	int i, maxahead, maxbehind, reqcount;
>   
>   	reqcount = count;
>   
> -	VM_OBJECT_WUNLOCK(object);
> -	bp = getpbuf(&nsw_rcount);
> -	VM_OBJECT_WLOCK(object);
> -
> -	if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead)) {
> -		relpbuf(bp, &nsw_rcount);
> +	/*
> +	 * Determine the final number of read-behind pages and
> +	 * allocate them BEFORE releasing the object lock.  Otherwise,
> +	 * there can be a problematic race with vm_object_split().
> +	 * Specifically, vm_object_split() might first transfer pages
> +	 * that precede ma[0] in the current object to a new object,
> +	 * and then this function incorrectly recreates those pages as
> +	 * read-behind pages in the current object.
> +	 */
> +	if (!swap_pager_haspage(object, ma[0]->pindex, &maxbehind, &maxahead))
>   		return (VM_PAGER_FAIL);
> -	}
>   
>   	/*
>   	 * Clip the readahead and readbehind ranges to exclude resident pages.
> @@ -1132,35 +1135,31 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma,
>   			*rbehind = pindex - mpred->pindex - 1;
>   	}
>   
> +	bm = ma[0];
> +	for (i = 0; i < count; i++)
> +		ma[i]->oflags |= VPO_SWAPINPROG;
> +
>   	/*
>   	 * Allocate readahead and readbehind pages.
>   	 */
> -	shift = rbehind != NULL ? *rbehind : 0;
> -	if (shift != 0) {
> -		for (i = 1; i <= shift; i++) {
> +	if (rbehind != NULL) {
> +		for (i = 1; i <= *rbehind; i++) {
>   			p = vm_page_alloc(object, ma[0]->pindex - i,
>   			    VM_ALLOC_NORMAL);
> -			if (p == NULL) {
> -				/* Shift allocated pages to the left. */
> -				for (j = 0; j < i - 1; j++)
> -					bp->b_pages[j] =
> -					    bp->b_pages[j + shift - i + 1];
> +			if (p == NULL)
>   				break;
> -			}
> -			bp->b_pages[shift - i] = p;
> +			p->oflags |= VPO_SWAPINPROG;
> +			bm = p;
>   		}
> -		shift = i - 1;
> -		*rbehind = shift;
> +		*rbehind = i - 1;
>   	}
> -	for (i = 0; i < reqcount; i++)
> -		bp->b_pages[i + shift] = ma[i];
>   	if (rahead != NULL) {
>   		for (i = 0; i < *rahead; i++) {
>   			p = vm_page_alloc(object,
>   			    ma[reqcount - 1]->pindex + i + 1, VM_ALLOC_NORMAL);
>   			if (p == NULL)
>   				break;
> -			bp->b_pages[shift + reqcount + i] = p;
> +			p->oflags |= VPO_SWAPINPROG;
>   		}
>   		*rahead = i;
>   	}
> @@ -1171,15 +1170,18 @@ swap_pager_getpages(vm_object_t object, vm_page_t *ma,
>   
>   	vm_object_pip_add(object, count);
>   
> -	for (i = 0; i < count; i++)
> -		bp->b_pages[i]->oflags |= VPO_SWAPINPROG;
> -
> -	pindex = bp->b_pages[0]->pindex;
> +	pindex = bm->pindex;
>   	blk = swp_pager_meta_ctl(object, pindex, 0);
>   	KASSERT(blk != SWAPBLK_NONE,
>   	    ("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex));
>   
>   	VM_OBJECT_WUNLOCK(object);
> +	bp = getpbuf(&nsw_rcount);
> +	/* Pages cannot leave the object while busy. */
> +	for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) {
> +		MPASS(p->pindex == bm->pindex + i);
> +		bp->b_pages[i] = p;
> +	}
>   
>   	bp->b_flags |= B_PAGING;
>   	bp->b_iocmd = BIO_READ;
>




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?c241bd08-eb94-152e-c1f1-d77dc6987908>