Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 04 Nov 2011 10:09:09 -0500
From:      Alan Cox <alc@rice.edu>
To:        Kostik Belousov <kostikbel@gmail.com>
Cc:        "K. Macy" <kmacy@freebsd.org>, freebsd-current@freebsd.org, Penta Upa <bsdboot@gmail.com>, Andriy Gapon <avg@freebsd.org>, Benjamin Kaduk <kaduk@mit.edu>
Subject:   Re: panic at vm_page_wire with FreeBSD 9.0 Beta 3
Message-ID:  <4EB40015.5040100@rice.edu>
In-Reply-To: <20111104100828.GG50300@deviant.kiev.zoral.com.ua>
References:  <CAMsyT5QgHLqgSrt7qU_4FOVocW_GwmVWgZQ1A_CNrzkHQRTm4w@mail.gmail.com> <CAHM0Q_OWrQS_gnwupdJDwRFV9M3dKRN-SzkHgz6gJEedkvTPKQ@mail.gmail.com> <CAMsyT5Q5kMHRJQqFUdCCqqvKvFS_i5bvR8sHW6vNti_boD0nfA@mail.gmail.com> <alpine.GSO.1.10.1111020203230.882@multics.mit.edu> <4EB11C32.80106@FreeBSD.org> <4EB22938.4050803@rice.edu> <20111103132437.GV50300@deviant.kiev.zoral.com.ua> <4EB2D48E.1030102@rice.edu> <20111104100828.GG50300@deviant.kiev.zoral.com.ua>

next in thread | previous in thread | raw e-mail | index | archive | help
On 11/04/2011 05:08, Kostik Belousov wrote:
> On Thu, Nov 03, 2011 at 12:51:10PM -0500, Alan Cox wrote:
>> On 11/03/2011 08:24, Kostik Belousov wrote:
>>> On Thu, Nov 03, 2011 at 12:40:08AM -0500, Alan Cox wrote:
>>>> On 11/02/2011 05:32, Andriy Gapon wrote:
>>>>> [restored cc: to the original poster]
>>>>> As Bruce Evans has pointed to me privately [I am not sure why privately],
>>>>> there
>>>>> is already an example in i386 and amd64 atomic.h, where operations are
>>>>> inlined
>>>>> for a kernel build, but presented as real (external) functions for a
>>>>> module
>>>>> build.  You can search e.g. sys/amd64/include/atomic.h for KLD_MODULE.
>>>>>
>>>>> I think that the same treatment could/should be applied to vm_page_*lock*
>>>>> operations defined in sys/vm/vm_page.h.
>>>> *snip*
>>>>
>>>> Yes, it should be.  There are without question legitimate reasons for a
>>>> module to acquire a page lock.
>>> I agree. Also, I think that we should use the opportunity to also isolate
>>> the modules from the struct vm_page layout changes. As example, I converted
>>> nfsclient.ko.
>>>
>> I would suggest introducing the vm_page_bits_t change first.  If, at the
>> same time, you change the return type from the function vm_page_bits()
>> to use vm_page_bits_t, then I believe it is straightforward to fix all
>> of the places in vm_page.c that don't properly handle a 32 KB page size.
> Ok, I think this is orhtohonal to the ABI issue. The vm_page_bits_t
> applied.

Agreed, which is why I wanted to separate the two things.

I've made a few comments below.

> diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
> index f14da4a..f22c34a 100644
> --- a/sys/vm/vm_page.c
> +++ b/sys/vm/vm_page.c
> @@ -137,7 +137,7 @@ SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD,
>
>   static uma_zone_t fakepg_zone;
>
> -static void vm_page_clear_dirty_mask(vm_page_t m, int pagebits);
> +static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
>   static void vm_page_queue_remove(int queue, vm_page_t m);
>   static void vm_page_enqueue(int queue, vm_page_t m);
>   static void vm_page_init_fakepg(void *dummy);
> @@ -2350,7 +2350,7 @@ retrylookup:
>    *
>    * Inputs are required to range within a page.
>    */
> -int
> +vm_page_bits_t
>   vm_page_bits(int base, int size)
>   {
>   	int first_bit;
> @@ -2367,7 +2367,8 @@ vm_page_bits(int base, int size)
>   	first_bit = base>>  DEV_BSHIFT;
>   	last_bit = (base + size - 1)>>  DEV_BSHIFT;
>
> -	return ((2<<  last_bit) - (1<<  first_bit));
> +	return (((vm_page_bits_t)2<<  last_bit) -
> +	    ((vm_page_bits_t)1<<  first_bit));
>   }
>
>   /*
> @@ -2426,7 +2427,7 @@ vm_page_set_valid(vm_page_t m, int base, int size)
>    * Clear the given bits from the specified page's dirty field.
>    */
>   static __inline void
> -vm_page_clear_dirty_mask(vm_page_t m, int pagebits)
> +vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits)
>   {
>   	uintptr_t addr;
>   #if PAGE_SIZE<  16384
> @@ -2455,7 +2456,6 @@ vm_page_clear_dirty_mask(vm_page_t m, int pagebits)
>   		 */
>   		addr = (uintptr_t)&m->dirty;
>   #if PAGE_SIZE == 32768
> -#error pagebits too short
>   		atomic_clear_64((uint64_t *)addr, pagebits);
>   #elif PAGE_SIZE == 16384
>   		atomic_clear_32((uint32_t *)addr, pagebits);
> @@ -2492,8 +2492,8 @@ vm_page_clear_dirty_mask(vm_page_t m, int pagebits)
>   void
>   vm_page_set_validclean(vm_page_t m, int base, int size)
>   {
> -	u_long oldvalid;
> -	int endoff, frag, pagebits;
> +	vm_page_bits_t oldvalid, pagebits;
> +	int endoff, frag;
>
>   	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
>   	if (size == 0)	/* handle degenerate case */
> @@ -2505,7 +2505,7 @@ vm_page_set_validclean(vm_page_t m, int base, int size)
>   	 * first block.
>   	 */
>   	if ((frag = base&  ~(DEV_BSIZE - 1)) != base&&
> -	    (m->valid&  (1<<  (base>>  DEV_BSHIFT))) == 0)
> +	    (m->valid&  ((vm_page_bits_t)1<<  (base>>  DEV_BSHIFT))) == 0)
>   		pmap_zero_page_area(m, frag, base - frag);
>
>   	/*
> @@ -2515,7 +2515,7 @@ vm_page_set_validclean(vm_page_t m, int base, int size)
>   	 */
>   	endoff = base + size;
>   	if ((frag = endoff&  ~(DEV_BSIZE - 1)) != endoff&&
> -	    (m->valid&  (1<<  (endoff>>  DEV_BSHIFT))) == 0)
> +	    (m->valid&  ((vm_page_bits_t)1<<  (endoff>>  DEV_BSHIFT))) == 0)
>   		pmap_zero_page_area(m, endoff,
>   		    DEV_BSIZE - (endoff&  (DEV_BSIZE - 1)));
>
> @@ -2585,7 +2585,7 @@ vm_page_clear_dirty(vm_page_t m, int base, int size)
>   void
>   vm_page_set_invalid(vm_page_t m, int base, int size)
>   {
> -	int bits;
> +	vm_page_bits_t bits;
>
>   	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
>   	KASSERT((m->oflags&  VPO_BUSY) == 0,

I believe that a cast is still needed in vm_page_zero_invalid():

                     (m->valid & (1 << i))

> @@ -2656,9 +2656,10 @@ vm_page_zero_invalid(vm_page_t m, boolean_t setvalid)
>   int
>   vm_page_is_valid(vm_page_t m, int base, int size)
>   {
> -	int bits = vm_page_bits(base, size);
> +	vm_page_bits_t bits;
>
>   	VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
> +	bits = vm_page_bits(base, size);
>   	if (m->valid&&  ((m->valid&  bits) == bits))
>   		return 1;
>   	else
> diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
> index 23637bb..7d1a529 100644
> --- a/sys/vm/vm_page.h
> +++ b/sys/vm/vm_page.h
> @@ -113,6 +113,21 @@
>
>   TAILQ_HEAD(pglist, vm_page);
>
> +#if PAGE_SIZE == 4096
> +#define VM_PAGE_BITS_ALL 0xffu
> +typedef uint8_t vm_page_bits_t;
> +#elif PAGE_SIZE == 8192
> +#define VM_PAGE_BITS_ALL 0xffffu
> +typedef uint16_t vm_page_bits_t;
> +#elif PAGE_SIZE == 16384
> +#define VM_PAGE_BITS_ALL 0xffffffffu
> +typedef uint32_t vm_page_bits_t;
> +#elif PAGE_SIZE == 32768
> +#define VM_PAGE_BITS_ALL 0xfffffffffffffffflu
> +typedef uint64_t vm_page_bits_t;
> +#endif
> +
> +

Is there any reason for the extra blank line?

>   struct vm_page {
>   	TAILQ_ENTRY(vm_page) pageq;	/* queue info for FIFO queue or free list (Q) */
>   	TAILQ_ENTRY(vm_page) listq;	/* pages in same object (O) 	*/
> @@ -138,19 +153,8 @@ struct vm_page {
>   	/* NOTE that these must support one bit per DEV_BSIZE in a page!!! */
>   	/* so, on normal X86 kernels, they must be at least 8 bits wide */
>   	/* In reality, support for 32KB pages is not fully implemented. */

The previous comment can now be eliminated.

> -#if PAGE_SIZE == 4096
> -	uint8_t	valid;			/* map of valid DEV_BSIZE chunks (O) */
> -	uint8_t	dirty;			/* map of dirty DEV_BSIZE chunks (M) */
> -#elif PAGE_SIZE == 8192
> -	uint16_t valid;			/* map of valid DEV_BSIZE chunks (O) */
> -	uint16_t dirty;			/* map of dirty DEV_BSIZE chunks (M) */
> -#elif PAGE_SIZE == 16384
> -	uint32_t valid;			/* map of valid DEV_BSIZE chunks (O) */
> -	uint32_t dirty;			/* map of dirty DEV_BSIZE chunks (M) */
> -#elif PAGE_SIZE == 32768
> -	uint64_t valid;			/* map of valid DEV_BSIZE chunks (O) */
> -	uint64_t dirty;			/* map of dirty DEV_BSIZE chunks (M) */
> -#endif
> +	vm_page_bits_t valid;		/* map of valid DEV_BSIZE chunks (O) */
> +	vm_page_bits_t dirty;		/* map of dirty DEV_BSIZE chunks (M) */
>   };
>
>   /*
> @@ -403,7 +407,7 @@ void vm_page_clear_dirty (vm_page_t, int, int);
>   void vm_page_set_invalid (vm_page_t, int, int);
>   int vm_page_is_valid (vm_page_t, int, int);
>   void vm_page_test_dirty (vm_page_t);
> -int vm_page_bits (int, int);
> +vm_page_bits_t vm_page_bits (int, int);

Might as well change this to: vm_page_bits(int base, int size)

>   void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid);
>   void vm_page_free_toq(vm_page_t m);
>   void vm_page_zero_idle_wakeup(void);
> diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
> index e3222cb..cd2658d 100644
> --- a/sys/vm/vnode_pager.c
> +++ b/sys/vm/vnode_pager.c
> @@ -486,15 +486,16 @@ vnode_pager_input_smlfs(object, m)
>   	vm_object_t object;
>   	vm_page_t m;
>   {
> -	int bits, i;
>   	struct vnode *vp;
>   	struct bufobj *bo;
>   	struct buf *bp;
>   	struct sf_buf *sf;
>   	daddr_t fileaddr;
>   	vm_offset_t bsize;
> -	int error = 0;
> +	vm_page_bits_t bits;
> +	int error, i;
>
> +	error = 0;
>   	vp = object->handle;
>   	if (vp->v_iflag&  VI_DOOMED)
>   		return VM_PAGER_BAD;
>

Looks good.

Alan




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4EB40015.5040100>