Date: Fri, 04 Nov 2011 10:09:09 -0500 From: Alan Cox <alc@rice.edu> To: Kostik Belousov <kostikbel@gmail.com> Cc: "K. Macy" <kmacy@freebsd.org>, freebsd-current@freebsd.org, Penta Upa <bsdboot@gmail.com>, Andriy Gapon <avg@freebsd.org>, Benjamin Kaduk <kaduk@mit.edu> Subject: Re: panic at vm_page_wire with FreeBSD 9.0 Beta 3 Message-ID: <4EB40015.5040100@rice.edu> In-Reply-To: <20111104100828.GG50300@deviant.kiev.zoral.com.ua> References: <CAMsyT5QgHLqgSrt7qU_4FOVocW_GwmVWgZQ1A_CNrzkHQRTm4w@mail.gmail.com> <CAHM0Q_OWrQS_gnwupdJDwRFV9M3dKRN-SzkHgz6gJEedkvTPKQ@mail.gmail.com> <CAMsyT5Q5kMHRJQqFUdCCqqvKvFS_i5bvR8sHW6vNti_boD0nfA@mail.gmail.com> <alpine.GSO.1.10.1111020203230.882@multics.mit.edu> <4EB11C32.80106@FreeBSD.org> <4EB22938.4050803@rice.edu> <20111103132437.GV50300@deviant.kiev.zoral.com.ua> <4EB2D48E.1030102@rice.edu> <20111104100828.GG50300@deviant.kiev.zoral.com.ua>
next in thread | previous in thread | raw e-mail | index | archive | help
On 11/04/2011 05:08, Kostik Belousov wrote: > On Thu, Nov 03, 2011 at 12:51:10PM -0500, Alan Cox wrote: >> On 11/03/2011 08:24, Kostik Belousov wrote: >>> On Thu, Nov 03, 2011 at 12:40:08AM -0500, Alan Cox wrote: >>>> On 11/02/2011 05:32, Andriy Gapon wrote: >>>>> [restored cc: to the original poster] >>>>> As Bruce Evans has pointed to me privately [I am not sure why privately], >>>>> there >>>>> is already an example in i386 and amd64 atomic.h, where operations are >>>>> inlined >>>>> for a kernel build, but presented as real (external) functions for a >>>>> module >>>>> build. You can search e.g. sys/amd64/include/atomic.h for KLD_MODULE. >>>>> >>>>> I think that the same treatment could/should be applied to vm_page_*lock* >>>>> operations defined in sys/vm/vm_page.h. >>>> *snip* >>>> >>>> Yes, it should be. There are without question legitimate reasons for a >>>> module to acquire a page lock. >>> I agree. Also, I think that we should use the opportunity to also isolate >>> the modules from the struct vm_page layout changes. As example, I converted >>> nfsclient.ko. >>> >> I would suggest introducing the vm_page_bits_t change first. If, at the >> same time, you change the return type from the function vm_page_bits() >> to use vm_page_bits_t, then I believe it is straightforward to fix all >> of the places in vm_page.c that don't properly handle a 32 KB page size. > Ok, I think this is orhtohonal to the ABI issue. The vm_page_bits_t > applied. Agreed, which is why I wanted to separate the two things. I've made a few comments below. > diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c > index f14da4a..f22c34a 100644 > --- a/sys/vm/vm_page.c > +++ b/sys/vm/vm_page.c > @@ -137,7 +137,7 @@ SYSCTL_INT(_vm, OID_AUTO, tryrelock_restart, CTLFLAG_RD, > > static uma_zone_t fakepg_zone; > > -static void vm_page_clear_dirty_mask(vm_page_t m, int pagebits); > +static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits); > static void vm_page_queue_remove(int queue, vm_page_t m); > static void vm_page_enqueue(int queue, vm_page_t m); > static void vm_page_init_fakepg(void *dummy); > @@ -2350,7 +2350,7 @@ retrylookup: > * > * Inputs are required to range within a page. > */ > -int > +vm_page_bits_t > vm_page_bits(int base, int size) > { > int first_bit; > @@ -2367,7 +2367,8 @@ vm_page_bits(int base, int size) > first_bit = base>> DEV_BSHIFT; > last_bit = (base + size - 1)>> DEV_BSHIFT; > > - return ((2<< last_bit) - (1<< first_bit)); > + return (((vm_page_bits_t)2<< last_bit) - > + ((vm_page_bits_t)1<< first_bit)); > } > > /* > @@ -2426,7 +2427,7 @@ vm_page_set_valid(vm_page_t m, int base, int size) > * Clear the given bits from the specified page's dirty field. > */ > static __inline void > -vm_page_clear_dirty_mask(vm_page_t m, int pagebits) > +vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits) > { > uintptr_t addr; > #if PAGE_SIZE< 16384 > @@ -2455,7 +2456,6 @@ vm_page_clear_dirty_mask(vm_page_t m, int pagebits) > */ > addr = (uintptr_t)&m->dirty; > #if PAGE_SIZE == 32768 > -#error pagebits too short > atomic_clear_64((uint64_t *)addr, pagebits); > #elif PAGE_SIZE == 16384 > atomic_clear_32((uint32_t *)addr, pagebits); > @@ -2492,8 +2492,8 @@ vm_page_clear_dirty_mask(vm_page_t m, int pagebits) > void > vm_page_set_validclean(vm_page_t m, int base, int size) > { > - u_long oldvalid; > - int endoff, frag, pagebits; > + vm_page_bits_t oldvalid, pagebits; > + int endoff, frag; > > VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); > if (size == 0) /* handle degenerate case */ > @@ -2505,7 +2505,7 @@ vm_page_set_validclean(vm_page_t m, int base, int size) > * first block. > */ > if ((frag = base& ~(DEV_BSIZE - 1)) != base&& > - (m->valid& (1<< (base>> DEV_BSHIFT))) == 0) > + (m->valid& ((vm_page_bits_t)1<< (base>> DEV_BSHIFT))) == 0) > pmap_zero_page_area(m, frag, base - frag); > > /* > @@ -2515,7 +2515,7 @@ vm_page_set_validclean(vm_page_t m, int base, int size) > */ > endoff = base + size; > if ((frag = endoff& ~(DEV_BSIZE - 1)) != endoff&& > - (m->valid& (1<< (endoff>> DEV_BSHIFT))) == 0) > + (m->valid& ((vm_page_bits_t)1<< (endoff>> DEV_BSHIFT))) == 0) > pmap_zero_page_area(m, endoff, > DEV_BSIZE - (endoff& (DEV_BSIZE - 1))); > > @@ -2585,7 +2585,7 @@ vm_page_clear_dirty(vm_page_t m, int base, int size) > void > vm_page_set_invalid(vm_page_t m, int base, int size) > { > - int bits; > + vm_page_bits_t bits; > > VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); > KASSERT((m->oflags& VPO_BUSY) == 0, I believe that a cast is still needed in vm_page_zero_invalid(): (m->valid & (1 << i)) > @@ -2656,9 +2656,10 @@ vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) > int > vm_page_is_valid(vm_page_t m, int base, int size) > { > - int bits = vm_page_bits(base, size); > + vm_page_bits_t bits; > > VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); > + bits = vm_page_bits(base, size); > if (m->valid&& ((m->valid& bits) == bits)) > return 1; > else > diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h > index 23637bb..7d1a529 100644 > --- a/sys/vm/vm_page.h > +++ b/sys/vm/vm_page.h > @@ -113,6 +113,21 @@ > > TAILQ_HEAD(pglist, vm_page); > > +#if PAGE_SIZE == 4096 > +#define VM_PAGE_BITS_ALL 0xffu > +typedef uint8_t vm_page_bits_t; > +#elif PAGE_SIZE == 8192 > +#define VM_PAGE_BITS_ALL 0xffffu > +typedef uint16_t vm_page_bits_t; > +#elif PAGE_SIZE == 16384 > +#define VM_PAGE_BITS_ALL 0xffffffffu > +typedef uint32_t vm_page_bits_t; > +#elif PAGE_SIZE == 32768 > +#define VM_PAGE_BITS_ALL 0xfffffffffffffffflu > +typedef uint64_t vm_page_bits_t; > +#endif > + > + Is there any reason for the extra blank line? > struct vm_page { > TAILQ_ENTRY(vm_page) pageq; /* queue info for FIFO queue or free list (Q) */ > TAILQ_ENTRY(vm_page) listq; /* pages in same object (O) */ > @@ -138,19 +153,8 @@ struct vm_page { > /* NOTE that these must support one bit per DEV_BSIZE in a page!!! */ > /* so, on normal X86 kernels, they must be at least 8 bits wide */ > /* In reality, support for 32KB pages is not fully implemented. */ The previous comment can now be eliminated. > -#if PAGE_SIZE == 4096 > - uint8_t valid; /* map of valid DEV_BSIZE chunks (O) */ > - uint8_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ > -#elif PAGE_SIZE == 8192 > - uint16_t valid; /* map of valid DEV_BSIZE chunks (O) */ > - uint16_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ > -#elif PAGE_SIZE == 16384 > - uint32_t valid; /* map of valid DEV_BSIZE chunks (O) */ > - uint32_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ > -#elif PAGE_SIZE == 32768 > - uint64_t valid; /* map of valid DEV_BSIZE chunks (O) */ > - uint64_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ > -#endif > + vm_page_bits_t valid; /* map of valid DEV_BSIZE chunks (O) */ > + vm_page_bits_t dirty; /* map of dirty DEV_BSIZE chunks (M) */ > }; > > /* > @@ -403,7 +407,7 @@ void vm_page_clear_dirty (vm_page_t, int, int); > void vm_page_set_invalid (vm_page_t, int, int); > int vm_page_is_valid (vm_page_t, int, int); > void vm_page_test_dirty (vm_page_t); > -int vm_page_bits (int, int); > +vm_page_bits_t vm_page_bits (int, int); Might as well change this to: vm_page_bits(int base, int size) > void vm_page_zero_invalid(vm_page_t m, boolean_t setvalid); > void vm_page_free_toq(vm_page_t m); > void vm_page_zero_idle_wakeup(void); > diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c > index e3222cb..cd2658d 100644 > --- a/sys/vm/vnode_pager.c > +++ b/sys/vm/vnode_pager.c > @@ -486,15 +486,16 @@ vnode_pager_input_smlfs(object, m) > vm_object_t object; > vm_page_t m; > { > - int bits, i; > struct vnode *vp; > struct bufobj *bo; > struct buf *bp; > struct sf_buf *sf; > daddr_t fileaddr; > vm_offset_t bsize; > - int error = 0; > + vm_page_bits_t bits; > + int error, i; > > + error = 0; > vp = object->handle; > if (vp->v_iflag& VI_DOOMED) > return VM_PAGER_BAD; > Looks good. Alan
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4EB40015.5040100>