Date: Fri, 12 Oct 2001 13:26:22 -0700 (PDT) From: Matt Dillon <dillon@earth.backplane.com> To: Mike Silbersack <silby@silby.com> Cc: <cvs-committers@FreeBSD.ORG>, <cvs-all@FreeBSD.ORG> Subject: Re: cvs commit: src/sys/vm vnode_pager.c Message-ID: <200110122026.f9CKQMS35969@earth.backplane.com> References: <20011012145142.L29945-100000@achilles.silby.com>
next in thread | previous in thread | raw e-mail | index | archive | help
:
:
:On Fri, 12 Oct 2001, Matt Dillon wrote:
:
:> dillon 2001/10/12 11:17:34 PDT
:>
:> Modified files:
:> sys/vm vnode_pager.c
:> Log:
:> Finally fix the VM bug where a file whos EOF occurs in the middle of a page
:> would sometimes prevent a dirty page from being cleaned, even when synced,
:> resulting in the dirty page being re-flushed to disk every 30-60 seconds or
:> so, forever. The problem is that when the filesystem flushes a page to
:
:How commonly did this occur?
:
:Mike "Silby" Silbersack
It depends heavily on the situation. A file typically must be written
through an mmap(), or a recently-write()en file must be mmap()'d and
then accessed via the mmap() before the dirty buffer is flushed. Only
the last page of a file can get into this state and typically only
if it is stored as a single fragment by the filesystem - i.e. the
physical I/O executed by the filesystem is less then a page.
The biggest effect occurs when people are manipulating a large number of
small files. Once a vm_page gets into this state it's stuck in it
until the associated file is either removed or further extended.
Nothing else will clean up the state of the vm_page, which means that
vm_pages in this state can accumulate over time until you have hundreds
or even thousands of them (if your activity is operating on hundreds or
thousands of files). When you get to that point the syncer generates
a huge amount of repeated disk I/O every 30-60 seconds and, of course,
the system is unable to reclaim the effected pages for other uses.
In recent years the use of mmap() has increased hugely. For example,
'cp', 'install', and 'tail' now use it, as does 'samba' and 'apache'.
I only know of one or two 'severe' cases. 99% of installations will
not be effected because they simply do not access a large enough number
of files or do not access files in a way that hits the bug for it to
become a problem. For example, my home system has been up 69 days
and not a single page is in this state. One of our production severs
with an uptime of 4 days has one page in this state. Another production
server up 185 days has 0 pages in this state.
You can test your own boxes by running the vm pagelist dump program
included below and greping for 'dirty' values in weird states (not 00
and not fc) that don't get cleaned up when you do a 'sync'.
-Matt
#!/bin/tcsh -f
#
# MODIFY AS APPROPRIATE. Must point to a compile/XXX directory to get
# the various config option header files.
cc pagelist.c -o /usr/local/bin/pagelist -I/usr/src/sys -I/usr/src/sys/compile/APOLLO -lkvm
/*
* PAGELIST.C
*
* ./pagelist | egrep -v 'drty 00|drty ff'
*/
#include <sys/param.h>
/*#include <sys/systm.h>*/
/*#include <sys/kernel.h>*/
#include <sys/proc.h>
/*#include <sys/resourcevar.h>*/
#include <sys/malloc.h>
/*#include <sys/kernel.h>*/
#include <sys/signalvar.h>
#include <sys/vnode.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
/*#include <vm/vm_pageout.h>*/
#include <vm/vm_kern.h>
#include <vm/swap_pager.h>
#include <vm/vnode_pager.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <kvm.h>
#include <nlist.h>
struct nlist Nl[] = {
{ "_vm_page_queues" },
{ "_vm_page_array" },
{ "_vm_page_array_size" },
{ NULL }
};
void kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes);
void showpage(long kva, vm_page_t page);
main(int ac, char **av)
{
struct pglist vm_page_queues[PQ_COUNT];
vm_page_t vptr;
kvm_t *kd;
short ShowAll = 0;
short ShowInactive = 0;
short ShowActive = 0;
short ShowCache = 0;
short ShowFree = 0;
long ShowAddr = 0;
{
int i;
for (i = 1; i < ac; ++i) {
if (strncmp(av[i], "ina", 3) == 0) {
ShowInactive = 1;
}
if (strncmp(av[i], "act", 3) == 0) {
ShowActive = 1;
}
if (strncmp(av[i], "cac", 3) == 0) {
ShowCache = 1;
}
if (strncmp(av[i], "fre", 3) == 0) {
ShowFree = 1;
}
if (strncmp(av[i], "0x", 2) == 0) {
ShowAddr = (long)strtoul(av[i], NULL, 0);
}
}
if (ac == 1) {
ShowAll = 1;
}
}
if ((kd = kvm_open(NULL, NULL, NULL, O_RDONLY, "kvm:")) == NULL) {
perror("kvm_open");
exit(1);
}
if (kvm_nlist(kd, Nl) != 0) {
perror("kvm_nlist");
exit(1);
}
kkread(kd, Nl[0].n_value, &vm_page_queues[0], sizeof(vm_page_queues));
if (ShowAddr) {
struct vm_page vm_page;
kkread(kd, (u_long)ShowAddr, &vm_page, sizeof(vm_page));
showpage((long)ShowAddr, &vm_page);
}
if (ShowInactive) {
vptr = vm_page_queues[PQ_INACTIVE].tqh_first;
while (vptr != NULL) {
struct vm_page vm_page;
kkread(kd, (u_long)vptr, &vm_page, sizeof(vm_page));
/*
if ((vm_page.flags & PG_INACTIVE) == 0) {
printf("<lost chain>\n");
break;
}
*/
showpage((long)vptr, &vm_page);
vptr = vm_page.pageq.tqe_next;
}
puts("");
}
if (ShowActive) {
vptr = vm_page_queues[PQ_ACTIVE].tqh_first;
while (vptr != NULL) {
struct vm_page vm_page;
kkread(kd, (u_long)vptr, &vm_page, sizeof(vm_page));
/*
if ((vm_page.flags & PG_ACTIVE) == 0) {
printf("<lost chain>\n");
break;
}
*/
showpage((long)vptr, &vm_page);
vptr = vm_page.pageq.tqe_next;
}
puts("");
}
if (ShowCache) {
vptr = vm_page_queues[PQ_CACHE].tqh_first;
while (vptr != NULL) {
struct vm_page vm_page;
kkread(kd, (u_long)vptr, &vm_page, sizeof(vm_page));
/*
if ((vm_page.flags & PG_CACHE) == 0) {
printf("<lost chain>\n");
break;
}
*/
showpage((long)vptr, &vm_page);
vptr = vm_page.pageq.tqe_next;
}
puts("");
}
if (ShowFree) {
vptr = vm_page_queues[PQ_FREE].tqh_first;
while (vptr != NULL) {
struct vm_page vm_page;
kkread(kd, (u_long)vptr, &vm_page, sizeof(vm_page));
/*
if ((vm_page.flags & PG_FREE) == 0) {
printf("<lost chain>\n");
break;
}
*/
showpage((long)vptr, &vm_page);
vptr = vm_page.pageq.tqe_next;
}
puts("");
}
if (ShowAll) {
int count;
kkread(kd, Nl[1].n_value, &vptr, sizeof(vptr));
kkread(kd, Nl[2].n_value, &count, sizeof(count));
while (count) {
struct vm_page vm_page;
kkread(kd, (u_long)vptr, &vm_page, sizeof(vm_page));
showpage((long)vptr, &vm_page);
--count;
++vptr;
}
}
kvm_close(kd);
return(0);
}
void
showpage(long kva, vm_page_t page)
{
printf("%08lx phys %08lx pc %02x obj %08lx pi %-5d bsy %d hld %d wir %2d drty %02x val %02x act %-3d %c%c%c flags:%s%s%s%s%s%s%s%s%s\n",
(long)kva,
page->phys_addr,
(int)page->pc,
(long)page->object,
page->pindex,
page->busy,
page->hold_count,
page->wire_count,
(int)page->dirty,
(int)page->valid,
page->act_count,
((page->busy) ? 'b' : '-'),
((page->valid) ? 'v' : '-'),
((page->dirty) ? 'd' : '-'),
((page->flags & PG_BUSY) ? " BSY" : ""),
((page->flags & PG_WANTED) ? " WNT" : ""),
((page->flags & PG_FICTITIOUS) ? " FIC" : ""),
((page->flags & PG_WRITEABLE) ? " WRT" : ""),
((page->flags & PG_MAPPED) ? " MAP" : ""),
((page->flags & PG_ZERO) ? " ZRO" : ""),
((page->flags & PG_REFERENCED) ? " REF" : ""),
((page->flags & PG_CLEANCHK) ? " CCHK" : ""),
((page->flags & PG_SWAPINPROG) ? " SWP" : "")
);
}
void
kkread(kvm_t *kd, u_long addr, void *buf, size_t nbytes)
{
if (kvm_read(kd, addr, buf, nbytes) != nbytes) {
perror("kvm_read");
exit(1);
}
}
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe cvs-all" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200110122026.f9CKQMS35969>
