Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 5 Feb 2013 18:13:35 +0200
From:      Konstantin Belousov <kostikbel@gmail.com>
To:        mdf@FreeBSD.org
Cc:        davide@freebsd.org, alc@freebsd.org, avg@freebsd.org, rank1seeker@gmail.com, hackers@freebsd.org, Neel Natu <neelnatu@gmail.com>
Subject:   Re: dynamically calculating NKPT [was: Re: huge ktr buffer]
Message-ID:  <20130205161335.GM2522@kib.kiev.ua>
In-Reply-To: <CAMBSHm_%2B4k765JMgOKgqzc52bE9dijdC6gpBzAgF3xEm1JeRfQ@mail.gmail.com>
References:  <CAFgRE9F4JMutV9jJ_m7_9va67xiX4YXMT%2BRm6rUoDPMPymsg4w@mail.gmail.com> <20130205151413.GL2522@kib.kiev.ua> <CAMBSHm_%2B4k765JMgOKgqzc52bE9dijdC6gpBzAgF3xEm1JeRfQ@mail.gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help

--ZKQlerlNKW0xCYkU
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Tue, Feb 05, 2013 at 07:45:24AM -0800, mdf@FreeBSD.org wrote:
> On Tue, Feb 5, 2013 at 7:14 AM, Konstantin Belousov <kostikbel@gmail.com>=
 wrote:
> > On Mon, Feb 04, 2013 at 03:05:15PM -0800, Neel Natu wrote:
> >> Hi,
> >>
> >> I have a patch to dynamically calculate NKPT for amd64 kernels. This
> >> should fix the various issues that people pointed out in the email
> >> thread.
> >>
> >> Please review and let me know if there are any objections to committin=
g this.
> >>
> >> Also, thanks to Alan (alc@) for reviewing and providing feedback on
> >> the initial version of the patch.
> >>
> >> Patch (also available at http://people.freebsd.org/~neel/patches/nkpt_=
diff.txt):
> >>
> >> Index: sys/amd64/include/pmap.h
> >> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> >> --- sys/amd64/include/pmap.h  (revision 246277)
> >> +++ sys/amd64/include/pmap.h  (working copy)
> >> @@ -113,13 +113,7 @@
> >>       ((unsigned long)(l2) << PDRSHIFT) | \
> >>       ((unsigned long)(l1) << PAGE_SHIFT))
> >>
> >> -/* Initial number of kernel page tables. */
> >> -#ifndef NKPT
> >> -#define      NKPT            32
> >> -#endif
> >> -
> >>  #define NKPML4E              1               /* number of kernel PML4=
 slots */
> >> -#define NKPDPE               howmany(NKPT, NPDEPG)/* number of kernel=
 PDP slots */
> >>
> >>  #define      NUPML4E         (NPML4EPG/2)    /* number of userland PM=
L4 pages */
> >>  #define      NUPDPE          (NUPML4E*NPDPEPG)/* number of userland P=
DP pages */
> >> @@ -181,6 +175,7 @@
> >>  #define      PML4map         ((pd_entry_t *)(addr_PML4map))
> >>  #define      PML4pml4e       ((pd_entry_t *)(addr_PML4pml4e))
> >>
> >> +extern int nkpt;             /* Initial number of kernel page tables =
*/
> >>  extern u_int64_t KPDPphys;   /* physical address of kernel level 3 */
> >>  extern u_int64_t KPML4phys;  /* physical address of kernel level 4 */
> >>
> >> Index: sys/amd64/amd64/minidump_machdep.c
> >> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> >> --- sys/amd64/amd64/minidump_machdep.c        (revision 246277)
> >> +++ sys/amd64/amd64/minidump_machdep.c        (working copy)
> >> @@ -232,7 +232,7 @@
> >>       /* Walk page table pages, set bits in vm_page_dump */
> >>       pmapsize =3D 0;
> >>       pdp =3D (uint64_t *)PHYS_TO_DMAP(KPDPphys);
> >> -     for (va =3D VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NB=
PDR,
> >> +     for (va =3D VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NB=
PDR,
> >>           kernel_vm_end); ) {
> >>               /*
> >>                * We always write a page, even if it is zero. Each
> >> @@ -364,7 +364,7 @@
> >>       /* Dump kernel page directory pages */
> >>       bzero(fakepd, sizeof(fakepd));
> >>       pdp =3D (uint64_t *)PHYS_TO_DMAP(KPDPphys);
> >> -     for (va =3D VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + NKPT * NB=
PDR,
> >> +     for (va =3D VM_MIN_KERNEL_ADDRESS; va < MAX(KERNBASE + nkpt * NB=
PDR,
> >>           kernel_vm_end); va +=3D NBPDP) {
> >>               i =3D (va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1);
> >>
> >> Index: sys/amd64/amd64/pmap.c
> >> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> >> --- sys/amd64/amd64/pmap.c    (revision 246277)
> >> +++ sys/amd64/amd64/pmap.c    (working copy)
> >> @@ -202,6 +202,10 @@
> >>  vm_offset_t virtual_avail;   /* VA of first avail page (after kernel =
bss) */
> >>  vm_offset_t virtual_end;     /* VA of last avail page (end of kernel =
AS) */
> >>
> >> +int nkpt;
> >> +SYSCTL_INT(_machdep, OID_AUTO, nkpt, CTLFLAG_RD, &nkpt, 0,
> >> +    "Number of kernel page table pages allocated on bootup");
> >> +
> >>  static int ndmpdp;
> >>  static vm_paddr_t dmaplimit;
> >>  vm_offset_t kernel_vm_end =3D VM_MIN_KERNEL_ADDRESS;
> >> @@ -495,17 +499,42 @@
> >>
> >>  CTASSERT(powerof2(NDMPML4E));
> >>
> >> +/* number of kernel PDP slots */
> >> +#define      NKPDPE(ptpgs)           howmany((ptpgs), NPDEPG)
> >> +
> >>  static void
> >> +nkpt_init(vm_paddr_t addr)
> >> +{
> >> +     int pt_pages;
> >> +
> >> +#ifdef NKPT
> >> +     pt_pages =3D NKPT;
> >> +#else
> >> +     pt_pages =3D howmany(addr, 1 << PDRSHIFT);
> >> +     pt_pages +=3D NKPDPE(pt_pages);
> >> +
> >> +     /*
> >> +      * Add some slop beyond the bare minimum required for bootstrapp=
ing
> >> +      * the kernel.
> >> +      *
> >> +      * This is quite important when allocating KVA for kernel module=
s.
> >> +      * The modules are required to be linked in the negative 2GB of
> >> +      * the address space.  If we run out of KVA in this region then
> >> +      * pmap_growkernel() will need to allocate page table pages to m=
ap
> >> +      * the entire 512GB of KVA space which is an unnecessary tax on
> >> +      * physical memory.
> >> +      */
> >> +     pt_pages +=3D 4;          /* 8MB additional slop for kernel modu=
les */
> > 8MB might be to low. I just checked one of my machines with fully
> > modularized kernel, it takes slightly more than 6 MB to load 50 modules.
> > I think that 16MB would be safer, but it probably needs to be scaled
> > down based on the available phys memory. amd64 kernel could be booted
> > on 128MB machine still.
>=20
> Is there no way to not map the entire 512GB?  Otherwise this patch
> could really hose some vendors.  E.g. the kernel module for the OneFS
> file system is around 8MB all by itself.
No, I do not think that this patch would hose somebody with the 8MB
module, esp. if the slack is increased.

But yes, I believe it is possible to note that the growth happen after
the KERNBASE point and only allocate the page tables at this region.
We would need to not update the kernel_vm_end then, probably creating
some other var to keep track of the other tail.

>=20
> I found when we moved from FreeBSD 6 to 7 that the NKPT of 32 was
> insufficient for our system to even boot so I put it back to 240 (I
> didn't want to spend a lot of time playing).  At that time our module
> was loaded by the boot loader; now we do it during init to save some
> seconds on boot.  But we're probably not the only ones with a large
> kernel module.
>=20
> Cheers,
> matthew

--ZKQlerlNKW0xCYkU
Content-Type: application/pgp-signature

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.19 (FreeBSD)

iQIcBAEBAgAGBQJRES+vAAoJEJDCuSvBvK1BMmwP/1k8k4vPKEcDet3jQUN4JZu9
KAOoffF0GcJpRMEwvnm5ouGtNleCNeHYk2nlaQQaEUgh9A9qMBHtAE20+cLcTSvw
TLjwTu+3GUxB1hljNobyNgDuNYaT32M8JPecH0VBY3z+uPd8ZM2i/eUHM6a25n2C
0SI1+FmXApDJh27nONzUwwgnkIE/Ak40STJVpXSW035QLoun+wYrlq2ut0jKFTAW
2tzx5+D//Zc/PsBnTxVPnk8PjTuj0lXnPBYG+ODYuXZe3QkGf7P3zfNLKD1BL8JQ
NAd53eduaoGQYkSgBn87HPW5UW9f3lprgSqkcZxh5x3iZoEWOc/dZXnFTssmbPbA
ox+JPwENq3P//SWWwv7C7Um3UIoagGLeiCk2Gq9dkW3HQwd3rpAlJ66BkbAnZ+lR
+RsRiiESPR4uwq0jgGbW+OtTHfNUAtpk1TZkAlJx7wNZdVsJY0UmIpeDXW6mAj8v
WlUMmW9a7DTIrFcctZ+NdFphM96Tk43lZ/9zyJg5qJNdASq+Aw4RMN9onZ0ssNvl
0v/aQCsF2bjDSFuv5uuUtst68oNCrB7BUs1mXqyXFKSq7BEonTuIfoUKOUX7UrVb
puQCTH/bhMsfqpA6EtWp0iN+hCzwZwrSPq8R0C3wejNtqwozJ5mToMBUjZpFO9EF
9iClrroqW5FCHGfUfRPz
=AJvn
-----END PGP SIGNATURE-----

--ZKQlerlNKW0xCYkU--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20130205161335.GM2522>