From owner-freebsd-fs@FreeBSD.ORG Wed Dec 9 15:19:10 2009 Return-Path: Delivered-To: freebsd-fs@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 4257110656A3 for ; Wed, 9 Dec 2009 15:19:10 +0000 (UTC) (envelope-from jhb@freebsd.org) Received: from cyrus.watson.org (cyrus.watson.org [65.122.17.42]) by mx1.freebsd.org (Postfix) with ESMTP id DD28B8FC2D for ; Wed, 9 Dec 2009 15:19:09 +0000 (UTC) Received: from bigwig.baldwin.cx (66.111.2.69.static.nyinternet.net [66.111.2.69]) by cyrus.watson.org (Postfix) with ESMTPSA id 71D7F46B45; Wed, 9 Dec 2009 10:19:09 -0500 (EST) Received: from jhbbsd.localnet (unknown [209.249.190.9]) by bigwig.baldwin.cx (Postfix) with ESMTPA id 974158A020; Wed, 9 Dec 2009 10:19:08 -0500 (EST) From: John Baldwin To: Matt Reimer Date: Wed, 9 Dec 2009 10:15:15 -0500 User-Agent: KMail/1.12.1 (FreeBSD/7.2-CBSD-20091103; KDE/4.3.1; amd64; ; ) References: <200912080813.58600.jhb@freebsd.org> In-Reply-To: MIME-Version: 1.0 Content-Type: Text/Plain; charset="iso-8859-15" Content-Transfer-Encoding: 7bit Message-Id: <200912091015.15450.jhb@freebsd.org> X-Greylist: Sender succeeded SMTP AUTH, not delayed by milter-greylist-4.0.1 (bigwig.baldwin.cx); Wed, 09 Dec 2009 10:19:08 -0500 (EST) X-Virus-Scanned: clamav-milter 0.95.1 at bigwig.baldwin.cx X-Virus-Status: Clean X-Spam-Status: No, score=-2.5 required=4.2 tests=AWL,BAYES_00,RDNS_NONE autolearn=no version=3.2.5 X-Spam-Checker-Version: SpamAssassin 3.2.5 (2008-06-10) on bigwig.baldwin.cx Cc: freebsd-fs@freebsd.org Subject: Re: PATCH: increase heap size for (gpt)zfsboot X-BeenThere: freebsd-fs@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Filesystems List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 09 Dec 2009 15:19:10 -0000 On Tuesday 08 December 2009 6:06:14 pm Matt Reimer wrote: > On Tue, Dec 8, 2009 at 5:13 AM, John Baldwin wrote: > > On Monday 07 December 2009 7:01:03 pm Matt Reimer wrote: > >> Enlarge the heap size for gptzfsboot and zfsboot. This is necessary so > >> the ZFS code has enough memory to handle decompression and error > >> recovery. > >> > >> Before this patch the heap grew from the end of the (gpt)zfsboot code > >> and static data up to 640KB, possibly overrunning the stack. Now the > >> heap is located at 16MB-64MB. > >> > >> Note that this requires machines with at least 64MB RAM, but this is > >> not likely to be a problem on any machine running ZFS. > >> > >> Sponsored by: VPOP Technologies, Inc. > >> > >> Matt Reimer > > > > Unfortunately the 16M - 64M range may not all be useable RAM. It may contain > > ACPI tables, etc. A robust approach would involve walking the SMAP, etc. I > > just committed some changes to sys/boot/i386/loader/biosmem.c that make it > > smarter about choosing a heap region. I suggest adopting that algorithm for > > figuring out a safe heap range. zfsboot and gptzfsboot should have enough > > free space to take the bios_getmem() function. You can increase the minimum > > heap size beyond 3M if desired (though 3M is the minimum the loader will > > guarantee currently, and if ZFS needs more than that it likely needs to be > > changed in both places). > > Thanks John. I dropped the bios_getmem() into zfsboot.c and it works > for me. How's the attached patch look to you? I tweaked it slightly (memtop* aren't needed for gptboot, and memsize() isn't used anymore). Can you test the updated version below? Index: i386/zfsboot/zfsboot.c =================================================================== --- i386/zfsboot/zfsboot.c (revision 200271) +++ i386/zfsboot/zfsboot.c (working copy) @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -90,8 +91,6 @@ #define ARGS 0x900 #define NOPT 14 #define NDEV 3 -#define MEM_BASE 0x12 -#define MEM_EXT 0x15 #define V86_CY(x) ((x) & 1) #define V86_ZR(x) ((x) & 0x40) @@ -149,6 +148,19 @@ static uint32_t bootdev; static uint8_t ioctrl = IO_KEYBOARD; +vm_offset_t high_heap_base; +uint32_t bios_basemem, bios_extmem, high_heap_size; + +static struct bios_smap smap; + +/* + * The minimum amount of memory to reserve in bios_extmem for the heap. + */ +#define HEAP_MIN (3 * 1024 * 1024) + +static char *heap_next; +static char *heap_end; + /* Buffers that must not span a 64k boundary. */ #define READ_BUF_SIZE 8192 struct dmadat { @@ -162,7 +174,7 @@ static int parse(void); static void printf(const char *,...); static void putchar(int); -static uint32_t memsize(void); +static void bios_getmem(void); static int drvread(struct dsk *, void *, daddr_t, unsigned); static int keyhit(unsigned); static int xputc(int); @@ -237,14 +249,6 @@ static void * malloc(size_t n) { - static char *heap_next; - static char *heap_end; - - if (!heap_next) { - heap_next = (char *) dmadat + sizeof(*dmadat); - heap_end = (char *) (640*1024); - } - char *p = heap_next; if (p + n > heap_end) { printf("malloc failure\n"); @@ -344,15 +348,92 @@ return 0; } -static inline uint32_t -memsize(void) +static inline void +bios_getmem(void) { - v86.addr = MEM_EXT; - v86.eax = 0x8800; - v86int(); - return v86.eax; -} + uint64_t size; + /* Parse system memory map */ + v86.ebx = 0; + do { + v86.ctl = V86_FLAGS; + v86.addr = 0x15; /* int 0x15 function 0xe820*/ + v86.eax = 0xe820; + v86.ecx = sizeof(struct bios_smap); + v86.edx = SMAP_SIG; + v86.es = VTOPSEG(&smap); + v86.edi = VTOPOFF(&smap); + v86int(); + if ((v86.efl & 1) || (v86.eax != SMAP_SIG)) + break; + /* look for a low-memory segment that's large enough */ + if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) && + (smap.length >= (512 * 1024))) + bios_basemem = smap.length; + /* look for the first segment in 'extended' memory */ + if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) { + bios_extmem = smap.length; + } + + /* + * Look for the largest segment in 'extended' memory beyond + * 1MB but below 4GB. + */ + if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) && + (smap.base < 0x100000000ull)) { + size = smap.length; + + /* + * If this segment crosses the 4GB boundary, truncate it. + */ + if (smap.base + size > 0x100000000ull) + size = 0x100000000ull - smap.base; + + if (size > high_heap_size) { + high_heap_size = size; + high_heap_base = smap.base; + } + } + } while (v86.ebx != 0); + + /* Fall back to the old compatibility function for base memory */ + if (bios_basemem == 0) { + v86.ctl = 0; + v86.addr = 0x12; /* int 0x12 */ + v86int(); + + bios_basemem = (v86.eax & 0xffff) * 1024; + } + + /* Fall back through several compatibility functions for extended memory */ + if (bios_extmem == 0) { + v86.ctl = V86_FLAGS; + v86.addr = 0x15; /* int 0x15 function 0xe801*/ + v86.eax = 0xe801; + v86int(); + if (!(v86.efl & 1)) { + bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024; + } + } + if (bios_extmem == 0) { + v86.ctl = 0; + v86.addr = 0x15; /* int 0x15 function 0x88*/ + v86.eax = 0x8800; + v86int(); + bios_extmem = (v86.eax & 0xffff) * 1024; + } + + /* + * If we have extended memory and did not find a suitable heap + * region in the SMAP, use the last 3MB of 'extended' memory as a + * high heap candidate. + */ + if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) { + high_heap_size = HEAP_MIN; + high_heap_base = bios_extmem + 0x100000 - HEAP_MIN; + } +} + static inline void getstr(void) { @@ -536,6 +617,16 @@ off_t off; struct dsk *dsk; + bios_getmem(); + + if (high_heap_size > 0) { + heap_end = PTOV(high_heap_base + high_heap_size); + heap_next = PTOV(high_heap_base); + } else { + heap_next = (char *) dmadat + sizeof(*dmadat); + heap_end = (char *) PTOV(bios_basemem); + } + dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base); v86.ctl = V86_FLAGS; @@ -550,8 +641,8 @@ bootinfo.bi_version = BOOTINFO_VERSION; bootinfo.bi_size = sizeof(bootinfo); - bootinfo.bi_basemem = 0; /* XXX will be filled by loader or kernel */ - bootinfo.bi_extmem = memsize(); + bootinfo.bi_basemem = bios_basemem / 1024; + bootinfo.bi_extmem = bios_extmem / 1024; bootinfo.bi_memsizes_valid++; bootinfo.bi_bios_dev = dsk->drive; Index: zfs/zfsimpl.c =================================================================== --- zfs/zfsimpl.c (revision 200271) +++ zfs/zfsimpl.c (working copy) @@ -51,7 +51,7 @@ static char *zap_scratch; static char *zfs_temp_buf, *zfs_temp_end, *zfs_temp_ptr; -#define TEMP_SIZE (1*SPA_MAXBLOCKSIZE) +#define TEMP_SIZE (1024 * 1024) static int zio_read(spa_t *spa, const blkptr_t *bp, void *buf); -- John Baldwin