Date: Mon, 17 Oct 2011 10:24:51 +0200 From: Maurizio Vairani <maurizio.vairani@cloverinformatica.it> To: Gleb Kurtsou <gleb.kurtsou@gmail.com>, freebsd-fs@freebsd.org Subject: [TMPFS] patch for FreeBSD 8.2-RELEASE Message-ID: <4E9BE653.7070008@cloverinformatica.it>
next in thread | raw e-mail | index | archive | help
Hi list, Gleb Kurtsou in this thread http://lists.freebsd.org/pipermail/freebsd-fs/2011-October/012650.html proposes a patch for solving the well known TMPSF problem: the free space drops down to zero when ZFS consumes the kernel memory and there isn't enough free swap space. Unfortunately the patch is not directly applicable to FreeBSD 8.2-RELEASE so I have modified the source code using the Gleb's patch as reference, recompiled and installed the new driver. I am testing it for a week on my AMD64 16G RAM server reducing the swap space from 28G to 8G, 4G or none and seems the the problem is solved. Regards -Maurizio /sys/fs/tmpfs/tmpfs.h =================================================================== --- tmpfs.h.orig 2010-12-21 18:09:00.000000000 +0100 (v 1.17.2.2.2.1) +++ tmpfs.h 2011-10-13 15:16:26.900043000 +0200 (working copy) @@ -304,10 +304,30 @@ #define TMPFS_NODE_LOCK(node) mtx_lock(&(node)->tn_interlock) #define TMPFS_NODE_UNLOCK(node) mtx_unlock(&(node)->tn_interlock) -#define TMPFS_NODE_MTX(node) (&(node)->tn_interlock) +#define TMPFS_NODE_MTX(node) (&(node)->tn_interlock) + +#ifdef INVARIANTS +#define TMPFS_ASSERT_LOCKED(node) do { \ + MPASS(node != NULL); \ + MPASS(node->tn_vnode != NULL); \ + if (!VOP_ISLOCKED(node->tn_vnode) && \ + !mtx_owned(TMPFS_NODE_MTX(node))) \ + panic("tmpfs: node is not locked: %p", node); \ + } while (0) +#define TMPFS_ASSERT_ELOCKED(node) do { \ + MPASS((node) != NULL); \ + MPASS((node)->tn_vnode != NULL); \ + mtx_assert(TMPFS_NODE_MTX(node), MA_OWNED); \ + ASSERT_VOP_LOCKED((node)->tn_vnode, "tmpfs"); \ + } while (0) +#else +#define TMPFS_ASSERT_LOCKED(node) (void)0 +#define TMPFS_ASSERT_ELOCKED(node) (void)0 +#endif #define TMPFS_VNODE_ALLOCATING 1 #define TMPFS_VNODE_WANT 2 +#define TMPFS_VNODE_DOOMED 4 /* --------------------------------------------------------------------- */ /* @@ -467,65 +487,30 @@ * Memory management stuff. */ -/* Amount of memory pages to reserve for the system (e.g., to not use by - * tmpfs). - * XXX: Should this be tunable through sysctl, for instance? */ -#define TMPFS_PAGES_RESERVED (4 * 1024 * 1024 / PAGE_SIZE) - /* - * Returns information about the number of available memory pages, - * including physical and virtual ones. - * - * If 'total' is TRUE, the value returned is the total amount of memory - * pages configured for the system (either in use or free). - * If it is FALSE, the value returned is the amount of free memory pages. - * - * Remember to remove TMPFS_PAGES_RESERVED from the returned value to avoid - * excessive memory usage. - * + * Number of reserved swap pages should not be lower than + * swap_pager_almost_full high water mark. */ +#define TMPFS_SWAP_MINRESERVED 1024 + static __inline size_t -tmpfs_mem_info(void) +tmpfs_pages_max(struct tmpfs_mount *tmp) { - size_t size; - - size = swap_pager_avail + cnt.v_free_count + cnt.v_inactive_count; - size -= size > cnt.v_wire_count ? cnt.v_wire_count : size; - return size; + return (tmp->tm_pages_max); } -/* Returns the maximum size allowed for a tmpfs file system. This macro - * must be used instead of directly retrieving the value from tm_pages_max. - * The reason is that the size of a tmpfs file system is dynamic: it lets - * the user store files as long as there is enough free memory (including - * physical memory and swap space). Therefore, the amount of memory to be - * used is either the limit imposed by the user during mount time or the - * amount of available memory, whichever is lower. To avoid consuming all - * the memory for a given mount point, the system will always reserve a - * minimum of TMPFS_PAGES_RESERVED pages, which is also taken into account - * by this macro (see above). */ static __inline size_t -TMPFS_PAGES_MAX(struct tmpfs_mount *tmp) +tmpfs_pages_used(struct tmpfs_mount *tmp) { - size_t freepages; - - freepages = tmpfs_mem_info(); - freepages -= freepages < TMPFS_PAGES_RESERVED ? - freepages : TMPFS_PAGES_RESERVED; - - return MIN(tmp->tm_pages_max, freepages + tmp->tm_pages_used); + const size_t node_size = sizeof(struct tmpfs_node) + + sizeof(struct tmpfs_dirent); + size_t meta_pages; + + meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size, + PAGE_SIZE); + return (meta_pages + tmp->tm_pages_used); } -/* Returns the available space for the given file system. */ -#define TMPFS_META_PAGES(tmp) (howmany((tmp)->tm_nodes_inuse * (sizeof(struct tmpfs_node) \ - + sizeof(struct tmpfs_dirent)), PAGE_SIZE)) -#define TMPFS_FILE_PAGES(tmp) ((tmp)->tm_pages_used) - -#define TMPFS_PAGES_AVAIL(tmp) (TMPFS_PAGES_MAX(tmp) > \ - TMPFS_META_PAGES(tmp)+TMPFS_FILE_PAGES(tmp)? \ - TMPFS_PAGES_MAX(tmp) - TMPFS_META_PAGES(tmp) \ - - TMPFS_FILE_PAGES(tmp):0) - #endif /* --------------------------------------------------------------------- */ /sys/fs/tmpfs/tmpfs_subr.c =================================================================== --- tmpfs_subr.c.orig 2010-12-21 18:09:00.000000000 +0100 (v 1.23.2.2.2.1) +++ tmpfs_subr.c 2011-10-06 14:31:26.007163000 +0200 (working copy) @@ -41,6 +41,7 @@ #include <sys/priv.h> #include <sys/proc.h> #include <sys/stat.h> +#include <sys/sysctl.h> #include <sys/systm.h> #include <sys/vnode.h> #include <sys/vmmeter.h> @@ -55,6 +56,60 @@ #include <fs/tmpfs/tmpfs_fifoops.h> #include <fs/tmpfs/tmpfs_vnops.h> +static long tmpfs_swap_reserved = TMPFS_SWAP_MINRESERVED * 2; + +SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs memory file system"); + +static int +sysctl_swap_reserved(SYSCTL_HANDLER_ARGS) +{ + int error; + long pages, bytes; + + pages = *(long *)arg1; + bytes = pages * PAGE_SIZE; + + error = sysctl_handle_long(oidp, &bytes, 0, req); + if (error || !req->newptr) + return (error); + + pages = bytes / PAGE_SIZE; + if (pages < TMPFS_SWAP_MINRESERVED) + return (EINVAL); + + *(long *)arg1 = pages; + return (0); +} + +SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, swap_reserved, CTLTYPE_LONG|CTLFLAG_RW, + &tmpfs_swap_reserved, 0, sysctl_swap_reserved, "L", "reserved swap space"); + +static __inline size_t +tmpfs_pages_avail(struct tmpfs_mount *tmp, size_t req_pages) +{ + vm_ooffset_t avail; + + if (tmpfs_pages_max(tmp) < tmpfs_pages_used(tmp) + req_pages) + return (0); + + if (!vm_page_count_target()) + return (1); + + /* + * Fail if pagedaemon wasn't able to free desired number of pages and + * we are running out of swap. + */ + avail = swap_pager_avail - vm_paging_target() - req_pages; + if (avail < tmpfs_swap_reserved) { /* avail is signed */ + printf("tmpfs: low memory: available %jd, " + "paging target %d, requested %zd\n", + (intmax_t)swap_pager_avail, vm_paging_target(), req_pages); + return (0); + } + + return (1); +} + /* --------------------------------------------------------------------- */ /* @@ -95,6 +150,8 @@ if (tmp->tm_nodes_inuse > tmp->tm_nodes_max) return (ENOSPC); + if (tmpfs_pages_avail(tmp, 1) == 0) + return (ENOSPC); nnode = (struct tmpfs_node *)uma_zalloc_arg( tmp->tm_node_pool, tmp, M_WAITOK); @@ -882,7 +939,7 @@ newpages = round_page(newsize) / PAGE_SIZE; if (newpages > oldpages && - newpages - oldpages > TMPFS_PAGES_AVAIL(tmp)) { + tmpfs_pages_avail(tmp, newpages - oldpages) == 0) { error = ENOSPC; goto out; } /sys/fs/tmpfs/tmpfs_vfsops.c =================================================================== --- tmpfs_vfsops.c.orig 2010-12-21 18:09:00.000000000 +0100 (v 1.21.2.1.6.1) +++ tmpfs_vfsops.c 2011-10-07 14:10:15.137747000 +0200 (working copy) @@ -85,53 +85,6 @@ #define SWI_MAXMIB 3 -static u_int -get_swpgtotal(void) -{ - struct xswdev xsd; - char *sname = "vm.swap_info"; - int soid[SWI_MAXMIB], oid[2]; - u_int unswdev, total, dmmax, nswapdev; - size_t mibi, len; - - total = 0; - - len = sizeof(dmmax); - if (kernel_sysctlbyname(curthread, "vm.dmmax", &dmmax, &len, - NULL, 0, NULL, 0) != 0) - return total; - - len = sizeof(nswapdev); - if (kernel_sysctlbyname(curthread, "vm.nswapdev", - &nswapdev, &len, - NULL, 0, NULL, 0) != 0) - return total; - - mibi = (SWI_MAXMIB - 1) * sizeof(int); - oid[0] = 0; - oid[1] = 3; - - if (kernel_sysctl(curthread, oid, 2, - soid, &mibi, (void *)sname, strlen(sname), - NULL, 0) != 0) - return total; - - mibi = (SWI_MAXMIB - 1); - for (unswdev = 0; unswdev < nswapdev; ++unswdev) { - soid[mibi] = unswdev; - len = sizeof(struct xswdev); - if (kernel_sysctl(curthread, - soid, mibi + 1, &xsd, &len, NULL, 0, - NULL, 0) != 0) - return total; - if (len == sizeof(struct xswdev)) - total += (xsd.xsw_nblks - dmmax); - } - - /* Not Reached */ - return total; -} - /* --------------------------------------------------------------------- */ static int tmpfs_node_ctor(void *mem, int size, void *arg, int flags) @@ -179,14 +132,13 @@ static int tmpfs_mount(struct mount *mp) { + const size_t nodes_per_page = howmany(PAGE_SIZE, + sizeof(struct tmpfs_dirent) + sizeof(struct tmpfs_node)); struct tmpfs_mount *tmp; struct tmpfs_node *root; - size_t pages, mem_size; - ino_t nodes; + u_quad_t pages; + u_quad_t nodes_max, size_max, maxfilesize; int error; - /* Size counters. */ - ino_t nodes_max; - size_t size_max; /* Root node attributes. */ uid_t root_uid; @@ -223,42 +175,55 @@ if (mp->mnt_cred->cr_ruid != 0 || vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1) root_mode = va.va_mode; - if (vfs_scanopt(mp->mnt_optnew, "inodes", "%d", &nodes_max) != 1) + if (vfs_scanopt(mp->mnt_optnew, "inodes", "%qu", &nodes_max) != 1) nodes_max = 0; if (vfs_scanopt(mp->mnt_optnew, "size", "%qu", &size_max) != 1) size_max = 0; - - /* Do not allow mounts if we do not have enough memory to preserve - * the minimum reserved pages. */ - mem_size = cnt.v_free_count + cnt.v_inactive_count + get_swpgtotal(); - mem_size -= mem_size > cnt.v_wire_count ? cnt.v_wire_count : mem_size; - if (mem_size < TMPFS_PAGES_RESERVED) + if (vfs_scanopt(mp->mnt_optnew, "maxfilesize", "%qu", &maxfilesize) != 0) + maxfilesize = 0; + /* + * XXX Deny mounts if pagedaemon wasn't able to recovery desired + * number of pages. + */ + if (vm_page_count_target()) return ENOSPC; /* Get the maximum number of memory pages this file system is * allowed to use, based on the maximum size the user passed in - * the mount structure. A value of zero is treated as if the - * maximum available space was requested. */ - if (size_max < PAGE_SIZE || size_max >= SIZE_MAX) - pages = SIZE_MAX; + * the mount structure. Use half of RAM by default. */ + if (size_max < PAGE_SIZE*4 || size_max > SIZE_MAX - PAGE_SIZE) + pages = cnt.v_page_count / 2; else pages = howmany(size_max, PAGE_SIZE); MPASS(pages > 0); + MPASS(pages < SIZE_MAX); - if (nodes_max <= 3) - nodes = 3 + pages * PAGE_SIZE / 1024; + if (pages < SIZE_MAX / PAGE_SIZE) + size_max = pages * PAGE_SIZE; else - nodes = nodes_max; - MPASS(nodes >= 3); + size_max = SIZE_MAX; + + if (nodes_max <= 3) { + if (pages < UINT32_MAX / nodes_per_page) + nodes_max = pages * nodes_per_page; + else + nodes_max = UINT32_MAX; + } + if (nodes_max > UINT32_MAX) + nodes_max = UINT32_MAX; + MPASS(nodes_max >= 3); + + if (maxfilesize < PAGE_SIZE || maxfilesize > size_max) + maxfilesize = size_max; /* Allocate the tmpfs mount structure and fill it. */ tmp = (struct tmpfs_mount *)malloc(sizeof(struct tmpfs_mount), M_TMPFSMNT, M_WAITOK | M_ZERO); mtx_init(&tmp->allnode_lock, "tmpfs allnode lock", NULL, MTX_DEF); - tmp->tm_nodes_max = nodes; + tmp->tm_nodes_max = nodes_max; tmp->tm_nodes_inuse = 0; - tmp->tm_maxfilesize = (u_int64_t)(cnt.v_page_count + get_swpgtotal()) * PAGE_SIZE; + tmp->tm_maxfilesize = maxfilesize; LIST_INIT(&tmp->tm_nodes_used); tmp->tm_pages_max = pages; @@ -427,22 +392,23 @@ static int tmpfs_statfs(struct mount *mp, struct statfs *sbp) { - fsfilcnt_t freenodes; struct tmpfs_mount *tmp; + size_t used; tmp = VFS_TO_TMPFS(mp); sbp->f_iosize = PAGE_SIZE; sbp->f_bsize = PAGE_SIZE; - sbp->f_blocks = TMPFS_PAGES_MAX(tmp); - sbp->f_bavail = sbp->f_bfree = TMPFS_PAGES_AVAIL(tmp); - - freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_inuse, - TMPFS_PAGES_AVAIL(tmp) * PAGE_SIZE / sizeof(struct tmpfs_node)); - - sbp->f_files = freenodes + tmp->tm_nodes_inuse; - sbp->f_ffree = freenodes; + sbp->f_blocks = tmpfs_pages_max(tmp); + used = tmpfs_pages_used(tmp); + if (tmpfs_pages_max(tmp) <= used) + sbp->f_bavail = 0; + else + sbp->f_bavail = tmpfs_pages_max(tmp) - used; + sbp->f_bfree = sbp->f_bavail; + sbp->f_files = tmp->tm_nodes_max; + sbp->f_ffree = tmp->tm_nodes_max - tmp->tm_nodes_inuse; /* sbp->f_owner = tmp->tn_uid; */ return 0;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4E9BE653.7070008>