From owner-svn-src-head@freebsd.org Fri Jul 24 13:23:33 2020 Return-Path: Delivered-To: svn-src-head@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 494E2365566; Fri, 24 Jul 2020 13:23:33 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4BCqdx1Bd7z4WMh; Fri, 24 Jul 2020 13:23:33 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 0C2A8B215; Fri, 24 Jul 2020 13:23:33 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 06ODNWvg008517; Fri, 24 Jul 2020 13:23:32 GMT (envelope-from mjg@FreeBSD.org) Received: (from mjg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 06ODNWUr008515; Fri, 24 Jul 2020 13:23:32 GMT (envelope-from mjg@FreeBSD.org) Message-Id: <202007241323.06ODNWUr008515@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mjg set sender to mjg@FreeBSD.org using -f From: Mateusz Guzik Date: Fri, 24 Jul 2020 13:23:32 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r363472 - head/sys/vm X-SVN-Group: head X-SVN-Commit-Author: mjg X-SVN-Commit-Paths: head/sys/vm X-SVN-Commit-Revision: 363472 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.33 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 24 Jul 2020 13:23:33 -0000 Author: mjg Date: Fri Jul 24 13:23:32 2020 New Revision: 363472 URL: https://svnweb.freebsd.org/changeset/base/363472 Log: vm: fix swap reservation leak and clean up surrounding code The code did not subtract from the global counter if per-uid reservation failed. Cleanup highlights: - load overcommit once - move per-uid manipulation to dedicated routines - don't fetch wire count if requested size is below the limit - convert return type from int to bool - ifdef the routines with _KERNEL to keep vm.h compilable by userspace Reviewed by: kib (previous version) Differential Revision: https://reviews.freebsd.org/D25787 Modified: head/sys/vm/swap_pager.c head/sys/vm/vm.h Modified: head/sys/vm/swap_pager.c ============================================================================== --- head/sys/vm/swap_pager.c Fri Jul 24 08:40:04 2020 (r363471) +++ head/sys/vm/swap_pager.c Fri Jul 24 13:23:32 2020 (r363472) @@ -202,101 +202,141 @@ sysctl_page_shift(SYSCTL_HANDLER_ARGS) return (sysctl_handle_64(oidp, &newval, 0, req)); } -int +static bool +swap_reserve_by_cred_rlimit(u_long pincr, struct ucred *cred, int oc) +{ + struct uidinfo *uip; + u_long prev; + + uip = cred->cr_ruidinfo; + + prev = atomic_fetchadd_long(&uip->ui_vmsize, pincr); + if ((oc & SWAP_RESERVE_RLIMIT_ON) != 0 && + prev + pincr > lim_cur(curthread, RLIMIT_SWAP) && + priv_check(curthread, PRIV_VM_SWAP_NORLIMIT) != 0) { + prev = atomic_fetchadd_long(&uip->ui_vmsize, -pincr); + KASSERT(prev >= pincr, ("negative vmsize for uid = %d\n", uip->ui_uid)); + return (false); + } + return (true); +} + +static void +swap_release_by_cred_rlimit(u_long pdecr, struct ucred *cred) +{ + struct uidinfo *uip; +#ifdef INVARIANTS + u_long prev; +#endif + + uip = cred->cr_ruidinfo; + +#ifdef INVARIANTS + prev = atomic_fetchadd_long(&uip->ui_vmsize, -pdecr); + KASSERT(prev >= pdecr, ("negative vmsize for uid = %d\n", uip->ui_uid)); +#else + atomic_subtract_long(&uip->ui_vmsize, pdecr); +#endif +} + +static void +swap_reserve_force_rlimit(u_long pincr, struct ucred *cred) +{ + struct uidinfo *uip; + + uip = cred->cr_ruidinfo; + atomic_add_long(&uip->ui_vmsize, pincr); +} + +bool swap_reserve(vm_ooffset_t incr) { return (swap_reserve_by_cred(incr, curthread->td_ucred)); } -int +bool swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred) { u_long r, s, prev, pincr; - int res, error; +#ifdef RACCT + int error; +#endif + int oc; static int curfail; static struct timeval lastfail; - struct uidinfo *uip; - uip = cred->cr_ruidinfo; - KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__, (uintmax_t)incr)); #ifdef RACCT - if (racct_enable) { + if (RACCT_ENABLED()) { PROC_LOCK(curproc); error = racct_add(curproc, RACCT_SWAP, incr); PROC_UNLOCK(curproc); if (error != 0) - return (0); + return (false); } #endif pincr = atop(incr); - res = 0; prev = atomic_fetchadd_long(&swap_reserved, pincr); r = prev + pincr; - if (overcommit & SWAP_RESERVE_ALLOW_NONWIRED) { - s = vm_cnt.v_page_count - vm_cnt.v_free_reserved - + s = swap_total; + oc = atomic_load_int(&overcommit); + if (r > s && (oc & SWAP_RESERVE_ALLOW_NONWIRED) != 0) { + s += vm_cnt.v_page_count - vm_cnt.v_free_reserved - vm_wire_count(); - } else - s = 0; - s += swap_total; - if ((overcommit & SWAP_RESERVE_FORCE_ON) == 0 || r <= s || - (error = priv_check(curthread, PRIV_VM_SWAP_NOQUOTA)) == 0) { - res = 1; - } else { + } + if ((oc & SWAP_RESERVE_FORCE_ON) != 0 && r > s && + priv_check(curthread, PRIV_VM_SWAP_NOQUOTA) != 0) { prev = atomic_fetchadd_long(&swap_reserved, -pincr); - if (prev < pincr) - panic("swap_reserved < incr on overcommit fail"); + KASSERT(prev >= pincr, ("swap_reserved < incr on overcommit fail")); + goto out_error; } - if (res) { - prev = atomic_fetchadd_long(&uip->ui_vmsize, pincr); - if ((overcommit & SWAP_RESERVE_RLIMIT_ON) != 0 && - prev + pincr > lim_cur(curthread, RLIMIT_SWAP) && - priv_check(curthread, PRIV_VM_SWAP_NORLIMIT)) { - res = 0; - prev = atomic_fetchadd_long(&uip->ui_vmsize, -pincr); - if (prev < pincr) - panic("uip->ui_vmsize < incr on overcommit fail"); - } + + if (!swap_reserve_by_cred_rlimit(pincr, cred, oc)) { + prev = atomic_fetchadd_long(&swap_reserved, -pincr); + KASSERT(prev >= pincr, ("swap_reserved < incr on overcommit fail")); + goto out_error; } - if (!res && ppsratecheck(&lastfail, &curfail, 1)) { + + return (true); + +out_error: + if (ppsratecheck(&lastfail, &curfail, 1)) { printf("uid %d, pid %d: swap reservation for %jd bytes failed\n", - uip->ui_uid, curproc->p_pid, incr); + cred->cr_ruidinfo->ui_uid, curproc->p_pid, incr); } - #ifdef RACCT - if (racct_enable && !res) { + if (RACCT_ENABLED()) { PROC_LOCK(curproc); racct_sub(curproc, RACCT_SWAP, incr); PROC_UNLOCK(curproc); } #endif - return (res); + return (false); } void swap_reserve_force(vm_ooffset_t incr) { - struct uidinfo *uip; u_long pincr; KASSERT((incr & PAGE_MASK) == 0, ("%s: incr: %ju & PAGE_MASK", __func__, (uintmax_t)incr)); - PROC_LOCK(curproc); #ifdef RACCT - if (racct_enable) + if (RACCT_ENABLED()) { + PROC_LOCK(curproc); racct_add_force(curproc, RACCT_SWAP, incr); + PROC_UNLOCK(curproc); + } #endif pincr = atop(incr); atomic_add_long(&swap_reserved, pincr); - uip = curproc->p_ucred->cr_ruidinfo; - atomic_add_long(&uip->ui_vmsize, pincr); - PROC_UNLOCK(curproc); + swap_reserve_force_rlimit(pincr, curthread->td_ucred); } void @@ -313,22 +353,23 @@ swap_release(vm_ooffset_t decr) void swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred) { - u_long prev, pdecr; - struct uidinfo *uip; + u_long pdecr; +#ifdef INVARIANTS + u_long prev; +#endif - uip = cred->cr_ruidinfo; - KASSERT((decr & PAGE_MASK) == 0, ("%s: decr: %ju & PAGE_MASK", __func__, (uintmax_t)decr)); pdecr = atop(decr); +#ifdef INVARIANTS prev = atomic_fetchadd_long(&swap_reserved, -pdecr); - if (prev < pdecr) - panic("swap_reserved < decr"); + KASSERT(prev >= pdecr, ("swap_reserved < decr")); +#else + atomic_subtract_long(&swap_reserved, pdecr); +#endif - prev = atomic_fetchadd_long(&uip->ui_vmsize, -pdecr); - if (prev < pdecr) - printf("negative vmsize for uid = %d\n", uip->ui_uid); + swap_release_by_cred_rlimit(pdecr, cred); #ifdef RACCT if (racct_enable) racct_sub_cred(cred, RACCT_SWAP, decr); Modified: head/sys/vm/vm.h ============================================================================== --- head/sys/vm/vm.h Fri Jul 24 08:40:04 2020 (r363471) +++ head/sys/vm/vm.h Fri Jul 24 13:23:32 2020 (r363472) @@ -150,13 +150,15 @@ extern int old_mlock; extern int vm_ndomains; +#ifdef _KERNEL struct ucred; -int swap_reserve(vm_ooffset_t incr); -int swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred); +bool swap_reserve(vm_ooffset_t incr); +bool swap_reserve_by_cred(vm_ooffset_t incr, struct ucred *cred); void swap_reserve_force(vm_ooffset_t incr); void swap_release(vm_ooffset_t decr); void swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred); void swapper(void); +#endif #endif /* VM_H */