Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 21 Apr 2026 15:43:14 +0000
Message-ID:  <69e79b12.35f92.3882be48@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch stable/14 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=4c0e5e3cc441cf7354d15361a0903120cdd23fd9

commit 4c0e5e3cc441cf7354d15361a0903120cdd23fd9
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2026-03-31 13:37:43 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2026-04-21 15:43:09 +0000

    pkru: Fix handling of 1GB largepage mappings
    
    pmap_pkru_update_range() did not handle the case where a PDPE has PG_PS
    set.  More generally, the SET_PKRU and CLEAR_PKRU sysarch
    implementations did not check whether the request covers a "boundary" vm
    map entry.  Fix this, add the missing PG_PS test, and add some tests.
    
    Approved by:    so
    Security:       FreeBSD-SA-26:11.amd64
    Security:       CVE-2026-6386
    Reported by:    Nicholas Carlini <npc@anthropic.com>
    Reviewed by:    kib, alc
    Differential Revision:  https://reviews.freebsd.org/D56184
---
 lib/libc/x86/sys/pkru.3            |   3 +
 sys/amd64/amd64/pmap.c             |  20 +++-
 sys/amd64/amd64/sys_machdep.c      |  43 +++++++--
 sys/vm/vm_map.c                    |  32 +++++++
 sys/vm/vm_map.h                    |   1 +
 tests/sys/posixshm/posixshm_test.c | 187 +++++++++++++++++++++++++++++++++++++
 6 files changed, 274 insertions(+), 12 deletions(-)

diff --git a/lib/libc/x86/sys/pkru.3 b/lib/libc/x86/sys/pkru.3
index 95bc66c979ac..033dc07c4b06 100644
--- a/lib/libc/x86/sys/pkru.3
+++ b/lib/libc/x86/sys/pkru.3
@@ -179,6 +179,9 @@ The supplied
 argument for
 .Fn x86_pkru_protect_range
 has reserved bits set.
+.It Bq Er EINVAL
+The range of the request partially covers a mapping of an object created by
+.Xr shm_create_largepage 3 .
 .It Bq Er EFAULT
 The supplied address range does not completely fit into the user-managed
 address range.
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 9fd6c4091c95..bae3735eeafe 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -11598,7 +11598,7 @@ pmap_pkru_update_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
     u_int keyidx)
 {
 	pml4_entry_t *pml4e;
-	pdp_entry_t *pdpe;
+	pdp_entry_t newpdpe, *pdpe;
 	pd_entry_t newpde, ptpaddr, *pde;
 	pt_entry_t newpte, *ptep, pte;
 	vm_offset_t va, va_next;
@@ -11624,6 +11624,22 @@ pmap_pkru_update_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
 				va_next = eva;
 			continue;
 		}
+		if ((*pdpe & PG_PS) != 0) {
+			va_next = (va + NBPDP) & ~PDPMASK;
+			if (va_next < va)
+				va_next = eva;
+			KASSERT(va_next <= eva,
+			    ("partial update of non-transparent 1G mapping "
+			    "pdpe %#lx va %#lx eva %#lx va_next %#lx",
+			    *pdpe, va, eva, va_next));
+			newpdpe = (*pdpe & ~X86_PG_PKU_MASK) |
+			    X86_PG_PKU(keyidx);
+			if (newpdpe != *pdpe) {
+				*pdpe = newpdpe;
+				changed = true;
+			}
+			continue;
+		}
 
 		va_next = (va + NBPDR) & ~PDRMASK;
 		if (va_next < va)
@@ -11676,8 +11692,6 @@ pmap_pkru_check_uargs(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
 	if (pmap->pm_type != PT_X86 || keyidx > PMAP_MAX_PKRU_IDX ||
 	    (flags & ~(AMD64_PKRU_PERSIST | AMD64_PKRU_EXCL)) != 0)
 		return (EINVAL);
-	if (eva <= sva || eva > VM_MAXUSER_ADDRESS)
-		return (EFAULT);
 	if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0)
 		return (ENOTSUP);
 	return (0);
diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c
index a7ee1f49887c..74b182e73dc2 100644
--- a/sys/amd64/amd64/sys_machdep.c
+++ b/sys/amd64/amd64/sys_machdep.c
@@ -32,7 +32,6 @@
  *	from: @(#)sys_machdep.c	5.5 (Berkeley) 1/19/91
  */
 
-#include <sys/cdefs.h>
 #include "opt_capsicum.h"
 #include "opt_ktrace.h"
 
@@ -371,32 +370,58 @@ sysarch(struct thread *td, struct sysarch_args *uap)
 		break;
 
 	case I386_SET_PKRU:
-	case AMD64_SET_PKRU:
+	case AMD64_SET_PKRU: {
+		vm_offset_t addr, start, end;
+		vm_size_t len;
+
+		addr = (uintptr_t)a64pkru.addr;
+		len = a64pkru.len;
+
 		/*
 		 * Read-lock the map to synchronize with parallel
 		 * pmap_vmspace_copy() on fork.
 		 */
 		map = &td->td_proc->p_vmspace->vm_map;
 		vm_map_lock_read(map);
-		error = pmap_pkru_set(PCPU_GET(curpmap),
-		    (vm_offset_t)a64pkru.addr, (vm_offset_t)a64pkru.addr +
-		    a64pkru.len, a64pkru.keyidx, a64pkru.flags);
+		if (len == 0 || !vm_map_check_boundary(map, addr, addr + len)) {
+			vm_map_unlock_read(map);
+			error = EINVAL;
+			break;
+		}
+		start = trunc_page(addr);
+		end = round_page(addr + len);
+		error = pmap_pkru_set(PCPU_GET(curpmap), start, end,
+		    a64pkru.keyidx, a64pkru.flags);
 		vm_map_unlock_read(map);
 		break;
+	}
 
 	case I386_CLEAR_PKRU:
-	case AMD64_CLEAR_PKRU:
+	case AMD64_CLEAR_PKRU: {
+		vm_offset_t addr, start, end;
+		vm_size_t len;
+
 		if (a64pkru.flags != 0 || a64pkru.keyidx != 0) {
 			error = EINVAL;
 			break;
 		}
+
+		addr = (uintptr_t)a64pkru.addr;
+		len = a64pkru.len;
+
 		map = &td->td_proc->p_vmspace->vm_map;
 		vm_map_lock_read(map);
-		error = pmap_pkru_clear(PCPU_GET(curpmap),
-		    (vm_offset_t)a64pkru.addr,
-		    (vm_offset_t)a64pkru.addr + a64pkru.len);
+		if (len == 0 || !vm_map_check_boundary(map, addr, addr + len)) {
+			vm_map_unlock_read(map);
+			error = EINVAL;
+			break;
+		}
+		start = trunc_page(addr);
+		end = round_page(addr + len);
+		error = pmap_pkru_clear(PCPU_GET(curpmap), start, end);
 		vm_map_unlock_read(map);
 		break;
+	}
 
 	case AMD64_DISABLE_TLSBASE:
 		clear_pcb_flags(pcb, PCB_TLSBASE);
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index cf028c0c47ef..631a5e302879 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -4140,6 +4140,38 @@ vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
 	return (TRUE);
 }
 
+/*
+ * Check whether the specified range partially overlaps a map entry with
+ * fixed boundaries, and return false if so.
+ *
+ * The map must be locked.
+ */
+bool
+vm_map_check_boundary(vm_map_t map, vm_offset_t start, vm_offset_t end)
+{
+	vm_map_entry_t entry;
+	int bdry_idx;
+
+	if (!vm_map_range_valid(map, start, end))
+		return (false);
+	if (start == end)
+		return (true);
+
+	if (vm_map_lookup_entry(map, start, &entry)) {
+		bdry_idx = MAP_ENTRY_SPLIT_BOUNDARY_INDEX(entry);
+		if (bdry_idx != 0 &&
+		    (start & (pagesizes[bdry_idx] - 1)) != 0)
+			return (false);
+	}
+	if (vm_map_lookup_entry(map, end - 1, &entry)) {
+		bdry_idx = MAP_ENTRY_SPLIT_BOUNDARY_INDEX(entry);
+		if (bdry_idx != 0 &&
+		    (end & (pagesizes[bdry_idx] - 1)) != 0)
+			return (false);
+	}
+	return (true);
+}
+
 /*
  *
  *	vm_map_copy_swap_object:
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 5d41f892e83d..66a787b0d2d8 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -471,6 +471,7 @@ vm_map_entry_read_succ(void *token, struct vm_map_entry *const clone,
 #endif				/* ! _KERNEL */
 
 #ifdef _KERNEL
+bool vm_map_check_boundary(vm_map_t, vm_offset_t, vm_offset_t);
 boolean_t vm_map_check_protection (vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t);
 int vm_map_delete(vm_map_t, vm_offset_t, vm_offset_t);
 int vm_map_find(vm_map_t, vm_object_t, vm_ooffset_t, vm_offset_t *, vm_size_t,
diff --git a/tests/sys/posixshm/posixshm_test.c b/tests/sys/posixshm/posixshm_test.c
index 406ad5011884..b8db1edd5c2d 100644
--- a/tests/sys/posixshm/posixshm_test.c
+++ b/tests/sys/posixshm/posixshm_test.c
@@ -39,10 +39,17 @@
 #include <sys/sysctl.h>
 #include <sys/wait.h>
 
+#ifdef __amd64__
+#include <machine/sysarch.h>
+#endif
+
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <paths.h>
+#include <setjmp.h>
 #include <signal.h>
+#include <stdatomic.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -1890,6 +1897,183 @@ ATF_TC_BODY(largepage_pipe, tc)
 	}
 }
 
+#ifdef __amd64__
+static sigjmp_buf jmpbuf;
+static _Atomic(void *) faultaddr;
+static _Atomic(int) faultsig;
+
+#define	KEY_RW	1
+#define	KEY_RO	2
+#define KEY_WO	3
+#define KEY_NO	4
+#define	VAL	0xdeadfacec0debeef
+static void
+set_keys(void)
+{
+	int error;
+
+	error = x86_pkru_set_perm(KEY_RW, 1, 1);
+	ATF_REQUIRE(error == 0);
+	error = x86_pkru_set_perm(KEY_RO, 1, 0);
+	ATF_REQUIRE(error == 0);
+	error = x86_pkru_set_perm(KEY_WO, 0, 1);
+	ATF_REQUIRE(error == 0);
+	error = x86_pkru_set_perm(KEY_NO, 0, 0);
+	ATF_REQUIRE(error == 0);
+}
+
+static void
+sigsegv(int sig, siginfo_t *si, void *uc __unused)
+{
+	faultsig = sig;
+	faultaddr = si->si_addr;
+	siglongjmp(jmpbuf, 1);
+}
+
+static bool
+try_read(volatile uint64_t *p, uint64_t *outp)
+{
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		*outp = *p;
+		return (true);
+	} else {
+		atomic_signal_fence(memory_order_relaxed);
+		ATF_REQUIRE(faultsig == SIGSEGV);
+		ATF_REQUIRE(faultaddr == p);
+		set_keys(); /* PKRU is not restored by siglongjmp? */
+		return (false);
+	}
+}
+
+static bool
+try_write(volatile uint64_t *p, uint64_t val)
+{
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		*p = val;
+		return (true);
+	} else {
+		atomic_signal_fence(memory_order_relaxed);
+		ATF_REQUIRE(faultsig == SIGSEGV);
+		ATF_REQUIRE(faultaddr == p);
+		set_keys(); /* PKRU is not restored by siglongjmp? */
+		return (false);
+	}
+}
+
+ATF_TC_WITHOUT_HEAD(largepage_pkru);
+ATF_TC_BODY(largepage_pkru, tc)
+{
+	size_t ps[MAXPAGESIZES];
+	struct sigaction sa;
+	char *addr, *addr1;
+	int error, fd, pscnt;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = sigsegv;
+	sa.sa_flags = SA_SIGINFO;
+	sigemptyset(&sa.sa_mask);
+	error = sigaction(SIGSEGV, &sa, NULL);
+	ATF_REQUIRE(error == 0);
+
+	pscnt = pagesizes(ps, true);
+
+	for (int i = 1; i < pscnt; i++) {
+		uint64_t val;
+
+		fd = shm_open_large(i, SHM_LARGEPAGE_ALLOC_DEFAULT, ps[i]);
+		addr = mmap(NULL, ps[i], PROT_READ | PROT_WRITE, MAP_SHARED, fd,
+		    0);
+		ATF_REQUIRE_MSG(addr != MAP_FAILED,
+		    "mmap(%zu bytes) failed; error=%d", ps[i], errno);
+
+		/*
+		 * Ensure that the page is faulted into the pmap.
+		 */
+		memset(addr, 0, ps[i]);
+
+		set_keys();
+
+		/*
+		 * Make sure we can't partially cover a largepage mapping.
+		 */
+		error = x86_pkru_protect_range(addr, PAGE_SIZE, KEY_RW, 0);
+		ATF_REQUIRE_ERRNO(EINVAL, error != 0);
+		error = x86_pkru_protect_range(addr, ps[i] - PAGE_SIZE, KEY_RW,
+		    0);
+		ATF_REQUIRE_ERRNO(EINVAL, error != 0);
+		error = x86_pkru_protect_range(addr + PAGE_SIZE, ps[i] - PAGE_SIZE,
+		    KEY_RW, 0);
+		ATF_REQUIRE_ERRNO(EINVAL, error != 0);
+		error = x86_pkru_protect_range(addr + 1, ps[i], KEY_RW, 0);
+		ATF_REQUIRE_ERRNO(EINVAL, error != 0);
+
+		/*
+		 * Make sure that protections are honoured.
+		 */
+		for (int j = 1; j <= 4; j++) {
+			volatile uint64_t *addr64;
+
+			error = x86_pkru_protect_range(addr, ps[i], 0, 0);
+			ATF_REQUIRE(error == 0);
+
+			addr64 = (volatile uint64_t *)(void *)addr;
+			*addr64 = VAL;
+
+			error = x86_pkru_protect_range(addr, ps[i], j, 0);
+			ATF_REQUIRE(error == 0);
+			switch (j) {
+			case KEY_RW:
+				ATF_REQUIRE(try_write(addr64, VAL));
+				ATF_REQUIRE(try_read(addr64, &val));
+				ATF_REQUIRE(val == VAL);
+				break;
+			case KEY_RO:
+				ATF_REQUIRE(try_read(addr64, &val));
+				ATF_REQUIRE(val == VAL);
+				ATF_REQUIRE(!try_write(addr64, VAL));
+				break;
+			case KEY_WO:
+				/* !access implies !modify */
+			case KEY_NO:
+				ATF_REQUIRE(!try_read(addr64, &val));
+				ATF_REQUIRE(!try_write(addr64, VAL));
+				break;
+			default:
+				__unreachable();
+			}
+		}
+		error = munmap(addr, ps[i]);
+		ATF_CHECK(error == 0);
+
+		/*
+		 * Try mapping a large page in a region partially covered by a
+		 * key.
+		 *
+		 * Rather than detecting the mismatch when the logical mapping
+		 * is created, we currently only fail once pmap_enter() is
+		 * called from the fault handler.  This is not ideal and might
+		 * be improved in the future.
+		 */
+		error = x86_pkru_protect_range(addr, ps[i], 0, 0);
+		ATF_REQUIRE(error == 0);
+		error = x86_pkru_protect_range(addr + PAGE_SIZE,
+		    ps[i] - PAGE_SIZE, KEY_RW, 0);
+		ATF_REQUIRE(error == 0);
+
+		addr1 = mmap(addr, ps[i], PROT_READ | PROT_WRITE,
+		    MAP_SHARED | MAP_FIXED, fd, 0);
+		ATF_REQUIRE(addr1 != MAP_FAILED);
+		ATF_REQUIRE(addr == addr1);
+		ATF_REQUIRE(!try_read((volatile uint64_t *)(void *)addr, &val));
+		ATF_REQUIRE(!try_write((volatile uint64_t *)(void *)addr, VAL));
+	}
+}
+#undef KEY_RW
+#undef KEY_RO
+#undef KEY_WO
+#undef KEY_NO
+#endif
+
 ATF_TC_WITHOUT_HEAD(largepage_reopen);
 ATF_TC_BODY(largepage_reopen, tc)
 {
@@ -1980,6 +2164,9 @@ ATF_TP_ADD_TCS(tp)
 	ATF_TP_ADD_TC(tp, largepage_mprotect);
 	ATF_TP_ADD_TC(tp, largepage_minherit);
 	ATF_TP_ADD_TC(tp, largepage_pipe);
+#ifdef __amd64__
+	ATF_TP_ADD_TC(tp, largepage_pkru);
+#endif
 	ATF_TP_ADD_TC(tp, largepage_reopen);
 
 	return (atf_no_error());


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69e79b12.35f92.3882be48>