Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 3 Aug 2017 00:38:13 +0000 (UTC)
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r321963 - in head: . cddl/contrib/opensolaris/cmd/lockstat share/man/man4 sys/dev/ksyms sys/sys
Message-ID:  <201708030038.v730cD76021861@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: markj
Date: Thu Aug  3 00:38:13 2017
New Revision: 321963
URL: https://svnweb.freebsd.org/changeset/base/321963

Log:
  Rework and simplify the ksyms(4) implementation.
  
  - Store the symbol table contents in an anonymous swap-backed object. Have
    mmap(/dev/ksyms) map that object, and stop mapping the symbol table into
    the calling process in ksyms_open(). Previously we would cache a pointer
    to the pmap of the opening process, and mmap(/dev/ksyms) would create a
    mapping using the physical address found by a pmap lookup at the initial
    mapping address. However, this assumes that the cached pmap is valid,
    which may not be the case. [1]
  - Remove the ksyms ioctl interface. It appears to have been added to work
    around a limitation in libelf that no longer exists; see r321842.
    Moreover, the interface is difficult to support and isn't present in
    illumos. Since ksyms was added specifically to support lockstat(1), it
    is expected that this removal won't have any real impact.
  - Simplify ksyms_read() to avoid unnecessary copying.
  - Don't call the device handle destructor if we fail to capture a snapshot
    of the kernel's symbol table. devfs will do that for us.
  
  Reported by:	Ilja van Sprundel <ivansprundel@ioactive.com> [1]
  Reviewed by:	kib (previous revision)
  MFC after:	1 week
  Differential Revision:	https://reviews.freebsd.org/D11789

Deleted:
  head/sys/sys/ksyms.h
Modified:
  head/ObsoleteFiles.inc
  head/cddl/contrib/opensolaris/cmd/lockstat/sym.c
  head/share/man/man4/ksyms.4
  head/sys/dev/ksyms/ksyms.c

Modified: head/ObsoleteFiles.inc
==============================================================================
--- head/ObsoleteFiles.inc	Thu Aug  3 00:35:35 2017	(r321962)
+++ head/ObsoleteFiles.inc	Thu Aug  3 00:38:13 2017	(r321963)
@@ -38,6 +38,9 @@
 #   xargs -n1 | sort | uniq -d;
 # done
 
+# 20170802: ksyms(4) ioctl interface was removed
+OLD_FILES+=usr/include/sys/ksyms.h
+
 # 20170722: new clang import which bumps version from 4.0.0 to 5.0.0.
 OLD_FILES+=usr/lib/clang/4.0.0/include/sanitizer/allocator_interface.h
 OLD_FILES+=usr/lib/clang/4.0.0/include/sanitizer/asan_interface.h

Modified: head/cddl/contrib/opensolaris/cmd/lockstat/sym.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/lockstat/sym.c	Thu Aug  3 00:35:35 2017	(r321962)
+++ head/cddl/contrib/opensolaris/cmd/lockstat/sym.c	Thu Aug  3 00:38:13 2017	(r321963)
@@ -48,7 +48,6 @@
 #include <kstat.h>
 #else
 #include <sys/elf.h>
-#include <sys/ksyms.h>
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/linker.h>

Modified: head/share/man/man4/ksyms.4
==============================================================================
--- head/share/man/man4/ksyms.4	Thu Aug  3 00:35:35 2017	(r321962)
+++ head/share/man/man4/ksyms.4	Thu Aug  3 00:38:13 2017	(r321963)
@@ -27,7 +27,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd April 5, 2009
+.Dd August 2, 2017
 .Dt KSYMS 4
 .Os
 .Sh NAME
@@ -69,24 +69,6 @@ driver does not block the loading or unloading of modu
 while the
 .Pa /dev/ksyms
 file is open but may contain stale data.
-.Sh IOCTLS
-The
-.Xr ioctl 2
-command codes below are defined in
-.Aq Pa sys/ksyms.h .
-.Pp
-The (third) argument to the
-.Xr ioctl 2
-should be a pointer to the type indicated.
-.Bl -tag -width indent -offset indent
-.It Dv KIOCGSIZE (size_t)
-Returns the total size of the current symbol table.
-This can be used when allocating a buffer to make a copy of
-the kernel symbol table.
-.It Dv KIOCGADDR (void *)
-Returns the address of the kernel symbol table mapped in
-the process memory.
-.El
 .Sh FILES
 .Bl -tag -width /dev/ksymsX
 .It Pa /dev/ksyms
@@ -112,7 +94,6 @@ This may occur if the kernel was in the process of loa
 unloading a module.
 .El
 .Sh SEE ALSO
-.Xr ioctl 2 ,
 .Xr nlist 3 ,
 .Xr elf 5 ,
 .Xr kldload 8
@@ -152,12 +133,3 @@ file once at a time.
 The process must close the
 .Pa /dev/ksyms
 before it is allowed to open it again.
-.Pp
-The
-.Nm
-driver uses the calling process' memory address space to store the snapshot.
-.Xr ioctl 2
-can be used to get the memory address where the symbol table is stored to
-save kernel memory.
-.Xr mmap 2
-may also be used but it will map it to another address.

Modified: head/sys/dev/ksyms/ksyms.c
==============================================================================
--- head/sys/dev/ksyms/ksyms.c	Thu Aug  3 00:35:35 2017	(r321962)
+++ head/sys/dev/ksyms/ksyms.c	Thu Aug  3 00:38:13 2017	(r321963)
@@ -32,16 +32,15 @@
 
 #include <sys/conf.h>
 #include <sys/elf.h>
-#include <sys/ksyms.h>
 #include <sys/linker.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/module.h>
-#include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/resourcevar.h>
 #include <sys/stat.h>
+#include <sys/sx.h>
 #include <sys/uio.h>
 
 #include <machine/elf.h>
@@ -49,7 +48,7 @@
 #include <vm/pmap.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
-#include <vm/vm_map.h>
+#include <vm/vm_object.h>
 
 #include "linker_if.h"
 
@@ -68,18 +67,14 @@
 
 static d_open_t ksyms_open;
 static d_read_t ksyms_read;
-static d_close_t ksyms_close;
-static d_ioctl_t ksyms_ioctl;
-static d_mmap_t ksyms_mmap;
+static d_mmap_single_t ksyms_mmap_single;
 
 static struct cdevsw ksyms_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_flags =	D_TRACKCLOSE,
 	.d_open =	ksyms_open,
-	.d_close =	ksyms_close,
 	.d_read =	ksyms_read,
-	.d_ioctl =	ksyms_ioctl,
-	.d_mmap =	ksyms_mmap,
+	.d_mmap_single = ksyms_mmap_single,
 	.d_name =	KSYMS_DNAME
 };
 
@@ -87,11 +82,12 @@ struct ksyms_softc {
 	LIST_ENTRY(ksyms_softc)	sc_list;
 	vm_offset_t		sc_uaddr;
 	size_t			sc_usize;
-	pmap_t			sc_pmap;
+	vm_object_t		sc_obj;
+	vm_size_t		sc_objsz;
 	struct proc	       *sc_proc;
 };
 
-static struct mtx		 ksyms_mtx;
+static struct sx		 ksyms_mtx;
 static struct cdev		*ksyms_dev;
 static LIST_HEAD(, ksyms_softc)	 ksyms_list = LIST_HEAD_INITIALIZER(ksyms_list);
 
@@ -112,6 +108,7 @@ struct tsizes {
 };
 
 struct toffsets {
+	struct ksyms_softc *to_sc;
 	vm_offset_t	to_symoff;
 	vm_offset_t	to_stroff;
 	unsigned	to_stridx;
@@ -155,11 +152,25 @@ ksyms_size_calc(struct tsizes *ts)
 	(void)linker_file_foreach(ksyms_size_permod, ts);
 }
 
-#define KSYMS_EMIT(src, des, sz) do {			\
-	copyout(src, (void *)des, sz);			\
-	des += sz;					\
-} while (0)
+static int
+ksyms_emit(struct ksyms_softc *sc, void *buf, off_t off, size_t sz)
+{
+	struct iovec iov;
+	struct uio uio;
 
+	iov.iov_base = buf;
+	iov.iov_len = sz;
+	uio.uio_iov = &iov;
+	uio.uio_iovcnt = 1;
+	uio.uio_offset = off;
+	uio.uio_resid = (ssize_t)sz;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_rw = UIO_WRITE;
+	uio.uio_td = curthread;
+
+	return (uiomove_object(sc->sc_obj, sc->sc_objsz, &uio));
+}
+
 #define SYMBLKSZ	(256 * sizeof(Elf_Sym))
 
 /*
@@ -170,24 +181,24 @@ static int
 ksyms_add(linker_file_t lf, void *arg)
 {
 	char *buf;
+	struct ksyms_softc *sc;
 	struct toffsets *to;
 	const Elf_Sym *symtab;
 	Elf_Sym *symp;
 	caddr_t strtab;
-	long symsz;
-	size_t strsz, numsyms;
+	size_t len, numsyms, strsz, symsz;
 	linker_symval_t symval;
-	int i, nsyms, len;
+	int error, i, nsyms;
 
+	buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK);
 	to = arg;
+	sc = to->to_sc;
 
 	MOD_SLOCK;
 	numsyms =  LINKER_SYMTAB_GET(lf, &symtab);
 	strsz = LINKER_STRTAB_GET(lf, &strtab);
 	symsz = numsyms * sizeof(Elf_Sym);
 
-	buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK);
-
 	while (symsz > 0) {
 		len = min(SYMBLKSZ, symsz);
 		bcopy(symtab, buf, len);
@@ -213,7 +224,13 @@ ksyms_add(linker_file_t lf, void *arg)
 			return (ENXIO);
 		}
 		to->to_resid -= len;
-		KSYMS_EMIT(buf, to->to_symoff, len);
+		error = ksyms_emit(sc, buf, to->to_symoff, len);
+		to->to_symoff += len;
+		if (error != 0) {
+			MOD_SUNLOCK;
+			free(buf, M_KSYMS);
+			return (error);
+		}
 
 		symtab += nsyms;
 		symsz -= len;
@@ -224,10 +241,11 @@ ksyms_add(linker_file_t lf, void *arg)
 	if (strsz > to->to_resid)
 		return (ENXIO);
 	to->to_resid -= strsz;
-	KSYMS_EMIT(strtab, to->to_stroff, strsz);
+	error = ksyms_emit(sc, strtab, to->to_stroff, strsz);
+	to->to_stroff += strsz;
 	to->to_stridx += strsz;
 
-	return (0);
+	return (error);
 }
 
 /*
@@ -236,11 +254,11 @@ ksyms_add(linker_file_t lf, void *arg)
  * 0 on success, otherwise error.
  */
 static int
-ksyms_snapshot(struct tsizes *ts, vm_offset_t uaddr, size_t resid)
+ksyms_snapshot(struct ksyms_softc *sc, struct tsizes *ts)
 {
-	struct ksyms_hdr *hdr;
 	struct toffsets	to;
-	int error = 0;
+	struct ksyms_hdr *hdr;
+	int error;
 
 	hdr = malloc(sizeof(*hdr), M_KSYMS, M_WAITOK | M_ZERO);
 
@@ -334,39 +352,41 @@ ksyms_snapshot(struct tsizes *ts, vm_offset_t uaddr, s
 	/* Copy shstrtab into the header. */
 	bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab));
 
-	to.to_symoff = uaddr + hdr->kh_shdr[SHDR_SYMTAB].sh_offset;
-	to.to_stroff = uaddr + hdr->kh_shdr[SHDR_STRTAB].sh_offset;
+	to.to_sc = sc;
+	to.to_symoff = hdr->kh_shdr[SHDR_SYMTAB].sh_offset;
+	to.to_stroff = hdr->kh_shdr[SHDR_STRTAB].sh_offset;
 	to.to_stridx = 0;
-	if (sizeof(struct ksyms_hdr) > resid) {
-		free(hdr, M_KSYMS);
-		return (ENXIO);
-	}
-	to.to_resid = resid - sizeof(struct ksyms_hdr);
+	to.to_resid = sc->sc_objsz - sizeof(struct ksyms_hdr);
 
 	/* emit header */
-	copyout(hdr, (void *)uaddr, sizeof(struct ksyms_hdr));
-
+	error = ksyms_emit(sc, hdr, 0, sizeof(*hdr));
 	free(hdr, M_KSYMS);
+	if (error != 0)
+		return (error);
 
 	/* Add symbol and string tables for each kernel module. */
 	error = linker_file_foreach(ksyms_add, &to);
-
+	if (error != 0)
+		return (error);
 	if (to.to_resid != 0)
 		return (ENXIO);
-
-	return (error);
+	return (0);
 }
 
 static void
 ksyms_cdevpriv_dtr(void *data)
 {
 	struct ksyms_softc *sc;
+	vm_object_t obj;
 
 	sc = (struct ksyms_softc *)data;
 
-	mtx_lock(&ksyms_mtx);
+	sx_xlock(&ksyms_mtx);
 	LIST_REMOVE(sc, sc_list);
-	mtx_unlock(&ksyms_mtx);
+	sx_xunlock(&ksyms_mtx);
+	obj = sc->sc_obj;
+	if (obj != NULL)
+		vm_object_deallocate(obj);
 	free(sc, M_KSYMS);
 }
 
@@ -375,34 +395,31 @@ ksyms_open(struct cdev *dev, int flags, int fmt __unus
 {
 	struct tsizes ts;
 	struct ksyms_softc *sc;
-	size_t total_elf_sz;
+	vm_size_t elfsz;
 	int error, try;
 
 	/*
 	 * Limit one open() per process. The process must close()
 	 * before open()'ing again.
 	 */
-	mtx_lock(&ksyms_mtx);
+	sx_xlock(&ksyms_mtx);
 	LIST_FOREACH(sc, &ksyms_list, sc_list) {
 		if (sc->sc_proc == td->td_proc) {
-			mtx_unlock(&ksyms_mtx);
+			sx_xunlock(&ksyms_mtx);
 			return (EBUSY);
 		}
 	}
 
-	sc = malloc(sizeof(*sc), M_KSYMS, M_NOWAIT | M_ZERO);
-	if (sc == NULL) {
-		mtx_unlock(&ksyms_mtx);
-		return (ENOMEM);
-	}
+	sc = malloc(sizeof(*sc), M_KSYMS, M_WAITOK | M_ZERO);
 	sc->sc_proc = td->td_proc;
-	sc->sc_pmap = &td->td_proc->p_vmspace->vm_pmap;
 	LIST_INSERT_HEAD(&ksyms_list, sc, sc_list);
-	mtx_unlock(&ksyms_mtx);
+	sx_xunlock(&ksyms_mtx);
 
 	error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr);
-	if (error != 0)
-		goto failed;
+	if (error != 0) {
+		ksyms_cdevpriv_dtr(sc);
+		return (error);
+	}
 
 	/*
 	 * MOD_SLOCK doesn't work here (because of a lock reversal with
@@ -412,32 +429,20 @@ ksyms_open(struct cdev *dev, int flags, int fmt __unus
 	 * time.
 	 */
 	for (try = 0; try < 3; try++) {
-		/*
-		 * Map a buffer in the calling process memory space and
-		 * create a snapshot of the kernel symbol table in it.
-		 */
-
-		/* Compute the size of buffer needed. */
 		ksyms_size_calc(&ts);
-		total_elf_sz = sizeof(struct ksyms_hdr) + ts.ts_symsz +
-		    ts.ts_strsz;
+		elfsz = sizeof(struct ksyms_hdr) + ts.ts_symsz + ts.ts_strsz;
 
-		error = copyout_map(td, &sc->sc_uaddr, (vm_size_t)total_elf_sz);
-		if (error != 0)
-			break;
-		sc->sc_usize = total_elf_sz;
+		sc->sc_obj = vm_object_allocate(OBJT_DEFAULT,
+		    OFF_TO_IDX(round_page(elfsz)));
+		sc->sc_objsz = elfsz;
 
-		error = ksyms_snapshot(&ts, sc->sc_uaddr, total_elf_sz);
+		error = ksyms_snapshot(sc, &ts);
 		if (error == 0)
-			/* successful snapshot */
-			return (0);
+			break;
 
-		/* Snapshot failed, unmap the memory and try again. */
-		(void)copyout_unmap(td, sc->sc_uaddr, sc->sc_usize);
+		vm_object_deallocate(sc->sc_obj);
+		sc->sc_obj = NULL;
 	}
-
-failed:
-	ksyms_cdevpriv_dtr(sc);
 	return (error);
 }
 
@@ -445,125 +450,38 @@ static int
 ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused)
 {
 	struct ksyms_softc *sc;
-	char *buf;
-	off_t off;
-	size_t len, sz;
-	vm_size_t ubase;
 	int error;
 
 	error = devfs_get_cdevpriv((void **)&sc);
 	if (error != 0)
 		return (error);
-
-	off = uio->uio_offset;
-	len = uio->uio_resid;
-
-	if (off < 0 || off > sc->sc_usize)
-		return (EFAULT);
-
-	if (len > sc->sc_usize - off)
-		len = sc->sc_usize - off;
-	if (len == 0)
-		return (0);
-
-	/*
-	 * Since the snapshot buffer is in the user space we have to copy it
-	 * in to the kernel and then back out.  The extra copy saves valuable
-	 * kernel memory.
-	 */
-	buf = malloc(PAGE_SIZE, M_KSYMS, M_WAITOK);
-	ubase = sc->sc_uaddr + off;
-
-	while (len) {
-		sz = min(PAGE_SIZE, len);
-		if (copyin((void *)ubase, buf, sz) != 0)
-			error = EFAULT;
-		else
-			error = uiomove(buf, sz, uio);
-		if (error != 0)
-			break;
-
-		len -= sz;
-		ubase += sz;
-	}
-	free(buf, M_KSYMS);
-
-	return (error);
+	return (uiomove_object(sc->sc_obj, sc->sc_objsz, uio));
 }
 
 static int
-ksyms_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int32_t flag __unused,
-    struct thread *td __unused)
+ksyms_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size,
+    vm_object_t *objp, int nprot)
 {
 	struct ksyms_softc *sc;
+	vm_object_t obj;
 	int error;
 
 	error = devfs_get_cdevpriv((void **)&sc);
 	if (error != 0)
 		return (error);
 
-	switch (cmd) {
-	case KIOCGSIZE:
-		/*
-		 * Return the size (in bytes) of the symbol table
-		 * snapshot.
-		 */
-		*(size_t *)data = sc->sc_usize;
-		break;
-	case KIOCGADDR:
-		/*
-		 * Return the address of the symbol table snapshot.
-		 * XXX - compat32 version of this?
-		 */
-		*(void **)data = (void *)sc->sc_uaddr;
-		break;
-	default:
-		error = ENOTTY;
-		break;
-	}
+	if (*offset < 0 || *offset >= round_page(sc->sc_objsz) ||
+	    size > round_page(sc->sc_objsz) - *offset ||
+	    (nprot & ~PROT_READ) != 0)
+		return (EINVAL);
 
-	return (error);
-}
-
-static int
-ksyms_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
-    int prot __unused, vm_memattr_t *memattr __unused)
-{
-	struct ksyms_softc *sc;
-	int error;
-
-	error = devfs_get_cdevpriv((void **)&sc);
-	if (error != 0)
-		return (error);
-
-	/*
-	 * XXX mmap() will actually map the symbol table into the process
-	 * address space again.
-	 */
-	if (offset > round_page(sc->sc_usize) ||
-	    (*paddr = pmap_extract(sc->sc_pmap,
-	    (vm_offset_t)sc->sc_uaddr + offset)) == 0)
-		return (-1);
-
+	obj = sc->sc_obj;
+	vm_object_reference(obj);
+	*objp = obj;
 	return (0);
 }
 
 static int
-ksyms_close(struct cdev *dev, int flags __unused, int fmt __unused,
-    struct thread *td)
-{
-	struct ksyms_softc *sc;
-	int error;
-	
-	error = devfs_get_cdevpriv((void **)&sc);
-	if (error != 0)
-		return (error);
-
-	/* Unmap the buffer from the process address space. */
-	return (copyout_unmap(td, sc->sc_uaddr, sc->sc_usize));
-}
-
-static int
 ksyms_modevent(module_t mod __unused, int type, void *data __unused)
 {
 	int error;
@@ -571,7 +489,7 @@ ksyms_modevent(module_t mod __unused, int type, void *
 	error = 0;
 	switch (type) {
 	case MOD_LOAD:
-		mtx_init(&ksyms_mtx, "KSyms mtx", NULL, MTX_DEF);
+		sx_init(&ksyms_mtx, "KSyms mtx");
 		ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL,
 		    0400, KSYMS_DNAME);
 		break;
@@ -579,7 +497,7 @@ ksyms_modevent(module_t mod __unused, int type, void *
 		if (!LIST_EMPTY(&ksyms_list))
 			return (EBUSY);
 		destroy_dev(ksyms_dev);
-		mtx_destroy(&ksyms_mtx);
+		sx_destroy(&ksyms_mtx);
 		break;
 	case MOD_SHUTDOWN:
 		break;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201708030038.v730cD76021861>