Date: Thu, 3 Aug 2017 00:38:13 +0000 (UTC) From: Mark Johnston <markj@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r321963 - in head: . cddl/contrib/opensolaris/cmd/lockstat share/man/man4 sys/dev/ksyms sys/sys Message-ID: <201708030038.v730cD76021861@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: markj Date: Thu Aug 3 00:38:13 2017 New Revision: 321963 URL: https://svnweb.freebsd.org/changeset/base/321963 Log: Rework and simplify the ksyms(4) implementation. - Store the symbol table contents in an anonymous swap-backed object. Have mmap(/dev/ksyms) map that object, and stop mapping the symbol table into the calling process in ksyms_open(). Previously we would cache a pointer to the pmap of the opening process, and mmap(/dev/ksyms) would create a mapping using the physical address found by a pmap lookup at the initial mapping address. However, this assumes that the cached pmap is valid, which may not be the case. [1] - Remove the ksyms ioctl interface. It appears to have been added to work around a limitation in libelf that no longer exists; see r321842. Moreover, the interface is difficult to support and isn't present in illumos. Since ksyms was added specifically to support lockstat(1), it is expected that this removal won't have any real impact. - Simplify ksyms_read() to avoid unnecessary copying. - Don't call the device handle destructor if we fail to capture a snapshot of the kernel's symbol table. devfs will do that for us. Reported by: Ilja van Sprundel <ivansprundel@ioactive.com> [1] Reviewed by: kib (previous revision) MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D11789 Deleted: head/sys/sys/ksyms.h Modified: head/ObsoleteFiles.inc head/cddl/contrib/opensolaris/cmd/lockstat/sym.c head/share/man/man4/ksyms.4 head/sys/dev/ksyms/ksyms.c Modified: head/ObsoleteFiles.inc ============================================================================== --- head/ObsoleteFiles.inc Thu Aug 3 00:35:35 2017 (r321962) +++ head/ObsoleteFiles.inc Thu Aug 3 00:38:13 2017 (r321963) @@ -38,6 +38,9 @@ # xargs -n1 | sort | uniq -d; # done +# 20170802: ksyms(4) ioctl interface was removed +OLD_FILES+=usr/include/sys/ksyms.h + # 20170722: new clang import which bumps version from 4.0.0 to 5.0.0. OLD_FILES+=usr/lib/clang/4.0.0/include/sanitizer/allocator_interface.h OLD_FILES+=usr/lib/clang/4.0.0/include/sanitizer/asan_interface.h Modified: head/cddl/contrib/opensolaris/cmd/lockstat/sym.c ============================================================================== --- head/cddl/contrib/opensolaris/cmd/lockstat/sym.c Thu Aug 3 00:35:35 2017 (r321962) +++ head/cddl/contrib/opensolaris/cmd/lockstat/sym.c Thu Aug 3 00:38:13 2017 (r321963) @@ -48,7 +48,6 @@ #include <kstat.h> #else #include <sys/elf.h> -#include <sys/ksyms.h> #include <sys/param.h> #include <sys/module.h> #include <sys/linker.h> Modified: head/share/man/man4/ksyms.4 ============================================================================== --- head/share/man/man4/ksyms.4 Thu Aug 3 00:35:35 2017 (r321962) +++ head/share/man/man4/ksyms.4 Thu Aug 3 00:38:13 2017 (r321963) @@ -27,7 +27,7 @@ .\" .\" $FreeBSD$ .\" -.Dd April 5, 2009 +.Dd August 2, 2017 .Dt KSYMS 4 .Os .Sh NAME @@ -69,24 +69,6 @@ driver does not block the loading or unloading of modu while the .Pa /dev/ksyms file is open but may contain stale data. -.Sh IOCTLS -The -.Xr ioctl 2 -command codes below are defined in -.Aq Pa sys/ksyms.h . -.Pp -The (third) argument to the -.Xr ioctl 2 -should be a pointer to the type indicated. -.Bl -tag -width indent -offset indent -.It Dv KIOCGSIZE (size_t) -Returns the total size of the current symbol table. -This can be used when allocating a buffer to make a copy of -the kernel symbol table. -.It Dv KIOCGADDR (void *) -Returns the address of the kernel symbol table mapped in -the process memory. -.El .Sh FILES .Bl -tag -width /dev/ksymsX .It Pa /dev/ksyms @@ -112,7 +94,6 @@ This may occur if the kernel was in the process of loa unloading a module. .El .Sh SEE ALSO -.Xr ioctl 2 , .Xr nlist 3 , .Xr elf 5 , .Xr kldload 8 @@ -152,12 +133,3 @@ file once at a time. The process must close the .Pa /dev/ksyms before it is allowed to open it again. -.Pp -The -.Nm -driver uses the calling process' memory address space to store the snapshot. -.Xr ioctl 2 -can be used to get the memory address where the symbol table is stored to -save kernel memory. -.Xr mmap 2 -may also be used but it will map it to another address. Modified: head/sys/dev/ksyms/ksyms.c ============================================================================== --- head/sys/dev/ksyms/ksyms.c Thu Aug 3 00:35:35 2017 (r321962) +++ head/sys/dev/ksyms/ksyms.c Thu Aug 3 00:38:13 2017 (r321963) @@ -32,16 +32,15 @@ #include <sys/conf.h> #include <sys/elf.h> -#include <sys/ksyms.h> #include <sys/linker.h> #include <sys/malloc.h> #include <sys/mman.h> #include <sys/module.h> -#include <sys/mutex.h> #include <sys/proc.h> #include <sys/queue.h> #include <sys/resourcevar.h> #include <sys/stat.h> +#include <sys/sx.h> #include <sys/uio.h> #include <machine/elf.h> @@ -49,7 +48,7 @@ #include <vm/pmap.h> #include <vm/vm.h> #include <vm/vm_extern.h> -#include <vm/vm_map.h> +#include <vm/vm_object.h> #include "linker_if.h" @@ -68,18 +67,14 @@ static d_open_t ksyms_open; static d_read_t ksyms_read; -static d_close_t ksyms_close; -static d_ioctl_t ksyms_ioctl; -static d_mmap_t ksyms_mmap; +static d_mmap_single_t ksyms_mmap_single; static struct cdevsw ksyms_cdevsw = { .d_version = D_VERSION, .d_flags = D_TRACKCLOSE, .d_open = ksyms_open, - .d_close = ksyms_close, .d_read = ksyms_read, - .d_ioctl = ksyms_ioctl, - .d_mmap = ksyms_mmap, + .d_mmap_single = ksyms_mmap_single, .d_name = KSYMS_DNAME }; @@ -87,11 +82,12 @@ struct ksyms_softc { LIST_ENTRY(ksyms_softc) sc_list; vm_offset_t sc_uaddr; size_t sc_usize; - pmap_t sc_pmap; + vm_object_t sc_obj; + vm_size_t sc_objsz; struct proc *sc_proc; }; -static struct mtx ksyms_mtx; +static struct sx ksyms_mtx; static struct cdev *ksyms_dev; static LIST_HEAD(, ksyms_softc) ksyms_list = LIST_HEAD_INITIALIZER(ksyms_list); @@ -112,6 +108,7 @@ struct tsizes { }; struct toffsets { + struct ksyms_softc *to_sc; vm_offset_t to_symoff; vm_offset_t to_stroff; unsigned to_stridx; @@ -155,11 +152,25 @@ ksyms_size_calc(struct tsizes *ts) (void)linker_file_foreach(ksyms_size_permod, ts); } -#define KSYMS_EMIT(src, des, sz) do { \ - copyout(src, (void *)des, sz); \ - des += sz; \ -} while (0) +static int +ksyms_emit(struct ksyms_softc *sc, void *buf, off_t off, size_t sz) +{ + struct iovec iov; + struct uio uio; + iov.iov_base = buf; + iov.iov_len = sz; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = off; + uio.uio_resid = (ssize_t)sz; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_WRITE; + uio.uio_td = curthread; + + return (uiomove_object(sc->sc_obj, sc->sc_objsz, &uio)); +} + #define SYMBLKSZ (256 * sizeof(Elf_Sym)) /* @@ -170,24 +181,24 @@ static int ksyms_add(linker_file_t lf, void *arg) { char *buf; + struct ksyms_softc *sc; struct toffsets *to; const Elf_Sym *symtab; Elf_Sym *symp; caddr_t strtab; - long symsz; - size_t strsz, numsyms; + size_t len, numsyms, strsz, symsz; linker_symval_t symval; - int i, nsyms, len; + int error, i, nsyms; + buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK); to = arg; + sc = to->to_sc; MOD_SLOCK; numsyms = LINKER_SYMTAB_GET(lf, &symtab); strsz = LINKER_STRTAB_GET(lf, &strtab); symsz = numsyms * sizeof(Elf_Sym); - buf = malloc(SYMBLKSZ, M_KSYMS, M_WAITOK); - while (symsz > 0) { len = min(SYMBLKSZ, symsz); bcopy(symtab, buf, len); @@ -213,7 +224,13 @@ ksyms_add(linker_file_t lf, void *arg) return (ENXIO); } to->to_resid -= len; - KSYMS_EMIT(buf, to->to_symoff, len); + error = ksyms_emit(sc, buf, to->to_symoff, len); + to->to_symoff += len; + if (error != 0) { + MOD_SUNLOCK; + free(buf, M_KSYMS); + return (error); + } symtab += nsyms; symsz -= len; @@ -224,10 +241,11 @@ ksyms_add(linker_file_t lf, void *arg) if (strsz > to->to_resid) return (ENXIO); to->to_resid -= strsz; - KSYMS_EMIT(strtab, to->to_stroff, strsz); + error = ksyms_emit(sc, strtab, to->to_stroff, strsz); + to->to_stroff += strsz; to->to_stridx += strsz; - return (0); + return (error); } /* @@ -236,11 +254,11 @@ ksyms_add(linker_file_t lf, void *arg) * 0 on success, otherwise error. */ static int -ksyms_snapshot(struct tsizes *ts, vm_offset_t uaddr, size_t resid) +ksyms_snapshot(struct ksyms_softc *sc, struct tsizes *ts) { - struct ksyms_hdr *hdr; struct toffsets to; - int error = 0; + struct ksyms_hdr *hdr; + int error; hdr = malloc(sizeof(*hdr), M_KSYMS, M_WAITOK | M_ZERO); @@ -334,39 +352,41 @@ ksyms_snapshot(struct tsizes *ts, vm_offset_t uaddr, s /* Copy shstrtab into the header. */ bcopy(ksyms_shstrtab, hdr->kh_shstrtab, sizeof(ksyms_shstrtab)); - to.to_symoff = uaddr + hdr->kh_shdr[SHDR_SYMTAB].sh_offset; - to.to_stroff = uaddr + hdr->kh_shdr[SHDR_STRTAB].sh_offset; + to.to_sc = sc; + to.to_symoff = hdr->kh_shdr[SHDR_SYMTAB].sh_offset; + to.to_stroff = hdr->kh_shdr[SHDR_STRTAB].sh_offset; to.to_stridx = 0; - if (sizeof(struct ksyms_hdr) > resid) { - free(hdr, M_KSYMS); - return (ENXIO); - } - to.to_resid = resid - sizeof(struct ksyms_hdr); + to.to_resid = sc->sc_objsz - sizeof(struct ksyms_hdr); /* emit header */ - copyout(hdr, (void *)uaddr, sizeof(struct ksyms_hdr)); - + error = ksyms_emit(sc, hdr, 0, sizeof(*hdr)); free(hdr, M_KSYMS); + if (error != 0) + return (error); /* Add symbol and string tables for each kernel module. */ error = linker_file_foreach(ksyms_add, &to); - + if (error != 0) + return (error); if (to.to_resid != 0) return (ENXIO); - - return (error); + return (0); } static void ksyms_cdevpriv_dtr(void *data) { struct ksyms_softc *sc; + vm_object_t obj; sc = (struct ksyms_softc *)data; - mtx_lock(&ksyms_mtx); + sx_xlock(&ksyms_mtx); LIST_REMOVE(sc, sc_list); - mtx_unlock(&ksyms_mtx); + sx_xunlock(&ksyms_mtx); + obj = sc->sc_obj; + if (obj != NULL) + vm_object_deallocate(obj); free(sc, M_KSYMS); } @@ -375,34 +395,31 @@ ksyms_open(struct cdev *dev, int flags, int fmt __unus { struct tsizes ts; struct ksyms_softc *sc; - size_t total_elf_sz; + vm_size_t elfsz; int error, try; /* * Limit one open() per process. The process must close() * before open()'ing again. */ - mtx_lock(&ksyms_mtx); + sx_xlock(&ksyms_mtx); LIST_FOREACH(sc, &ksyms_list, sc_list) { if (sc->sc_proc == td->td_proc) { - mtx_unlock(&ksyms_mtx); + sx_xunlock(&ksyms_mtx); return (EBUSY); } } - sc = malloc(sizeof(*sc), M_KSYMS, M_NOWAIT | M_ZERO); - if (sc == NULL) { - mtx_unlock(&ksyms_mtx); - return (ENOMEM); - } + sc = malloc(sizeof(*sc), M_KSYMS, M_WAITOK | M_ZERO); sc->sc_proc = td->td_proc; - sc->sc_pmap = &td->td_proc->p_vmspace->vm_pmap; LIST_INSERT_HEAD(&ksyms_list, sc, sc_list); - mtx_unlock(&ksyms_mtx); + sx_xunlock(&ksyms_mtx); error = devfs_set_cdevpriv(sc, ksyms_cdevpriv_dtr); - if (error != 0) - goto failed; + if (error != 0) { + ksyms_cdevpriv_dtr(sc); + return (error); + } /* * MOD_SLOCK doesn't work here (because of a lock reversal with @@ -412,32 +429,20 @@ ksyms_open(struct cdev *dev, int flags, int fmt __unus * time. */ for (try = 0; try < 3; try++) { - /* - * Map a buffer in the calling process memory space and - * create a snapshot of the kernel symbol table in it. - */ - - /* Compute the size of buffer needed. */ ksyms_size_calc(&ts); - total_elf_sz = sizeof(struct ksyms_hdr) + ts.ts_symsz + - ts.ts_strsz; + elfsz = sizeof(struct ksyms_hdr) + ts.ts_symsz + ts.ts_strsz; - error = copyout_map(td, &sc->sc_uaddr, (vm_size_t)total_elf_sz); - if (error != 0) - break; - sc->sc_usize = total_elf_sz; + sc->sc_obj = vm_object_allocate(OBJT_DEFAULT, + OFF_TO_IDX(round_page(elfsz))); + sc->sc_objsz = elfsz; - error = ksyms_snapshot(&ts, sc->sc_uaddr, total_elf_sz); + error = ksyms_snapshot(sc, &ts); if (error == 0) - /* successful snapshot */ - return (0); + break; - /* Snapshot failed, unmap the memory and try again. */ - (void)copyout_unmap(td, sc->sc_uaddr, sc->sc_usize); + vm_object_deallocate(sc->sc_obj); + sc->sc_obj = NULL; } - -failed: - ksyms_cdevpriv_dtr(sc); return (error); } @@ -445,125 +450,38 @@ static int ksyms_read(struct cdev *dev, struct uio *uio, int flags __unused) { struct ksyms_softc *sc; - char *buf; - off_t off; - size_t len, sz; - vm_size_t ubase; int error; error = devfs_get_cdevpriv((void **)&sc); if (error != 0) return (error); - - off = uio->uio_offset; - len = uio->uio_resid; - - if (off < 0 || off > sc->sc_usize) - return (EFAULT); - - if (len > sc->sc_usize - off) - len = sc->sc_usize - off; - if (len == 0) - return (0); - - /* - * Since the snapshot buffer is in the user space we have to copy it - * in to the kernel and then back out. The extra copy saves valuable - * kernel memory. - */ - buf = malloc(PAGE_SIZE, M_KSYMS, M_WAITOK); - ubase = sc->sc_uaddr + off; - - while (len) { - sz = min(PAGE_SIZE, len); - if (copyin((void *)ubase, buf, sz) != 0) - error = EFAULT; - else - error = uiomove(buf, sz, uio); - if (error != 0) - break; - - len -= sz; - ubase += sz; - } - free(buf, M_KSYMS); - - return (error); + return (uiomove_object(sc->sc_obj, sc->sc_objsz, uio)); } static int -ksyms_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int32_t flag __unused, - struct thread *td __unused) +ksyms_mmap_single(struct cdev *dev, vm_ooffset_t *offset, vm_size_t size, + vm_object_t *objp, int nprot) { struct ksyms_softc *sc; + vm_object_t obj; int error; error = devfs_get_cdevpriv((void **)&sc); if (error != 0) return (error); - switch (cmd) { - case KIOCGSIZE: - /* - * Return the size (in bytes) of the symbol table - * snapshot. - */ - *(size_t *)data = sc->sc_usize; - break; - case KIOCGADDR: - /* - * Return the address of the symbol table snapshot. - * XXX - compat32 version of this? - */ - *(void **)data = (void *)sc->sc_uaddr; - break; - default: - error = ENOTTY; - break; - } + if (*offset < 0 || *offset >= round_page(sc->sc_objsz) || + size > round_page(sc->sc_objsz) - *offset || + (nprot & ~PROT_READ) != 0) + return (EINVAL); - return (error); -} - -static int -ksyms_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, - int prot __unused, vm_memattr_t *memattr __unused) -{ - struct ksyms_softc *sc; - int error; - - error = devfs_get_cdevpriv((void **)&sc); - if (error != 0) - return (error); - - /* - * XXX mmap() will actually map the symbol table into the process - * address space again. - */ - if (offset > round_page(sc->sc_usize) || - (*paddr = pmap_extract(sc->sc_pmap, - (vm_offset_t)sc->sc_uaddr + offset)) == 0) - return (-1); - + obj = sc->sc_obj; + vm_object_reference(obj); + *objp = obj; return (0); } static int -ksyms_close(struct cdev *dev, int flags __unused, int fmt __unused, - struct thread *td) -{ - struct ksyms_softc *sc; - int error; - - error = devfs_get_cdevpriv((void **)&sc); - if (error != 0) - return (error); - - /* Unmap the buffer from the process address space. */ - return (copyout_unmap(td, sc->sc_uaddr, sc->sc_usize)); -} - -static int ksyms_modevent(module_t mod __unused, int type, void *data __unused) { int error; @@ -571,7 +489,7 @@ ksyms_modevent(module_t mod __unused, int type, void * error = 0; switch (type) { case MOD_LOAD: - mtx_init(&ksyms_mtx, "KSyms mtx", NULL, MTX_DEF); + sx_init(&ksyms_mtx, "KSyms mtx"); ksyms_dev = make_dev(&ksyms_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400, KSYMS_DNAME); break; @@ -579,7 +497,7 @@ ksyms_modevent(module_t mod __unused, int type, void * if (!LIST_EMPTY(&ksyms_list)) return (EBUSY); destroy_dev(ksyms_dev); - mtx_destroy(&ksyms_mtx); + sx_destroy(&ksyms_mtx); break; case MOD_SHUTDOWN: break;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201708030038.v730cD76021861>