From owner-svn-src-head@FreeBSD.ORG Mon Jan 12 07:48:23 2015 Return-Path: Delivered-To: svn-src-head@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 2CB1CCB7; Mon, 12 Jan 2015 07:48:23 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 0E867314; Mon, 12 Jan 2015 07:48:23 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id t0C7mMZ5048708; Mon, 12 Jan 2015 07:48:22 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id t0C7mMWc048707; Mon, 12 Jan 2015 07:48:22 GMT (envelope-from kib@FreeBSD.org) Message-Id: <201501120748.t0C7mMWc048707@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Mon, 12 Jan 2015 07:48:22 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r277051 - head/sys/amd64/amd64 X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.18-1 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 12 Jan 2015 07:48:23 -0000 Author: kib Date: Mon Jan 12 07:48:22 2015 New Revision: 277051 URL: https://svnweb.freebsd.org/changeset/base/277051 Log: Fix several issues with /dev/mem and /dev/kmem devices on amd64. For /dev/mem, when requested physical address is not accessible by the direct map, do temporal remaping with the caching attribute 'uncached'. Limit the accessible addresses by MAXPHYADDR, since the architecture disallowes writing non-zero into reserved bits of ptes (or setting garbage into NX). For /dev/kmem, only access existing kernel mappings for direct map region. For all other addresses, obtain a physical address of the mapping and fall back to the /dev/mem mechanism. This ensures that /dev/kmem i/o does not fault even if the accessed region is changed in parallel, by using either direct map or temporal mapping. For both devices, operate on one page by iteration. Do not return error if any bytes were moved around, return the (partial) bytes count to userspace. Reviewed by: alc Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week Modified: head/sys/amd64/amd64/mem.c Modified: head/sys/amd64/amd64/mem.c ============================================================================== --- head/sys/amd64/amd64/mem.c Mon Jan 12 07:43:19 2015 (r277050) +++ head/sys/amd64/amd64/mem.c Mon Jan 12 07:48:22 2015 (r277051) @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -77,13 +78,15 @@ int memrw(struct cdev *dev, struct uio *uio, int flags) { struct iovec *iov; - u_long c, v, vd; - int error, o, sflags; - vm_offset_t addr, eaddr; + void *p; + ssize_t orig_resid; + u_long v, vd; + u_int c; + int error, sflags; error = 0; - c = 0; sflags = curthread_pflags_set(TDP_DEVMEMIO); + orig_resid = uio->uio_resid; while (uio->uio_resid > 0 && error == 0) { iov = uio->uio_iov; if (iov->iov_len == 0) { @@ -93,63 +96,68 @@ memrw(struct cdev *dev, struct uio *uio, panic("memrw"); continue; } - if (dev2unit(dev) == CDEV_MINOR_MEM) { - v = uio->uio_offset; -kmemphys: - o = v & PAGE_MASK; - c = min(uio->uio_resid, (u_int)(PAGE_SIZE - o)); - vd = PHYS_TO_DMAP_RAW(v); - if (vd < DMAP_MIN_ADDRESS || - (vd > DMAP_MIN_ADDRESS + dmaplimit && - vd <= DMAP_MAX_ADDRESS) || - (pmap_kextract(vd) == 0 && (v & PG_FRAME) != 0)) { - error = EFAULT; - goto ret; - } - error = uiomove((void *)vd, (int)c, uio); - continue; - } else if (dev2unit(dev) == CDEV_MINOR_KMEM) { - v = uio->uio_offset; + v = uio->uio_offset; + c = ulmin(iov->iov_len, PAGE_SIZE - (u_int)(v & PAGE_MASK)); - if (v >= DMAP_MIN_ADDRESS && v < DMAP_MAX_ADDRESS) { - v = DMAP_TO_PHYS_RAW(v); - goto kmemphys; + switch (dev2unit(dev)) { + case CDEV_MINOR_KMEM: + /* + * Since c is clamped to be less or equal than + * PAGE_SIZE, the uiomove() call does not + * access past the end of the direct map. + */ + if (v >= DMAP_MIN_ADDRESS && + v < DMAP_MIN_ADDRESS + dmaplimit) { + error = uiomove((void *)v, c, uio); + break; } - c = iov->iov_len; + if (!kernacc((void *)v, c, uio->uio_rw == UIO_READ ? + VM_PROT_READ : VM_PROT_WRITE)) { + error = EFAULT; + break; + } /* - * Make sure that all of the pages are currently - * resident so that we don't create any zero-fill - * pages. + * If the extracted address is not accessible + * through the direct map, then we make a + * private (uncached) mapping because we can't + * depend on the existing kernel mapping + * remaining valid until the completion of + * uiomove(). + * + * XXX We cannot provide access to the + * physical page 0 mapped into KVA. */ - addr = trunc_page(v); - eaddr = round_page(v + c); - - if (addr < VM_MIN_KERNEL_ADDRESS) { + v = pmap_extract(kernel_pmap, v); + if (v == 0) { error = EFAULT; - goto ret; + break; } - for (; addr < eaddr; addr += PAGE_SIZE) { - if (pmap_extract(kernel_pmap, addr) == 0) { - error = EFAULT; - goto ret; - } + /* FALLTHROUGH */ + case CDEV_MINOR_MEM: + if (v < dmaplimit) { + vd = PHYS_TO_DMAP(v); + error = uiomove((void *)vd, c, uio); + break; } - if (!kernacc((caddr_t)(long)v, c, - uio->uio_rw == UIO_READ ? - VM_PROT_READ : VM_PROT_WRITE)) { + if (v >= (1ULL << cpu_maxphyaddr)) { error = EFAULT; - goto ret; + break; } - - error = uiomove((caddr_t)(long)v, (int)c, uio); - continue; + p = pmap_mapdev(v, PAGE_SIZE); + error = uiomove(p, c, uio); + pmap_unmapdev((vm_offset_t)p, PAGE_SIZE); + break; } - /* else panic! */ } -ret: curthread_pflags_restore(sflags); + /* + * Don't return error if any byte was written. Read and write + * can return error only if no i/o was performed. + */ + if (uio->uio_resid != orig_resid) + error = 0; return (error); }