Date: Tue, 17 Mar 2026 19:50:28 +0000 From: John Baldwin <jhb@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 2cf15144daf7 - main - lindebugfs: Pass user buffer pointers to the read/write file operations Message-ID: <69b9b084.3b9bc.65156cd1@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch main has been updated by jhb: URL: https://cgit.FreeBSD.org/src/commit/?id=2cf15144daf7ec44cdcd9bf3ef007939b79c361e commit 2cf15144daf7ec44cdcd9bf3ef007939b79c361e Author: John Baldwin <jhb@FreeBSD.org> AuthorDate: 2026-03-17 19:45:34 +0000 Commit: John Baldwin <jhb@FreeBSD.org> CommitDate: 2026-03-17 19:45:34 +0000 lindebugfs: Pass user buffer pointers to the read/write file operations The Linux file_operations API expects the read and write operations to take a single user buffer pointer (along with the length and the file offset as an in/out parameter). However, the debugfs_fill function was violating this part of the contract as it was passing down kernel pointers instead. An earlier commit (5668c22a13c6befa9b8486387d38457c40ce7af4) hacked around this by modifying simple_read_from_buffer() to treat its user pointer argument as a kernel pointer instead. However, other commits keep tripping over this same API mismatch (e.g. 78e25e65bf381303c8bdac9a713ab7b26a854b8c passes a kernel pointer to copy_from_user in fops_str_write). Instead, change debugfs_fill to use the "raw" pseudofs mode where the uio is passed down to directly to the fill callback rather than an sbuf. debufs_fill now iterates over the iovec in the uio similar to the implementation of uiomove invoking the read or write operation on each user pointer. This also fixes a tiny bug where the initial file offset from uio_offset was ignored. Instead, the operations were always invoked with a file offset of 0. As part of this, revert the the changes to simple_read_from_buffer() from commit 5668c22a13c6befa9b8486387d38457c40ce7af4. Also as part of this, the simple_attr_read/write methods and seq_read now also need to accept and handle user pointers (also matching the API in Linux). For simple_attr_write*(), copy the user buffer into a kernel buffer before parsing. Also, do not permit writes at an offset as it's unclear what the semantics for those would even be (perhaps you would write out the formatted value into a buffer first and then allow the copy_from_user to overwrite/extend that buffer and then re-parse the integer value?). The old handling of *ppos for writes was definitely wrong before and only worked for an offset of 0 anyway. Reviewed by: bz Sponsored by: AFRL, DARPA Differential Revision: https://reviews.freebsd.org/D55833 --- sys/compat/lindebugfs/lindebugfs.c | 83 ++++++++++++++-------- sys/compat/linuxkpi/common/include/linux/fs.h | 30 ++++---- .../linuxkpi/common/include/linux/seq_file.h | 2 +- sys/compat/linuxkpi/common/src/linux_seq_file.c | 2 +- sys/compat/linuxkpi/common/src/linux_simple_attr.c | 35 +++++---- 5 files changed, 94 insertions(+), 58 deletions(-) diff --git a/sys/compat/lindebugfs/lindebugfs.c b/sys/compat/lindebugfs/lindebugfs.c index 857546f61e55..88b92afd374a 100644 --- a/sys/compat/lindebugfs/lindebugfs.c +++ b/sys/compat/lindebugfs/lindebugfs.c @@ -117,9 +117,14 @@ debugfs_fill(PFS_FILL_ARGS) struct dentry_meta *d; struct linux_file lf = {}; struct vnode vn; - char *buf; - int rc; - off_t off = 0; + struct iovec *iov; + size_t cnt, orig_resid; + ssize_t rc; + off_t off; + + /* Linux file operations assume a pointer to a user buffer. */ + if (uio->uio_segflg != UIO_USERSPACE) + return (EOPNOTSUPP); if ((rc = linux_set_current_flags(curthread, M_NOWAIT))) return (rc); @@ -130,42 +135,64 @@ debugfs_fill(PFS_FILL_ARGS) rc = d->dm_fops->open(&vn, &lf); if (rc < 0) { #ifdef INVARIANTS - printf("%s:%d open failed with %d\n", __func__, __LINE__, rc); + printf("%s:%d open failed with %zd\n", __func__, __LINE__, rc); #endif return (-rc); } - rc = -ENODEV; - switch (uio->uio_rw) { - case UIO_READ: - if (d->dm_fops->read != NULL) { - rc = -ENOMEM; - buf = malloc(sb->s_size, M_DFSINT, M_ZERO | M_NOWAIT); - if (buf != NULL) { - rc = d->dm_fops->read(&lf, buf, sb->s_size, - &off); - if (rc > 0) - sbuf_bcpy(sb, buf, strlen(buf)); - - free(buf, M_DFSINT); - } + off = uio->uio_offset; + orig_resid = uio->uio_resid; + while (uio->uio_resid > 0) { + KASSERT(uio->uio_iovcnt > 0, + ("%s: uio %p iovcnt underflow", __func__, uio)); + + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; } - break; - case UIO_WRITE: - if (d->dm_fops->write != NULL) { - sbuf_finish(sb); - rc = d->dm_fops->write(&lf, sbuf_data(sb), sbuf_len(sb), - &off); + if (cnt > uio->uio_resid) + cnt = uio->uio_resid; + + switch (uio->uio_rw) { + case UIO_READ: + if (d->dm_fops->read != NULL) + rc = d->dm_fops->read(&lf, iov->iov_base, cnt, + &off); + else + rc = -ENODEV; + break; + case UIO_WRITE: + if (d->dm_fops->write != NULL) + rc = d->dm_fops->write(&lf, iov->iov_base, cnt, + &off); + else + rc = -ENODEV; + break; } - break; + + if (rc <= 0) + break; + + iov->iov_base = (char *)iov->iov_base + rc; + iov->iov_len -= rc; + uio->uio_resid -= rc; + uio->uio_offset = off; } if (d->dm_fops->release) d->dm_fops->release(&vn, &lf); + /* Return success for short operations. */ + if (orig_resid != uio->uio_resid) + rc = 0; + if (rc < 0) { #ifdef INVARIANTS - printf("%s:%d read/write failed with %d\n", __func__, __LINE__, rc); + printf("%s:%d read/write failed with %zd\n", __func__, __LINE__, + rc); #endif return (-rc); } @@ -207,7 +234,7 @@ debugfs_create_file(const char *name, umode_t mode, flags = fops->write ? PFS_RDWR : PFS_RD; pfs_create_file(pnode, &dnode->d_pfs_node, name, debugfs_fill, - debugfs_attr, NULL, debugfs_destroy, flags | PFS_NOWAIT); + debugfs_attr, NULL, debugfs_destroy, flags | PFS_RAW | PFS_NOWAIT); if (dnode->d_pfs_node == NULL) { free(dm, M_DFSINT); return (NULL); @@ -671,7 +698,7 @@ fops_str_read(struct file *filp, char __user *ubuf, size_t read_size, } static ssize_t -fops_str_write(struct file *filp, const char *buf, size_t write_size, +fops_str_write(struct file *filp, const char __user *buf, size_t write_size, loff_t *ppos) { char *old, *new; diff --git a/sys/compat/linuxkpi/common/include/linux/fs.h b/sys/compat/linuxkpi/common/include/linux/fs.h index f1568ad6282d..40e1b396fe86 100644 --- a/sys/compat/linuxkpi/common/include/linux/fs.h +++ b/sys/compat/linuxkpi/common/include/linux/fs.h @@ -364,8 +364,9 @@ static inline ssize_t simple_read_from_buffer(void __user *dest, size_t read_size, loff_t *ppos, void *orig, size_t buf_size) { - void *p, *read_pos = ((char *) orig) + *ppos; + void *read_pos = ((char *) orig) + *ppos; size_t buf_remain = buf_size - *ppos; + ssize_t num_read; if (buf_remain < 0 || buf_remain > buf_size) return -EINVAL; @@ -373,18 +374,13 @@ simple_read_from_buffer(void __user *dest, size_t read_size, loff_t *ppos, if (read_size > buf_remain) read_size = buf_remain; - /* - * XXX At time of commit only debugfs consumers could be - * identified. If others will use this function we may - * have to revise this: normally we would call copy_to_user() - * here but lindebugfs will return the result and the - * copyout is done elsewhere for us. - */ - p = memcpy(dest, read_pos, read_size); - if (p != NULL) - *ppos += read_size; - - return (read_size); + /* copy_to_user returns number of bytes NOT read */ + num_read = read_size - copy_to_user(dest, read_pos, read_size); + if (num_read == 0) + return -EFAULT; + *ppos += num_read; + + return (num_read); } MALLOC_DECLARE(M_LSATTR); @@ -415,11 +411,13 @@ int simple_attr_open(struct inode *inode, struct file *filp, int simple_attr_release(struct inode *inode, struct file *filp); -ssize_t simple_attr_read(struct file *filp, char *buf, size_t read_size, loff_t *ppos); +ssize_t simple_attr_read(struct file *filp, char __user *buf, size_t read_size, + loff_t *ppos); -ssize_t simple_attr_write(struct file *filp, const char *buf, size_t write_size, loff_t *ppos); +ssize_t simple_attr_write(struct file *filp, const char __user *buf, + size_t write_size, loff_t *ppos); -ssize_t simple_attr_write_signed(struct file *filp, const char *buf, +ssize_t simple_attr_write_signed(struct file *filp, const char __user *buf, size_t write_size, loff_t *ppos); #endif /* _LINUXKPI_LINUX_FS_H_ */ diff --git a/sys/compat/linuxkpi/common/include/linux/seq_file.h b/sys/compat/linuxkpi/common/include/linux/seq_file.h index 3c7862890c67..786c09bd6a20 100644 --- a/sys/compat/linuxkpi/common/include/linux/seq_file.h +++ b/sys/compat/linuxkpi/common/include/linux/seq_file.h @@ -85,7 +85,7 @@ struct seq_operations { int (*show) (struct seq_file *m, void *v); }; -ssize_t seq_read(struct linux_file *, char *, size_t, off_t *); +ssize_t seq_read(struct linux_file *, char __user *, size_t, off_t *); int seq_write(struct seq_file *seq, const void *data, size_t len); void seq_putc(struct seq_file *m, char c); void seq_puts(struct seq_file *m, const char *str); diff --git a/sys/compat/linuxkpi/common/src/linux_seq_file.c b/sys/compat/linuxkpi/common/src/linux_seq_file.c index b1d53aa2db60..eae414ea696e 100644 --- a/sys/compat/linuxkpi/common/src/linux_seq_file.c +++ b/sys/compat/linuxkpi/common/src/linux_seq_file.c @@ -40,7 +40,7 @@ MALLOC_DEFINE(M_LSEQ, "seq_file", "seq_file"); ssize_t -seq_read(struct linux_file *f, char *ubuf, size_t size, off_t *ppos) +seq_read(struct linux_file *f, char __user *ubuf, size_t size, off_t *ppos) { struct seq_file *m; struct sbuf *sbuf; diff --git a/sys/compat/linuxkpi/common/src/linux_simple_attr.c b/sys/compat/linuxkpi/common/src/linux_simple_attr.c index 88fa908e47bb..e5514194cb33 100644 --- a/sys/compat/linuxkpi/common/src/linux_simple_attr.c +++ b/sys/compat/linuxkpi/common/src/linux_simple_attr.c @@ -99,7 +99,8 @@ simple_attr_release(struct inode *inode, struct file *filp) * On failure, negative signed ERRNO */ ssize_t -simple_attr_read(struct file *filp, char *buf, size_t read_size, loff_t *ppos) +simple_attr_read(struct file *filp, char __user *buf, size_t read_size, + loff_t *ppos) { struct simple_attr *sattr; uint64_t data; @@ -146,29 +147,38 @@ unlock: * On failure, negative signed ERRNO */ static ssize_t -simple_attr_write_common(struct file *filp, const char *buf, size_t write_size, - loff_t *ppos, bool is_signed) +simple_attr_write_common(struct file *filp, const char __user *ubuf, + size_t write_size, loff_t *ppos, bool is_signed) { struct simple_attr *sattr; unsigned long long data; - size_t bufsize; + char *buf; ssize_t ret; sattr = filp->private_data; - bufsize = strlen(buf) + 1; if (sattr->set == NULL) return (-EFAULT); - if (*ppos >= bufsize || write_size < 1) + if (*ppos != 0 || write_size < 1) return (-EINVAL); + buf = malloc(write_size, M_LSATTR, M_WAITOK); + if (copy_from_user(buf, ubuf, write_size) != 0) { + free(buf, M_LSATTR); + return (-EFAULT); + } + if (strnlen(buf, write_size) == write_size) { + free(buf, M_LSATTR); + return (-EINVAL); + } + mutex_lock(&sattr->mutex); if (is_signed) - ret = kstrtoll(buf + *ppos, 0, &data); + ret = kstrtoll(buf, 0, &data); else - ret = kstrtoull(buf + *ppos, 0, &data); + ret = kstrtoull(buf, 0, &data); if (ret) goto unlock; @@ -176,23 +186,24 @@ simple_attr_write_common(struct file *filp, const char *buf, size_t write_size, if (ret) goto unlock; - ret = bufsize - *ppos; + ret = write_size; unlock: mutex_unlock(&sattr->mutex); + free(buf, M_LSATTR); return (ret); } ssize_t -simple_attr_write(struct file *filp, const char *buf, size_t write_size, +simple_attr_write(struct file *filp, const char __user *buf, size_t write_size, loff_t *ppos) { return (simple_attr_write_common(filp, buf, write_size, ppos, false)); } ssize_t -simple_attr_write_signed(struct file *filp, const char *buf, size_t write_size, - loff_t *ppos) +simple_attr_write_signed(struct file *filp, const char __user *buf, + size_t write_size, loff_t *ppos) { return (simple_attr_write_common(filp, buf, write_size, ppos, true)); }home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69b9b084.3b9bc.65156cd1>
