Date: Mon, 01 Dec 2025 16:38:12 +0000 From: Mark Johnston <markj@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: f15e54924ccb - stable/15 - file: Add a fdclose method Message-ID: <692dc474.3750c.791912b3@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch stable/15 has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=f15e54924ccbf405972eeacd3c2ad65df29c4a8d commit f15e54924ccbf405972eeacd3c2ad65df29c4a8d Author: Mark Johnston <markj@FreeBSD.org> AuthorDate: 2025-11-16 15:49:39 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2025-12-01 16:37:57 +0000 file: Add a fdclose method Consider a program that creates a unix socket pair, transmits both sockets from one to the other using an SCM_RIGHTS message, and then closes both sockets without externalizing the message. unp_gc() is supposed to handle cleanup, but it is only triggered by uipc_detach(), which runs when a unix socket is destroyed. Because the two sockets are internalized, their refcounts are positive, so uipc_detach() isn't called. As a result, a userspace program can create an unbounded amount of garbage without triggering reclaim. Let's trigger garbage collection whenever a unix socket is close()d. To implement this, add new a fdclose file op and protocol op, and implement them accordingly. Since mqueuefs has a hack to hook into the file close path, convert it to use the new op as well. Now, userspace can't create garbage without triggering reclamation. Reviewed by: glebius, kib MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D53744 (cherry picked from commit 1390bba42caf53a00fa370f3844cd7b3725ed4ec) --- sys/kern/kern_descrip.c | 8 ++----- sys/kern/sys_socket.c | 12 ++++++++++ sys/kern/uipc_mqueue.c | 59 +++++++++++++++++++++++-------------------------- sys/kern/uipc_socket.c | 3 ++- sys/kern/uipc_usrreq.c | 27 ++++++++++++++++++---- sys/sys/file.h | 2 ++ sys/sys/mqueue.h | 5 ----- sys/sys/protosw.h | 7 ++++-- 8 files changed, 74 insertions(+), 49 deletions(-) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 58b048d780f1..ebc82b8dc427 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -194,7 +194,6 @@ struct filedesc0 { */ static int __exclusive_cache_line openfiles; /* actual number of open files */ struct mtx sigio_lock; /* mtx to protect pointers to sigio */ -void __read_mostly (*mq_fdclose)(struct thread *td, int fd, struct file *fp); /* * If low >= size, just return low. Otherwise find the first zero bit in the @@ -1404,11 +1403,8 @@ closefp_impl(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, if (__predict_false(!TAILQ_EMPTY(&fdp->fd_kqlist))) knote_fdclose(td, fd); - /* - * We need to notify mqueue if the object is of type mqueue. - */ - if (__predict_false(fp->f_type == DTYPE_MQUEUE)) - mq_fdclose(td, fd, fp); + if (fp->f_ops->fo_fdclose != NULL) + fp->f_ops->fo_fdclose(fp, fd, td); FILEDESC_XUNLOCK(fdp); #ifdef AUDIT diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index c221106ae067..a326b9935290 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -90,6 +90,7 @@ static fo_poll_t soo_poll; static fo_kqfilter_t soo_kqfilter; static fo_stat_t soo_stat; static fo_close_t soo_close; +static fo_fdclose_t soo_fdclose; static fo_chmod_t soo_chmod; static fo_fill_kinfo_t soo_fill_kinfo; static fo_aio_queue_t soo_aio_queue; @@ -105,6 +106,7 @@ const struct fileops socketops = { .fo_kqfilter = soo_kqfilter, .fo_stat = soo_stat, .fo_close = soo_close, + .fo_fdclose = soo_fdclose, .fo_chmod = soo_chmod, .fo_chown = invfo_chown, .fo_sendfile = invfo_sendfile, @@ -362,6 +364,16 @@ soo_close(struct file *fp, struct thread *td) return (error); } +static void +soo_fdclose(struct file *fp, int fd __unused, struct thread *td) +{ + struct socket *so; + + so = fp->f_data; + if (so->so_proto->pr_fdclose != NULL) + so->so_proto->pr_fdclose(so); +} + static int soo_chmod(struct file *fp, mode_t mode, struct ucred *cred, struct thread *td) { diff --git a/sys/kern/uipc_mqueue.c b/sys/kern/uipc_mqueue.c index b808da088ffd..e26a21a6efe0 100644 --- a/sys/kern/uipc_mqueue.c +++ b/sys/kern/uipc_mqueue.c @@ -267,7 +267,6 @@ static int _mqueue_send(struct mqueue *mq, struct mqueue_msg *msg, static int _mqueue_recv(struct mqueue *mq, struct mqueue_msg **msg, int timo); static void mqueue_send_notification(struct mqueue *mq); -static void mqueue_fdclose(struct thread *td, int fd, struct file *fp); static void mq_proc_exit(void *arg, struct proc *p); /* @@ -688,7 +687,6 @@ mqfs_init(struct vfsconf *vfc) mqfs_fixup_dir(root); exit_tag = EVENTHANDLER_REGISTER(process_exit, mq_proc_exit, NULL, EVENTHANDLER_PRI_ANY); - mq_fdclose = mqueue_fdclose; p31b_setcfg(CTL_P1003_1B_MESSAGE_PASSING, _POSIX_MESSAGE_PASSING); mqfs_osd_jail_slot = osd_jail_register(NULL, methods); return (0); @@ -2472,35 +2470,6 @@ sys_kmq_notify(struct thread *td, struct kmq_notify_args *uap) return (kern_kmq_notify(td, uap->mqd, evp)); } -static void -mqueue_fdclose(struct thread *td, int fd, struct file *fp) -{ - struct mqueue *mq; -#ifdef INVARIANTS - struct filedesc *fdp; - - fdp = td->td_proc->p_fd; - FILEDESC_LOCK_ASSERT(fdp); -#endif - - if (fp->f_ops == &mqueueops) { - mq = FPTOMQ(fp); - mtx_lock(&mq->mq_mutex); - notifier_remove(td->td_proc, mq, fd); - - /* have to wakeup thread in same process */ - if (mq->mq_flags & MQ_RSEL) { - mq->mq_flags &= ~MQ_RSEL; - selwakeup(&mq->mq_rsel); - } - if (mq->mq_flags & MQ_WSEL) { - mq->mq_flags &= ~MQ_WSEL; - selwakeup(&mq->mq_wsel); - } - mtx_unlock(&mq->mq_mutex); - } -} - static void mq_proc_exit(void *arg __unused, struct proc *p) { @@ -2566,6 +2535,33 @@ mqf_close(struct file *fp, struct thread *td) return (0); } +static void +mqf_fdclose(struct file *fp, int fd, struct thread *td) +{ + struct mqueue *mq; +#ifdef INVARIANTS + struct filedesc *fdp; + + fdp = td->td_proc->p_fd; + FILEDESC_LOCK_ASSERT(fdp); +#endif + + mq = FPTOMQ(fp); + mtx_lock(&mq->mq_mutex); + notifier_remove(td->td_proc, mq, fd); + + /* have to wakeup thread in same process */ + if (mq->mq_flags & MQ_RSEL) { + mq->mq_flags &= ~MQ_RSEL; + selwakeup(&mq->mq_rsel); + } + if (mq->mq_flags & MQ_WSEL) { + mq->mq_flags &= ~MQ_WSEL; + selwakeup(&mq->mq_wsel); + } + mtx_unlock(&mq->mq_mutex); +} + static int mqf_stat(struct file *fp, struct stat *st, struct ucred *active_cred) { @@ -2694,6 +2690,7 @@ static const struct fileops mqueueops = { .fo_kqfilter = mqf_kqfilter, .fo_stat = mqf_stat, .fo_close = mqf_close, + .fo_fdclose = mqf_fdclose, .fo_chmod = mqf_chmod, .fo_chown = mqf_chown, .fo_sendfile = invfo_sendfile, diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index eb9544628137..00aa5f9309b2 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -54,7 +54,8 @@ * consumer of a socket is starting to tear down the socket, and that the * protocol should terminate the connection. Historically, pr_abort() also * detached protocol state from the socket state, but this is no longer the - * case. + * case. pr_fdclose() is called when userspace invokes close(2) on a socket + * file descriptor. * * socreate() creates a socket and attaches protocol state. This is a public * interface that may be used by socket layer consumers to create new diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 05e267b8ae2b..6569422a3710 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -792,13 +792,19 @@ uipc_connect2(struct socket *so1, struct socket *so2) return (0); } +static void +maybe_schedule_gc(void) +{ + if (atomic_load_int(&unp_rights) != 0) + taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1); +} + static void uipc_detach(struct socket *so) { struct unpcb *unp, *unp2; struct mtx *vplock; struct vnode *vp; - int local_unp_rights; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); @@ -854,7 +860,6 @@ uipc_detach(struct socket *so) UNP_REF_LIST_UNLOCK(); UNP_PCB_LOCK(unp); - local_unp_rights = unp_rights; unp->unp_socket->so_pcb = NULL; unp->unp_socket = NULL; free(unp->unp_addr, M_SONAME); @@ -865,8 +870,7 @@ uipc_detach(struct socket *so) mtx_unlock(vplock); vrele(vp); } - if (local_unp_rights) - taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1); + maybe_schedule_gc(); switch (so->so_type) { case SOCK_STREAM: @@ -902,6 +906,18 @@ uipc_disconnect(struct socket *so) return (0); } +static void +uipc_fdclose(struct socket *so __unused) +{ + /* + * Ensure that userspace can't create orphaned file descriptors without + * triggering garbage collection. Triggering GC from uipc_detach() is + * not sufficient, since that's only closed once a socket reference + * count drops to zero. + */ + maybe_schedule_gc(); +} + static int uipc_listen(struct socket *so, int backlog, struct thread *td) { @@ -4374,6 +4390,7 @@ static struct protosw streamproto = { .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, + .pr_fdclose = uipc_fdclose, .pr_listen = uipc_listen, .pr_peeraddr = uipc_peeraddr, .pr_send = uipc_sendfile, @@ -4404,6 +4421,7 @@ static struct protosw dgramproto = { .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, + .pr_fdclose = uipc_fdclose, .pr_peeraddr = uipc_peeraddr, .pr_sosend = uipc_sosend_dgram, .pr_sense = uipc_sense, @@ -4428,6 +4446,7 @@ static struct protosw seqpacketproto = { .pr_connect2 = uipc_connect2, .pr_detach = uipc_detach, .pr_disconnect = uipc_disconnect, + .pr_fdclose = uipc_fdclose, .pr_listen = uipc_listen, .pr_peeraddr = uipc_peeraddr, .pr_sense = uipc_sense, diff --git a/sys/sys/file.h b/sys/sys/file.h index b2bd62f244da..b537daa27ff5 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -114,6 +114,7 @@ typedef int fo_kqfilter_t(struct file *fp, struct knote *kn); typedef int fo_stat_t(struct file *fp, struct stat *sb, struct ucred *active_cred); typedef int fo_close_t(struct file *fp, struct thread *td); +typedef void fo_fdclose_t(struct file *fp, int fd, struct thread *td); typedef int fo_chmod_t(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td); typedef int fo_chown_t(struct file *fp, uid_t uid, gid_t gid, @@ -151,6 +152,7 @@ struct fileops { fo_kqfilter_t *fo_kqfilter; fo_stat_t *fo_stat; fo_close_t *fo_close; + fo_fdclose_t *fo_fdclose; fo_chmod_t *fo_chmod; fo_chown_t *fo_chown; fo_sendfile_t *fo_sendfile; diff --git a/sys/sys/mqueue.h b/sys/sys/mqueue.h index 50f6681ce218..211f09d57427 100644 --- a/sys/sys/mqueue.h +++ b/sys/sys/mqueue.h @@ -37,9 +37,4 @@ struct mq_attr { long __reserved[4]; /* Ignored for input, zeroed for output */ }; -#ifdef _KERNEL -struct thread; -struct file; -extern void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); -#endif #endif diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h index 7fb8dfdc7208..392ff7cf678e 100644 --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h @@ -94,6 +94,7 @@ typedef int pr_sopoll_t(struct socket *, int, struct thread *); typedef int pr_kqfilter_t(struct socket *, struct knote *); typedef void pr_sosetlabel_t(struct socket *); typedef void pr_close_t(struct socket *); +typedef void pr_fdclose_t(struct socket *); typedef int pr_bindat_t(int, struct socket *, struct sockaddr *, struct thread *); typedef int pr_connectat_t(int, struct socket *, struct sockaddr *, @@ -120,8 +121,8 @@ struct protosw { pr_detach_t *pr_detach; /* destruction: sofree() */ pr_connect_t *pr_connect; /* connect(2) */ pr_disconnect_t *pr_disconnect; /* sodisconnect() */ - pr_close_t *pr_close; /* close(2) */ - pr_shutdown_t *pr_shutdown; /* shutdown(2) */ + pr_close_t *pr_close; /* soclose(), socket refcount is 0 */ + pr_fdclose_t *pr_fdclose; /* close(2) */ pr_rcvd_t *pr_rcvd; /* soreceive_generic() if PR_WANTRCVD */ pr_aio_queue_t *pr_aio_queue; /* aio(9) */ /* Cache line #3 */ @@ -142,7 +143,9 @@ struct protosw { pr_sosetlabel_t *pr_sosetlabel; /* MAC, XXXGL: remove */ pr_setsbopt_t *pr_setsbopt; /* Socket buffer ioctls */ pr_chmod_t *pr_chmod; /* fchmod(2) */ +/* Cache line #5 */ pr_kqfilter_t *pr_kqfilter; /* kevent(2) */ + pr_shutdown_t *pr_shutdown; /* shutdown(2) */ }; #endif /* defined(_KERNEL) || defined(_WANT_PROTOSW) */ #ifdef _KERNELhelp
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?692dc474.3750c.791912b3>
