Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 21 Sep 2023 04:13:04 GMT
From:      Zhenlei Huang <zlei@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: cf7974fd9e55 - main - sysctl: Update 'master' copy of vnet SYSCTLs on kernel environment variables change
Message-ID:  <202309210413.38L4D42w076513@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by zlei:

URL: https://cgit.FreeBSD.org/src/commit/?id=cf7974fd9e554552989237c3d6bc736d672ac7c6

commit cf7974fd9e554552989237c3d6bc736d672ac7c6
Author:     Zhenlei Huang <zlei@FreeBSD.org>
AuthorDate: 2023-09-21 04:11:28 +0000
Commit:     Zhenlei Huang <zlei@FreeBSD.org>
CommitDate: 2023-09-21 04:11:28 +0000

    sysctl: Update 'master' copy of vnet SYSCTLs on kernel environment variables change
    
    Complete phase three of 3da1cf1e88f8.
    
    With commit 110113bc086f, vnet sysctl variables can be loader tunable
    but the feature is limited. When the kernel modules have been initialized,
    any changes (e.g. via kenv) to kernel environment variable will not affect
    subsequently created VNETs.
    
    This change relexes the limitation by listening on kernel environment
    variable's set / unset events, and then update the 'master' copy of vnet
    SYSCTL or restore it to its initial value.
    
    With this change, TUNABLE_XXX_FETCH can be greately eliminated for vnet
    loader tunables.
    
    Reviewed by:    glebius
    Fixes:  110113bc086f sysctl(9): Enable vnet sysctl variables to be loader tunable
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D41825
---
 sys/kern/kern_environment.c |   3 ++
 sys/kern/kern_sysctl.c      | 107 +++++++++++++++++++++++++++++++++++++++++++-
 sys/kern/link_elf.c         |   2 +
 sys/kern/link_elf_obj.c     |   8 ++++
 sys/net/vnet.c              |  33 ++++++++++++++
 sys/net/vnet.h              |   6 +++
 sys/sys/eventhandler.h      |   5 +++
 7 files changed, 162 insertions(+), 2 deletions(-)

diff --git a/sys/kern/kern_environment.c b/sys/kern/kern_environment.c
index 761734674bdf..a0967d044a96 100644
--- a/sys/kern/kern_environment.c
+++ b/sys/kern/kern_environment.c
@@ -38,6 +38,7 @@
 
 #include <sys/cdefs.h>
 #include <sys/param.h>
+#include <sys/eventhandler.h>
 #include <sys/systm.h>
 #include <sys/kenv.h>
 #include <sys/kernel.h>
@@ -666,6 +667,7 @@ kern_setenv(const char *name, const char *value)
 		kenvp[i + 1] = NULL;
 		mtx_unlock(&kenv_lock);
 	}
+	EVENTHANDLER_INVOKE(setenv, name);
 	return (0);
 }
 
@@ -689,6 +691,7 @@ kern_unsetenv(const char *name)
 		kenvp[i] = NULL;
 		mtx_unlock(&kenv_lock);
 		zfree(oldenv, M_KENV);
+		EVENTHANDLER_INVOKE(unsetenv, name);
 		return (0);
 	}
 	mtx_unlock(&kenv_lock);
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index a1d502d58bff..780eb6099b07 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -127,6 +127,7 @@ static int	sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del,
 		    int recurse);
 static int	sysctl_old_kernel(struct sysctl_req *, const void *, size_t);
 static int	sysctl_new_kernel(struct sysctl_req *, void *, size_t);
+static int	name2oid(char *, int *, int *, struct sysctl_oid **);
 
 static struct sysctl_oid *
 sysctl_find_oidname(const char *name, struct sysctl_oid_list *list)
@@ -512,8 +513,14 @@ sysctl_register_oid(struct sysctl_oid *oidp)
 	if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
 	    (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
 	    (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
-		/* only fetch value once */
-		oidp->oid_kind |= CTLFLAG_NOFETCH;
+#ifdef VIMAGE
+		/*
+		 * Can fetch value multiple times for VNET loader tunables.
+		 * Only fetch once for non-VNET loader tunables.
+		 */
+		if ((oidp->oid_kind & CTLFLAG_VNET) == 0)
+#endif
+			oidp->oid_kind |= CTLFLAG_NOFETCH;
 		/* try to fetch value from kernel environment */
 		sysctl_load_tunable_by_oid_locked(oidp);
 	}
@@ -969,6 +976,102 @@ sysctl_register_all(void *arg)
 }
 SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL);
 
+#ifdef VIMAGE
+static void
+sysctl_setenv_vnet(void *arg __unused, char *name)
+{
+	struct sysctl_oid *oidp;
+	int oid[CTL_MAXNAME];
+	int error, nlen;
+
+	SYSCTL_WLOCK();
+	error = name2oid(name, oid, &nlen, &oidp);
+	if (error)
+		goto out;
+
+	if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
+	    (oidp->oid_kind & CTLFLAG_VNET) != 0 &&
+	    (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
+	    (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
+		/* Update value from kernel environment */
+		sysctl_load_tunable_by_oid_locked(oidp);
+	}
+out:
+	SYSCTL_WUNLOCK();
+}
+
+static void
+sysctl_unsetenv_vnet(void *arg __unused, char *name)
+{
+	struct sysctl_oid *oidp;
+	int oid[CTL_MAXNAME];
+	int error, nlen;
+
+	SYSCTL_WLOCK();
+	/*
+	 * The setenv / unsetenv event handlers are invoked by kern_setenv() /
+	 * kern_unsetenv() without exclusive locks. It is rare but still possible
+	 * that the invoke order of event handlers is different from that of
+	 * kern_setenv() and kern_unsetenv().
+	 * Re-check environment variable string to make sure it is unset.
+	 */
+	if (testenv(name))
+		goto out;
+	error = name2oid(name, oid, &nlen, &oidp);
+	if (error)
+		goto out;
+
+	if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
+	    (oidp->oid_kind & CTLFLAG_VNET) != 0 &&
+	    (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
+	    (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
+		size_t size;
+
+		switch (oidp->oid_kind & CTLTYPE) {
+		case CTLTYPE_INT:
+		case CTLTYPE_UINT:
+			size = sizeof(int);
+			break;
+		case CTLTYPE_LONG:
+		case CTLTYPE_ULONG:
+			size = sizeof(long);
+			break;
+		case CTLTYPE_S8:
+		case CTLTYPE_U8:
+			size = sizeof(int8_t);
+			break;
+		case CTLTYPE_S16:
+		case CTLTYPE_U16:
+			size = sizeof(int16_t);
+			break;
+		case CTLTYPE_S32:
+		case CTLTYPE_U32:
+			size = sizeof(int32_t);
+			break;
+		case CTLTYPE_S64:
+		case CTLTYPE_U64:
+			size = sizeof(int64_t);
+			break;
+		case CTLTYPE_STRING:
+			MPASS(oidp->oid_arg2 > 0);
+			size = oidp->oid_arg2;
+			break;
+		default:
+			goto out;
+		}
+		vnet_restore_init(oidp->oid_arg1, size);
+	}
+out:
+	SYSCTL_WUNLOCK();
+}
+
+/*
+ * Register the kernel's setenv / unsetenv events.
+ */
+EVENTHANDLER_DEFINE(setenv, sysctl_setenv_vnet, NULL, EVENTHANDLER_PRI_ANY);
+EVENTHANDLER_DEFINE(unsetenv, sysctl_unsetenv_vnet, NULL, EVENTHANDLER_PRI_ANY);
+#endif
+
 /*
  * "Staff-functions"
  *
diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index 568f1e1dbd95..eb7ce3828deb 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c
@@ -506,6 +506,7 @@ link_elf_init(void* arg)
 	TAILQ_INIT(&set_pcpu_list);
 #ifdef VIMAGE
 	TAILQ_INIT(&set_vnet_list);
+	vnet_save_init((void *)VNET_START, VNET_STOP - VNET_START);
 #endif
 }
 
@@ -767,6 +768,7 @@ parse_vnet(elf_file_t ef)
 		return (ENOSPC);
 	}
 	memcpy((void *)ef->vnet_base, (void *)ef->vnet_start, size);
+	vnet_save_init((void *)ef->vnet_base, size);
 	elf_set_add(&set_vnet_list, ef->vnet_start, ef->vnet_stop,
 	    ef->vnet_base);
 
diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c
index d4ad963e8181..0b2befc02c1a 100644
--- a/sys/kern/link_elf_obj.c
+++ b/sys/kern/link_elf_obj.c
@@ -547,6 +547,8 @@ link_elf_link_preload(linker_class_t cls, const char *filename,
 				memcpy(vnet_data, ef->progtab[pb].addr,
 				    ef->progtab[pb].size);
 				ef->progtab[pb].addr = vnet_data;
+				vnet_save_init(ef->progtab[pb].addr,
+				    ef->progtab[pb].size);
 #endif
 			} else if ((ef->progtab[pb].name != NULL &&
 			    strcmp(ef->progtab[pb].name, ".ctors") == 0) ||
@@ -1120,6 +1122,12 @@ link_elf_load_file(linker_class_t cls, const char *filename,
 			} else
 				bzero(ef->progtab[pb].addr, shdr[i].sh_size);
 
+#ifdef VIMAGE
+			if (ef->progtab[pb].addr != (void *)mapbase &&
+			    strcmp(ef->progtab[pb].name, VNET_SETNAME) == 0)
+				vnet_save_init(ef->progtab[pb].addr,
+				    ef->progtab[pb].size);
+#endif
 			/* Update all symbol values with the offset. */
 			for (j = 0; j < ef->ddbsymcnt; j++) {
 				es = &ef->ddbsymtab[j];
diff --git a/sys/net/vnet.c b/sys/net/vnet.c
index c4a623698341..ac937125a19d 100644
--- a/sys/net/vnet.c
+++ b/sys/net/vnet.c
@@ -178,6 +178,11 @@ static MALLOC_DEFINE(M_VNET_DATA, "vnet_data", "VNET data");
  */
 VNET_DEFINE_STATIC(char, modspace[VNET_MODMIN] __aligned(__alignof(void *)));
 
+/*
+ * A copy of the initial values of all virtualized global variables.
+ */
+static uintptr_t vnet_init_var;
+
 /*
  * Global lists of subsystem constructor and destructors for vnets.  They are
  * registered via VNET_SYSINIT() and VNET_SYSUNINIT().  Both lists are
@@ -356,6 +361,7 @@ vnet_data_startup(void *dummy __unused)
 	df->vnd_len = VNET_MODMIN;
 	TAILQ_INSERT_HEAD(&vnet_data_free_head, df, vnd_link);
 	sx_init(&vnet_data_free_lock, "vnet_data alloc lock");
+	vnet_init_var = (uintptr_t)malloc(VNET_BYTES, M_VNET_DATA, M_WAITOK);
 }
 SYSINIT(vnet_data, SI_SUB_KLD, SI_ORDER_FIRST, vnet_data_startup, NULL);
 
@@ -473,6 +479,33 @@ vnet_data_copy(void *start, int size)
 	VNET_LIST_RUNLOCK();
 }
 
+/*
+ * Save a copy of the initial values of virtualized global variables.
+ */
+void
+vnet_save_init(void *start, size_t size)
+{
+	MPASS(vnet_init_var != 0);
+	MPASS(VNET_START <= (uintptr_t)start &&
+	    (uintptr_t)start + size <= VNET_STOP);
+	memcpy((void *)(vnet_init_var + ((uintptr_t)start - VNET_START)),
+	    start, size);
+}
+
+/*
+ * Restore the 'master' copies of virtualized global variables to theirs
+ * initial values.
+ */
+void
+vnet_restore_init(void *start, size_t size)
+{
+	MPASS(vnet_init_var != 0);
+	MPASS(VNET_START <= (uintptr_t)start &&
+	    (uintptr_t)start + size <= VNET_STOP);
+	memcpy(start,
+	    (void *)(vnet_init_var + ((uintptr_t)start - VNET_START)), size);
+}
+
 /*
  * Support for special SYSINIT handlers registered via VNET_SYSINIT()
  * and VNET_SYSUNINIT().
diff --git a/sys/net/vnet.h b/sys/net/vnet.h
index 1d37fe85eec3..5485889ceaa7 100644
--- a/sys/net/vnet.h
+++ b/sys/net/vnet.h
@@ -311,6 +311,12 @@ void	*vnet_data_alloc(int size);
 void	 vnet_data_copy(void *start, int size);
 void	 vnet_data_free(void *start_arg, int size);
 
+/*
+ * Interfaces to manipulate the initial values of virtualized global variables.
+ */
+void    vnet_save_init(void *, size_t);
+void    vnet_restore_init(void *, size_t);
+
 /*
  * Virtual sysinit mechanism, allowing network stack components to declare
  * startup and shutdown methods to be run when virtual network stack
diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h
index 47024ecf87a9..c0d9811dd1b9 100644
--- a/sys/sys/eventhandler.h
+++ b/sys/sys/eventhandler.h
@@ -326,4 +326,9 @@ struct ifaddr;
 typedef void (*rt_addrmsg_fn)(void *, struct ifaddr *, int);
 EVENTHANDLER_DECLARE(rt_addrmsg, rt_addrmsg_fn);
 
+/* Kernel environment variable change event */
+typedef void (*env_change_fn)(void *, const char *);
+EVENTHANDLER_DECLARE(setenv, env_change_fn);
+EVENTHANDLER_DECLARE(unsetenv, env_change_fn);
+
 #endif /* _SYS_EVENTHANDLER_H_ */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202309210413.38L4D42w076513>