Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 27 Dec 2019 11:19:57 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r356119 - in head: share/man/man9 sys/kern sys/sys
Message-ID:  <201912271119.xBRBJvFF007947@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Fri Dec 27 11:19:57 2019
New Revision: 356119
URL: https://svnweb.freebsd.org/changeset/base/356119

Log:
  Add read-mostly sleepable locks
  
  To be used when like rmlocks, except when sleeping for readers needs to be
  allowed. See the manpage for more information.
  
  Reviewed by:	kib (previous version)
  Differential Revision:	https://reviews.freebsd.org/D22823

Modified:
  head/share/man/man9/rmlock.9
  head/sys/kern/kern_rmlock.c
  head/sys/sys/_rmlock.h
  head/sys/sys/rmlock.h

Modified: head/share/man/man9/rmlock.9
==============================================================================
--- head/share/man/man9/rmlock.9	Fri Dec 27 05:01:13 2019	(r356118)
+++ head/share/man/man9/rmlock.9	Fri Dec 27 11:19:57 2019	(r356119)
@@ -26,7 +26,7 @@
 .\" $FreeBSD$
 .\"
 .\" Based on rwlock.9 man page
-.Dd November 11, 2017
+.Dd December 27, 2019
 .Dt RMLOCK 9
 .Os
 .Sh NAME
@@ -43,7 +43,13 @@
 .Nm rm_sleep ,
 .Nm rm_assert ,
 .Nm RM_SYSINIT ,
-.Nm RM_SYSINIT_FLAGS
+.Nm RM_SYSINIT_FLAGS ,
+.Nm rms_init ,
+.Nm rms_destroy ,
+.Nm rms_rlock ,
+.Nm rms_wlock ,
+.Nm rms_runlock ,
+.Nm rms_wunlock
 .Nd kernel reader/writer lock optimized for read-mostly access patterns
 .Sh SYNOPSIS
 .In sys/param.h
@@ -77,6 +83,18 @@
 .In sys/kernel.h
 .Fn RM_SYSINIT "name" "struct rmlock *rm" "const char *desc"
 .Fn RM_SYSINIT_FLAGS "name" "struct rmlock *rm" "const char *desc" "int flags"
+.Ft void
+.Fn rms_init "struct rmslock *rms" "const char *name"
+.Ft void
+.Fn rms_destroy "struct rmslock *rms"
+.Ft void
+.Fn rms_rlock "struct rmslock *rms"
+.Ft void
+.Fn rms_wlock "struct rmslock *rms"
+.Ft void
+.Fn rms_runlock "struct rmslock *rms"
+.Ft void
+.Fn rms_wunlock "struct rmslock *rms"
 .Sh DESCRIPTION
 Read-mostly locks allow shared access to protected data by multiple threads,
 or exclusive access by a single thread.
@@ -113,22 +131,22 @@ Readers can recurse if the lock is initialized with th
 option;
 however, writers are never allowed to recurse.
 .Pp
-Sleepable read-mostly locks are created by passing
+Sleeping for writers can be allowed by passing
 .Dv RM_SLEEPABLE
 to
 .Fn rm_init_flags .
-Unlike normal read-mostly locks,
-sleepable read-mostly locks follow the same lock ordering rules as
+It changes lock ordering rules to the same as for
 .Xr sx 9
 locks.
-Sleepable read-mostly locks do not propagate priority to writers,
-but they do propagate priority to readers.
-Writers are permitted to sleep while holding a read-mostly lock,
-but readers are not.
-Unlike other sleepable locks such as
+They do not propagate priority to writers, but they do propagate priority to
+readers. Note that readers are not permitted to sleep regardless of the flag.
+.Pp
+Sleepable read-mostly locks (created with
+.Fn rms_init )
+allow sleeping for both readers and writers, but don't do priority propagation
+for either. They follow
 .Xr sx 9
-locks,
-readers must use try operations on other sleepable locks to avoid sleeping.
+lock ordering.
 .Ss Macros and Functions
 .Bl -tag -width indent
 .It Fn rm_init "struct rmlock *rm" "const char *name"
@@ -286,6 +304,43 @@ Assert that the current thread does not hold a recursi
 .Fa rm .
 .El
 .El
+.Bl -tag -width indent
+.It Fn rms_init "struct rmslock *rms" "const char *name"
+Initialize the sleepable read-mostly lock
+.Fa rms .
+The
+.Fa name
+description is used as
+.Fa wmesg
+parameter to the
+.Xr msleep 9
+routine.
+This function must be called before any other operations on the lock.
+.It Fn rms_rlock "struct rmlock *rm"
+Lock
+.Fa rms
+as a reader.
+If any thread holds this lock exclusively, the current thread blocks.
+.It Fn rms_wlock "struct rmslock *rms"
+Lock
+.Fa rms
+as a writer.
+If the lock is already taken, the current thread blocks.
+The
+.Fn rms_wlock
+function cannot be called recursively.
+.It Fn rms_runlock "struct rmslock *rms"
+This function releases a shared lock previously acquired by
+.Fn rms_rlock .
+.It Fn rms_wunlock "struct rmslock *rms"
+This function releases an exclusive lock previously acquired by
+.Fn rms_wlock .
+.It Fn rms_destroy "struct rmslock *rms"
+This functions destroys a lock previously initialized with
+.Fn rms_init .
+The
+.Fa rms
+lock must be unlocked.
 .Sh SEE ALSO
 .Xr locking 9 ,
 .Xr mutex 9 ,

Modified: head/sys/kern/kern_rmlock.c
==============================================================================
--- head/sys/kern/kern_rmlock.c	Fri Dec 27 05:01:13 2019	(r356118)
+++ head/sys/kern/kern_rmlock.c	Fri Dec 27 11:19:57 2019	(r356119)
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/turnstile.h>
 #include <sys/lock_profile.h>
 #include <machine/cpu.h>
+#include <vm/uma.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
@@ -853,3 +854,241 @@ db_show_rm(const struct lock_object *lock)
 	lc->lc_ddb_show(&rm->rm_wlock_object);
 }
 #endif
+
+/*
+ * Read-mostly sleepable locks.
+ *
+ * These primitives allow both readers and writers to sleep. However, neither
+ * readers nor writers are tracked and subsequently there is no priority
+ * propagation.
+ *
+ * They are intended to be only used when write-locking is almost never needed
+ * (e.g., they can guard against unloading a kernel module) while read-locking
+ * happens all the time.
+ *
+ * Concurrent writers take turns taking the lock while going off cpu. If this is
+ * of concern for your usecase, this is not the right primitive.
+ *
+ * Neither rms_rlock nor rms_runlock use fences. Instead compiler barriers are
+ * inserted to prevert reordering of generated code. Execution ordering is
+ * provided with the use of an IPI handler.
+ */
+
+void
+rms_init(struct rmslock *rms, const char *name)
+{
+
+	rms->writers = 0;
+	rms->readers = 0;
+	mtx_init(&rms->mtx, name, NULL, MTX_DEF | MTX_NEW);
+	rms->readers_pcpu = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO);
+	rms->readers_influx = uma_zalloc_pcpu(pcpu_zone_int, M_WAITOK | M_ZERO);
+}
+
+void
+rms_destroy(struct rmslock *rms)
+{
+
+	MPASS(rms->writers == 0);
+	MPASS(rms->readers == 0);
+	mtx_destroy(&rms->mtx);
+	uma_zfree_pcpu(pcpu_zone_int, rms->readers_pcpu);
+	uma_zfree_pcpu(pcpu_zone_int, rms->readers_influx);
+}
+
+static void __noinline
+rms_rlock_fallback(struct rmslock *rms)
+{
+
+	(*zpcpu_get(rms->readers_influx)) = 0;
+	critical_exit();
+
+	mtx_lock(&rms->mtx);
+	MPASS(*zpcpu_get(rms->readers_pcpu) == 0);
+	while (rms->writers > 0)
+		msleep(&rms->readers, &rms->mtx, PUSER - 1, mtx_name(&rms->mtx), 0);
+	(*zpcpu_get(rms->readers_pcpu))++;
+	mtx_unlock(&rms->mtx);
+}
+
+void
+rms_rlock(struct rmslock *rms)
+{
+	int *influx;
+
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
+
+	critical_enter();
+	influx = zpcpu_get(rms->readers_influx);
+	__compiler_membar();
+	*influx = 1;
+	__compiler_membar();
+	if (__predict_false(rms->writers > 0)) {
+		rms_rlock_fallback(rms);
+		return;
+	}
+	__compiler_membar();
+	(*zpcpu_get(rms->readers_pcpu))++;
+	__compiler_membar();
+	*influx = 0;
+	critical_exit();
+}
+
+static void __noinline
+rms_runlock_fallback(struct rmslock *rms)
+{
+
+	(*zpcpu_get(rms->readers_influx)) = 0;
+	critical_exit();
+
+	mtx_lock(&rms->mtx);
+	MPASS(*zpcpu_get(rms->readers_pcpu) == 0);
+	MPASS(rms->writers > 0);
+	MPASS(rms->readers > 0);
+	rms->readers--;
+	if (rms->readers == 0)
+		wakeup_one(&rms->writers);
+	mtx_unlock(&rms->mtx);
+}
+
+void
+rms_runlock(struct rmslock *rms)
+{
+	int *influx;
+
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
+
+	critical_enter();
+	influx = zpcpu_get(rms->readers_influx);
+	__compiler_membar();
+	*influx = 1;
+	__compiler_membar();
+	if (__predict_false(rms->writers > 0)) {
+		rms_runlock_fallback(rms);
+		return;
+	}
+	__compiler_membar();
+	(*zpcpu_get(rms->readers_pcpu))--;
+	__compiler_membar();
+	*influx = 0;
+	critical_exit();
+}
+
+struct rmslock_ipi {
+	struct rmslock *rms;
+	cpuset_t signal;
+};
+
+static void
+rms_wlock_IPI(void *arg)
+{
+	struct rmslock_ipi *rmsipi;
+	struct rmslock *rms;
+	int readers;
+
+	rmsipi = arg;
+	rms = rmsipi->rms;
+
+	if (*zpcpu_get(rms->readers_influx))
+		return;
+	readers = zpcpu_replace(rms->readers_pcpu, 0);
+	if (readers != 0)
+		atomic_add_int(&rms->readers, readers);
+	CPU_CLR_ATOMIC(curcpu, &rmsipi->signal);
+}
+
+static void
+rms_wlock_switch(struct rmslock *rms)
+{
+	struct rmslock_ipi rmsipi;
+	int *in_op;
+	int cpu;
+
+	MPASS(rms->readers == 0);
+	MPASS(rms->writers == 1);
+
+	rmsipi.rms = rms;
+
+	/*
+	 * Publishes rms->writers. rlock and runlock will get this ordered
+	 * via IPI in the worst case.
+	 */
+	atomic_thread_fence_rel();
+
+	/*
+	 * Collect reader counts from all CPUs using an IPI. The handler can
+	 * find itself running while the interrupted CPU was doing either
+	 * rlock or runlock in which case it will fail.
+	 *
+	 * Successful attempts clear the cpu id in the bitmap.
+	 *
+	 * In case of failure we observe all failing CPUs not executing there to
+	 * determine when to make the next attempt. Note that threads having
+	 * the var set have preemption disabled.  Setting of readers_influx
+	 * only uses compiler barriers making these loads unreliable, which is
+	 * fine -- the IPI handler will always see the correct result.
+	 *
+	 * We retry until all counts are collected. Forward progress is
+	 * guaranteed by that fact that the total number of threads which can
+	 * be caught like this is finite and they all are going to block on
+	 * their own.
+	 */
+	CPU_COPY(&all_cpus, &rmsipi.signal);
+	for (;;) {
+		smp_rendezvous_cpus(
+		    rmsipi.signal,
+		    smp_no_rendezvous_barrier,
+		    rms_wlock_IPI,
+		    smp_no_rendezvous_barrier,
+		    &rmsipi);
+
+		if (CPU_EMPTY(&rmsipi.signal))
+			break;
+
+		CPU_FOREACH(cpu) {
+			if (!CPU_ISSET(cpu, &rmsipi.signal))
+				continue;
+			in_op = zpcpu_get_cpu(rms->readers_influx, cpu);
+			while (atomic_load_int(in_op))
+				cpu_spinwait();
+		}
+	}
+}
+
+void
+rms_wlock(struct rmslock *rms)
+{
+
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
+
+	mtx_lock(&rms->mtx);
+	rms->writers++;
+	if (rms->writers > 1) {
+		msleep(&rms->writers, &rms->mtx, PUSER - 1 | PDROP, mtx_name(&rms->mtx), 0);
+		MPASS(rms->readers == 0);
+		return;
+	}
+
+	rms_wlock_switch(rms);
+
+	if (rms->readers > 0)
+		msleep(&rms->writers, &rms->mtx, PUSER - 1 | PDROP, mtx_name(&rms->mtx), 0);
+	else
+		mtx_unlock(&rms->mtx);
+	MPASS(rms->readers == 0);
+}
+
+void
+rms_wunlock(struct rmslock *rms)
+{
+
+	mtx_lock(&rms->mtx);
+	MPASS(rms->writers >= 1);
+	MPASS(rms->readers == 0);
+	rms->writers--;
+	if (rms->writers > 0)
+		wakeup_one(&rms->writers);
+	else
+		wakeup(&rms->readers);
+	mtx_unlock(&rms->mtx);
+}

Modified: head/sys/sys/_rmlock.h
==============================================================================
--- head/sys/sys/_rmlock.h	Fri Dec 27 05:01:13 2019	(r356118)
+++ head/sys/sys/_rmlock.h	Fri Dec 27 11:19:57 2019	(r356119)
@@ -68,4 +68,14 @@ struct rm_priotracker {
 	LIST_ENTRY(rm_priotracker) rmp_qentry;
 };
 
+#include <sys/_mutex.h>
+
+struct rmslock {
+	struct mtx mtx;
+	int	writers;
+	int	readers;
+	int	*readers_pcpu;
+	int	*readers_influx;
+};
+
 #endif /* !_SYS__RMLOCK_H_ */

Modified: head/sys/sys/rmlock.h
==============================================================================
--- head/sys/sys/rmlock.h	Fri Dec 27 05:01:13 2019	(r356118)
+++ head/sys/sys/rmlock.h	Fri Dec 27 11:19:57 2019	(r356119)
@@ -133,5 +133,12 @@ struct rm_args {
 #define	rm_assert(rm, what)
 #endif
 
+void	rms_init(struct rmslock *rms, const char *name);
+void	rms_destroy(struct rmslock *rms);
+void	rms_rlock(struct rmslock *rms);
+void	rms_runlock(struct rmslock *rms);
+void	rms_wlock(struct rmslock *rms);
+void	rms_wunlock(struct rmslock *rms);
+
 #endif /* _KERNEL */
 #endif /* !_SYS_RMLOCK_H_ */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201912271119.xBRBJvFF007947>