Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 12 Jul 2019 18:43:24 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r349951 - in head: share/man/man9 sys/amd64/amd64 sys/arm/arm sys/arm64/arm64 sys/i386/i386 sys/kern sys/mips/mips sys/powerpc/powerpc sys/riscv/riscv sys/sparc64/sparc64
Message-ID:  <201907121843.x6CIhOPG044234@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Fri Jul 12 18:43:24 2019
New Revision: 349951
URL: https://svnweb.freebsd.org/changeset/base/349951

Log:
  Provide protection against starvation of the ll/sc loops when accessing userpace.
  
  Casueword(9) on ll/sc architectures must be prepared for userspace
  constantly modifying the same cache line as containing the CAS word,
  and not loop infinitely.  Otherwise, rogue userspace livelocks the
  kernel.
  
  To fix the issue, change casueword(9) interface to return new value 1
  indicating that either comparision or store failed, instead of relying
  on the oldval == *oldvalp comparison.  The primitive no longer retries
  the operation if it failed spuriously.  Modify callers of
  casueword(9), all in kern_umtx.c, to handle retries, and react to
  stops and requests to terminate between retries.
  
  On x86, despite cmpxchg should not return spurious failures, we can
  take advantage of the new interface and just return PSL.ZF.
  
  Reviewed by:	andrew (arm64, previous version), markj
  Tested by:	pho
  Reported by:	https://xenbits.xen.org/xsa/advisory-295.txt
  Sponsored by:	The FreeBSD Foundation
  MFC after:	2 weeks
  Differential revision:	https://reviews.freebsd.org/D20772

Modified:
  head/share/man/man9/casuword.9
  head/sys/amd64/amd64/support.S
  head/sys/arm/arm/fusu.S
  head/sys/arm64/arm64/support.S
  head/sys/i386/i386/copyout.c
  head/sys/kern/kern_umtx.c
  head/sys/mips/mips/support.S
  head/sys/powerpc/powerpc/copyinout.c
  head/sys/riscv/riscv/support.S
  head/sys/sparc64/sparc64/support.S
  head/sys/sparc64/sparc64/vm_machdep.c

Modified: head/share/man/man9/casuword.9
==============================================================================
--- head/share/man/man9/casuword.9	Fri Jul 12 18:39:41 2019	(r349950)
+++ head/share/man/man9/casuword.9	Fri Jul 12 18:43:24 2019	(r349951)
@@ -1,4 +1,4 @@
-.\" Copyright (c) 2014 The FreeBSD Foundation
+.\" Copyright (c) 2014, 2019 The FreeBSD Foundation
 .\" All rights reserved.
 .\"
 .\" Part of this documentation was written by
@@ -28,7 +28,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 21, 2014
+.Dd April 19, 2019
 .Dt CASU 9
 .Os
 .Sh NAME
@@ -106,7 +106,9 @@ The
 .Fn casueword
 and
 .Fn casueword32
-functions return 0 on success and -1 on failure.
+functions return 0 on success, -1 on failure to access memory,
+and 1 when comparison or store failed.
+The store can fail on load-linked/store-conditional architectures.
 .Sh SEE ALSO
 .Xr atomic 9 ,
 .Xr fetch 9 ,

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S	Fri Jul 12 18:39:41 2019	(r349950)
+++ head/sys/amd64/amd64/support.S	Fri Jul 12 18:43:24 2019	(r349951)
@@ -811,6 +811,7 @@ ENTRY(casueword32_nosmap)
 	lock
 #endif
 	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
+	setne	%cl
 
 	/*
 	 * The old value is in %eax.  If the store succeeded it will be the
@@ -828,6 +829,7 @@ ENTRY(casueword32_nosmap)
 	 */
 	movl	%esi,(%rdx)			/* oldp = %rdx */
 	POP_FRAME_POINTER
+	movzbl	%cl, %eax
 	ret
 END(casueword32_nosmap)
 
@@ -847,6 +849,7 @@ ENTRY(casueword32_smap)
 #endif
 	cmpxchgl %ecx,(%rdi)			/* new = %ecx */
 	clac
+	setne	%cl
 
 	/*
 	 * The old value is in %eax.  If the store succeeded it will be the
@@ -864,6 +867,7 @@ ENTRY(casueword32_smap)
 	 */
 	movl	%esi,(%rdx)			/* oldp = %rdx */
 	POP_FRAME_POINTER
+	movzbl	%cl, %eax
 	ret
 END(casueword32_smap)
 
@@ -886,6 +890,7 @@ ENTRY(casueword_nosmap)
 	lock
 #endif
 	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
+	setne	%cl
 
 	/*
 	 * The old value is in %rax.  If the store succeeded it will be the
@@ -897,6 +902,7 @@ ENTRY(casueword_nosmap)
 	movq	%rax,PCB_ONFAULT(%r8)
 	movq	%rsi,(%rdx)
 	POP_FRAME_POINTER
+	movzbl	%cl, %eax
 	ret
 END(casueword_nosmap)
 
@@ -916,6 +922,7 @@ ENTRY(casueword_smap)
 #endif
 	cmpxchgq %rcx,(%rdi)			/* new = %rcx */
 	clac
+	setne	%cl
 
 	/*
 	 * The old value is in %rax.  If the store succeeded it will be the
@@ -927,6 +934,7 @@ ENTRY(casueword_smap)
 	movq	%rax,PCB_ONFAULT(%r8)
 	movq	%rsi,(%rdx)
 	POP_FRAME_POINTER
+	movzbl	%cl, %eax
 	ret
 END(casueword_smap)
 

Modified: head/sys/arm/arm/fusu.S
==============================================================================
--- head/sys/arm/arm/fusu.S	Fri Jul 12 18:39:41 2019	(r349950)
+++ head/sys/arm/arm/fusu.S	Fri Jul 12 18:43:24 2019	(r349951)
@@ -63,7 +63,7 @@ EENTRY_NP(casueword32)
 	ldr	r4, =(VM_MAXUSER_ADDRESS-3)
 	cmp	r0, r4
 	mvncs	r0, #0
-	bcs	2f
+	bcs	1f
 
 	GET_PCB(r6)
 	ldr	r6, [r6]
@@ -78,12 +78,10 @@ EENTRY_NP(casueword32)
 	str	r4, [r6, #PCB_ONFAULT]
 
 #if __ARM_ARCH >= 6
-1:
+	mov	r5, #1
 	ldrex	r4, [r0]
 	cmp	r4, r1
 	strexeq	r5, r3, [r0]
-	cmpeq	r5, #1
-	beq	1b
 #else
 	ldrt	r4, [r0]
 	cmp	r4, r1
@@ -92,7 +90,10 @@ EENTRY_NP(casueword32)
 	str	r4, [r2]
 	mov	r0, #0
 	str	r0, [r6, #PCB_ONFAULT]
-2:
+#if __ARM_ARCH >= 6
+	mov	r0, r5
+#endif
+1:
 	ldmfd	sp!, {r4, r5, r6}
 	RET
 EEND(casueword32)

Modified: head/sys/arm64/arm64/support.S
==============================================================================
--- head/sys/arm64/arm64/support.S	Fri Jul 12 18:39:41 2019	(r349950)
+++ head/sys/arm64/arm64/support.S	Fri Jul 12 18:43:24 2019	(r349951)
@@ -57,17 +57,17 @@ ENTRY(casueword32)
 	cmp	x0, x4
 	b.cs	fsu_fault_nopcb
 	adr	x6, fsu_fault		/* Load the fault handler */
+	mov	w5, #1
 	SET_FAULT_HANDLER(x6, x4)	/* And set it */
 	ENTER_USER_ACCESS(w6, x4)
 1:	ldxr	w4, [x0]		/* Load-exclusive the data */
 	cmp	w4, w1			/* Compare */
 	b.ne	2f			/* Not equal, exit */
 	stxr	w5, w3, [x0]		/* Store the new data */
-	cbnz	w5, 1b			/* Retry on failure */
 2:	EXIT_USER_ACCESS(w6)
-	SET_FAULT_HANDLER(xzr, x5)	/* Reset the fault handler */
+	SET_FAULT_HANDLER(xzr, x6)	/* Reset the fault handler */
 	str	w4, [x2]		/* Store the read data */
-	mov	x0, #0			/* Success */
+	mov	w0, w5			/* Result same as store status */
 	ret				/* Return */
 END(casueword32)
 
@@ -79,17 +79,17 @@ ENTRY(casueword)
 	cmp	x0, x4
 	b.cs	fsu_fault_nopcb
 	adr	x6, fsu_fault		/* Load the fault handler */
+	mov	w5, #1
 	SET_FAULT_HANDLER(x6, x4)	/* And set it */
 	ENTER_USER_ACCESS(w6, x4)
 1:	ldxr	x4, [x0]		/* Load-exclusive the data */
 	cmp	x4, x1			/* Compare */
 	b.ne	2f			/* Not equal, exit */
 	stxr	w5, x3, [x0]		/* Store the new data */
-	cbnz	w5, 1b			/* Retry on failure */
 2:	EXIT_USER_ACCESS(w6)
-	SET_FAULT_HANDLER(xzr, x5)	/* Reset the fault handler */
+	SET_FAULT_HANDLER(xzr, x6)	/* Reset the fault handler */
 	str	x4, [x2]		/* Store the read data */
-	mov	x0, #0			/* Success */
+	mov	w0, w5			/* Result same as store status */
 	ret				/* Return */
 END(casueword)
 

Modified: head/sys/i386/i386/copyout.c
==============================================================================
--- head/sys/i386/i386/copyout.c	Fri Jul 12 18:39:41 2019	(r349950)
+++ head/sys/i386/i386/copyout.c	Fri Jul 12 18:43:24 2019	(r349951)
@@ -428,6 +428,7 @@ suword32(volatile void *base, int32_t word)
 struct casueword_arg0 {
 	uint32_t oldval;
 	uint32_t newval;
+	int res;
 };
 
 static void
@@ -436,7 +437,8 @@ casueword_slow0(vm_offset_t kva, void *arg)
 	struct casueword_arg0 *ca;
 
 	ca = arg;
-	atomic_fcmpset_int((u_int *)kva, &ca->oldval, ca->newval);
+	ca->res = 1 - atomic_fcmpset_int((u_int *)kva, &ca->oldval,
+	    ca->newval);
 }
 
 int
@@ -452,7 +454,7 @@ casueword32(volatile uint32_t *base, uint32_t oldval, 
 	    casueword_slow0, &ca);
 	if (res == 0) {
 		*oldvalp = ca.oldval;
-		return (0);
+		return (ca.res);
 	}
 	return (-1);
 }
@@ -469,7 +471,7 @@ casueword(volatile u_long *base, u_long oldval, u_long
 	    casueword_slow0, &ca);
 	if (res == 0) {
 		*oldvalp = ca.oldval;
-		return (0);
+		return (ca.res);
 	}
 	return (-1);
 }

Modified: head/sys/kern/kern_umtx.c
==============================================================================
--- head/sys/kern/kern_umtx.c	Fri Jul 12 18:39:41 2019	(r349950)
+++ head/sys/kern/kern_umtx.c	Fri Jul 12 18:43:24 2019	(r349951)
@@ -690,8 +690,26 @@ umtxq_count_pi(struct umtx_key *key, struct umtx_q **f
 	return (0);
 }
 
+/*
+ * Check for possible stops and suspensions while executing a umtx
+ * locking operation.
+ *
+ * The sleep argument controls whether the function can handle a stop
+ * request itself or it should return ERESTART and the request is
+ * proceed at the kernel/user boundary in ast.
+ *
+ * Typically, when retrying due to casueword(9) failure (rv == 1), we
+ * should handle the stop requests there, with exception of cases when
+ * the thread busied the umtx key, or when functions return
+ * immediately if umtxq_check_susp() returned non-zero.  On the other
+ * hand, retrying the whole lock operation, we better not stop there
+ * but delegate the handling to ast.
+ *
+ * If the request is for thread termination P_SINGLE_EXIT, we cannot
+ * handle it at all, and simply return EINTR.
+ */
 static int
-umtxq_check_susp(struct thread *td)
+umtxq_check_susp(struct thread *td, bool sleep)
 {
 	struct proc *p;
 	int error;
@@ -710,7 +728,7 @@ umtxq_check_susp(struct thread *td)
 		if (p->p_flag & P_SINGLE_EXIT)
 			error = EINTR;
 		else
-			error = ERESTART;
+			error = sleep ? thread_suspend_check(0) : ERESTART;
 	}
 	PROC_UNLOCK(p);
 	return (error);
@@ -1049,9 +1067,12 @@ do_lock_normal(struct thread *td, struct umutex *m, ui
 				    id | UMUTEX_CONTESTED);
 				if (rv == -1)
 					return (EFAULT);
-				if (owner == UMUTEX_RB_OWNERDEAD)
+				if (rv == 0) {
+					MPASS(owner == UMUTEX_RB_OWNERDEAD);
 					return (EOWNERDEAD); /* success */
-				rv = umtxq_check_susp(td);
+				}
+				MPASS(rv == 1);
+				rv = umtxq_check_susp(td, false);
 				if (rv != 0)
 					return (rv);
 				continue;
@@ -1070,13 +1091,16 @@ do_lock_normal(struct thread *td, struct umutex *m, ui
 				return (EFAULT);
 
 			/* The acquire succeeded. */
-			if (owner == UMUTEX_UNOWNED)
+			if (rv == 0) {
+				MPASS(owner == UMUTEX_UNOWNED);
 				return (0);
+			}
 
 			/*
 			 * If no one owns it but it is contested try
 			 * to acquire it.
 			 */
+			MPASS(rv == 1);
 			if (owner == UMUTEX_CONTESTED) {
 				rv = casueword32(&m->m_owner,
 				    UMUTEX_CONTESTED, &owner,
@@ -1084,20 +1108,27 @@ do_lock_normal(struct thread *td, struct umutex *m, ui
 				/* The address was invalid. */
 				if (rv == -1)
 					return (EFAULT);
-
-				if (owner == UMUTEX_CONTESTED)
+				if (rv == 0) {
+					MPASS(owner == UMUTEX_CONTESTED);
 					return (0);
+				}
+				if (rv == 1) {
+					rv = umtxq_check_susp(td, false);
+					if (rv != 0)
+						return (rv);
+				}
 
-				rv = umtxq_check_susp(td);
-				if (rv != 0)
-					return (rv);
-
 				/*
 				 * If this failed the lock has
 				 * changed, restart.
 				 */
 				continue;
 			}
+
+			/* rv == 1 but not contested, likely store failure */
+			rv = umtxq_check_susp(td, false);
+			if (rv != 0)
+				return (rv);
 		}
 
 		if (mode == _UMUTEX_TRY)
@@ -1128,14 +1159,21 @@ do_lock_normal(struct thread *td, struct umutex *m, ui
 		rv = casueword32(&m->m_owner, owner, &old,
 		    owner | UMUTEX_CONTESTED);
 
-		/* The address was invalid. */
-		if (rv == -1) {
+		/* The address was invalid or casueword failed to store. */
+		if (rv == -1 || rv == 1) {
 			umtxq_lock(&uq->uq_key);
 			umtxq_remove(uq);
 			umtxq_unbusy(&uq->uq_key);
 			umtxq_unlock(&uq->uq_key);
 			umtx_key_release(&uq->uq_key);
-			return (EFAULT);
+			if (rv == -1)
+				return (EFAULT);
+			if (rv == 1) {
+				rv = umtxq_check_susp(td, false);
+				if (rv != 0)
+					return (rv);
+			}
+			continue;
 		}
 
 		/*
@@ -1145,15 +1183,15 @@ do_lock_normal(struct thread *td, struct umutex *m, ui
 		 */
 		umtxq_lock(&uq->uq_key);
 		umtxq_unbusy(&uq->uq_key);
-		if (old == owner)
-			error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
-			    NULL : &timo);
+		MPASS(old == owner);
+		error = umtxq_sleep(uq, "umtxn", timeout == NULL ?
+		    NULL : &timo);
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
 		umtx_key_release(&uq->uq_key);
 
 		if (error == 0)
-			error = umtxq_check_susp(td);
+			error = umtxq_check_susp(td, false);
 	}
 
 	return (0);
@@ -1170,6 +1208,8 @@ do_unlock_normal(struct thread *td, struct umutex *m, 
 	int error, count;
 
 	id = td->td_tid;
+
+again:
 	/*
 	 * Make sure we own this mtx.
 	 */
@@ -1185,9 +1225,14 @@ do_unlock_normal(struct thread *td, struct umutex *m, 
 		error = casueword32(&m->m_owner, owner, &old, newlock);
 		if (error == -1)
 			return (EFAULT);
-		if (old == owner)
-			return (0);
-		owner = old;
+		if (error == 1) {
+			error = umtxq_check_susp(td, false);
+			if (error != 0)
+				return (error);
+			goto again;
+		}
+		MPASS(old == owner);
+		return (0);
 	}
 
 	/* We should only ever be in here for contested locks */
@@ -1215,8 +1260,14 @@ do_unlock_normal(struct thread *td, struct umutex *m, 
 	umtx_key_release(&key);
 	if (error == -1)
 		return (EFAULT);
-	if (old != owner)
-		return (EINVAL);
+	if (error == 1) {
+		if (old != owner)
+			return (EINVAL);
+		error = umtxq_check_susp(td, false);
+		if (error != 0)
+			return (error);
+		goto again;
+	}
 	return (0);
 }
 
@@ -1233,6 +1284,7 @@ do_wake_umutex(struct thread *td, struct umutex *m)
 	int error;
 	int count;
 
+again:
 	error = fueword32(&m->m_owner, &owner);
 	if (error == -1)
 		return (EFAULT);
@@ -1259,14 +1311,27 @@ do_wake_umutex(struct thread *td, struct umutex *m)
 	    owner != UMUTEX_RB_NOTRECOV) {
 		error = casueword32(&m->m_owner, UMUTEX_CONTESTED, &owner,
 		    UMUTEX_UNOWNED);
-		if (error == -1)
+		if (error == -1) {
 			error = EFAULT;
+		} else if (error == 1) {
+			umtxq_lock(&key);
+			umtxq_unbusy(&key);
+			umtxq_unlock(&key);
+			umtx_key_release(&key);
+			error = umtxq_check_susp(td, false);
+			if (error != 0)
+				return (error);
+			goto again;
+		}
 	}
 
 	umtxq_lock(&key);
-	if (error == 0 && count != 0 && ((owner & ~UMUTEX_CONTESTED) == 0 ||
-	    owner == UMUTEX_RB_OWNERDEAD || owner == UMUTEX_RB_NOTRECOV))
+	if (error == 0 && count != 0) {
+		MPASS((owner & ~UMUTEX_CONTESTED) == 0 ||
+		    owner == UMUTEX_RB_OWNERDEAD ||
+		    owner == UMUTEX_RB_NOTRECOV);
 		umtxq_signal(&key, 1);
+	}
 	umtxq_unbusy(&key);
 	umtxq_unlock(&key);
 	umtx_key_release(&key);
@@ -1314,49 +1379,32 @@ do_wake2_umutex(struct thread *td, struct umutex *m, u
 	umtxq_busy(&key);
 	count = umtxq_count(&key);
 	umtxq_unlock(&key);
+
+	error = fueword32(&m->m_owner, &owner);
+	if (error == -1)
+		error = EFAULT;
+
 	/*
-	 * Only repair contention bit if there is a waiter, this means the mutex
-	 * is still being referenced by userland code, otherwise don't update
-	 * any memory.
+	 * Only repair contention bit if there is a waiter, this means
+	 * the mutex is still being referenced by userland code,
+	 * otherwise don't update any memory.
 	 */
-	if (count > 1) {
-		error = fueword32(&m->m_owner, &owner);
-		if (error == -1)
+	while (error == 0 && (owner & UMUTEX_CONTESTED) == 0 &&
+	    (count > 1 || (count == 1 && (owner & ~UMUTEX_CONTESTED) != 0))) {
+		error = casueword32(&m->m_owner, owner, &old,
+		    owner | UMUTEX_CONTESTED);
+		if (error == -1) {
 			error = EFAULT;
-		while (error == 0 && (owner & UMUTEX_CONTESTED) == 0) {
-			error = casueword32(&m->m_owner, owner, &old,
-			    owner | UMUTEX_CONTESTED);
-			if (error == -1) {
-				error = EFAULT;
-				break;
-			}
-			if (old == owner)
-				break;
-			owner = old;
-			error = umtxq_check_susp(td);
-			if (error != 0)
-				break;
+			break;
 		}
-	} else if (count == 1) {
-		error = fueword32(&m->m_owner, &owner);
-		if (error == -1)
-			error = EFAULT;
-		while (error == 0 && (owner & ~UMUTEX_CONTESTED) != 0 &&
-		    (owner & UMUTEX_CONTESTED) == 0) {
-			error = casueword32(&m->m_owner, owner, &old,
-			    owner | UMUTEX_CONTESTED);
-			if (error == -1) {
-				error = EFAULT;
-				break;
-			}
-			if (old == owner)
-				break;
-			owner = old;
-			error = umtxq_check_susp(td);
-			if (error != 0)
-				break;
+		if (error == 0) {
+			MPASS(old == owner);
+			break;
 		}
+		owner = old;
+		error = umtxq_check_susp(td, false);
 	}
+
 	umtxq_lock(&key);
 	if (error == EFAULT) {
 		umtxq_signal(&key, INT_MAX);
@@ -1842,9 +1890,9 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32
 			error = EFAULT;
 			break;
 		}
-
 		/* The acquire succeeded. */
-		if (owner == UMUTEX_UNOWNED) {
+		if (rv == 0) {
+			MPASS(owner == UMUTEX_UNOWNED);
 			error = 0;
 			break;
 		}
@@ -1854,6 +1902,16 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32
 			break;
 		}
 
+		/*
+		 * Avoid overwriting a possible error from sleep due
+		 * to the pending signal with suspension check result.
+		 */
+		if (error == 0) {
+			error = umtxq_check_susp(td, true);
+			if (error != 0)
+				break;
+		}
+
 		/* If no one owns it but it is contested try to acquire it. */
 		if (owner == UMUTEX_CONTESTED || owner == UMUTEX_RB_OWNERDEAD) {
 			old_owner = owner;
@@ -1864,36 +1922,40 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32
 				error = EFAULT;
 				break;
 			}
-
-			if (owner == old_owner) {
-				umtxq_lock(&uq->uq_key);
-				umtxq_busy(&uq->uq_key);
-				error = umtx_pi_claim(pi, td);
-				umtxq_unbusy(&uq->uq_key);
-				umtxq_unlock(&uq->uq_key);
-				if (error != 0) {
-					/*
-					 * Since we're going to return an
-					 * error, restore the m_owner to its
-					 * previous, unowned state to avoid
-					 * compounding the problem.
-					 */
-					(void)casuword32(&m->m_owner,
-					    id | UMUTEX_CONTESTED,
-					    old_owner);
+			if (rv == 1) {
+				if (error == 0) {
+					error = umtxq_check_susp(td, true);
+					if (error != 0)
+						return (error);
 				}
-				if (error == 0 &&
-				    old_owner == UMUTEX_RB_OWNERDEAD)
-					error = EOWNERDEAD;
-				break;
+
+				/*
+				 * If this failed the lock could
+				 * changed, restart.
+				 */
+				continue;
 			}
 
-			error = umtxq_check_susp(td);
-			if (error != 0)
-				break;
-
-			/* If this failed the lock has changed, restart. */
-			continue;
+			MPASS(rv == 0);
+			MPASS(owner == old_owner);
+			umtxq_lock(&uq->uq_key);
+			umtxq_busy(&uq->uq_key);
+			error = umtx_pi_claim(pi, td);
+			umtxq_unbusy(&uq->uq_key);
+			umtxq_unlock(&uq->uq_key);
+			if (error != 0) {
+				/*
+				 * Since we're going to return an
+				 * error, restore the m_owner to its
+				 * previous, unowned state to avoid
+				 * compounding the problem.
+				 */
+				(void)casuword32(&m->m_owner,
+				    id | UMUTEX_CONTESTED, old_owner);
+			}
+			if (error == 0 && old_owner == UMUTEX_RB_OWNERDEAD)
+				error = EOWNERDEAD;
+			break;
 		}
 
 		if ((owner & ~UMUTEX_CONTESTED) == id) {
@@ -1932,28 +1994,33 @@ do_lock_pi(struct thread *td, struct umutex *m, uint32
 			error = EFAULT;
 			break;
 		}
-
-		umtxq_lock(&uq->uq_key);
-		/*
-		 * We set the contested bit, sleep. Otherwise the lock changed
-		 * and we need to retry or we lost a race to the thread
-		 * unlocking the umtx.  Note that the UMUTEX_RB_OWNERDEAD
-		 * value for owner is impossible there.
-		 */
-		if (old == owner) {
-			error = umtxq_sleep_pi(uq, pi,
-			    owner & ~UMUTEX_CONTESTED,
-			    "umtxpi", timeout == NULL ? NULL : &timo,
-			    (flags & USYNC_PROCESS_SHARED) != 0);
+		if (rv == 1) {
+			umtxq_unbusy_unlocked(&uq->uq_key);
+			error = umtxq_check_susp(td, true);
 			if (error != 0)
-				continue;
-		} else {
-			umtxq_unbusy(&uq->uq_key);
-			umtxq_unlock(&uq->uq_key);
+				break;
+
+			/*
+			 * The lock changed and we need to retry or we
+			 * lost a race to the thread unlocking the
+			 * umtx.  Note that the UMUTEX_RB_OWNERDEAD
+			 * value for owner is impossible there.
+			 */
+			continue;
 		}
 
-		error = umtxq_check_susp(td);
+		umtxq_lock(&uq->uq_key);
+
+		/* We set the contested bit, sleep. */
+		MPASS(old == owner);
+		error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
+		    "umtxpi", timeout == NULL ? NULL : &timo,
+		    (flags & USYNC_PROCESS_SHARED) != 0);
 		if (error != 0)
+			continue;
+
+		error = umtxq_check_susp(td, false);
+		if (error != 0)
 			break;
 	}
 
@@ -1978,6 +2045,8 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint
 	int count, error, pri;
 
 	id = td->td_tid;
+
+usrloop:
 	/*
 	 * Make sure we own this mtx.
 	 */
@@ -1995,6 +2064,12 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint
 		error = casueword32(&m->m_owner, owner, &old, new_owner);
 		if (error == -1)
 			return (EFAULT);
+		if (error == 1) {
+			error = umtxq_check_susp(td, true);
+			if (error != 0)
+				return (error);
+			goto usrloop;
+		}
 		if (old == owner)
 			return (0);
 		owner = old;
@@ -2074,15 +2149,20 @@ do_unlock_pi(struct thread *td, struct umutex *m, uint
 
 	if (count > 1)
 		new_owner |= UMUTEX_CONTESTED;
+again:
 	error = casueword32(&m->m_owner, owner, &old, new_owner);
-
+	if (error == 1) {
+		error = umtxq_check_susp(td, false);
+		if (error == 0)
+			goto again;
+	}
 	umtxq_unbusy_unlocked(&key);
 	umtx_key_release(&key);
 	if (error == -1)
 		return (EFAULT);
-	if (old != owner)
+	if (error == 0 && old != owner)
 		return (EINVAL);
-	return (0);
+	return (error);
 }
 
 /*
@@ -2149,31 +2229,49 @@ do_lock_pp(struct thread *td, struct umutex *m, uint32
 			error = EFAULT;
 			break;
 		}
-
-		if (owner == UMUTEX_CONTESTED) {
+		if (rv == 0) {
+			MPASS(owner == UMUTEX_CONTESTED);
 			error = 0;
 			break;
-		} else if (owner == UMUTEX_RB_OWNERDEAD) {
+		}
+		/* rv == 1 */
+		if (owner == UMUTEX_RB_OWNERDEAD) {
 			rv = casueword32(&m->m_owner, UMUTEX_RB_OWNERDEAD,
 			    &owner, id | UMUTEX_CONTESTED);
 			if (rv == -1) {
 				error = EFAULT;
 				break;
 			}
-			if (owner == UMUTEX_RB_OWNERDEAD) {
+			if (rv == 0) {
+				MPASS(owner == UMUTEX_RB_OWNERDEAD);
 				error = EOWNERDEAD; /* success */
 				break;
 			}
-			error = 0;
+
+			/*
+			 *  rv == 1, only check for suspension if we
+			 *  did not already catched a signal.  If we
+			 *  get an error from the check, the same
+			 *  condition is checked by the umtxq_sleep()
+			 *  call below, so we should obliterate the
+			 *  error to not skip the last loop iteration.
+			 */
+			if (error == 0) {
+				error = umtxq_check_susp(td, false);
+				if (error == 0) {
+					if (try != 0)
+						error = EBUSY;
+					else
+						continue;
+				}
+				error = 0;
+			}
 		} else if (owner == UMUTEX_RB_NOTRECOV) {
 			error = ENOTRECOVERABLE;
-			break;
 		}
 
-		if (try != 0) {
+		if (try != 0)
 			error = EBUSY;
-			break;
-		}
 
 		/*
 		 * If we caught a signal, we have retried and now
@@ -2668,11 +2766,12 @@ do_rw_rdlock(struct thread *td, struct urwlock *rwlock
 				umtx_key_release(&uq->uq_key);
 				return (EFAULT);
 			}
-			if (oldstate == state) {
+			if (rv == 0) {
+				MPASS(oldstate == state);
 				umtx_key_release(&uq->uq_key);
 				return (0);
 			}
-			error = umtxq_check_susp(td);
+			error = umtxq_check_susp(td, true);
 			if (error != 0)
 				break;
 			state = oldstate;
@@ -2703,10 +2802,12 @@ do_rw_rdlock(struct thread *td, struct urwlock *rwlock
 				error = EFAULT;
 				break;
 			}
-			if (oldstate == state)
+			if (rv == 0) {
+				MPASS(oldstate == state);
 				goto sleep;
+			}
 			state = oldstate;
-			error = umtxq_check_susp(td);
+			error = umtxq_check_susp(td, false);
 			if (error != 0)
 				break;
 		}
@@ -2718,7 +2819,7 @@ do_rw_rdlock(struct thread *td, struct urwlock *rwlock
 		/* state is changed while setting flags, restart */
 		if (!(state & wrflags)) {
 			umtxq_unbusy_unlocked(&uq->uq_key);
-			error = umtxq_check_susp(td);
+			error = umtxq_check_susp(td, true);
 			if (error != 0)
 				break;
 			continue;
@@ -2781,10 +2882,12 @@ sleep:
 					error = EFAULT;
 					break;
 				}
-				if (oldstate == state)
+				if (rv == 0) {
+					MPASS(oldstate == state);
 					break;
+				}
 				state = oldstate;
-				error1 = umtxq_check_susp(td);
+				error1 = umtxq_check_susp(td, false);
 				if (error1 != 0) {
 					if (error == 0)
 						error = error1;
@@ -2840,22 +2943,25 @@ do_rw_wrlock(struct thread *td, struct urwlock *rwlock
 				umtx_key_release(&uq->uq_key);
 				return (EFAULT);
 			}
-			if (oldstate == state) {
+			if (rv == 0) {
+				MPASS(oldstate == state);
 				umtx_key_release(&uq->uq_key);
 				return (0);
 			}
 			state = oldstate;
-			error = umtxq_check_susp(td);
+			error = umtxq_check_susp(td, true);
 			if (error != 0)
 				break;
 		}
 
 		if (error) {
-			if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
+			if ((state & (URWLOCK_WRITE_OWNER |
+			    URWLOCK_WRITE_WAITERS)) == 0 &&
 			    blocked_readers != 0) {
 				umtxq_lock(&uq->uq_key);
 				umtxq_busy(&uq->uq_key);
-				umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
+				umtxq_signal_queue(&uq->uq_key, INT_MAX,
+				    UMTX_SHARED_QUEUE);
 				umtxq_unbusy(&uq->uq_key);
 				umtxq_unlock(&uq->uq_key);
 			}
@@ -2885,10 +2991,12 @@ do_rw_wrlock(struct thread *td, struct urwlock *rwlock
 				error = EFAULT;
 				break;
 			}
-			if (oldstate == state)
+			if (rv == 0) {
+				MPASS(oldstate == state);
 				goto sleep;
+			}
 			state = oldstate;
-			error = umtxq_check_susp(td);
+			error = umtxq_check_susp(td, false);
 			if (error != 0)
 				break;
 		}
@@ -2900,7 +3008,7 @@ do_rw_wrlock(struct thread *td, struct urwlock *rwlock
 		if ((state & URWLOCK_WRITE_OWNER) == 0 &&
 		    URWLOCK_READER_COUNT(state) == 0) {
 			umtxq_unbusy_unlocked(&uq->uq_key);
-			error = umtxq_check_susp(td);
+			error = umtxq_check_susp(td, false);
 			if (error != 0)
 				break;
 			continue;
@@ -2958,10 +3066,12 @@ sleep:
 					error = EFAULT;
 					break;
 				}
-				if (oldstate == state)
+				if (rv == 0) {
+					MPASS(oldstate == state);
 					break;
+				}
 				state = oldstate;
-				error1 = umtxq_check_susp(td);
+				error1 = umtxq_check_susp(td, false);
 				/*
 				 * We are leaving the URWLOCK_WRITE_WAITERS
 				 * behind, but this should not harm the
@@ -3021,13 +3131,13 @@ do_rw_unlock(struct thread *td, struct urwlock *rwlock
 				error = EFAULT;
 				goto out;
 			}
-			if (oldstate != state) {
+			if (rv == 1) {
 				state = oldstate;
 				if (!(oldstate & URWLOCK_WRITE_OWNER)) {
 					error = EPERM;
 					goto out;
 				}
-				error = umtxq_check_susp(td);
+				error = umtxq_check_susp(td, true);
 				if (error != 0)
 					goto out;
 			} else
@@ -3041,13 +3151,13 @@ do_rw_unlock(struct thread *td, struct urwlock *rwlock
 				error = EFAULT;
 				goto out;
 			}
-			if (oldstate != state) {
+			if (rv == 1) {
 				state = oldstate;
 				if (URWLOCK_READER_COUNT(oldstate) == 0) {
 					error = EPERM;
 					goto out;
 				}
-				error = umtxq_check_susp(td);
+				error = umtxq_check_susp(td, true);
 				if (error != 0)
 					goto out;
 			} else
@@ -3097,7 +3207,7 @@ do_sem_wait(struct thread *td, struct _usem *sem, stru
 	struct abs_timeout timo;
 	struct umtx_q *uq;
 	uint32_t flags, count, count1;
-	int error, rv;
+	int error, rv, rv1;
 
 	uq = td->td_umtxq;
 	error = fueword32(&sem->_flags, &flags);
@@ -3110,20 +3220,30 @@ do_sem_wait(struct thread *td, struct _usem *sem, stru
 	if (timeout != NULL)
 		abs_timeout_init2(&timo, timeout);
 
+again:
 	umtxq_lock(&uq->uq_key);
 	umtxq_busy(&uq->uq_key);
 	umtxq_insert(uq);
 	umtxq_unlock(&uq->uq_key);
 	rv = casueword32(&sem->_has_waiters, 0, &count1, 1);
 	if (rv == 0)
-		rv = fueword32(&sem->_count, &count);
-	if (rv == -1 || count != 0) {
+		rv1 = fueword32(&sem->_count, &count);
+	if (rv == -1 || (rv == 0 && (rv1 == -1 || count != 0)) || rv == 1) {
 		umtxq_lock(&uq->uq_key);
 		umtxq_unbusy(&uq->uq_key);
 		umtxq_remove(uq);
 		umtxq_unlock(&uq->uq_key);
-		umtx_key_release(&uq->uq_key);
-		return (rv == -1 ? EFAULT : 0);
+		if (rv == 1) {
+			rv = umtxq_check_susp(td, true);
+			if (rv == 0)
+				goto again;
+			error = rv;
+			goto out;
+		}
+		if (rv == 0)
+			rv = rv1;
+		error = rv == -1 ? EFAULT : 0;
+		goto out;
 	}
 	umtxq_lock(&uq->uq_key);
 	umtxq_unbusy(&uq->uq_key);
@@ -3140,6 +3260,7 @@ do_sem_wait(struct thread *td, struct _usem *sem, stru
 			error = EINTR;
 	}
 	umtxq_unlock(&uq->uq_key);
+out:
 	umtx_key_release(&uq->uq_key);
 	return (error);
 }
@@ -3201,6 +3322,7 @@ do_sem2_wait(struct thread *td, struct _usem2 *sem, st
 	if (timeout != NULL)
 		abs_timeout_init2(&timo, timeout);
 
+again:
 	umtxq_lock(&uq->uq_key);
 	umtxq_busy(&uq->uq_key);
 	umtxq_insert(uq);
@@ -3226,16 +3348,19 @@ do_sem2_wait(struct thread *td, struct _usem2 *sem, st
 		if (count == USEM_HAS_WAITERS)
 			break;
 		rv = casueword32(&sem->_count, 0, &count, USEM_HAS_WAITERS);
-		if (rv == -1) {
-			umtxq_lock(&uq->uq_key);
-			umtxq_unbusy(&uq->uq_key);
-			umtxq_remove(uq);
-			umtxq_unlock(&uq->uq_key);
-			umtx_key_release(&uq->uq_key);
-			return (EFAULT);
-		}
-		if (count == 0)
+		if (rv == 0)

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201907121843.x6CIhOPG044234>