Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 13 Aug 2009 13:17:50 +0000 (UTC)
From:      Lawrence Stewart <lstewart@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r196180 - in projects/tcp_ffcaia2008_8.x/sys: kern modules modules/alq sys
Message-ID:  <200908131317.n7DDHoNE001757@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: lstewart
Date: Thu Aug 13 13:17:50 2009
New Revision: 196180
URL: http://svn.freebsd.org/changeset/base/196180

Log:
  Import variable length ALQ(9) work from my user branch/SIFTR development
  repository. This is a prerequisite for importing SIFTR into the tree.
  
  This patch allows the ALQ(9) framework to be compiled as a kernel module. It
  also extends the KPI to support logging variable length messages by replacing
  the underlying storage with a circular byte based buffer and adding new
  alq_writen/alq_getn functions.
  
  Some additional minor structural work is required to remove the patch's current
  use of malloc in alq_getn.
  
  Sponsored by:	FreeBSD Foundation
  Discussed with:	jeff@, rwatson@ (quite some time ago now)

Added:
  projects/tcp_ffcaia2008_8.x/sys/modules/alq/
Modified:
  projects/tcp_ffcaia2008_8.x/sys/kern/kern_alq.c
  projects/tcp_ffcaia2008_8.x/sys/modules/Makefile
  projects/tcp_ffcaia2008_8.x/sys/sys/alq.h

Modified: projects/tcp_ffcaia2008_8.x/sys/kern/kern_alq.c
==============================================================================
--- projects/tcp_ffcaia2008_8.x/sys/kern/kern_alq.c	Thu Aug 13 12:28:30 2009	(r196179)
+++ projects/tcp_ffcaia2008_8.x/sys/kern/kern_alq.c	Thu Aug 13 13:17:50 2009	(r196180)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
+ * Copyright (c) 2008-2009, Lawrence Stewart <lstewart@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -27,6 +28,8 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_mac.h"
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -43,20 +46,23 @@ __FBSDID("$FreeBSD$");
 #include <sys/fcntl.h>
 #include <sys/eventhandler.h>
 
+#if (__FreeBSD_version >= 700000)
 #include <security/mac/mac_framework.h>
+#endif
 
 /* Async. Logging Queue */
 struct alq {
 	int	aq_entmax;		/* Max entries */
 	int	aq_entlen;		/* Entry length */
+	int	aq_freebytes;		/* Bytes available in buffer */
+	int	aq_buflen;		/* Total length of our buffer */
 	char	*aq_entbuf;		/* Buffer for stored entries */
+	int	aq_writehead;
+	int	aq_writetail;
 	int	aq_flags;		/* Queue flags */
 	struct mtx	aq_mtx;		/* Queue lock */
 	struct vnode	*aq_vp;		/* Open vnode handle */
 	struct ucred	*aq_cred;	/* Credentials of the opening thread */
-	struct ale	*aq_first;	/* First ent */
-	struct ale	*aq_entfree;	/* First free ent */
-	struct ale	*aq_entvalid;	/* First ent valid for writing */
 	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
 	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
 };
@@ -69,6 +75,8 @@ struct alq {
 #define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
 #define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
 
+#define ALQ_HAS_PENDING_DATA(alq) ((alq)->aq_freebytes != (alq)->aq_buflen)
+
 static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
 
 /*
@@ -78,7 +86,6 @@ static struct mtx ald_mtx;
 static LIST_HEAD(, alq) ald_queues;
 static LIST_HEAD(, alq) ald_active;
 static int ald_shutingdown = 0;
-struct thread *ald_thread;
 static struct proc *ald_proc;
 
 #define	ALD_LOCK()	mtx_lock(&ald_mtx)
@@ -172,16 +179,25 @@ ald_daemon(void)
 	int needwakeup;
 	struct alq *alq;
 
-	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
-
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL,
 	    SHUTDOWN_PRI_FIRST);
 
 	ALD_LOCK();
 
 	for (;;) {
-		while ((alq = LIST_FIRST(&ald_active)) == NULL)
+		while ((alq = LIST_FIRST(&ald_active)) == NULL
+		    && !ald_shutingdown)
+#if (__FreeBSD_version >= 700000)
+			mtx_sleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
+#else
 			msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
+#endif
+
+		/* Don't shutdown until all active alq's are flushed */
+		if (ald_shutingdown && alq == NULL) {
+			ALD_UNLOCK();
+			break;
+		}
 
 		ALQ_LOCK(alq);
 		ald_deactivate(alq);
@@ -189,9 +205,18 @@ ald_daemon(void)
 		needwakeup = alq_doio(alq);
 		ALQ_UNLOCK(alq);
 		if (needwakeup)
-			wakeup(alq);
+			wakeup_one(alq);
 		ALD_LOCK();
 	}
+
+#if (__FreeBSD_version < 800000)
+#if (__FreeBSD_version < 700000)
+	wakeup(ald_proc);
+#endif
+	kthread_exit(0);
+#else
+	kproc_exit(0);
+#endif
 }
 
 static void
@@ -200,14 +225,32 @@ ald_shutdown(void *arg, int howto)
 	struct alq *alq;
 
 	ALD_LOCK();
+
+	/* Ensure no new queues can be created */
 	ald_shutingdown = 1;
 
+	/* Shutdown all alqs prior to terminating the ald_daemon */
 	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
 		LIST_REMOVE(alq, aq_link);
 		ALD_UNLOCK();
 		alq_shutdown(alq);
 		ALD_LOCK();
 	}
+
+	/* At this point, all alqs are flushed and shutdown */
+
+	/*
+	 * Wake ald_daemon so that it exits. It won't be able to do
+	 * anything until we mtx_sleep because we hold the ald_mtx
+	 */
+	wakeup(&ald_active);
+
+	/* Wait for ald_daemon to exit */
+#if (__FreeBSD_version >= 700000)
+	mtx_sleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
+#else
+	msleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
+#endif
 	ALD_UNLOCK();
 }
 
@@ -219,15 +262,30 @@ alq_shutdown(struct alq *alq)
 	/* Stop any new writers. */
 	alq->aq_flags |= AQ_SHUTDOWN;
 
+	/*
+	 * If the alq isn't active but has unwritten data (possible if
+	 * the ALQ_NOACTIVATE flag has been used), explicitly activate the
+	 * alq here so that the pending data gets flushed by the ald_daemon.
+	 */
+	if (!(alq->aq_flags & AQ_ACTIVE) &&
+	    ALQ_HAS_PENDING_DATA(alq)) {
+		alq->aq_flags |= AQ_ACTIVE;
+		ALQ_UNLOCK(alq);
+		ALD_LOCK();
+		ald_activate(alq);
+		ALD_UNLOCK();
+		ALQ_LOCK(alq);
+	}
+
 	/* Drain IO */
-	while (alq->aq_flags & (AQ_FLUSHING|AQ_ACTIVE)) {
+	while (alq->aq_flags & AQ_ACTIVE) {
 		alq->aq_flags |= AQ_WANTED;
 		msleep_spin(alq, &alq->aq_mtx, "aldclose", 0);
 	}
+
 	ALQ_UNLOCK(alq);
 
-	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
-	    curthread);
+	vn_close(alq->aq_vp, FWRITE, alq->aq_cred, curthread);
 	crfree(alq->aq_cred);
 }
 
@@ -242,46 +300,52 @@ alq_doio(struct alq *alq)
 	struct vnode *vp;
 	struct uio auio;
 	struct iovec aiov[2];
-	struct ale *ale;
-	struct ale *alstart;
 	int totlen;
 	int iov;
 	int vfslocked;
 
+	KASSERT((ALQ_HAS_PENDING_DATA(alq)),
+		("%s: queue emtpy!", __func__)
+	);
+
 	vp = alq->aq_vp;
 	td = curthread;
 	totlen = 0;
-	iov = 0;
-
-	alstart = ale = alq->aq_entvalid;
-	alq->aq_entvalid = NULL;
+	iov = 1;
 
 	bzero(&aiov, sizeof(aiov));
 	bzero(&auio, sizeof(auio));
 
-	do {
-		if (aiov[iov].iov_base == NULL)
-			aiov[iov].iov_base = ale->ae_data;
-		aiov[iov].iov_len += alq->aq_entlen;
-		totlen += alq->aq_entlen;
-		/* Check to see if we're wrapping the buffer */
-		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
-			iov++;
-		ale->ae_flags &= ~AE_VALID;
-		ale = ale->ae_next;
-	} while (ale->ae_flags & AE_VALID);
+	/* Start the write from the location of our buffer tail pointer. */
+	aiov[0].iov_base = alq->aq_entbuf + alq->aq_writetail;
+
+	if (alq->aq_writetail < alq->aq_writehead) {
+		/* Buffer not wrapped */
+		totlen = aiov[0].iov_len = alq->aq_writehead - alq->aq_writetail;
+	} else if (alq->aq_writehead == 0) {
+		/* Buffer not wrapped (special case to avoid an empty iov) */
+		totlen = aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail;;
+	} else {
+		/*
+		 * Buffer wrapped, requires 2 aiov entries:
+		 * - first is from writetail to end of buffer
+		 * - second is from start of buffer to writehead
+		 */
+		aiov[0].iov_len = alq->aq_buflen - alq->aq_writetail;
+		iov++;
+		aiov[1].iov_base = alq->aq_entbuf;
+		aiov[1].iov_len =  alq->aq_writehead;
+		totlen = aiov[0].iov_len + aiov[1].iov_len;
+	}
 
 	alq->aq_flags |= AQ_FLUSHING;
 	ALQ_UNLOCK(alq);
 
-	if (iov == 2 || aiov[iov].iov_base == NULL)
-		iov--;
-
 	auio.uio_iov = &aiov[0];
 	auio.uio_offset = 0;
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_rw = UIO_WRITE;
-	auio.uio_iovcnt = iov + 1;
+	auio.uio_iovcnt = iov;
 	auio.uio_resid = totlen;
 	auio.uio_td = td;
 
@@ -290,7 +354,12 @@ alq_doio(struct alq *alq)
 	 */
 	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 	vn_start_write(vp, &mp, V_WAIT);
+#if (__FreeBSD_version < 800000)
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
+#else
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+#endif
+
 	/*
 	 * XXX: VOP_WRITE error checks are ignored.
 	 */
@@ -298,15 +367,32 @@ alq_doio(struct alq *alq)
 	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
 #endif
 		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
+#if (__FreeBSD_version < 800000)
+	VOP_UNLOCK(vp, 0, td);
+#else
 	VOP_UNLOCK(vp, 0);
+#endif
 	vn_finished_write(mp);
 	VFS_UNLOCK_GIANT(vfslocked);
 
 	ALQ_LOCK(alq);
 	alq->aq_flags &= ~AQ_FLUSHING;
 
-	if (alq->aq_entfree == NULL)
-		alq->aq_entfree = alstart;
+	/* Adjust writetail as required, taking into account wrapping. */
+	alq->aq_writetail = (alq->aq_writetail + totlen) % alq->aq_buflen;
+	alq->aq_freebytes += totlen;
+
+	/*
+	 * If we just flushed the buffer completely,
+	 * reset indexes to 0 to minimise buffer wraps.
+	 * This is also required to ensure alq_getn() can't wedge itself.
+	 */
+	if (!ALQ_HAS_PENDING_DATA(alq))
+		alq->aq_writehead = alq->aq_writetail = 0;
+
+	KASSERT((alq->aq_writetail >= 0 && alq->aq_writetail < alq->aq_buflen),
+		("%s: aq_writetail < 0 || aq_writetail >= aq_buflen", __func__)
+	);
 
 	if (alq->aq_flags & AQ_WANTED) {
 		alq->aq_flags &= ~AQ_WANTED;
@@ -337,13 +423,13 @@ alq_open(struct alq **alqp, const char *
 {
 	struct thread *td;
 	struct nameidata nd;
-	struct ale *ale;
-	struct ale *alp;
 	struct alq *alq;
-	char *bufp;
 	int flags;
 	int error;
-	int i, vfslocked;
+	int vfslocked;
+
+	KASSERT((size > 0), ("%s: size <= 0", __func__));
+	KASSERT((count >= 0), ("%s: count < 0", __func__));
 
 	*alqp = NULL;
 	td = curthread;
@@ -351,42 +437,48 @@ alq_open(struct alq **alqp, const char *
 	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
 	flags = FWRITE | O_NOFOLLOW | O_CREAT;
 
+#if (__FreeBSD_version < 700000)
+	error = vn_open_cred(&nd, &flags, cmode, cred, 0);
+#elif (__FreeBSD_version < 800098)
+	error = vn_open_cred(&nd, &flags, cmode, cred, NULL);
+#else
 	error = vn_open_cred(&nd, &flags, cmode, 0, cred, NULL);
+#endif
 	if (error)
 		return (error);
 
 	vfslocked = NDHASGIANT(&nd);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
-	/* We just unlock so we hold a reference */
+	/* We just unlock so we hold a reference. */
+#if (__FreeBSD_version < 800000)
+	VOP_UNLOCK(nd.ni_vp, 0, td);
+#else
 	VOP_UNLOCK(nd.ni_vp, 0);
+#endif
 	VFS_UNLOCK_GIANT(vfslocked);
 
 	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
-	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
-	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
 	alq->aq_vp = nd.ni_vp;
 	alq->aq_cred = crhold(cred);
-	alq->aq_entmax = count;
-	alq->aq_entlen = size;
-	alq->aq_entfree = alq->aq_first;
 
 	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
 
-	bufp = alq->aq_entbuf;
-	ale = alq->aq_first;
-	alp = NULL;
-
-	/* Match up entries with buffers */
-	for (i = 0; i < count; i++) {
-		if (alp)
-			alp->ae_next = ale;
-		ale->ae_data = bufp;
-		alp = ale;
-		ale++;
-		bufp += size;
+	if (count > 0) {
+		/* Fixed length messages. */
+		alq->aq_buflen = size * count;
+		alq->aq_entmax = count;
+		alq->aq_entlen = size;
+	} else {
+		/* Variable length messages. */
+		alq->aq_buflen = size;
+		alq->aq_entmax = 0;
+		alq->aq_entlen = 0;
 	}
 
-	alp->ae_next = alq->aq_first;
+	alq->aq_freebytes = alq->aq_buflen;
+	alq->aq_entbuf = malloc(alq->aq_buflen, M_ALD, M_WAITOK|M_ZERO);
+
+	alq->aq_writehead = alq->aq_writetail = 0;
 
 	if ((error = ald_add(alq)) != 0)
 		return (error);
@@ -400,72 +492,240 @@ alq_open(struct alq **alqp, const char *
  * wait or return an error depending on the value of waitok.
  */
 int
-alq_write(struct alq *alq, void *data, int waitok)
+alq_write(struct alq *alq, void *data, int flags)
 {
-	struct ale *ale;
+	/* Should only be called in fixed length message (legacy) mode. */
+	KASSERT((alq->aq_entmax > 0 && alq->aq_entlen > 0),
+		("%s: fixed length write on variable length queue", __func__)
+	);
+	return (alq_writen(alq, data, alq->aq_entlen, flags));
+}
+
+int
+alq_writen(struct alq *alq, void *data, int len, int flags)
+{
+	int activate = 0;
+	int copy = len;
+
+	KASSERT((len > 0 && len <= alq->aq_buflen),
+		("%s: len <= 0 || len > aq_buflen", __func__)
+	);
 
-	if ((ale = alq_get(alq, waitok)) == NULL)
+	ALQ_LOCK(alq);
+
+	/*
+	 * If the message is larger than our underlying buffer or
+	 * there is not enough free space in our underlying buffer
+	 * to accept the message and the user can't wait, return.
+	 */
+	if ((len > alq->aq_buflen) ||
+	    ((flags & ALQ_NOWAIT) && (alq->aq_freebytes < len))) {
+		ALQ_UNLOCK(alq);
 		return (EWOULDBLOCK);
+	}
 
-	bcopy(data, ale->ae_data, alq->aq_entlen);
-	alq_post(alq, ale);
+	/*
+	 * ALQ_WAITOK or alq->aq_freebytes > len, either spin until
+	 * we have enough free bytes (former) or skip (latter).
+	 */
+	while (alq->aq_freebytes < len && (alq->aq_flags & AQ_SHUTDOWN) == 0) {
+		alq->aq_flags |= AQ_WANTED;
+		msleep_spin(alq, &alq->aq_mtx, "alqwriten", 0);
+	}
+
+	/*
+	 * We need to serialise wakups to ensure records remain in order...
+	 * Therefore, wakeup the next thread in the queue waiting for
+	 * alq resources to be available.
+	 * (technically this is only required if we actually entered the above
+	 * while loop)
+	 */
+	wakeup_one(alq);
+
+	/* Bail if we're shutting down. */
+	if (alq->aq_flags & AQ_SHUTDOWN) {
+		ALQ_UNLOCK(alq);
+		return (EWOULDBLOCK);
+	}
+
+	/*
+	 * If we need to wrap the buffer to accommodate the write,
+	 * we'll need 2 calls to bcopy.
+	 */
+	if ((alq->aq_buflen - alq->aq_writehead) < len)
+		copy = alq->aq_buflen - alq->aq_writehead;
+
+	/* Copy message (or part thereof if wrap required) to the buffer. */
+	bcopy(data, alq->aq_entbuf + alq->aq_writehead, copy);
+	alq->aq_writehead += copy;
+
+	if (alq->aq_writehead >= alq->aq_buflen) {
+		KASSERT((alq->aq_writehead == alq->aq_buflen),
+		    ("alq->aq_writehead (%d) > alq->aq_buflen (%d)",
+		    alq->aq_writehead,
+		    alq->aq_buflen)
+		);
+		alq->aq_writehead = 0;
+	}
+
+	if (copy != len) {
+		/*
+		 * Wrap the buffer by copying the remainder of our message
+		 * to the start of the buffer and resetting aq_writehead.
+		 */
+		bcopy(((uint8_t *)data)+copy, alq->aq_entbuf, len - copy);
+		alq->aq_writehead = len - copy;
+	}
+
+	KASSERT((alq->aq_writehead >= 0 && alq->aq_writehead < alq->aq_buflen),
+		("%s: aq_writehead < 0 || aq_writehead >= aq_buflen", __func__)
+	);
+
+	alq->aq_freebytes -= len;
+
+	if (((alq->aq_flags & AQ_ACTIVE) == 0) &&
+		((flags & ALQ_NOACTIVATE) == 0)) {
+		alq->aq_flags |= AQ_ACTIVE;
+		activate = 1;
+	}
+
+	ALQ_UNLOCK(alq);
+
+	if (activate) {
+		ALD_LOCK();
+		ald_activate(alq);
+		ALD_UNLOCK();
+	}
 
 	return (0);
 }
 
 struct ale *
-alq_get(struct alq *alq, int waitok)
+alq_get(struct alq *alq, int flags)
+{
+	/* Should only be called in fixed length message (legacy) mode. */
+	KASSERT((alq->aq_entmax > 0 && alq->aq_entlen > 0),
+		("%s: fixed length get on variable length queue", __func__)
+	);
+	return (alq_getn(alq, alq->aq_entlen, flags));
+}
+
+struct ale *
+alq_getn(struct alq *alq, int len, int flags)
 {
 	struct ale *ale;
-	struct ale *aln;
+	int contigbytes;
 
-	ale = NULL;
+	KASSERT((len > 0 && len <= alq->aq_buflen),
+		("%s: len <= 0 || len > alq->aq_buflen", __func__)
+	);
+
+	ale = malloc(	sizeof(struct ale),
+			M_ALD,
+			(flags & ALQ_NOWAIT) ? M_NOWAIT : M_WAITOK
+	);
+
+	if (ale == NULL)
+		return (NULL);
 
 	ALQ_LOCK(alq);
 
-	/* Loop until we get an entry or we're shutting down */
-	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 && 
-	    (ale = alq->aq_entfree) == NULL &&
-	    (waitok & ALQ_WAITOK)) {
-		alq->aq_flags |= AQ_WANTED;
-		msleep_spin(alq, &alq->aq_mtx, "alqget", 0);
+	/*
+	 * Determine the number of free contiguous bytes.
+	 * We ensure elsewhere that if aq_writehead == aq_writetail because
+	 * the buffer is empty, they will both be set to 0 and therefore
+	 * aq_freebytes == aq_buflen and is fully contiguous.
+	 * If they are equal and the buffer is not empty, aq_freebytes will
+	 * be 0 indicating the buffer is full.
+	 */
+	if (alq->aq_writehead <= alq->aq_writetail)
+		contigbytes = alq->aq_freebytes;
+	else
+		contigbytes = alq->aq_buflen - alq->aq_writehead;
+
+	/*
+	 * If the message is larger than our underlying buffer or
+	 * there is not enough free contiguous space in our underlying buffer
+	 * to accept the message and the user can't wait, return.
+	 */
+	if ((len > alq->aq_buflen) ||
+		((flags & ALQ_NOWAIT) && (contigbytes < len))) {
+		ALQ_UNLOCK(alq);
+		free(ale, M_ALD);
+		return (NULL);
 	}
 
-	if (ale != NULL) {
-		aln = ale->ae_next;
-		if ((aln->ae_flags & AE_VALID) == 0)
-			alq->aq_entfree = aln;
+	/*
+	 * ALQ_WAITOK or contigbytes >= len,
+	 * either spin until we have enough free contiguous bytes (former)
+	 * or skip (latter).
+	 */
+	while (contigbytes < len && (alq->aq_flags & AQ_SHUTDOWN) == 0) {
+		alq->aq_flags |= AQ_WANTED;
+		msleep_spin(alq, &alq->aq_mtx, "alqgetn", 0);
+		if (alq->aq_writehead <= alq->aq_writetail)
+			contigbytes = alq->aq_freebytes;
 		else
-			alq->aq_entfree = NULL;
-	} else
+			contigbytes = alq->aq_buflen - alq->aq_writehead;
+	}
+
+	/*
+	 * We need to serialise wakups to ensure records remain in order.
+	 * Therefore, wakeup the next thread in the queue waiting for
+	 * alq resources to be available.
+	 * (technically this is only required if we actually entered the above
+	 * while loop)
+	 */
+	wakeup_one(alq);
+
+	/* Bail if we're shutting down */
+	if (alq->aq_flags & AQ_SHUTDOWN) {
 		ALQ_UNLOCK(alq);
+		free(ale, M_ALD);
+		return (NULL);
+	}
 
+	/*
+	 * If we are here, we have a contiguous number of bytes >= len
+	 * available in our buffer starting at aq_writehead.
+	 */
+	ale->ae_data = alq->aq_entbuf + alq->aq_writehead;
+	ale->ae_datalen = len;
+	alq->aq_writehead += len;
+	alq->aq_freebytes -= len;
+
+	/* Wrap aq_writehead if we've filled to the end of the buffer. */
+	if (alq->aq_writehead == alq->aq_buflen)
+		alq->aq_writehead = 0;
+
+	KASSERT((alq->aq_writehead >= 0 && alq->aq_writehead < alq->aq_buflen),
+		("%s: aq_writehead < 0 || aq_writehead >= aq_buflen", __func__)
+	);
 
 	return (ale);
 }
 
 void
-alq_post(struct alq *alq, struct ale *ale)
+alq_post(struct alq *alq, struct ale *ale, int flags)
 {
 	int activate;
 
-	ale->ae_flags |= AE_VALID;
-
-	if (alq->aq_entvalid == NULL)
-		alq->aq_entvalid = ale;
-
-	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
+	if (((alq->aq_flags & AQ_ACTIVE) == 0) &&
+		((flags & ALQ_NOACTIVATE) == 0)) {
 		alq->aq_flags |= AQ_ACTIVE;
 		activate = 1;
 	} else
 		activate = 0;
 
 	ALQ_UNLOCK(alq);
+
 	if (activate) {
 		ALD_LOCK();
 		ald_activate(alq);
 		ALD_UNLOCK();
 	}
+
+	free(ale, M_ALD);
 }
 
 void
@@ -475,16 +735,23 @@ alq_flush(struct alq *alq)
 
 	ALD_LOCK();
 	ALQ_LOCK(alq);
-	if (alq->aq_flags & AQ_ACTIVE) {
+
+	if (alq->aq_flags & AQ_ACTIVE)
 		ald_deactivate(alq);
-		ALD_UNLOCK();
+
+	ALD_UNLOCK();
+
+	/*
+	 * Pull the lever iff there is data to flush and we're
+	 * not already in the middle of a flush operation.
+	 */
+	if (ALQ_HAS_PENDING_DATA(alq) && (alq->aq_flags & AQ_FLUSHING) == 0)
 		needwakeup = alq_doio(alq);
-	} else
-		ALD_UNLOCK();
+
 	ALQ_UNLOCK(alq);
 
 	if (needwakeup)
-		wakeup(alq);
+		wakeup_one(alq);
 }
 
 /*
@@ -506,7 +773,49 @@ alq_close(struct alq *alq)
 	alq_shutdown(alq);
 
 	mtx_destroy(&alq->aq_mtx);
-	free(alq->aq_first, M_ALD);
 	free(alq->aq_entbuf, M_ALD);
 	free(alq, M_ALD);
 }
+
+static int alq_load_handler(module_t mod, int what, void *arg)
+{
+	int ret = 0;
+
+	switch(what) {
+		case MOD_LOAD:
+		case MOD_UNLOAD:
+		case MOD_SHUTDOWN:
+			break;
+		
+		case MOD_QUIESCE:
+			ALD_LOCK();
+			/* only allow unload if there are no open queues */
+			if (LIST_FIRST(&ald_queues) == NULL) {
+				ald_shutingdown = 1;
+				ALD_UNLOCK();
+				ald_shutdown(NULL, 0);
+				mtx_destroy(&ald_mtx);
+			} else {
+				ALD_UNLOCK();
+				ret = EBUSY;
+			}
+			break;
+		
+		default:
+			ret = EINVAL;
+			break;
+	}
+
+	return (ret);
+}
+
+/* basic module data */
+static moduledata_t alq_mod =
+{
+	"alq",
+	alq_load_handler, /* execution entry point for the module */
+	NULL
+};
+
+DECLARE_MODULE(alq, alq_mod, SI_SUB_SMP, SI_ORDER_ANY);
+MODULE_VERSION(alq, 1);

Modified: projects/tcp_ffcaia2008_8.x/sys/modules/Makefile
==============================================================================
--- projects/tcp_ffcaia2008_8.x/sys/modules/Makefile	Thu Aug 13 12:28:30 2009	(r196179)
+++ projects/tcp_ffcaia2008_8.x/sys/modules/Makefile	Thu Aug 13 13:17:50 2009	(r196180)
@@ -22,6 +22,7 @@ SUBDIR=	${_3dfx} \
 	${_amdtemp} \
 	alc \
 	ale \
+	alq \
 	amr \
 	${_an} \
 	${_aout} \

Modified: projects/tcp_ffcaia2008_8.x/sys/sys/alq.h
==============================================================================
--- projects/tcp_ffcaia2008_8.x/sys/sys/alq.h	Thu Aug 13 12:28:30 2009	(r196179)
+++ projects/tcp_ffcaia2008_8.x/sys/sys/alq.h	Thu Aug 13 13:17:50 2009	(r196180)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
+ * Copyright (c) 2008-2009, Lawrence Stewart <lstewart@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,7 +27,7 @@
  * $FreeBSD$
  *
  */
-#ifndef _SYS_ALQ_H_
+#ifndef	_SYS_ALQ_H_
 #define	_SYS_ALQ_H_
 
 /*
@@ -41,17 +42,14 @@ extern struct thread *ald_thread;
  * Async. Logging Entry
  */
 struct ale {
-	struct ale	*ae_next;	/* Next Entry */
 	char		*ae_data;	/* Entry buffer */
-	int		ae_flags;	/* Entry flags */
+	int		ae_datalen;	/* Length of buffer */
 };
 
-#define	AE_VALID	0x0001		/* Entry has valid data */
- 
-
-/* waitok options */
+/* flags options */
 #define	ALQ_NOWAIT	0x0001
 #define	ALQ_WAITOK	0x0002
+#define	ALQ_NOACTIVATE	0x0004
 
 /* Suggested mode for file creation. */
 #define	ALQ_DEFAULT_CMODE	0600
@@ -64,7 +62,8 @@ struct ale {
  *	file	The filename to open for logging.
  *	cred	Credential to authorize open and I/O with.
  *	cmode	Creation mode for file, if new.
- *	size	The size of each entry in the queue.
+ *	size	The size of each entry in the queue, or the size of the queue
+ *		itself in bytes if count=0 (variable length queues).
  *	count	The number of items in the buffer, this should be large enough
  *		to store items over the period of a disk write.
  * Returns:
@@ -88,7 +87,8 @@ int alq_open(struct alq **, const char *
  *		The system is shutting down.
  *	0 on success.
  */
-int alq_write(struct alq *alq, void *data, int waitok);
+int alq_write(struct alq *alq, void *data, int flags);
+int alq_writen(struct alq *alq, void *data, int len, int flags);
 
 /*
  * alq_flush:  Flush the queue out to disk
@@ -115,13 +115,14 @@ void alq_close(struct alq *alq);
  *
  * This leaves the queue locked until a subsequent alq_post.
  */
-struct ale *alq_get(struct alq *alq, int waitok);
+struct ale *alq_get(struct alq *alq, int flags);
+struct ale *alq_getn(struct alq *alq, int len, int flags);
 
 /*
  * alq_post:  Schedule the ale retrieved by alq_get for writing.
  *	alq	The queue to post the entry to.
  *	ale	An asynch logging entry returned by alq_get.
  */
-void alq_post(struct alq *, struct ale *);
+void alq_post(struct alq *alq, struct ale *ale, int flags);
 
 #endif	/* _SYS_ALQ_H_ */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200908131317.n7DDHoNE001757>