Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 17 Dec 2000 10:22:36 -0800
From:      Julian Elischer <julian@elischer.org>
To:        archie@freebsd.org, phk@freebsd.org, brian@freebsd.org, smp@freebsd.org
Subject:   Re: Netgraph locking primatives. take 1.
Message-ID:  <3A3D046C.76A001FE@elischer.org>
References:  <3A3CFF88.6B98890@elischer.org>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------A62F680A062585DDD860F02F
Content-Type: text/plain; charset=iso-8859-15
Content-Transfer-Encoding: 7bit

Julian Elischer wrote:
> 
> here is a first pass at some locking primatives for netgraph.
> 
> 
wierd.
I attached the file but it didn't make it back to me here..
so it's at:
http://www.freebsd.org/~julian/locks.c





>   --------------------------------------------------------------------------------
>  [Image]

why was it attached as an image??????
I'll try again anyhow

-- 
      __--_|\  Julian Elischer
     /       \ julian@elischer.org
    (   OZ    ) World tour 2000
---> X_.---._/  presently in:  Budapest
            v
--------------A62F680A062585DDD860F02F
Content-Type: image/x-xbitmap;
 name="locks.c"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="locks.c"

#define SMP
#define _KERNEL

#include <sys/types.h>
#include <sys/systm.h>
#include <sys/mutex.h>



#define NULL ((void *)0)

struct ng_qelem {
	struct ng_qelem *next;
	u_int	flags;
/*
	struct mbuf *m;
	meta_p	meta;
*/
};
#define NGQE_READER	0x00
#define NGQE_WRITER	0x01
#define NGQE_TYPE	0x01

struct ng_queue {
	u_long	flag;
	struct mtx mtx;
	struct ng_qelem *queue;
	struct ng_qelem **last;
};

/* The ordering here is important! don;t shuffle these.
 * If  you add READ_PENDING to the word when it has READ_PENDING
 * already set, you generate a carry into the reader count,
 * this you atomically add a reader, and remove the pending reader count!
 * Similarly for the pendign writer flag, adding WRITE_PENDING generates
 * a carry and sets the WRITER_ACTIVE flag, while clearing WRITE_PENDING.
 * When 'SINGLE_THREAD_ONLY' is set, then it is only permitted to 
 * do WRITER operations. Reader operations will result in errors.
 */
#define	SINGLE_THREAD_ONLY 0x00000001	/* if set, even reads single thread */
#define WRITE_PENDING	0x00000002
#define	WRITER_ACTIVE	0x00000004
#define READ_PENDING	0x00000008
#define	READER_INCREMENT 0x00000010
#define	READER_MASK	0xfffffff0	/* Not valid if WRITER_ACTIVE is set */
#define SAFETY_BARRIER	0x00100000	/* 64K items queued should be enough */

/*
 Safety Barrier--------+ (adjustable to suit taste)
		       |
                       V                      
+-------+-------+-------+-------+-------+-------+-------+-------+
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
|A|c|t|i|v|e| |R|e|a|d|e|r| |C|o|u|n|t| | | | | | | | | |R|A|W|S|
| | | | | | | | | | | | | | | | | | | | | | | | | | | | |P|W|P|T|
+-------+-------+-------+-------+-------+-------+-------+-------+
\_________________________ ____________________________/ | | | |
                          V                              | | | |
                [active reader count]                    | | | |
                                                         | | | |
        Read Pending ------------------------------------+ | | |
                                                           | | |
        Active Writer -------------------------------------+ | |
                                                             | |
        Write Pending ---------------------------------------+ |
                                                               |
        Single Threading Only ---------------------------------+
*/

/* 
 * Taking into account the current state of the queue and node,
 * possibly take the next element off the queue and return it.
 * Return NULL if there was nothing we could return, either because
 * there really was nothing there, or becasue the node was in a state where
 * it cannot yet process the next item on the queue.
 *
 * This MUST MUST MUST be called with the mutex held.
 */
__inline
struct ng_qelem *
dequeue(struct ng_queue *ngq)
{
	struct ng_qelem *ngqe;
	u_int	add_arg;	

	/*
	 * if there is a writer, then the answer is "no". Everything else
	 * stops when there is a WRITER.
	 */
	if (ngq->flag & WRITER_ACTIVE) {
		return (NULL);
	}

	/* Now take a look at what's on the queue and what's running */
	if ((ngq->flag & ~(READER_MASK|SINGLE_THREAD_ONLY)) == READ_PENDING) {
		/*
		 * It was a reader and we have no write active.
		 * We don't care how many readers are already active.
		 * Adjust the count for the item we are about to 
		 * dequeue. Adding READ_PENDING to the exisiting
		 * READ_PENDING clears it and generates a carry into
		 * the reader count.
		 * [XXX HOLE: fasttrack could come in here and succeed,
		 * which would not be a disaster but would give out of
		 * order delivery.. maybe we should leave the pending
		 * flag set until later?]
		 */
		add_arg = READ_PENDING;
	} else if ((ngq->flag & ~SINGLE_THREAD_ONLY) == WRITE_PENDING) {
		/* 
		 * There is a pending write, no readers and no active writer.
		 * This means we can go ahead with the pending writer.
		 * Note the fact that we now have a writer, 
		 * ready for when we take it off the queue.
		 *
		 * We don't need to worry about a possible collision
		 * with the fasttrack reader.
		 *
		 * The fasttrack thread may take a long time
		 * to discover that we are running so we would have
		 * an inconsistent state in the flags for a while.
		 * Since we ignore the reader count
		 * entirely when the WRITER_ACTIVE flag is set, this
		 * should not matter (in fact it is defined that way).
		 * If it tests the flag before this operation, the 
		 * WRITE_PENDING flag will make it fail, and if it tests it
		 * later, the ACTIVE_WRITER flag will do the same. If it is
		 * SO slow that we have actually completed the operation,
		 * and neither flag is set (nor the READ_PENDING) by the time
		 * that it tests the flags, then it is actually ok for
		 * it to continue. If it completes and we've finished and the
		 * read pending is set it still fails.
		 * 
		 * So we can just ignore it,  as long as we can ensure that the 
		 * transition from WRITE_PENDING state to the WRITER_ACTIVE
		 * state is atomic.
		 *
		 * After failing, first it will be held back by the
		 * mutex, then when it can proceed, it will queue
		 * its request, then it would arrive at this
		 * function. Usually it will have to leave empty
		 * handed because the ACTIVE WRITER bit wil be set.
		 */
		/*
		 * Adjust the flags for the item we are about to 
		 * dequeue. Adding WRITE_PENDING to the exisiting
		 * WRITE_PENDING clears it and generates a carry into
		 * the WRITER_ACTIVE flag, all atomically.
		 */
		add_arg = WRITE_PENDING;
		/*
		 * We want to write "active writer, no readers "
		 * Now go make it true. In fact there may be a number in the
		 * readers count but we know it is not true and will
		 * be fixed soon. We will fix the flags for the next
		 * pending element in a moment.
		 */
	} else {
		/*
		 * We can't dequeue anything.. return and say so.
		 * Probably we have a write pending and the readers
		 * count is non zero. If we got here because a reader
		 * hit us just at the wrong moment with the fasttrack
		 * code, and put us in a strange state, then it will be
		 * through in just a moment, (as soon as we release the mutex)
		 * and keep things moving.
		 */
		return (0);
	}
	/*
	 * Now we dequeue the request (whatever it may be) and
	 * correct the pending flags and the next and last pointers.
	 */
	ngqe = ngq->queue;
	ngq->queue = ngqe->next;
	if (ngq->last == &(ngqe->next)) {
		/*
		 * that was the last entry in the queue
		 * so set the 'last pointer up correctly
		 * and make sure the pending flags are clear.
		 */
		ngq->last = &(ngq->queue);
		/*
		 * Whatever flag was set is cleared and the carry sets
		 * the correct new active state/count. So we don't
		 * need to change add_arg.
		 */
	} else {
		if ((ngq->queue->flags & NGQE_TYPE) == NGQE_READER) {
			/*
			 * If we had a READ_PENDING and have another one,
			 * we just want to add READ_PENDING twice
			 * (the same as adding READER_INCREMENT).
			 * If we had WRITE_PENDING, we want to add
			 * READ_PENDING + WRITE_PENDING to clear the
			 * old WRITE_PENDING, set ACTIVE_WRITER, and
			 * set READ_PENDING. Either way we just add
			 * READ_PENDING to whatever we already had.
			 */
			add_arg += READ_PENDING;
		} else {
			/*
			 * If we had a WRITE_PENDING and have another one,
			 * we just want to add WRITE_PENDING twice
			 * (the same as adding ACTIVE_WRITER).
			 * If we had READ_PENDING, we want to add
			 * READ_PENDING + WRITE_PENDING to clear the
			 * old READ_PENDING, increment the readers, and
			 * set WRITE_PENDING. Either way we just add
			 * WRITE_PENDING to whatever we already had.
			 */
			add_arg += WRITE_PENDING;
		}
	}
	atomic_add_long(&ngq->flag, add_arg);
	/*
	 * We have successfully cleared the old pending flag,
	 * set the new one if it is needed, and incremented the appropriate
	 * active field. (all in one atomic addition.. wow)
	 */
	return (ngqe);
}




/*
 * This function 'cheats' in that it first tries to 'grab'
 * the use of the node, without going through the mutex.
 * We can do this becasue of the semantics of the lock.
 * The semantics include a clause that says that the value of the
 * readers count is invalid if the WRITER_ACTIVE flag is set.
 * It also says that the WRITER_ACTIVE flag cannot be set if
 * the readers count is not zero. Note that this talks about 
 * what is valid to SET the WRITER_ACTIVE flag, because from the moment
 * it is set, the value if the reader count is immaterial, and 
 * not valid. The two 'pending' flags have a similar effect, in that
 * If they are orthogonal to the two active fields in how they are set,
 * but if either is set, the attempted 'grab' need to be backed out
 * because there is earlier work, and we maintain ordering in the queue.
 * The result of this is that the reader request can try obtain
 * use of the node with only a single atomic addition, and without
 * any of the mutex overhead. If this fails the operation
 * degenerates to the same as for  other cases.
 *
 */
__inline struct ng_qelem *
aquire_read(struct ng_queue *ngq, struct ng_qelem *ngqe)
{

/*######### Hack alert #########*/
	atomic_add_long(&ngq->flag, READER_INCREMENT);
	if ((ngq->flag & (~READER_MASK)) == 0) {
		/* Successfully grabbed node */
		return (ngqe);
	}
	/* undo the damage if we didn't succeed */
	atomic_subtract_long(&ngq->flag, READER_INCREMENT);

/*######### End Hack alert #########*/
	mtx_enter((&ngq->mtx), MTX_SPIN);
	/*
	 * Try again. Another processor (or interrupt for that matter)
	 * may have removed the last queued item that was stopping us
	 * from running, between the previous test, and the moment
	 * that we took the mutex. (Or maybe a writer completed.)
	 */
	if ((ngq->flag & (~READER_MASK)) == 0) {
		atomic_add_long(&ngq->flag, READER_INCREMENT);
		mtx_exit((&ngq->mtx), MTX_SPIN);
		return (ngqe);
	}

	/*
	 * queue the reader
	 */
	if (ngq->flag & SINGLE_THREAD_ONLY) {
		panic ("Calling single treaded queue incorrectly");
	}
	ngqe->next = NULL; /* maybe not needed */
	*ngq->last = ngqe;
	/*
	 * If it was the first item in the queue
	 * then we need to set the last pointer and the type flags.
	 */
	if (ngq->last == &(ngq->queue)) {
		ngq->last = &(ngqe->next);
		atomic_add_long(&ngq->flag, READ_PENDING);
	}

	/*
	 * Ok, so that's the item successfully queued for later.
	 * So now we see if we can dequeue something to run instead.
	 */
	ngqe = dequeue(ngq);
	mtx_exit(&(ngq->mtx), MTX_SPIN);
	return (ngqe);
}	

__inline struct ng_qelem *
aquire_write(struct ng_queue *ngq, struct ng_qelem *ngqe)
{
	mtx_enter(&(ngq->mtx), MTX_SPIN);
	/*
	 * If there are no readers, no writer, and no pending packets,
	 * then we can just go ahead. In all other situations
	 * we need to queue the request
	 */
	if ((ngq->flag & (~SINGLE_THREAD_ONLY)) == 0) {
		atomic_add_long(&ngq->flag, WRITER_ACTIVE);
		mtx_exit((&ngq->mtx), MTX_SPIN);
		return (ngqe);
	}

	/*
	 * queue the reader
	 */
	ngqe->next = NULL; /* maybe not needed */
	*ngq->last = ngqe;
	/*
	 * If it was the first item in the queue
	 * then we need to set the last pointer and the type flags.
	 */
	if (ngq->last == &(ngq->queue)) {
		ngq->last = &(ngqe->next);
		atomic_add_long(&ngq->flag, WRITE_PENDING);
	}

	/*
	 * Ok, so that's the item successfully queued for later.
	 * So now we see if we can dequeue something to run instead.
	 */
	ngqe = dequeue(ngq);
	mtx_exit(&(ngq->mtx), MTX_SPIN);
	return (ngqe);
}


__inline void
leave_read(struct ng_queue *ngq)
{
	atomic_subtract_long(&ngq->flag, READER_INCREMENT);
}

__inline void
leave_write(struct ng_queue *ngq)
{
	atomic_subtract_long(&ngq->flag, WRITER_ACTIVE);
}

init_queue(struct ng_queue *ngq)
{
	mtx_init(&ngq->mtx, "junk mutex", 0);
	ngq->queue = NULL;
	ngq->last = &ngq->queue;
	ngq->flag = 0;
}

struct mtx  mtx1;

struct ng_qelem elem;
struct ng_queue q1;

main() {
	struct ng_qelem *el= &elem;
	struct ng_queue *ngq = &q1;
	mark0();
	init_queue(ngq);
	mark1();
	el = aquire_read(ngq,el);
	mark2();
	el = aquire_write(ngq,el);
	mark3();
	mtx_enter(&(ngq->mtx), MTX_SPIN);
	mark4();
	el = dequeue(ngq);
	mark5();
	mtx_exit(&(ngq->mtx), MTX_SPIN);
	mark6();
	leave_read(ngq);
	mark7();
	leave_write(ngq);
	mark_end();
}

--------------A62F680A062585DDD860F02F--



To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-smp" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?3A3D046C.76A001FE>