Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 12 Jul 1999 11:06:42 -0700 (PDT)
From:      Matthew Dillon <dillon@apollo.backplane.com>
To:        Poul-Henning Kamp <phk@critter.freebsd.dk>
Cc:        Luoqi Chen <luoqi@watermarkgroup.com>, dfr@nlsystems.com, jeremyp@gsmx07.alcatel.com.au, freebsd-current@FreeBSD.ORG, mike@ducky.net
Subject:   Re: "objtrm" problem probably found (was Re: Stuck in "objtrm") 
Message-ID:  <199907121806.LAA70634@apollo.backplane.com>
References:   <20136.931798322@critter.freebsd.dk>

next in thread | previous in thread | raw e-mail | index | archive | help
Here we are:

    Empty loop
	mode 0    9.21 ns/loop nproc=1 lcks=EMPTY

    Tight loop, 1 and 2 processes, with and without lock prefix

	mode 1   16.48 ns/loop nproc=1 lcks=no
	mode 2   23.65 ns/loop nproc=2 lcks=no
	mode 3   93.02 ns/loop nproc=1 lcks=yes
	mode 4  160.82 ns/loop nproc=2 lcks=yes

    Spread loop, 1 and 2 processes, with and without lock prefix.  Other
    memory operations included in loop to mimic more typical situations.

	mode 5   37.64 ns/loop nproc=1 lcks=no
	mode 6   89.28 ns/loop nproc=2 lcks=no
	mode 7   88.32 ns/loop nproc=1 lcks=yes
	mode 8  161.08 ns/loop nproc=2 lcks=yes


    As you can see, the lock prefix creates a stall condition on the locked
    memory, but does *NOT* stall other memory.    The overhead is the same
    with and without the other assembly ops when the lock prefix is used.

    Therefore I believe the impact will be unnoticeable.  On a duel 
    450MHz P-III we are talking 37 ns vs 88 ns - an overhead of 50 ns
    for the one processor case, and an overhead of 72 ns for the two processor
    case.  Still, I suppose if we wanted to optimize the UP case we could
    do away with the lock prefix on non-SMP machines.  I don't know if the 
    SMP variable is accessible from within the i386/include/atomic.h header
    file, though.

					-Matt


/*
 * Compile -O2.  Appropriate variables have been volatilized to avoid 
 * inappropriate optimizations.
 */

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdarg.h>
#include <unistd.h>

#define LOOPS	10000000

#define ATOMIC_ASM(type,op)     \
    __asm __volatile ("lock; " op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p))

#define ATOMIC_ASM_NOLOCK(type,op)     \
    __asm __volatile (op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p))

static __inline void
atomic_add_int(void *p, u_int v)
{
        ATOMIC_ASM(int, "addl %1,%0");
}

static __inline void
atomic_add_int_nolock(void *p, u_int v)
{
        ATOMIC_ASM_NOLOCK(int, "addl %1,%0");
}

volatile int GX[8];	/* note: not shared between processes */

int
main(int ac, char **av)
{
    int fd;
    int *ptr;
    char *wlocks;
    int pgsize = getpagesize();
    volatile int i;
    int m;
    int usec;
    struct timeval tv1;
    struct timeval tv2;

    fd = open("test.dat", O_RDWR|O_CREAT|O_TRUNC, 0666);
    ftruncate(fd, pgsize);
    ptr = mmap(NULL, pgsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

    for (m = 0; m <= 8; ++m) {
	pid_t pid = -1;
	int nproc = 1;
	const char *lcks = "EMPTY";

	gettimeofday(&tv1, NULL);
	switch(m) {
	case 8:
	    pid = fork();
	    nproc = 2;
	    /* fall through */
	case 7:
	    for (i = 0; i < LOOPS; ++i) {
		atomic_add_int(ptr, 1);
		GX[0] = 1;
		GX[1] = 1;
		GX[2] = 1;
		GX[3] = 1;
		GX[4] = 1;
		GX[5] = 1;
		GX[6] = 1;
		GX[7] = 1;
	    }
	    lcks = "yes";
	    break;
	case 6:
	    pid = fork();
	    nproc = 2;
	    /* fall through */
	case 5:
	    for (i = 0; i < LOOPS; ++i) {
		atomic_add_int_nolock(ptr, 1);
		GX[0] = 1;
		GX[1] = 1;
		GX[2] = 1;
		GX[3] = 1;
		GX[4] = 1;
		GX[5] = 1;
		GX[6] = 1;
		GX[7] = 1;
	    }
	    lcks = "no";
	    break;
	case 4:
	    pid = fork();
	    nproc = 2;
	    /* fall through */
	case 3:
	    for (i = 0; i < LOOPS; ++i) {
		atomic_add_int(ptr, 1);
	    }
	    lcks = "yes";
	    break;
	case 2:
	    pid = fork();
	    nproc = 2;
	    /* fall through */
	case 1:
	    for (i = 0; i < LOOPS; ++i) {
		atomic_add_int_nolock(ptr, 1);
	    }
	    lcks = "no";
	    break;
	case 0:
	    for (i = 0; i < LOOPS; ++i) {
		    ;
	    }
	    break;
	default:
	    printf("huh?\n");
	    exit(1);
	}
	if (pid == 0)
	    _exit(0);
	while (wait(NULL) > 0)
	    ;
	gettimeofday(&tv2, NULL);

	usec = tv2.tv_usec + 1000000 - tv1.tv_usec + (tv2.tv_sec - tv1.tv_sec - 1) * 1000000;

	printf("mode %d\t%6.2f ns/loop nproc=%d lcks=%s\n", m, (double)usec * 1000.0 / (double)LOOPS / (double)nproc, nproc, lcks);
    }
    return(0);
}



To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199907121806.LAA70634>