From owner-freebsd-current Mon Jul 12 10:41:57 1999 Delivered-To: freebsd-current@freebsd.org Received: from apollo.backplane.com (apollo.backplane.com [209.157.86.2]) by hub.freebsd.org (Postfix) with ESMTP id 8568315135 for ; Mon, 12 Jul 1999 10:41:49 -0700 (PDT) (envelope-from dillon@apollo.backplane.com) Received: (from dillon@localhost) by apollo.backplane.com (8.9.3/8.9.1) id KAA70496; Mon, 12 Jul 1999 10:41:10 -0700 (PDT) (envelope-from dillon) Date: Mon, 12 Jul 1999 10:41:10 -0700 (PDT) From: Matthew Dillon Message-Id: <199907121741.KAA70496@apollo.backplane.com> To: Matthew Dillon Cc: Poul-Henning Kamp , Luoqi Chen , dfr@nlsystems.com, jeremyp@gsmx07.alcatel.com.au, freebsd-current@FreeBSD.ORG, mike@ducky.net Subject: lock prefix overhead in SMP system - results Sender: owner-freebsd-current@FreeBSD.ORG Precedence: bulk X-Loop: FreeBSD.ORG Ok, here we are. The lock instruction overhead is significant relative the execution time of the instruction, though on a 450 MHz P-III it is still not a huge problem. Basically 12 vs 89 ns for the non competitive case, and 26 vs 161 ns for the competitive case. empty loop 9.33 ns/loop mode 0 1proc no locks 12.86 ns/loop mode 9 2proc no locks 26.16 ns/loop mode 10 1proc w/ locks 89.87 ns/loop mode 1 2proc w/ locks 161.09 ns/loop mode 2 If you modify the source code and add three nop() calls inside the loop for case 9 and 1 (which also covers 10 and 2), which avoids some of the synchronization overhead, you get numbers closer to what I expect in the real world. ( recompile the code -DREAL_WORLD ) empty loop 9.33 ns/loop mode 0 1proc no locks 55.89 ns/loop 2proc no locks 129.96 ns/loop 1proc w/ locks 98.35 ns/loop 2proc w/ locks 160.96 ns/loop /* * Compile -O2 */ #include #include #include #include #include #include #include #include #define LOOPS 10000000 #define ATOMIC_ASM(type,op) \ __asm __volatile ("lock; " op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p)) #define ATOMIC_ASM_NOLOCK(type,op) \ __asm __volatile (op : "=m" (*(type *)p) : "ir" (v), "0" (*(type *)p)) static __inline void atomic_add_int(void *p, u_int v) { ATOMIC_ASM(int, "addl %1,%0"); } static __inline void atomic_add_int_nolock(void *p, u_int v) { ATOMIC_ASM_NOLOCK(int, "addl %1,%0"); } void nop(void) { } int main(int ac, char **av) { int fd; char *ptr; char *wlocks; int pgsize = getpagesize(); volatile int i; int m; int usec; pid_t pid = -1; struct timeval tv1; struct timeval tv2; if (ac == 1) { printf("%s {0,1,2}\n", av[0]); printf(" 0 - empty loop\n"); printf(" 1 - one process\n"); printf(" 2 - two processes\n"); printf(" 8 - empty loop - no lock\n"); printf(" 9 - one process - no lock\n"); printf(" 10- two processes - no lock\n"); exit(1); } m = strtol(av[1], NULL, 0); fd = open("test.dat", O_RDWR|O_CREAT|O_TRUNC, 0666); ftruncate(fd, pgsize); ptr = mmap(NULL, pgsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (m & 8) wlocks = ""; else wlocks = " w/locks"; gettimeofday(&tv1, NULL); switch(m) { case 10: pid = fork(); /* fall through */ case 9: for (i = 0; i < LOOPS; ++i) { atomic_add_int_nolock(ptr, 1); #ifdef REAL_WORLD nop(); nop(); nop(); #endif } break; case 8: for (i = 0; i < LOOPS; ++i) { ; } break; case 2: pid = fork(); /* fall through */ case 1: for (i = 0; i < LOOPS; ++i) { atomic_add_int(ptr, 1); #ifdef REAL_WORLD nop(); nop(); nop(); #endif } break; case 0: for (i = 0; i < LOOPS; ++i) { ; } break; default: printf("huh?\n"); exit(1); } if (pid == 0) _exit(0); while (wait(NULL) > 0) ; gettimeofday(&tv2, NULL); usec = tv2.tv_usec + 1000000 - tv1.tv_usec + (tv2.tv_sec - tv1.tv_sec - 1) * 1000000; switch(m) { case 0: case 8: printf("empty%s %4.2f ns/loop\n", wlocks, (double)usec * 1000.0 / (double)LOOPS); break; case 1: case 9: printf("1proc%s %4.2f ns/loop\n", wlocks, (double)usec * 1000.0 / (double)LOOPS); break; case 2: case 10: printf("2proc%s %4.2f ns/loop\n", wlocks, (double)usec * 1000.0 / (double)LOOPS / 2.0); break; } return(0); } To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-current" in the body of the message