Date: Thu, 17 May 2001 01:31:06 +0200 From: Tor.Egge@fast.no To: dillon@earth.backplane.com Cc: arch@FreeBSD.ORG Subject: Re: on load control / process swapping Message-ID: <200105162331.BAA04708@midten.fast.no> In-Reply-To: Your message of "Wed, 16 May 2001 15:22:51 -0700 (PDT)" References: <200105162222.f4GMMpC81247@earth.backplane.com>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
> I'd have to see your test code. Doing a direct-read into a user buffer
> has no cache impact at all (DMA does not go through the cpu cache).
> If you are doing seek/read()s but not actually looking at the data that
> is returned, your test results are going to be seriously skewed.
The test code does not look at the data. I sent a copy of it to you
at January 7th 2000 (along with a previous version of the O_DIRECT
patch).
I agree that the 95% reduction in CPU usage is seriously skewed. The
performance improvement for most real applications will be very small
or even negative. For some specialized applications it is a
significant performance improvement, giving nearly the same
performance as when bypassing the kernel file system and using the raw
device directly.
- Tor Egge
[-- Attachment #2 --]
all: aiotest_lt_raw aiotest_lt aiotest_ut
clean:
rm -f aiotest_lt_raw aiotest_lt aiotest_ut
aiotest_lt_raw: aiotest.c
cc -static -D_THREAD_SAFE -D_PTHREADS -DLINUXTHREADS -DRAWREAD -O2 -I/usr/local/include/pthread/linuxthreads -o aiotest_lt_raw aiotest.c -L/usr/local/lib -llthread -llgcc_r
aiotest_lt: aiotest.c
cc -D_THREAD_SAFE -D_PTHREADS -DLINUXTHREADS -I/usr/local/include/pthread/linuxthreads -O2 -o aiotest_lt aiotest.c -L/usr/local/lib -llthread -llgcc_r
aiotest_ut: aiotest.c
cc -static -pthread -D_THREAD_SAFE -D_PTHREADS -O2 -o aiotest_ut aiotest.c
[-- Attachment #3 --]
#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
#include <time.h>
#include <string.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <fcntl.h>
#include <stdlib.h>
#include <assert.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/resource.h>
#include <signal.h>
#include <arpa/inet.h>
#include <ctype.h>
#include <stdarg.h>
#include <sys/wait.h>
#include <sys/utsname.h>
#include <rpc/types.h>
#include <pthread.h>
#ifndef __linux__
#include <sys/filio.h>
#endif
#ifndef LINUXTHREADS
#include <sys/aio.h>
struct myaio {
struct aiocb cb;
struct {
int busy;
pthread_mutex_t mutex;
pthread_cond_t cond;
} cond;
struct myaio *next;
struct myaio *prev;
ssize_t retval;
size_t reterrno;
time_t started;
int errwritten;
};
static struct myaio *activeaios;
static struct myaio *freeaios;
static int freecnt;
static pthread_mutex_t aiomutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t aiocond = PTHREAD_COND_INITIALIZER;
static int aiostartcnt;
static int aioendcnt;
static volatile sig_atomic_t gotusr1;
struct timeval maxlat;
static struct timeval gotusr1time;
static pthread_once_t aiothread_once = PTHREAD_ONCE_INIT;
static pthread_t aiothread;
static int aiothread_running;
#endif
int xreadlen; /* bytes */
int xreadoff; /* skip KB at eof */
static void runaiothread(void);
#ifndef __linux__
#ifndef O_DIRECT
ssize_t
rawread(int fd, void *buf, size_t nbytes, off_t offset)
{
struct rawread rr;
ssize_t ret;
rr.udata = buf;
rr.len = nbytes;
rr.offset = offset;
ret = ioctl(fd, FIORAWREAD, &rr);
if (ret < 0 && errno == ENOTTY)
ret = pread(fd, buf, nbytes, offset);
return ret;
}
#endif
#endif
#ifndef LINUXTHREADS
ssize_t aio_pread(const int fd,
void *buf,
const size_t buflen,
const off_t off)
{
struct myaio *aio;
int ret;
size_t retval;
int reterrno;
pthread_mutex_lock(&aiomutex);
if (freeaios != NULL) {
assert(freecnt > 0);
freecnt--;
aio = freeaios;
freeaios = aio->next;
aio->next = NULL;
aio->prev = NULL;
} else {
assert(freecnt == 0);
pthread_once(&aiothread_once, runaiothread);
while (aiothread_running == 0)
pthread_cond_wait(&aiocond, &aiomutex);
pthread_mutex_unlock(&aiomutex);
aio = (struct myaio *) malloc(sizeof(struct myaio));
memset(aio, 0, sizeof(struct myaio));
pthread_mutex_init(&aio->cond.mutex, NULL);
pthread_cond_init(&aio->cond.cond, NULL);
aio->next = NULL;
aio->prev = NULL;
pthread_mutex_lock(&aiomutex);
}
assert(aio->cond.busy == 0);
aio->cond.busy = 1;
aio->cb.aio_fildes = fd;
aio->cb.aio_offset = off;
aio->cb.aio_buf = buf;
aio->cb.aio_nbytes = buflen;
aio->cb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
aio->cb.aio_sigevent.sigev_signo = SIGUSR1;
aio->cb.aio_sigevent.sigev_value.sigval_ptr = &aio->cb;
aio->cb.aio_lio_opcode = 0;
aio->cb.aio_reqprio = 0;
aio->retval = 0;
aio->started = time(0);
aio->errwritten = 0;
aio->prev = NULL;
aio->next = activeaios;
if (activeaios != NULL)
activeaios->prev = aio;
activeaios = aio;
aiostartcnt++;
ret = aio_read(&aio->cb);
pthread_mutex_unlock(&aiomutex);
assert(ret == 0);
pthread_mutex_lock(&aio->cond.mutex);
while (aio->cond.busy != 0) {
pthread_cond_wait(&aio->cond.cond, &aio->cond.mutex);
}
pthread_mutex_unlock(&aio->cond.mutex);
retval = aio->retval;
reterrno = aio->reterrno;
#if 0
assert((size_t) aio->retval == buflen);
#endif
pthread_mutex_lock(&aiomutex);
assert(aio->next == NULL);
assert(aio->prev == NULL);
assert(aio != activeaios);
assert(aio != freeaios);
aio->next = freeaios;
aio->prev = NULL;
freeaios = aio;
freecnt++;
pthread_mutex_unlock(&aiomutex);
errno = reterrno;
return retval;
}
static void
usr1handler(int sig)
{
(void) sig;
if (gotusr1 == 0)
gettimeofday(&gotusr1time, NULL);
gotusr1 = 1;
}
void
processusr1(void)
{
struct myaio *aio, *naio;
int reterrno;
int now;
int qpos;
pthread_mutex_lock(&aiomutex);
now = time(0);
qpos = 0;
for (aio = activeaios; aio != NULL; aio =naio, qpos++) {
naio = aio->next;
reterrno = aio_error(&aio->cb);
if (now - aio->started > 15 &&
(reterrno != EINPROGRESS || aio->errwritten == 0)) {
printf("ERROR: aio used more than %d seconds: cb=%p, buflen=%u"
", qpos=%d %s, aiocnt=%d,%d\n",
(int) (now - aio->started - 1),
(void *) &aio->cb, aio->cb.aio_nbytes, qpos,
aio->next == NULL ? "" : "(more elements)",
aiostartcnt, aioendcnt);
aio->errwritten = 1;
}
if (reterrno == EINPROGRESS)
continue;
else if (reterrno < 0) {
assert(errno == EINVAL);
assert(now - aio->started < 10);
} else {
aioendcnt++;
assert(aio->prev != NULL || aio == activeaios);
aio->retval = aio_return(&aio->cb);
aio->reterrno = reterrno;
if (aio->next != NULL)
aio->next->prev = aio->prev;
if (aio->prev != NULL)
aio->prev->next = aio->next;
if (aio == activeaios)
activeaios = aio->next;
aio->prev = NULL;
aio->next = NULL;
pthread_mutex_lock(&aio->cond.mutex);
aio->cond.busy = 0;
pthread_cond_signal(&aio->cond.cond);
pthread_mutex_unlock(&aio->cond.mutex);
}
}
pthread_mutex_unlock(&aiomutex);
}
void *aiothreadmeat(void *dummy)
{
sigset_t sigs_to_block;
struct sigaction act;
struct timeval now, lat;
struct sched_param schedparam;
int policy;
if (pthread_getschedparam(pthread_self(), &policy, &schedparam) == 0) {
printf("Initial Aiothread priority was %d\n", schedparam.sched_priority);
schedparam.sched_priority += 4;
if (pthread_setschedparam(pthread_self(), policy, &schedparam) == 0) {
if (pthread_getschedparam(pthread_self(), &policy, &schedparam) == 0)
printf("Bumped priority of Aiothread to %d\n",
schedparam.sched_priority);
else
printf("Failed rereading Aiothread priority\n");
} else
printf("Failed Bumping Aiothread priority\n");
} else
printf("Failed reading initial Aiothread priority\n");
act.sa_handler=usr1handler;
sigemptyset(&act.sa_mask);
act.sa_flags=0;
sigaction(SIGUSR1,&act,NULL);
sigemptyset(&sigs_to_block);
sigaddset(&sigs_to_block, SIGUSR1);
pthread_sigmask(SIG_UNBLOCK, &sigs_to_block, NULL);
pthread_mutex_lock(&aiomutex);
aiothread_running = 1;
pthread_cond_broadcast(&aiocond);
pthread_mutex_unlock(&aiomutex);
gettimeofday(&gotusr1time, NULL);
gotusr1 = 1;
while (1) {
if (gotusr1 != 0) {
gettimeofday(&now, NULL);
if (now.tv_usec >= gotusr1time.tv_usec) {
lat.tv_usec = now.tv_usec - gotusr1time.tv_usec;
lat.tv_sec = now.tv_sec - gotusr1time.tv_sec;
} else {
lat.tv_usec = now.tv_usec + 1000000 - gotusr1time.tv_usec;
lat.tv_sec = now.tv_sec - 1 - gotusr1time.tv_sec;
}
if (lat.tv_sec > maxlat.tv_sec ||
(lat.tv_sec == maxlat.tv_sec &&
lat.tv_usec >= maxlat.tv_usec))
maxlat = lat;
gotusr1 = 0;
processusr1();
}
sleep(1);
}
abort();
}
static void runaiothread(void)
{
pthread_create(&aiothread, NULL, aiothreadmeat, NULL);
}
#endif
/* 10000 MB test file */
#define FILESIZE 10000
static off_t filesize;
int writefile(void)
{
char *buf;
size_t buflen;
int fd;
int count;
ssize_t wgot;
struct stat stbuf;
buflen = 1024 * 1024;
buf = (char *) malloc(buflen);
assert(buf != NULL);
filesize = (off_t) FILESIZE * (off_t) buflen;
fd = open("largefile", O_RDWR | O_CREAT, 0666);
assert(fd >= 0);
#if 1
fstat(fd, &stbuf);
if (stbuf.st_size < filesize) {
for (count = 0; count < FILESIZE; count++) {
wgot = write(fd, buf, buflen);
assert(wgot == buflen);
}
}
#endif
#ifdef RAWREAD
#ifdef O_DIRECT
{
int flags;
flags = fcntl(fd, F_GETFL, 0);
flags |= O_DIRECT;
fcntl(fd, F_SETFL, flags);
}
#endif
#endif
return fd;
}
static pthread_mutex_t cntmutex = PTHREAD_MUTEX_INITIALIZER;
static int startreadcnt;
static int donereadcnt;
static off_t donereadbytes;
void *readthread(void *data)
{
int fd;
size_t buflen;
char *buf;
ssize_t rgot;
off_t loc;
sigset_t sigs_to_block;
fd = (int) data;
buflen = xreadlen;
buf = (char *) malloc(buflen);
assert(buf != NULL);
sigemptyset(&sigs_to_block);
sigaddset(&sigs_to_block, SIGUSR1);
pthread_sigmask(SIG_BLOCK, &sigs_to_block, NULL);
sleep(1);
while (1) {
loc = (off_t) (random() % (FILESIZE * 2048 - xreadoff)) * (off_t) 512;
#if 0
loc &= ~ 32767LL;
#endif
pthread_mutex_lock(&cntmutex);
startreadcnt++;
pthread_mutex_unlock(&cntmutex);
#ifdef LINUXTHREADS
#if defined(RAWREAD) && !defined(O_DIRECT)
rgot = rawread(fd, buf, buflen, loc);
#else
rgot = pread(fd, buf, buflen, loc);
#endif
#else
rgot = aio_pread(fd, buf, buflen, loc);
#endif
if (rgot != buflen) {
printf("rgot=%d, buflen=%d, loc=%qd, startreadcnt=%d,%d\n",
rgot, buflen, loc, startreadcnt, donereadcnt);
}
assert(rgot == buflen);
pthread_mutex_lock(&cntmutex);
donereadcnt++;
donereadbytes += buflen;
pthread_mutex_unlock(&cntmutex);
}
return NULL;
}
int main(int argc, char **argv)
{
int fd;
int cnt;
pthread_t curthread;
int startcntcopy, donecntcopy;
sigset_t sigs_to_block;
struct timeval stime;
struct timeval now;
struct timeval report;
struct timeval delta;
double fdelta;
double rate;
double mbrate;
struct timeval tvsel;
int nthreads;
xreadlen = 1024;
if (argc >= 2) {
xreadlen = atoi(argv[1]);
if (xreadlen < 0 || xreadlen > 2097152)
xreadlen = 1024;
xreadlen = (xreadlen + 511) & ~511;
}
xreadoff = (xreadlen / 512) - 1;
nthreads = 250;
if (argc >= 3) {
nthreads = atoi(argv[2]);
if (nthreads < 1 || nthreads > 1000)
nthreads = 1;
}
fd = writefile();
#if 1
sigemptyset(&sigs_to_block);
sigaddset(&sigs_to_block, SIGUSR1);
pthread_sigmask(SIG_BLOCK, &sigs_to_block, NULL);
#endif
srandom(time(NULL));
gettimeofday(&stime, NULL);
report = stime;
report.tv_sec++;
for (cnt = 0; cnt < nthreads; cnt++) {
pthread_create(&curthread, NULL, readthread, (void *) fd);
}
while (1) {
#if 0
sleep(1); /* XXX: Does not work */
#else
gettimeofday(&now, NULL);
if (now.tv_sec < report.tv_sec ||
(now.tv_sec == report.tv_sec &&
now.tv_usec < report.tv_usec)) {
if (report.tv_usec >= now.tv_usec) {
tvsel.tv_sec = report.tv_sec - now.tv_sec;
tvsel.tv_usec = report.tv_usec - now.tv_usec;
} else {
tvsel.tv_sec = report.tv_sec -now.tv_sec - 1;
tvsel.tv_usec = report.tv_usec + 1000000 - now.tv_usec;
}
select(1, NULL, NULL, NULL, &tvsel);
continue;
}
report.tv_sec++;
#endif
gettimeofday(&now, NULL);
if (now.tv_usec >= stime.tv_usec) {
delta.tv_sec = now.tv_sec - stime.tv_sec;
delta.tv_usec = now.tv_usec - stime.tv_usec;
} else {
delta.tv_sec = now.tv_sec - stime.tv_sec - 1;
delta.tv_usec = now.tv_usec + 1000000 - stime.tv_usec;
}
fdelta = delta.tv_sec + ((double) delta.tv_usec) / 1000000.0;
pthread_mutex_lock(&cntmutex);
startcntcopy = startreadcnt;
donecntcopy = donereadcnt;
pthread_mutex_unlock(&cntmutex);
rate = (double) donecntcopy / (double) fdelta;
mbrate = (double) donereadbytes / ((double) (fdelta) * 1048576.0);
printf("%d(+%d) read operations time=%6.3f, rate=%6.3f tps/s, %6.3f MB/s\n",
donecntcopy, startcntcopy - donecntcopy,
fdelta, rate, mbrate);
#ifndef LINUXTHREADS
printf("lat=%d.%06d\n", maxlat.tv_sec, maxlat.tv_usec);
#endif
fflush(stdout);
}
}
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200105162331.BAA04708>
