Date: Tue, 19 Apr 2016 15:30:05 +0000 From: Howard Su <howard0su@gmail.com> To: Allan Jude <allanjude@freebsd.org>, src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: Re: svn commit: r298230 - in head: lib/libstand sys/boot/common sys/boot/efi/libefi sys/boot/efi/loader sys/boot/i386/libfirewire sys/boot/i386/libi386 sys/boot/i386/loader sys/boot/mips/beri/loader sy... Message-ID: <CAAvnz_rQ3KLqt2gd5JiZWxZNtoXxJrqfjbrGNTyS6pP-NAf%2B8Q@mail.gmail.com> In-Reply-To: <201604182309.u3IN9MC6047480@repo.freebsd.org> References: <201604182309.u3IN9MC6047480@repo.freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
this is wonderful change. this also helps a lot on the situation loading many .ko files from local disk. In my case, loading 5 different ko is reduced from 5 seconds to 1second. great job! -Howard On Tue, Apr 19, 2016 at 7:09 AM Allan Jude <allanjude@freebsd.org> wrote: > Author: allanjude > Date: Mon Apr 18 23:09:22 2016 > New Revision: 298230 > URL: https://svnweb.freebsd.org/changeset/base/298230 > > Log: > A new implementation of the loader block cache > > The block cache implementation in loader has proven to be almost > useless, and in worst case even slowing down the disk reads due to > insufficient cache size and extra memory copy. > Also the current cache implementation does not cache reads from CDs, or > work with zfs built on top of multiple disks. > Instead of an LRU, this code uses a simple hash (O(1) read from cache), > and instead of a single global cache, a separate cache per block device. > The cache also implements limited read-ahead to increase performance. > To simplify read ahead management, the read ahead will not wrap over > bcache end, so in worst case, single block physical read will be performed > to fill the last block in bcache. > > Booting from a virtual CD over IPMI: > 0ms latency, before: 27 second, after: 7 seconds > 60ms latency, before: over 12 minutes, after: under 5 minutes. > > Submitted by: Toomas Soome <tsoome@me.com> > Reviewed by: delphij (previous version), emaste (previous version) > Relnotes: yes > Differential Revision: https://reviews.freebsd.org/D4713 > > Modified: > head/lib/libstand/cd9660.c > head/lib/libstand/dosfs.c > head/lib/libstand/dosfs.h > head/lib/libstand/ext2fs.c > head/lib/libstand/read.c > head/lib/libstand/stand.h > head/lib/libstand/ufs.c > head/lib/libstand/write.c > head/sys/boot/common/bcache.c > head/sys/boot/common/bootstrap.h > head/sys/boot/common/disk.c > head/sys/boot/common/md.c > head/sys/boot/common/module.c > head/sys/boot/efi/libefi/efipart.c > head/sys/boot/efi/libefi/libefi.c > head/sys/boot/efi/loader/main.c > head/sys/boot/i386/libfirewire/firewire.c > head/sys/boot/i386/libi386/bioscd.c > head/sys/boot/i386/libi386/biosdisk.c > head/sys/boot/i386/libi386/biosmem.c > head/sys/boot/i386/libi386/pxe.c > head/sys/boot/i386/loader/main.c > head/sys/boot/mips/beri/loader/beri_disk_cfi.c > head/sys/boot/mips/beri/loader/beri_disk_sdcard.c > head/sys/boot/ofw/libofw/ofw_disk.c > head/sys/boot/pc98/libpc98/bioscd.c > head/sys/boot/pc98/libpc98/biosdisk.c > head/sys/boot/pc98/libpc98/biosmem.c > head/sys/boot/pc98/loader/main.c > head/sys/boot/powerpc/kboot/hostdisk.c > head/sys/boot/powerpc/ps3/ps3cdrom.c > head/sys/boot/powerpc/ps3/ps3disk.c > head/sys/boot/uboot/lib/disk.c > head/sys/boot/usb/storage/umass_loader.c > head/sys/boot/userboot/userboot/host.c > head/sys/boot/userboot/userboot/main.c > head/sys/boot/userboot/userboot/userboot_disk.c > head/sys/boot/zfs/zfs.c > > Modified: head/lib/libstand/cd9660.c > > ============================================================================== > --- head/lib/libstand/cd9660.c Mon Apr 18 22:00:26 2016 (r298229) > +++ head/lib/libstand/cd9660.c Mon Apr 18 23:09:22 2016 (r298230) > @@ -143,7 +143,7 @@ susp_lookup_record(struct open_file *f, > if (bcmp(sh->type, SUSP_CONTINUATION, 2) == 0) { > shc = (ISO_RRIP_CONT *)sh; > error = f->f_dev->dv_strategy(f->f_devdata, F_READ, > - cdb2devb(isonum_733(shc->location)), > + cdb2devb(isonum_733(shc->location)), 0, > ISO_DEFAULT_BLOCK_SIZE, susp_buffer, &read); > > /* Bail if it fails. */ > @@ -288,7 +288,7 @@ cd9660_open(const char *path, struct ope > for (bno = 16;; bno++) { > twiddle(1); > rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, > cdb2devb(bno), > - ISO_DEFAULT_BLOCK_SIZE, buf, > &read); > + 0, ISO_DEFAULT_BLOCK_SIZE, buf, > &read); > if (rc) > goto out; > if (read != ISO_DEFAULT_BLOCK_SIZE) { > @@ -322,7 +322,7 @@ cd9660_open(const char *path, struct ope > twiddle(1); > rc = f->f_dev->dv_strategy > (f->f_devdata, F_READ, > - cdb2devb(bno + boff), > + cdb2devb(bno + boff), 0, > ISO_DEFAULT_BLOCK_SIZE, > buf, &read); > if (rc) > @@ -381,7 +381,7 @@ cd9660_open(const char *path, struct ope > bno = isonum_733(rec.extent) + > isonum_711(rec.ext_attr_length); > twiddle(1); > rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, > cdb2devb(bno), > - ISO_DEFAULT_BLOCK_SIZE, buf, &read); > + 0, ISO_DEFAULT_BLOCK_SIZE, buf, &read); > if (rc) > goto out; > if (read != ISO_DEFAULT_BLOCK_SIZE) { > @@ -438,7 +438,8 @@ buf_read_file(struct open_file *f, char > > twiddle(16); > rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, > - cdb2devb(blkno), ISO_DEFAULT_BLOCK_SIZE, fp->f_buf, > &read); > + cdb2devb(blkno), 0, ISO_DEFAULT_BLOCK_SIZE, > + fp->f_buf, &read); > if (rc) > return (rc); > if (read != ISO_DEFAULT_BLOCK_SIZE) > > Modified: head/lib/libstand/dosfs.c > > ============================================================================== > --- head/lib/libstand/dosfs.c Mon Apr 18 22:00:26 2016 (r298229) > +++ head/lib/libstand/dosfs.c Mon Apr 18 23:09:22 2016 (r298230) > @@ -131,7 +131,18 @@ static DOS_DE dot[2] = { > #define stclus(sz, de) ((sz) != 32 ? cv2((de)->clus) : \ > ((u_int)cv2((de)->dex.h_clus) << 16) | \ > cv2((de)->clus)) > - > + > +/* > + * fat cache metadata > + */ > +struct fatcache { > + int unit; /* disk unit number */ > + int size; /* buffer (and fat) size in sectors */ > + u_char *buf; > +}; > + > +static struct fatcache fat; > + > static int dosunmount(DOS_FS *); > static int parsebs(DOS_FS *, DOS_BS *); > static int namede(DOS_FS *, const char *, DOS_DE **); > @@ -143,8 +154,36 @@ static int fatcnt(DOS_FS *, u_int); > static int fatget(DOS_FS *, u_int *); > static int fatend(u_int, u_int); > static int ioread(DOS_FS *, u_int, void *, u_int); > -static int iobuf(DOS_FS *, u_int); > -static int ioget(struct open_file *, u_int, void *, u_int); > +static int ioget(struct open_file *, daddr_t, size_t, void *, u_int); > + > +static void > +dos_read_fat(DOS_FS *fs, struct open_file *fd) > +{ > + struct devdesc *dd = fd->f_devdata; > + > + if (fat.buf != NULL) { /* can we reuse old buffer? */ > + if (fat.size != fs->spf) { > + free(fat.buf); /* no, free old buffer */ > + fat.buf = NULL; > + } > + } > + > + if (fat.buf == NULL) > + fat.buf = malloc(secbyt(fs->spf)); > + > + if (fat.buf != NULL) { > + if (ioget(fd, fs->lsnfat, 0, fat.buf, secbyt(fs->spf)) == 0) { > + fat.size = fs->spf; > + fat.unit = dd->d_unit; > + return; > + } > + } > + if (fat.buf != NULL) /* got IO error */ > + free(fat.buf); > + fat.buf = NULL; > + fat.unit = -1; /* impossible unit */ > + fat.size = 0; > +} > > /* > * Mount DOS filesystem > @@ -153,15 +192,25 @@ static int > dos_mount(DOS_FS *fs, struct open_file *fd) > { > int err; > + struct devdesc *dd = fd->f_devdata; > + u_char *buf; > > bzero(fs, sizeof(DOS_FS)); > fs->fd = fd; > - if ((err = !(fs->buf = malloc(SECSIZ)) ? errno : 0) || > - (err = ioget(fs->fd, 0, fs->buf, 1)) || > - (err = parsebs(fs, (DOS_BS *)fs->buf))) { > + > + if ((err = !(buf = malloc(secbyt(1))) ? errno : 0) || > + (err = ioget(fs->fd, 0, 0, buf, secbyt(1))) || > + (err = parsebs(fs, (DOS_BS *)buf))) { > + if (buf != NULL) > + free(buf); > (void)dosunmount(fs); > return(err); > } > + free(buf); > + > + if (fat.buf == NULL || fat.unit != dd->d_unit) > + dos_read_fat(fs, fd); > + > fs->root = dot[0]; > fs->root.name[0] = ' '; > if (fs->fatsz == 32) { > @@ -194,8 +243,6 @@ dos_unmount(DOS_FS *fs) > static int > dosunmount(DOS_FS *fs) > { > - if (fs->buf) > - free(fs->buf); > free(fs); > return(0); > } > @@ -252,42 +299,47 @@ dos_read(struct open_file *fd, void *buf > DOS_FILE *f = (DOS_FILE *)fd->f_fsdata; > int err = 0; > > + /* > + * as ioget() can be called *a lot*, use twiddle here. > + * also 4 seems to be good value not to slow loading down too much: > + * with 270MB file (~540k ioget() calls, twiddle can easily waste > 4-5sec. > + */ > + twiddle(4); > nb = (u_int)nbyte; > if ((size = fsize(f->fs, &f->de)) == -1) > return EINVAL; > if (nb > (n = size - f->offset)) > - nb = n; > + nb = n; > off = f->offset; > if ((clus = stclus(f->fs->fatsz, &f->de))) > - off &= f->fs->bsize - 1; > + off &= f->fs->bsize - 1; > c = f->c; > cnt = nb; > while (cnt) { > - n = 0; > - if (!c) { > - if ((c = clus)) > - n = bytblk(f->fs, f->offset); > - } else if (!off) > - n++; > - while (n--) { > - if ((err = fatget(f->fs, &c))) > + n = 0; > + if (!c) { > + if ((c = clus)) > + n = bytblk(f->fs, f->offset); > + } else if (!off) > + n++; > + while (n--) { > + if ((err = fatget(f->fs, &c))) > goto out; > - if (!okclus(f->fs, c)) { > + if (!okclus(f->fs, c)) { > err = EINVAL; > goto out; > } > - } > - if (!clus || (n = f->fs->bsize - off) > cnt) > - n = cnt; > - if ((err = ioread(f->fs, (c ? blkoff(f->fs, c) : > - secbyt(f->fs->lsndir)) + off, > - buf, n))) > + } > + if (!clus || (n = f->fs->bsize - off) > cnt) > + n = cnt; > + if ((err = ioread(f->fs, (c ? blkoff(f->fs, c) : > + secbyt(f->fs->lsndir)) + off, buf, > n))) > goto out; > - f->offset += n; > - f->c = c; > - off = 0; > - buf = (char *)buf + n; > - cnt -= n; > + f->offset += n; > + f->c = c; > + off = 0; > + buf = (char *)buf + n; > + cnt -= n; > } > out: > if (resid) > @@ -364,6 +416,23 @@ dos_stat(struct open_file *fd, struct st > } > > static int > +dos_checksum(char *name, char *ext) > +{ > + int x, i; > + char buf[11]; > + > + bcopy(name, buf, 8); > + bcopy(ext, buf+8, 3); > + x = 0; > + for (i = 0; i < 11; i++) { > + x = ((x & 1) << 7) | (x >> 1); > + x += buf[i]; > + x &= 0xff; > + } > + return (x); > +} > + > +static int > dos_readdir(struct open_file *fd, struct dirent *d) > { > /* DOS_FILE *f = (DOS_FILE *)fd->f_fsdata; */ > @@ -417,12 +486,7 @@ dos_readdir(struct open_file *fd, struct > } > } else { > if (xdn == 1) { > - x = 0; > - for (i = 0; i < 11; i++) { > - x = ((x & 1) << 7) | (x >> 1); > - x += dd.de.name[i]; > - x &= 0xff; > - } > + x = dos_checksum(dd.de.name, dd.de.ext); > if (x == chk) > break; > } else { > @@ -555,7 +619,7 @@ lookup(DOS_FS *fs, u_int clus, const cha > else > return EINVAL; > for (sec = 0; sec < nsec; sec++) { > - if ((err = ioget(fs->fd, lsec + sec, dir, 1))) > + if ((err = ioget(fs->fd, lsec + sec, 0, dir, secbyt(1)))) > return err; > for (ent = 0; ent < DEPSEC; ent++) { > if (!*dir[ent].de.name) > @@ -577,9 +641,7 @@ lookup(DOS_FS *fs, u_int clus, const cha > } > } else if (!(dir[ent].de.attr & FA_LABEL)) { > if ((ok = xdn == 1)) { > - for (x = 0, i = 0; i < 11; i++) > - x = ((((x & 1) << 7) | (x >> 1)) + > - dir[ent].de.name[i]) & 0xff; > + x = dos_checksum(dir[ent].de.name, > dir[ent].de.ext); > ok = chk == x && > !strcasecmp(name, (const char *)lfn); > } > @@ -699,22 +761,52 @@ fatcnt(DOS_FS *fs, u_int c) > } > > /* > - * Get next cluster in cluster chain > + * Get next cluster in cluster chain. Use in core fat cache unless another > + * device replaced it. > */ > static int > fatget(DOS_FS *fs, u_int *c) > { > u_char buf[4]; > - u_int x; > - int err; > + u_char *s; > + u_int x, offset, off, n, nbyte, lsec; > + struct devdesc *dd = fs->fd->f_devdata; > + int err = 0; > + > + if (fat.unit != dd->d_unit) { > + /* fat cache was changed to another device, dont use it */ > + err = ioread(fs, secbyt(fs->lsnfat) + fatoff(fs->fatsz, *c), buf, > + fs->fatsz != 32 ? 2 : 4); > + if (err) > + return err; > + } else { > + offset = fatoff(fs->fatsz, *c); > + nbyte = fs->fatsz != 32 ? 2 : 4; > + > + s = buf; > + if ((off = offset & (SECSIZ - 1))) { > + offset -= off; > + lsec = bytsec(offset); > + offset += SECSIZ; > + if ((n = SECSIZ - off) > nbyte) > + n = nbyte; > + memcpy(s, fat.buf + secbyt(lsec) + off, n); > + s += n; > + nbyte -= n; > + } > + n = nbyte & (SECSIZ - 1); > + if (nbyte -= n) { > + memcpy(s, fat.buf + secbyt(bytsec(offset)), nbyte); > + offset += nbyte; > + s += nbyte; > + } > + if (n) > + memcpy(s, fat.buf + secbyt(bytsec(offset)), n); > + } > > - err = ioread(fs, secbyt(fs->lsnfat) + fatoff(fs->fatsz, *c), buf, > - fs->fatsz != 32 ? 2 : 4); > - if (err) > - return err; > x = fs->fatsz != 32 ? cv2(buf) : cv4(buf); > *c = fs->fatsz == 12 ? *c & 1 ? x >> 4 : x & 0xfff : x; > - return 0; > + return (0); > } > > /* > @@ -739,42 +831,24 @@ ioread(DOS_FS *fs, u_int offset, void *b > s = buf; > if ((off = offset & (SECSIZ - 1))) { > offset -= off; > - if ((err = iobuf(fs, bytsec(offset)))) > - return err; > - offset += SECSIZ; > if ((n = SECSIZ - off) > nbyte) > n = nbyte; > - memcpy(s, fs->buf + off, n); > + if ((err = ioget(fs->fd, bytsec(offset), off, s, n))) > + return err; > + offset += SECSIZ; > s += n; > nbyte -= n; > } > n = nbyte & (SECSIZ - 1); > if (nbyte -= n) { > - if ((err = ioget(fs->fd, bytsec(offset), s, bytsec(nbyte)))) > + if ((err = ioget(fs->fd, bytsec(offset), 0, s, nbyte))) > return err; > offset += nbyte; > s += nbyte; > } > if (n) { > - if ((err = iobuf(fs, bytsec(offset)))) > - return err; > - memcpy(s, fs->buf, n); > - } > - return 0; > -} > - > -/* > - * Buffered sector-based I/O primitive > - */ > -static int > -iobuf(DOS_FS *fs, u_int lsec) > -{ > - int err; > - > - if (fs->bufsec != lsec) { > - if ((err = ioget(fs->fd, lsec, fs->buf, 1))) > + if ((err = ioget(fs->fd, bytsec(offset), 0, s, n))) > return err; > - fs->bufsec = lsec; > } > return 0; > } > @@ -783,13 +857,8 @@ iobuf(DOS_FS *fs, u_int lsec) > * Sector-based I/O primitive > */ > static int > -ioget(struct open_file *fd, u_int lsec, void *buf, u_int nsec) > +ioget(struct open_file *fd, daddr_t lsec, size_t offset, void *buf, u_int > size) > { > - int err; > - > - twiddle(1); > - if ((err = (fd->f_dev->dv_strategy)(fd->f_devdata, F_READ, lsec, > - secbyt(nsec), buf, NULL))) > - return(err); > - return(0); > + return ((fd->f_dev->dv_strategy)(fd->f_devdata, F_READ, lsec, offset, > + size, buf, NULL)); > } > > Modified: head/lib/libstand/dosfs.h > > ============================================================================== > --- head/lib/libstand/dosfs.h Mon Apr 18 22:00:26 2016 (r298229) > +++ head/lib/libstand/dosfs.h Mon Apr 18 23:09:22 2016 (r298230) > @@ -96,8 +96,6 @@ typedef union { > > typedef struct { > struct open_file *fd; /* file descriptor */ > - u_char *buf; /* buffer */ > - u_int bufsec; /* buffered sector */ > u_int links; /* active links to structure */ > u_int spc; /* sectors per cluster */ > u_int bsize; /* cluster size in bytes */ > > Modified: head/lib/libstand/ext2fs.c > > ============================================================================== > --- head/lib/libstand/ext2fs.c Mon Apr 18 22:00:26 2016 (r298229) > +++ head/lib/libstand/ext2fs.c Mon Apr 18 23:09:22 2016 (r298230) > @@ -355,7 +355,7 @@ ext2fs_open(const char *upath, struct op > fp->f_fs = fs; > twiddle(1); > error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, > - EXT2_SBLOCK, EXT2_SBSIZE, (char *)fs, &buf_size); > + EXT2_SBLOCK, 0, EXT2_SBSIZE, (char *)fs, &buf_size); > if (error) > goto out; > > @@ -397,7 +397,7 @@ ext2fs_open(const char *upath, struct op > fp->f_bg = malloc(len); > twiddle(1); > error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, > - EXT2_SBLOCK + EXT2_SBSIZE / DEV_BSIZE, len, > + EXT2_SBLOCK + EXT2_SBSIZE / DEV_BSIZE, 0, len, > (char *)fp->f_bg, &buf_size); > if (error) > goto out; > @@ -509,7 +509,7 @@ ext2fs_open(const char *upath, struct op > > twiddle(1); > error = > (f->f_dev->dv_strategy)(f->f_devdata, > - F_READ, fsb_to_db(fs, disk_block), > + F_READ, fsb_to_db(fs, disk_block), 0, > fs->fs_bsize, buf, &buf_size); > if (error) > goto out; > @@ -570,7 +570,7 @@ read_inode(ino_t inumber, struct open_fi > buf = malloc(fs->fs_bsize); > twiddle(1); > error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, > - ino_to_db(fs, fp->f_bg, inumber), fs->fs_bsize, buf, &rsize); > + ino_to_db(fs, fp->f_bg, inumber), 0, fs->fs_bsize, buf, > &rsize); > if (error) > goto out; > if (rsize != fs->fs_bsize) { > @@ -667,7 +667,7 @@ block_map(struct open_file *f, daddr_t f > malloc(fs->fs_bsize); > twiddle(1); > error = (f->f_dev->dv_strategy)(f->f_devdata, > F_READ, > - fsb_to_db(fp->f_fs, ind_block_num), > fs->fs_bsize, > + fsb_to_db(fp->f_fs, ind_block_num), 0, > fs->fs_bsize, > fp->f_blk[level], &fp->f_blksize[level]); > if (error) > return (error); > @@ -725,7 +725,7 @@ buf_read_file(struct open_file *f, char > } else { > twiddle(4); > error = (f->f_dev->dv_strategy)(f->f_devdata, > F_READ, > - fsb_to_db(fs, disk_block), block_size, > + fsb_to_db(fs, disk_block), 0, block_size, > fp->f_buf, &fp->f_buf_size); > if (error) > goto done; > > Modified: head/lib/libstand/read.c > > ============================================================================== > --- head/lib/libstand/read.c Mon Apr 18 22:00:26 2016 (r298229) > +++ head/lib/libstand/read.c Mon Apr 18 23:09:22 2016 (r298230) > @@ -79,7 +79,7 @@ read(int fd, void *dest, size_t bcount) > if (f->f_flags & F_RAW) { > twiddle(4); > errno = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, > - btodb(f->f_offset), bcount, dest, > &resid); > + btodb(f->f_offset), 0, bcount, dest, > &resid); > if (errno) > return (-1); > f->f_offset += resid; > > Modified: head/lib/libstand/stand.h > > ============================================================================== > --- head/lib/libstand/stand.h Mon Apr 18 22:00:26 2016 (r298229) > +++ head/lib/libstand/stand.h Mon Apr 18 23:09:22 2016 (r298230) > @@ -138,8 +138,8 @@ struct devsw { > const char dv_name[8]; > int dv_type; /* opaque type constant, > arch-dependant */ > int (*dv_init)(void); /* early probe call */ > - int (*dv_strategy)(void *devdata, int rw, daddr_t blk, > size_t size, > - char *buf, size_t *rsize); > + int (*dv_strategy)(void *devdata, int rw, daddr_t blk, > + size_t offset, size_t size, char *buf, size_t > *rsize); > int (*dv_open)(struct open_file *f, ...); > int (*dv_close)(struct open_file *f); > int (*dv_ioctl)(struct open_file *f, u_long cmd, void > *data); > @@ -154,6 +154,24 @@ extern struct devsw netdev; > > extern int errno; > > +/* > + * Generic device specifier; architecture-dependent > + * versions may be larger, but should be allowed to > + * overlap. > + */ > +struct devdesc > +{ > + struct devsw *d_dev; > + int d_type; > +#define DEVT_NONE 0 > +#define DEVT_DISK 1 > +#define DEVT_NET 2 > +#define DEVT_CD 3 > +#define DEVT_ZFS 4 > + int d_unit; > + void *d_opendata; > +}; > + > struct open_file { > int f_flags; /* see F_* below */ > struct devsw *f_dev; /* pointer to device operations */ > > Modified: head/lib/libstand/ufs.c > > ============================================================================== > --- head/lib/libstand/ufs.c Mon Apr 18 22:00:26 2016 (r298229) > +++ head/lib/libstand/ufs.c Mon Apr 18 23:09:22 2016 (r298230) > @@ -157,7 +157,7 @@ read_inode(inumber, f) > buf = malloc(fs->fs_bsize); > twiddle(1); > rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, > - fsbtodb(fs, ino_to_fsba(fs, inumber)), fs->fs_bsize, > + fsbtodb(fs, ino_to_fsba(fs, inumber)), 0, fs->fs_bsize, > buf, &rsize); > if (rc) > goto out; > @@ -267,7 +267,7 @@ block_map(f, file_block, disk_block_p) > malloc(fs->fs_bsize); > twiddle(1); > rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, > - fsbtodb(fp->f_fs, ind_block_num), > + fsbtodb(fp->f_fs, ind_block_num), 0, > fs->fs_bsize, > fp->f_blk[level], > &fp->f_blksize[level]); > @@ -348,7 +348,7 @@ buf_write_file(f, buf_p, size_p) > > twiddle(4); > rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, > - fsbtodb(fs, disk_block), > + fsbtodb(fs, disk_block), 0, > block_size, fp->f_buf, &fp->f_buf_size); > if (rc) > return (rc); > @@ -367,7 +367,7 @@ buf_write_file(f, buf_p, size_p) > > twiddle(4); > rc = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE, > - fsbtodb(fs, disk_block), > + fsbtodb(fs, disk_block), 0, > block_size, fp->f_buf, &fp->f_buf_size); > return (rc); > } > @@ -408,7 +408,7 @@ buf_read_file(f, buf_p, size_p) > } else { > twiddle(4); > rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, > - fsbtodb(fs, disk_block), > + fsbtodb(fs, disk_block), 0, > block_size, fp->f_buf, &fp->f_buf_size); > if (rc) > return (rc); > @@ -521,7 +521,7 @@ ufs_open(upath, f) > */ > for (i = 0; sblock_try[i] != -1; i++) { > rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, > - sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, > + sblock_try[i] / DEV_BSIZE, 0, SBLOCKSIZE, > (char *)fs, &buf_size); > if (rc) > goto out; > @@ -651,7 +651,7 @@ ufs_open(upath, f) > > twiddle(1); > rc = (f->f_dev->dv_strategy)(f->f_devdata, > - F_READ, fsbtodb(fs, disk_block), > + F_READ, fsbtodb(fs, disk_block), 0, > fs->fs_bsize, buf, &buf_size); > if (rc) > goto out; > > Modified: head/lib/libstand/write.c > > ============================================================================== > --- head/lib/libstand/write.c Mon Apr 18 22:00:26 2016 (r298229) > +++ head/lib/libstand/write.c Mon Apr 18 23:09:22 2016 (r298230) > @@ -82,7 +82,7 @@ write(fd, dest, bcount) > if (f->f_flags & F_RAW) { > twiddle(4); > errno = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE, > - btodb(f->f_offset), bcount, dest, &resid); > + btodb(f->f_offset), 0, bcount, dest, &resid); > if (errno) > return (-1); > f->f_offset += resid; > > Modified: head/sys/boot/common/bcache.c > > ============================================================================== > --- head/sys/boot/common/bcache.c Mon Apr 18 22:00:26 2016 > (r298229) > +++ head/sys/boot/common/bcache.c Mon Apr 18 23:09:22 2016 > (r298230) > @@ -1,5 +1,6 @@ > /*- > * Copyright (c) 1998 Michael Smith <msmith@freebsd.org> > + * Copyright 2015 Toomas Soome <tsoome@me.com> > * All rights reserved. > * > * Redistribution and use in source and binary forms, with or without > @@ -25,99 +26,155 @@ > */ > > #include <sys/cdefs.h> > +#include <sys/param.h> > __FBSDID("$FreeBSD$"); > > /* > - * Simple LRU block cache > + * Simple hashed block cache > */ > > #include <sys/stdint.h> > > #include <stand.h> > #include <string.h> > -#include <bitstring.h> > +#include <strings.h> > > #include "bootstrap.h" > > /* #define BCACHE_DEBUG */ > > #ifdef BCACHE_DEBUG > -#define BCACHE_TIMEOUT 10 > # define DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## > args) > #else > -#define BCACHE_TIMEOUT 2 > # define DEBUG(fmt, args...) > #endif > > - > struct bcachectl > { > daddr_t bc_blkno; > - time_t bc_stamp; > int bc_count; > }; > > -static struct bcachectl *bcache_ctl; > -static caddr_t bcache_data; > -static bitstr_t *bcache_miss; > -static u_int bcache_nblks; > -static u_int bcache_blksize; > -static u_int bcache_hits, bcache_misses, bcache_ops, > bcache_bypasses; > -static u_int bcache_flushes; > -static u_int bcache_bcount; > - > -static void bcache_invalidate(daddr_t blkno); > -static void bcache_insert(caddr_t buf, daddr_t blkno); > -static int bcache_lookup(caddr_t buf, daddr_t blkno); > +/* > + * bcache per device node. cache is allocated on device first open and > freed > + * on last close, to save memory. The issue there is the size; biosdisk > + * supports up to 31 (0x1f) devices. Classic setup would use single disk > + * to boot from, but this has changed with zfs. > + */ > +struct bcache { > + struct bcachectl *bcache_ctl; > + caddr_t bcache_data; > + u_int bcache_nblks; > + size_t ra; > +}; > + > +static u_int bcache_total_nblks; /* set by bcache_init */ > +static u_int bcache_blksize; /* set by bcache_init */ > +static u_int bcache_numdev; /* set by bcache_add_dev */ > +/* statistics */ > +static u_int bcache_units; /* number of devices with cache */ > +static u_int bcache_unit_nblks; /* nblocks per unit */ > +static u_int bcache_hits; > +static u_int bcache_misses; > +static u_int bcache_ops; > +static u_int bcache_bypasses; > +static u_int bcache_bcount; > +static u_int bcache_rablks; > + > +#define BHASH(bc, blkno) ((blkno) & ((bc)->bcache_nblks - > 1)) > +#define BCACHE_LOOKUP(bc, blkno) \ > + ((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno)) > +#define BCACHE_READAHEAD 256 > +#define BCACHE_MINREADAHEAD 32 > + > +static void bcache_invalidate(struct bcache *bc, daddr_t blkno); > +static void bcache_insert(struct bcache *bc, daddr_t blkno); > +static void bcache_free_instance(struct bcache *bc); > > /* > * Initialise the cache for (nblks) of (bsize). > */ > -int > +void > bcache_init(u_int nblks, size_t bsize) > { > - /* discard any old contents */ > - if (bcache_data != NULL) { > - free(bcache_data); > - bcache_data = NULL; > - free(bcache_ctl); > - } > - > - /* Allocate control structures */ > - bcache_nblks = nblks; > + /* set up control data */ > + bcache_total_nblks = nblks; > bcache_blksize = bsize; > - bcache_data = malloc(bcache_nblks * bcache_blksize); > - bcache_ctl = (struct bcachectl *)malloc(bcache_nblks * sizeof(struct > bcachectl)); > - bcache_miss = bit_alloc((bcache_nblks + 1) / 2); > - if ((bcache_data == NULL) || (bcache_ctl == NULL) || (bcache_miss == > NULL)) { > - if (bcache_miss) > - free(bcache_miss); > - if (bcache_ctl) > - free(bcache_ctl); > - if (bcache_data) > - free(bcache_data); > - bcache_data = NULL; > - return(ENOMEM); > - } > - > - return(0); > } > > /* > - * Flush the cache > + * add number of devices to bcache. we have to divide cache space > + * between the devices, so bcache_add_dev() can be used to set up the > + * number. The issue is, we need to get the number before actual > allocations. > + * bcache_add_dev() is supposed to be called from device init() call, so > the > + * assumption is, devsw dv_init is called for plain devices first, and > + * for zfs, last. > */ > void > -bcache_flush(void) > +bcache_add_dev(int devices) > { > - u_int i; > + bcache_numdev += devices; > +} > > - bcache_flushes++; > +void * > +bcache_allocate(void) > +{ > + u_int i; > + struct bcache *bc = malloc(sizeof (struct bcache)); > + int disks = bcache_numdev; > + > + if (disks == 0) > + disks = 1; /* safe guard */ > + > + if (bc == NULL) { > + errno = ENOMEM; > + return (bc); > + } > + > + /* > + * the bcache block count must be power of 2 for hash function > + */ > + i = fls(disks) - 1; /* highbit - 1 */ > + if (disks > (1 << i)) /* next power of 2 */ > + i++; > + > + bc->bcache_nblks = bcache_total_nblks >> i; > + bcache_unit_nblks = bc->bcache_nblks; > + bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize); > + if (bc->bcache_data == NULL) { > + /* dont error out yet. fall back to 32 blocks and try again */ > + bc->bcache_nblks = 32; > + bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize); > + } > + > + bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl)); > + > + if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) { > + bcache_free_instance(bc); > + errno = ENOMEM; > + return(NULL); > + } > > /* Flush the cache */ > - for (i = 0; i < bcache_nblks; i++) { > - bcache_ctl[i].bc_count = -1; > - bcache_ctl[i].bc_blkno = -1; > - } > + for (i = 0; i < bc->bcache_nblks; i++) { > + bc->bcache_ctl[i].bc_count = -1; > + bc->bcache_ctl[i].bc_blkno = -1; > + } > + bcache_units++; > + bc->ra = BCACHE_READAHEAD; /* optimistic read ahead */ > + return (bc); > +} > + > +void > +bcache_free(void *cache) > +{ > + struct bcache *bc = cache; > + > + if (bc == NULL) > + return; > + > + bcache_free_instance(bc); > + bcache_units--; > } > > /* > @@ -125,31 +182,22 @@ bcache_flush(void) > * cache with the new values. > */ > static int > -write_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size, > - char *buf, size_t *rsize) > +write_strategy(void *devdata, int rw, daddr_t blk, size_t offset, > + size_t size, char *buf, size_t *rsize) > { > struct bcache_devdata *dd = (struct bcache_devdata *)devdata; > + struct bcache *bc = dd->dv_cache; > daddr_t i, nblk; > - int err; > > nblk = size / bcache_blksize; > > /* Invalidate the blocks being written */ > for (i = 0; i < nblk; i++) { > - bcache_invalidate(blk + i); > + bcache_invalidate(bc, blk + i); > } > > /* Write the blocks */ > - err = dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize); > - > - /* Populate the block cache with the new data */ > - if (err == 0) { > - for (i = 0; i < nblk; i++) { > - bcache_insert(buf + (i * bcache_blksize),blk + i); > - } > - } > - > - return err; > + return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf, > rsize)); > } > > /* > @@ -158,61 +206,87 @@ write_strategy(void *devdata, int unit, > * device I/O and then use the I/O results to populate the cache. > */ > static int > -read_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size, > - char *buf, size_t *rsize) > +read_strategy(void *devdata, int rw, daddr_t blk, size_t offset, > + size_t size, char *buf, size_t *rsize) > { > struct bcache_devdata *dd = (struct bcache_devdata *)devdata; > - int p_size, result; > - daddr_t p_blk, i, j, nblk; > + struct bcache *bc = dd->dv_cache; > + size_t i, nblk, p_size, r_size, complete, ra; > + int result; > + daddr_t p_blk; > caddr_t p_buf; > > + if (bc == NULL) { > + errno = ENODEV; > + return (-1); > + } > + > + if (rsize != NULL) > + *rsize = 0; > + > nblk = size / bcache_blksize; > + if ((nblk == 0 && size != 0) || offset != 0) > + nblk++; > result = 0; > + complete = 1; > > - /* Satisfy any cache hits up front */ > + /* Satisfy any cache hits up front, break on first miss */ > for (i = 0; i < nblk; i++) { > - if (bcache_lookup(buf + (bcache_blksize * i), blk + i)) { > - bit_set(bcache_miss, i); /* cache miss */ > - bcache_misses++; > + if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) { > + bcache_misses += (nblk - i); > + complete = 0; > + if (nblk - i > BCACHE_MINREADAHEAD && bc->ra > > BCACHE_MINREADAHEAD) > + bc->ra >>= 1; /* reduce read ahead */ > + break; > } else { > - bit_clear(bcache_miss, i); /* cache hit */ > bcache_hits++; > } > } > > - /* Go back and fill in any misses XXX optimise */ > - p_blk = -1; > - p_buf = NULL; > - p_size = 0; > - for (i = 0; i < nblk; i++) { > - if (bit_test(bcache_miss, i)) { > - /* miss, add to pending transfer */ > - if (p_blk == -1) { > - p_blk = blk + i; > - p_buf = buf + (bcache_blksize * i); > - p_size = 1; > - } else { > - p_size++; > - } > - } else if (p_blk != -1) { > - /* hit, complete pending transfer */ > - result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size * > bcache_blksize, p_buf, NULL); > - if (result != 0) > - goto done; > - for (j = 0; j < p_size; j++) > - bcache_insert(p_buf + (j * bcache_blksize), p_blk + j); > - p_blk = -1; > - } > - } > - if (p_blk != -1) { > - /* pending transfer left */ > - result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size * > bcache_blksize, p_buf, NULL); > - if (result != 0) > - goto done; > - for (j = 0; j < p_size; j++) > - bcache_insert(p_buf + (j * bcache_blksize), p_blk + j); > - } > - > + if (complete) { /* whole set was in cache, return it */ > + if (bc->ra < BCACHE_READAHEAD) > + bc->ra <<= 1; /* increase read ahead */ > + bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, > + buf, size); > + goto done; > + } > + > + /* > + * Fill in any misses. From check we have i pointing to first missing > + * block, read in all remaining blocks + readahead. > + * We have space at least for nblk - i before bcache wraps. > + */ > + p_blk = blk + i; > + p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk)); > + r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */ > + > + p_size = MIN(r_size, nblk - i); /* read at least those blocks */ > + > + ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size); > + if (ra != bc->bcache_nblks) { /* do we have RA space? */ > + ra = MIN(bc->ra, ra); > + p_size += ra; > + } > + > + /* invalidate bcache */ > + for (i = 0; i < p_size; i++) { > + bcache_invalidate(bc, p_blk + i); > + } > + r_size = 0; > + result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, 0, > + p_size * bcache_blksize, p_buf, &r_size); > + > + if (result) > + goto done; > + > + r_size /= bcache_blksize; > + for (i = 0; i < r_size; i++) > + bcache_insert(bc, p_blk + i); > + > + bcache_rablks += ra; > + bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, > buf, > + size); > + > done: > if ((result == 0) && (rsize != NULL)) > *rsize = size; > @@ -220,130 +294,144 @@ read_strategy(void *devdata, int unit, i > } > > /* > - * Requests larger than 1/2 the cache size will be bypassed and go > + * Requests larger than 1/2 cache size will be bypassed and go > * directly to the disk. XXX tune this. > */ > int > -bcache_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size, > - char *buf, size_t *rsize) > +bcache_strategy(void *devdata, int rw, daddr_t blk, size_t offset, > + size_t size, char *buf, size_t *rsize) > { > - static int bcache_unit = -1; > struct bcache_devdata *dd = (struct bcache_devdata *)devdata; > + struct bcache *bc = dd->dv_cache; > + u_int bcache_nblks = 0; > + int nblk, cblk, ret; > + size_t csize, isize, total; > > bcache_ops++; > > - if(bcache_unit != unit) { > > *** DIFF OUTPUT TRUNCATED AT 1000 LINES *** > _______________________________________________ > svn-src-head@freebsd.org mailing list > https://lists.freebsd.org/mailman/listinfo/svn-src-head > To unsubscribe, send any mail to "svn-src-head-unsubscribe@freebsd.org" > -- -Howard
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAAvnz_rQ3KLqt2gd5JiZWxZNtoXxJrqfjbrGNTyS6pP-NAf%2B8Q>