Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 18 Apr 2016 16:36:10 -0700
From:      Adrian Chadd <adrian.chadd@gmail.com>
To:        Allan Jude <allanjude@freebsd.org>
Cc:        "src-committers@freebsd.org" <src-committers@freebsd.org>,  "svn-src-all@freebsd.org" <svn-src-all@freebsd.org>,  "svn-src-head@freebsd.org" <svn-src-head@freebsd.org>
Subject:   Re: svn commit: r298230 - in head: lib/libstand sys/boot/common sys/boot/efi/libefi sys/boot/efi/loader sys/boot/i386/libfirewire sys/boot/i386/libi386 sys/boot/i386/loader sys/boot/mips/beri/loader sy...
Message-ID:  <CAJ-Vmo=WX8xwnuvbPJMib3BMuVKjQMkKt1qXkx=TPH5wZYPM1A@mail.gmail.com>
In-Reply-To: <201604182309.u3IN9MC6047480@repo.freebsd.org>
References:  <201604182309.u3IN9MC6047480@repo.freebsd.org>

next in thread | previous in thread | raw e-mail | index | archive | help
Someone pointed out how this bloats out memory requirement in loader.

Did anyone check that?



-adrian


On 18 April 2016 at 16:09, Allan Jude <allanjude@freebsd.org> wrote:
> Author: allanjude
> Date: Mon Apr 18 23:09:22 2016
> New Revision: 298230
> URL: https://svnweb.freebsd.org/changeset/base/298230
>
> Log:
>   A new implementation of the loader block cache
>
>   The block cache implementation in loader has proven to be almost useless, and in worst case even slowing down the disk reads due to insufficient cache size and extra memory copy.
>   Also the current cache implementation does not cache reads from CDs, or work with zfs built on top of multiple disks.
>   Instead of an LRU, this code uses a simple hash (O(1) read from cache), and instead of a single global cache, a separate cache per block device.
>   The cache also implements limited read-ahead to increase performance.
>   To simplify read ahead management, the read ahead will not wrap over bcache end, so in worst case, single block physical read will be performed to fill the last block in bcache.
>
>   Booting from a virtual CD over IPMI:
>   0ms latency, before: 27 second, after: 7 seconds
>   60ms latency, before: over 12 minutes, after: under 5 minutes.
>
>   Submitted by: Toomas Soome <tsoome@me.com>
>   Reviewed by:  delphij (previous version), emaste (previous version)
>   Relnotes:     yes
>   Differential Revision:        https://reviews.freebsd.org/D4713
>
> Modified:
>   head/lib/libstand/cd9660.c
>   head/lib/libstand/dosfs.c
>   head/lib/libstand/dosfs.h
>   head/lib/libstand/ext2fs.c
>   head/lib/libstand/read.c
>   head/lib/libstand/stand.h
>   head/lib/libstand/ufs.c
>   head/lib/libstand/write.c
>   head/sys/boot/common/bcache.c
>   head/sys/boot/common/bootstrap.h
>   head/sys/boot/common/disk.c
>   head/sys/boot/common/md.c
>   head/sys/boot/common/module.c
>   head/sys/boot/efi/libefi/efipart.c
>   head/sys/boot/efi/libefi/libefi.c
>   head/sys/boot/efi/loader/main.c
>   head/sys/boot/i386/libfirewire/firewire.c
>   head/sys/boot/i386/libi386/bioscd.c
>   head/sys/boot/i386/libi386/biosdisk.c
>   head/sys/boot/i386/libi386/biosmem.c
>   head/sys/boot/i386/libi386/pxe.c
>   head/sys/boot/i386/loader/main.c
>   head/sys/boot/mips/beri/loader/beri_disk_cfi.c
>   head/sys/boot/mips/beri/loader/beri_disk_sdcard.c
>   head/sys/boot/ofw/libofw/ofw_disk.c
>   head/sys/boot/pc98/libpc98/bioscd.c
>   head/sys/boot/pc98/libpc98/biosdisk.c
>   head/sys/boot/pc98/libpc98/biosmem.c
>   head/sys/boot/pc98/loader/main.c
>   head/sys/boot/powerpc/kboot/hostdisk.c
>   head/sys/boot/powerpc/ps3/ps3cdrom.c
>   head/sys/boot/powerpc/ps3/ps3disk.c
>   head/sys/boot/uboot/lib/disk.c
>   head/sys/boot/usb/storage/umass_loader.c
>   head/sys/boot/userboot/userboot/host.c
>   head/sys/boot/userboot/userboot/main.c
>   head/sys/boot/userboot/userboot/userboot_disk.c
>   head/sys/boot/zfs/zfs.c
>
> Modified: head/lib/libstand/cd9660.c
> ==============================================================================
> --- head/lib/libstand/cd9660.c  Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/cd9660.c  Mon Apr 18 23:09:22 2016        (r298230)
> @@ -143,7 +143,7 @@ susp_lookup_record(struct open_file *f,
>                 if (bcmp(sh->type, SUSP_CONTINUATION, 2) == 0) {
>                         shc = (ISO_RRIP_CONT *)sh;
>                         error = f->f_dev->dv_strategy(f->f_devdata, F_READ,
> -                           cdb2devb(isonum_733(shc->location)),
> +                           cdb2devb(isonum_733(shc->location)), 0,
>                             ISO_DEFAULT_BLOCK_SIZE, susp_buffer, &read);
>
>                         /* Bail if it fails. */
> @@ -288,7 +288,7 @@ cd9660_open(const char *path, struct ope
>         for (bno = 16;; bno++) {
>                 twiddle(1);
>                 rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, cdb2devb(bno),
> -                                          ISO_DEFAULT_BLOCK_SIZE, buf, &read);
> +                                       0, ISO_DEFAULT_BLOCK_SIZE, buf, &read);
>                 if (rc)
>                         goto out;
>                 if (read != ISO_DEFAULT_BLOCK_SIZE) {
> @@ -322,7 +322,7 @@ cd9660_open(const char *path, struct ope
>                                 twiddle(1);
>                                 rc = f->f_dev->dv_strategy
>                                         (f->f_devdata, F_READ,
> -                                        cdb2devb(bno + boff),
> +                                        cdb2devb(bno + boff), 0,
>                                          ISO_DEFAULT_BLOCK_SIZE,
>                                          buf, &read);
>                                 if (rc)
> @@ -381,7 +381,7 @@ cd9660_open(const char *path, struct ope
>                 bno = isonum_733(rec.extent) + isonum_711(rec.ext_attr_length);
>                 twiddle(1);
>                 rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, cdb2devb(bno),
> -                   ISO_DEFAULT_BLOCK_SIZE, buf, &read);
> +                   0, ISO_DEFAULT_BLOCK_SIZE, buf, &read);
>                 if (rc)
>                         goto out;
>                 if (read != ISO_DEFAULT_BLOCK_SIZE) {
> @@ -438,7 +438,8 @@ buf_read_file(struct open_file *f, char
>
>                 twiddle(16);
>                 rc = f->f_dev->dv_strategy(f->f_devdata, F_READ,
> -                   cdb2devb(blkno), ISO_DEFAULT_BLOCK_SIZE, fp->f_buf, &read);
> +                   cdb2devb(blkno), 0, ISO_DEFAULT_BLOCK_SIZE,
> +                   fp->f_buf, &read);
>                 if (rc)
>                         return (rc);
>                 if (read != ISO_DEFAULT_BLOCK_SIZE)
>
> Modified: head/lib/libstand/dosfs.c
> ==============================================================================
> --- head/lib/libstand/dosfs.c   Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/dosfs.c   Mon Apr 18 23:09:22 2016        (r298230)
> @@ -131,7 +131,18 @@ static DOS_DE dot[2] = {
>  #define stclus(sz, de)  ((sz) != 32 ? cv2((de)->clus) :          \
>                           ((u_int)cv2((de)->dex.h_clus) << 16) |  \
>                          cv2((de)->clus))
> -
> +
> +/*
> + * fat cache metadata
> + */
> +struct fatcache {
> +       int unit;       /* disk unit number */
> +       int size;       /* buffer (and fat) size in sectors */
> +       u_char *buf;
> +};
> +
> +static struct fatcache fat;
> +
>  static int dosunmount(DOS_FS *);
>  static int parsebs(DOS_FS *, DOS_BS *);
>  static int namede(DOS_FS *, const char *, DOS_DE **);
> @@ -143,8 +154,36 @@ static int fatcnt(DOS_FS *, u_int);
>  static int fatget(DOS_FS *, u_int *);
>  static int fatend(u_int, u_int);
>  static int ioread(DOS_FS *, u_int, void *, u_int);
> -static int iobuf(DOS_FS *, u_int);
> -static int ioget(struct open_file *, u_int, void *, u_int);
> +static int ioget(struct open_file *, daddr_t, size_t, void *, u_int);
> +
> +static void
> +dos_read_fat(DOS_FS *fs, struct open_file *fd)
> +{
> +    struct devdesc *dd = fd->f_devdata;
> +
> +    if (fat.buf != NULL) {             /* can we reuse old buffer? */
> +       if (fat.size != fs->spf) {
> +           free(fat.buf);              /* no, free old buffer */
> +           fat.buf = NULL;
> +       }
> +    }
> +
> +    if (fat.buf == NULL)
> +       fat.buf = malloc(secbyt(fs->spf));
> +
> +    if (fat.buf != NULL) {
> +       if (ioget(fd, fs->lsnfat, 0, fat.buf, secbyt(fs->spf)) == 0) {
> +           fat.size = fs->spf;
> +           fat.unit = dd->d_unit;
> +           return;
> +       }
> +    }
> +    if (fat.buf != NULL)       /* got IO error */
> +       free(fat.buf);
> +    fat.buf = NULL;
> +    fat.unit = -1;     /* impossible unit */
> +    fat.size = 0;
> +}
>
>  /*
>   * Mount DOS filesystem
> @@ -153,15 +192,25 @@ static int
>  dos_mount(DOS_FS *fs, struct open_file *fd)
>  {
>      int err;
> +    struct devdesc *dd = fd->f_devdata;
> +    u_char *buf;
>
>      bzero(fs, sizeof(DOS_FS));
>      fs->fd = fd;
> -    if ((err = !(fs->buf = malloc(SECSIZ)) ? errno : 0) ||
> -        (err = ioget(fs->fd, 0, fs->buf, 1)) ||
> -        (err = parsebs(fs, (DOS_BS *)fs->buf))) {
> +
> +    if ((err = !(buf = malloc(secbyt(1))) ? errno : 0) ||
> +        (err = ioget(fs->fd, 0, 0, buf, secbyt(1))) ||
> +        (err = parsebs(fs, (DOS_BS *)buf))) {
> +       if (buf != NULL)
> +           free(buf);
>          (void)dosunmount(fs);
>          return(err);
>      }
> +    free(buf);
> +
> +    if (fat.buf == NULL || fat.unit != dd->d_unit)
> +       dos_read_fat(fs, fd);
> +
>      fs->root = dot[0];
>      fs->root.name[0] = ' ';
>      if (fs->fatsz == 32) {
> @@ -194,8 +243,6 @@ dos_unmount(DOS_FS *fs)
>  static int
>  dosunmount(DOS_FS *fs)
>  {
> -    if (fs->buf)
> -        free(fs->buf);
>      free(fs);
>      return(0);
>  }
> @@ -252,42 +299,47 @@ dos_read(struct open_file *fd, void *buf
>      DOS_FILE *f = (DOS_FILE *)fd->f_fsdata;
>      int err = 0;
>
> +    /*
> +     * as ioget() can be called *a lot*, use twiddle here.
> +     * also 4 seems to be good value not to slow loading down too much:
> +     * with 270MB file (~540k ioget() calls, twiddle can easily waste 4-5sec.
> +     */
> +    twiddle(4);
>      nb = (u_int)nbyte;
>      if ((size = fsize(f->fs, &f->de)) == -1)
>         return EINVAL;
>      if (nb > (n = size - f->offset))
> -        nb = n;
> +       nb = n;
>      off = f->offset;
>      if ((clus = stclus(f->fs->fatsz, &f->de)))
> -        off &= f->fs->bsize - 1;
> +       off &= f->fs->bsize - 1;
>      c = f->c;
>      cnt = nb;
>      while (cnt) {
> -        n = 0;
> -        if (!c) {
> -            if ((c = clus))
> -                n = bytblk(f->fs, f->offset);
> -        } else if (!off)
> -            n++;
> -        while (n--) {
> -            if ((err = fatget(f->fs, &c)))
> +       n = 0;
> +       if (!c) {
> +           if ((c = clus))
> +               n = bytblk(f->fs, f->offset);
> +       } else if (!off)
> +           n++;
> +       while (n--) {
> +           if ((err = fatget(f->fs, &c)))
>                 goto out;
> -            if (!okclus(f->fs, c)) {
> +           if (!okclus(f->fs, c)) {
>                 err = EINVAL;
>                 goto out;
>             }
> -        }
> -        if (!clus || (n = f->fs->bsize - off) > cnt)
> -            n = cnt;
> -        if ((err = ioread(f->fs, (c ? blkoff(f->fs, c) :
> -                                     secbyt(f->fs->lsndir)) + off,
> -                         buf, n)))
> +       }
> +       if (!clus || (n = f->fs->bsize - off) > cnt)
> +           n = cnt;
> +       if ((err = ioread(f->fs, (c ? blkoff(f->fs, c) :
> +                                     secbyt(f->fs->lsndir)) + off, buf, n)))
>             goto out;
> -        f->offset += n;
> -        f->c = c;
> -        off = 0;
> -        buf = (char *)buf + n;
> -        cnt -= n;
> +       f->offset += n;
> +       f->c = c;
> +       off = 0;
> +       buf = (char *)buf + n;
> +       cnt -= n;
>      }
>   out:
>      if (resid)
> @@ -364,6 +416,23 @@ dos_stat(struct open_file *fd, struct st
>  }
>
>  static int
> +dos_checksum(char *name, char *ext)
> +{
> +    int x, i;
> +    char buf[11];
> +
> +    bcopy(name, buf, 8);
> +    bcopy(ext, buf+8, 3);
> +    x = 0;
> +    for (i = 0; i < 11; i++) {
> +       x = ((x & 1) << 7) | (x >> 1);
> +       x += buf[i];
> +       x &= 0xff;
> +    }
> +    return (x);
> +}
> +
> +static int
>  dos_readdir(struct open_file *fd, struct dirent *d)
>  {
>      /* DOS_FILE *f = (DOS_FILE *)fd->f_fsdata; */
> @@ -417,12 +486,7 @@ dos_readdir(struct open_file *fd, struct
>             }
>         } else {
>             if (xdn == 1) {
> -               x = 0;
> -               for (i = 0; i < 11; i++) {
> -                   x = ((x & 1) << 7) | (x >> 1);
> -                   x += dd.de.name[i];
> -                   x &= 0xff;
> -               }
> +               x = dos_checksum(dd.de.name, dd.de.ext);
>                 if (x == chk)
>                     break;
>             } else {
> @@ -555,7 +619,7 @@ lookup(DOS_FS *fs, u_int clus, const cha
>          else
>              return EINVAL;
>          for (sec = 0; sec < nsec; sec++) {
> -            if ((err = ioget(fs->fd, lsec + sec, dir, 1)))
> +            if ((err = ioget(fs->fd, lsec + sec, 0, dir, secbyt(1))))
>                  return err;
>              for (ent = 0; ent < DEPSEC; ent++) {
>                  if (!*dir[ent].de.name)
> @@ -577,9 +641,7 @@ lookup(DOS_FS *fs, u_int clus, const cha
>                          }
>                      } else if (!(dir[ent].de.attr & FA_LABEL)) {
>                          if ((ok = xdn == 1)) {
> -                            for (x = 0, i = 0; i < 11; i++)
> -                                x = ((((x & 1) << 7) | (x >> 1)) +
> -                                     dir[ent].de.name[i]) & 0xff;
> +                           x = dos_checksum(dir[ent].de.name, dir[ent].de.ext);
>                              ok = chk == x &&
>                                  !strcasecmp(name, (const char *)lfn);
>                          }
> @@ -699,22 +761,52 @@ fatcnt(DOS_FS *fs, u_int c)
>  }
>
>  /*
> - * Get next cluster in cluster chain
> + * Get next cluster in cluster chain. Use in core fat cache unless another
> + * device replaced it.
>   */
>  static int
>  fatget(DOS_FS *fs, u_int *c)
>  {
>      u_char buf[4];
> -    u_int x;
> -    int err;
> +    u_char *s;
> +    u_int x, offset, off, n, nbyte, lsec;
> +    struct devdesc *dd = fs->fd->f_devdata;
> +    int err = 0;
> +
> +    if (fat.unit != dd->d_unit) {
> +       /* fat cache was changed to another device, dont use it */
> +       err = ioread(fs, secbyt(fs->lsnfat) + fatoff(fs->fatsz, *c), buf,
> +           fs->fatsz != 32 ? 2 : 4);
> +       if (err)
> +           return err;
> +    } else {
> +       offset = fatoff(fs->fatsz, *c);
> +       nbyte = fs->fatsz != 32 ? 2 : 4;
> +
> +       s = buf;
> +       if ((off = offset & (SECSIZ - 1))) {
> +           offset -= off;
> +           lsec = bytsec(offset);
> +           offset += SECSIZ;
> +           if ((n = SECSIZ - off) > nbyte)
> +               n = nbyte;
> +           memcpy(s, fat.buf + secbyt(lsec) + off, n);
> +           s += n;
> +           nbyte -= n;
> +       }
> +       n = nbyte & (SECSIZ - 1);
> +       if (nbyte -= n) {
> +           memcpy(s, fat.buf + secbyt(bytsec(offset)), nbyte);
> +           offset += nbyte;
> +           s += nbyte;
> +       }
> +       if (n)
> +           memcpy(s, fat.buf + secbyt(bytsec(offset)), n);
> +    }
>
> -    err = ioread(fs, secbyt(fs->lsnfat) + fatoff(fs->fatsz, *c), buf,
> -                 fs->fatsz != 32 ? 2 : 4);
> -    if (err)
> -        return err;
>      x = fs->fatsz != 32 ? cv2(buf) : cv4(buf);
>      *c = fs->fatsz == 12 ? *c & 1 ? x >> 4 : x & 0xfff : x;
> -    return 0;
> +    return (0);
>  }
>
>  /*
> @@ -739,42 +831,24 @@ ioread(DOS_FS *fs, u_int offset, void *b
>      s = buf;
>      if ((off = offset & (SECSIZ - 1))) {
>          offset -= off;
> -        if ((err = iobuf(fs, bytsec(offset))))
> -            return err;
> -        offset += SECSIZ;
>          if ((n = SECSIZ - off) > nbyte)
>              n = nbyte;
> -        memcpy(s, fs->buf + off, n);
> +        if ((err = ioget(fs->fd, bytsec(offset), off, s, n)))
> +            return err;
> +        offset += SECSIZ;
>          s += n;
>          nbyte -= n;
>      }
>      n = nbyte & (SECSIZ - 1);
>      if (nbyte -= n) {
> -        if ((err = ioget(fs->fd, bytsec(offset), s, bytsec(nbyte))))
> +        if ((err = ioget(fs->fd, bytsec(offset), 0, s, nbyte)))
>              return err;
>          offset += nbyte;
>          s += nbyte;
>      }
>      if (n) {
> -        if ((err = iobuf(fs, bytsec(offset))))
> -            return err;
> -        memcpy(s, fs->buf, n);
> -    }
> -    return 0;
> -}
> -
> -/*
> - * Buffered sector-based I/O primitive
> - */
> -static int
> -iobuf(DOS_FS *fs, u_int lsec)
> -{
> -    int err;
> -
> -    if (fs->bufsec != lsec) {
> -        if ((err = ioget(fs->fd, lsec, fs->buf, 1)))
> +        if ((err = ioget(fs->fd, bytsec(offset), 0, s, n)))
>              return err;
> -        fs->bufsec = lsec;
>      }
>      return 0;
>  }
> @@ -783,13 +857,8 @@ iobuf(DOS_FS *fs, u_int lsec)
>   * Sector-based I/O primitive
>   */
>  static int
> -ioget(struct open_file *fd, u_int lsec, void *buf, u_int nsec)
> +ioget(struct open_file *fd, daddr_t lsec, size_t offset, void *buf, u_int size)
>  {
> -    int        err;
> -
> -    twiddle(1);
> -    if ((err = (fd->f_dev->dv_strategy)(fd->f_devdata, F_READ, lsec,
> -                                       secbyt(nsec), buf, NULL)))
> -       return(err);
> -    return(0);
> +    return ((fd->f_dev->dv_strategy)(fd->f_devdata, F_READ, lsec, offset,
> +       size, buf, NULL));
>  }
>
> Modified: head/lib/libstand/dosfs.h
> ==============================================================================
> --- head/lib/libstand/dosfs.h   Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/dosfs.h   Mon Apr 18 23:09:22 2016        (r298230)
> @@ -96,8 +96,6 @@ typedef union {
>
>  typedef struct {
>      struct open_file *fd;       /* file descriptor */
> -    u_char *buf;                /* buffer */
> -    u_int bufsec;               /* buffered sector */
>      u_int links;                /* active links to structure */
>      u_int spc;                  /* sectors per cluster */
>      u_int bsize;                /* cluster size in bytes */
>
> Modified: head/lib/libstand/ext2fs.c
> ==============================================================================
> --- head/lib/libstand/ext2fs.c  Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/ext2fs.c  Mon Apr 18 23:09:22 2016        (r298230)
> @@ -355,7 +355,7 @@ ext2fs_open(const char *upath, struct op
>         fp->f_fs = fs;
>         twiddle(1);
>         error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -           EXT2_SBLOCK, EXT2_SBSIZE, (char *)fs, &buf_size);
> +           EXT2_SBLOCK, 0, EXT2_SBSIZE, (char *)fs, &buf_size);
>         if (error)
>                 goto out;
>
> @@ -397,7 +397,7 @@ ext2fs_open(const char *upath, struct op
>         fp->f_bg = malloc(len);
>         twiddle(1);
>         error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -           EXT2_SBLOCK + EXT2_SBSIZE / DEV_BSIZE, len,
> +           EXT2_SBLOCK + EXT2_SBSIZE / DEV_BSIZE, 0, len,
>             (char *)fp->f_bg, &buf_size);
>         if (error)
>                 goto out;
> @@ -509,7 +509,7 @@ ext2fs_open(const char *upath, struct op
>
>                                 twiddle(1);
>                                 error = (f->f_dev->dv_strategy)(f->f_devdata,
> -                                   F_READ, fsb_to_db(fs, disk_block),
> +                                   F_READ, fsb_to_db(fs, disk_block), 0,
>                                     fs->fs_bsize, buf, &buf_size);
>                                 if (error)
>                                         goto out;
> @@ -570,7 +570,7 @@ read_inode(ino_t inumber, struct open_fi
>         buf = malloc(fs->fs_bsize);
>         twiddle(1);
>         error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -           ino_to_db(fs, fp->f_bg, inumber), fs->fs_bsize, buf, &rsize);
> +           ino_to_db(fs, fp->f_bg, inumber), 0, fs->fs_bsize, buf, &rsize);
>         if (error)
>                 goto out;
>         if (rsize != fs->fs_bsize) {
> @@ -667,7 +667,7 @@ block_map(struct open_file *f, daddr_t f
>                                         malloc(fs->fs_bsize);
>                         twiddle(1);
>                         error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                           fsb_to_db(fp->f_fs, ind_block_num), fs->fs_bsize,
> +                           fsb_to_db(fp->f_fs, ind_block_num), 0, fs->fs_bsize,
>                             fp->f_blk[level], &fp->f_blksize[level]);
>                         if (error)
>                                 return (error);
> @@ -725,7 +725,7 @@ buf_read_file(struct open_file *f, char
>                 } else {
>                         twiddle(4);
>                         error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                           fsb_to_db(fs, disk_block), block_size,
> +                           fsb_to_db(fs, disk_block), 0, block_size,
>                             fp->f_buf, &fp->f_buf_size);
>                         if (error)
>                                 goto done;
>
> Modified: head/lib/libstand/read.c
> ==============================================================================
> --- head/lib/libstand/read.c    Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/read.c    Mon Apr 18 23:09:22 2016        (r298230)
> @@ -79,7 +79,7 @@ read(int fd, void *dest, size_t bcount)
>      if (f->f_flags & F_RAW) {
>         twiddle(4);
>         errno = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                                       btodb(f->f_offset), bcount, dest, &resid);
> +                               btodb(f->f_offset), 0, bcount, dest, &resid);
>         if (errno)
>             return (-1);
>         f->f_offset += resid;
>
> Modified: head/lib/libstand/stand.h
> ==============================================================================
> --- head/lib/libstand/stand.h   Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/stand.h   Mon Apr 18 23:09:22 2016        (r298230)
> @@ -138,8 +138,8 @@ struct devsw {
>      const char dv_name[8];
>      int                dv_type;                /* opaque type constant, arch-dependant */
>      int                (*dv_init)(void);       /* early probe call */
> -    int                (*dv_strategy)(void *devdata, int rw, daddr_t blk, size_t size,
> -                              char *buf, size_t *rsize);
> +    int                (*dv_strategy)(void *devdata, int rw, daddr_t blk,
> +                       size_t offset, size_t size, char *buf, size_t *rsize);
>      int                (*dv_open)(struct open_file *f, ...);
>      int                (*dv_close)(struct open_file *f);
>      int                (*dv_ioctl)(struct open_file *f, u_long cmd, void *data);
> @@ -154,6 +154,24 @@ extern struct devsw netdev;
>
>  extern int errno;
>
> +/*
> + * Generic device specifier; architecture-dependent
> + * versions may be larger, but should be allowed to
> + * overlap.
> + */
> +struct devdesc
> +{
> +    struct devsw       *d_dev;
> +    int                        d_type;
> +#define DEVT_NONE      0
> +#define DEVT_DISK      1
> +#define DEVT_NET       2
> +#define DEVT_CD                3
> +#define DEVT_ZFS       4
> +    int                        d_unit;
> +    void               *d_opendata;
> +};
> +
>  struct open_file {
>      int                        f_flags;        /* see F_* below */
>      struct devsw       *f_dev;         /* pointer to device operations */
>
> Modified: head/lib/libstand/ufs.c
> ==============================================================================
> --- head/lib/libstand/ufs.c     Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/ufs.c     Mon Apr 18 23:09:22 2016        (r298230)
> @@ -157,7 +157,7 @@ read_inode(inumber, f)
>         buf = malloc(fs->fs_bsize);
>         twiddle(1);
>         rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -               fsbtodb(fs, ino_to_fsba(fs, inumber)), fs->fs_bsize,
> +               fsbtodb(fs, ino_to_fsba(fs, inumber)), 0, fs->fs_bsize,
>                 buf, &rsize);
>         if (rc)
>                 goto out;
> @@ -267,7 +267,7 @@ block_map(f, file_block, disk_block_p)
>                                         malloc(fs->fs_bsize);
>                         twiddle(1);
>                         rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                               fsbtodb(fp->f_fs, ind_block_num),
> +                               fsbtodb(fp->f_fs, ind_block_num), 0,
>                                 fs->fs_bsize,
>                                 fp->f_blk[level],
>                                 &fp->f_blksize[level]);
> @@ -348,7 +348,7 @@ buf_write_file(f, buf_p, size_p)
>
>                 twiddle(4);
>                 rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                       fsbtodb(fs, disk_block),
> +                       fsbtodb(fs, disk_block), 0,
>                         block_size, fp->f_buf, &fp->f_buf_size);
>                 if (rc)
>                         return (rc);
> @@ -367,7 +367,7 @@ buf_write_file(f, buf_p, size_p)
>
>         twiddle(4);
>         rc = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE,
> -               fsbtodb(fs, disk_block),
> +               fsbtodb(fs, disk_block), 0,
>                 block_size, fp->f_buf, &fp->f_buf_size);
>         return (rc);
>  }
> @@ -408,7 +408,7 @@ buf_read_file(f, buf_p, size_p)
>                 } else {
>                         twiddle(4);
>                         rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                               fsbtodb(fs, disk_block),
> +                               fsbtodb(fs, disk_block), 0,
>                                 block_size, fp->f_buf, &fp->f_buf_size);
>                         if (rc)
>                                 return (rc);
> @@ -521,7 +521,7 @@ ufs_open(upath, f)
>          */
>         for (i = 0; sblock_try[i] != -1; i++) {
>                 rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ,
> -                   sblock_try[i] / DEV_BSIZE, SBLOCKSIZE,
> +                   sblock_try[i] / DEV_BSIZE, 0, SBLOCKSIZE,
>                     (char *)fs, &buf_size);
>                 if (rc)
>                         goto out;
> @@ -651,7 +651,7 @@ ufs_open(upath, f)
>
>                                 twiddle(1);
>                                 rc = (f->f_dev->dv_strategy)(f->f_devdata,
> -                                       F_READ, fsbtodb(fs, disk_block),
> +                                       F_READ, fsbtodb(fs, disk_block), 0,
>                                         fs->fs_bsize, buf, &buf_size);
>                                 if (rc)
>                                         goto out;
>
> Modified: head/lib/libstand/write.c
> ==============================================================================
> --- head/lib/libstand/write.c   Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/lib/libstand/write.c   Mon Apr 18 23:09:22 2016        (r298230)
> @@ -82,7 +82,7 @@ write(fd, dest, bcount)
>         if (f->f_flags & F_RAW) {
>                 twiddle(4);
>                 errno = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE,
> -                       btodb(f->f_offset), bcount, dest, &resid);
> +                       btodb(f->f_offset), 0, bcount, dest, &resid);
>                 if (errno)
>                         return (-1);
>                 f->f_offset += resid;
>
> Modified: head/sys/boot/common/bcache.c
> ==============================================================================
> --- head/sys/boot/common/bcache.c       Mon Apr 18 22:00:26 2016        (r298229)
> +++ head/sys/boot/common/bcache.c       Mon Apr 18 23:09:22 2016        (r298230)
> @@ -1,5 +1,6 @@
>  /*-
>   * Copyright (c) 1998 Michael Smith <msmith@freebsd.org>
> + * Copyright 2015 Toomas Soome <tsoome@me.com>
>   * All rights reserved.
>   *
>   * Redistribution and use in source and binary forms, with or without
> @@ -25,99 +26,155 @@
>   */
>
>  #include <sys/cdefs.h>
> +#include <sys/param.h>
>  __FBSDID("$FreeBSD$");
>
>  /*
> - * Simple LRU block cache
> + * Simple hashed block cache
>   */
>
>  #include <sys/stdint.h>
>
>  #include <stand.h>
>  #include <string.h>
> -#include <bitstring.h>
> +#include <strings.h>
>
>  #include "bootstrap.h"
>
>  /* #define BCACHE_DEBUG */
>
>  #ifdef BCACHE_DEBUG
> -#define BCACHE_TIMEOUT 10
>  # define DEBUG(fmt, args...)   printf("%s: " fmt "\n" , __func__ , ## args)
>  #else
> -#define BCACHE_TIMEOUT 2
>  # define DEBUG(fmt, args...)
>  #endif
>
> -
>  struct bcachectl
>  {
>      daddr_t    bc_blkno;
> -    time_t     bc_stamp;
>      int                bc_count;
>  };
>
> -static struct bcachectl        *bcache_ctl;
> -static caddr_t         bcache_data;
> -static bitstr_t                *bcache_miss;
> -static u_int           bcache_nblks;
> -static u_int           bcache_blksize;
> -static u_int           bcache_hits, bcache_misses, bcache_ops, bcache_bypasses;
> -static u_int           bcache_flushes;
> -static u_int           bcache_bcount;
> -
> -static void    bcache_invalidate(daddr_t blkno);
> -static void    bcache_insert(caddr_t buf, daddr_t blkno);
> -static int     bcache_lookup(caddr_t buf, daddr_t blkno);
> +/*
> + * bcache per device node. cache is allocated on device first open and freed
> + * on last close, to save memory. The issue there is the size; biosdisk
> + * supports up to 31 (0x1f) devices. Classic setup would use single disk
> + * to boot from, but this has changed with zfs.
> + */
> +struct bcache {
> +    struct bcachectl   *bcache_ctl;
> +    caddr_t            bcache_data;
> +    u_int              bcache_nblks;
> +    size_t             ra;
> +};
> +
> +static u_int bcache_total_nblks;       /* set by bcache_init */
> +static u_int bcache_blksize;           /* set by bcache_init */
> +static u_int bcache_numdev;            /* set by bcache_add_dev */
> +/* statistics */
> +static u_int bcache_units;     /* number of devices with cache */
> +static u_int bcache_unit_nblks;        /* nblocks per unit */
> +static u_int bcache_hits;
> +static u_int bcache_misses;
> +static u_int bcache_ops;
> +static u_int bcache_bypasses;
> +static u_int bcache_bcount;
> +static u_int bcache_rablks;
> +
> +#define        BHASH(bc, blkno)        ((blkno) & ((bc)->bcache_nblks - 1))
> +#define        BCACHE_LOOKUP(bc, blkno)        \
> +       ((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno))
> +#define        BCACHE_READAHEAD        256
> +#define        BCACHE_MINREADAHEAD     32
> +
> +static void    bcache_invalidate(struct bcache *bc, daddr_t blkno);
> +static void    bcache_insert(struct bcache *bc, daddr_t blkno);
> +static void    bcache_free_instance(struct bcache *bc);
>
>  /*
>   * Initialise the cache for (nblks) of (bsize).
>   */
> -int
> +void
>  bcache_init(u_int nblks, size_t bsize)
>  {
> -    /* discard any old contents */
> -    if (bcache_data != NULL) {
> -       free(bcache_data);
> -       bcache_data = NULL;
> -       free(bcache_ctl);
> -    }
> -
> -    /* Allocate control structures */
> -    bcache_nblks = nblks;
> +    /* set up control data */
> +    bcache_total_nblks = nblks;
>      bcache_blksize = bsize;
> -    bcache_data = malloc(bcache_nblks * bcache_blksize);
> -    bcache_ctl = (struct bcachectl *)malloc(bcache_nblks * sizeof(struct bcachectl));
> -    bcache_miss = bit_alloc((bcache_nblks + 1) / 2);
> -    if ((bcache_data == NULL) || (bcache_ctl == NULL) || (bcache_miss == NULL)) {
> -       if (bcache_miss)
> -           free(bcache_miss);
> -       if (bcache_ctl)
> -           free(bcache_ctl);
> -       if (bcache_data)
> -           free(bcache_data);
> -       bcache_data = NULL;
> -       return(ENOMEM);
> -    }
> -
> -    return(0);
>  }
>
>  /*
> - * Flush the cache
> + * add number of devices to bcache. we have to divide cache space
> + * between the devices, so bcache_add_dev() can be used to set up the
> + * number. The issue is, we need to get the number before actual allocations.
> + * bcache_add_dev() is supposed to be called from device init() call, so the
> + * assumption is, devsw dv_init is called for plain devices first, and
> + * for zfs, last.
>   */
>  void
> -bcache_flush(void)
> +bcache_add_dev(int devices)
>  {
> -    u_int      i;
> +    bcache_numdev += devices;
> +}
>
> -    bcache_flushes++;
> +void *
> +bcache_allocate(void)
> +{
> +    u_int i;
> +    struct bcache *bc = malloc(sizeof (struct bcache));
> +    int disks = bcache_numdev;
> +
> +    if (disks == 0)
> +       disks = 1;      /* safe guard */
> +
> +    if (bc == NULL) {
> +       errno = ENOMEM;
> +       return (bc);
> +    }
> +
> +    /*
> +     * the bcache block count must be power of 2 for hash function
> +     */
> +    i = fls(disks) - 1;                /* highbit - 1 */
> +    if (disks > (1 << i))      /* next power of 2 */
> +       i++;
> +
> +    bc->bcache_nblks = bcache_total_nblks >> i;
> +    bcache_unit_nblks = bc->bcache_nblks;
> +    bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
> +    if (bc->bcache_data == NULL) {
> +       /* dont error out yet. fall back to 32 blocks and try again */
> +       bc->bcache_nblks = 32;
> +       bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
> +    }
> +
> +    bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl));
> +
> +    if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) {
> +       bcache_free_instance(bc);
> +       errno = ENOMEM;
> +       return(NULL);
> +    }
>
>      /* Flush the cache */
> -    for (i = 0; i < bcache_nblks; i++) {
> -       bcache_ctl[i].bc_count = -1;
> -       bcache_ctl[i].bc_blkno = -1;
> -    }
> +    for (i = 0; i < bc->bcache_nblks; i++) {
> +       bc->bcache_ctl[i].bc_count = -1;
> +       bc->bcache_ctl[i].bc_blkno = -1;
> +    }
> +    bcache_units++;
> +    bc->ra = BCACHE_READAHEAD; /* optimistic read ahead */
> +    return (bc);
> +}
> +
> +void
> +bcache_free(void *cache)
> +{
> +    struct bcache *bc = cache;
> +
> +    if (bc == NULL)
> +       return;
> +
> +    bcache_free_instance(bc);
> +    bcache_units--;
>  }
>
>  /*
> @@ -125,31 +182,22 @@ bcache_flush(void)
>   * cache with the new values.
>   */
>  static int
> -write_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
> -               char *buf, size_t *rsize)
> +write_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
> +    size_t size, char *buf, size_t *rsize)
>  {
>      struct bcache_devdata      *dd = (struct bcache_devdata *)devdata;
> +    struct bcache              *bc = dd->dv_cache;
>      daddr_t                    i, nblk;
> -    int                                err;
>
>      nblk = size / bcache_blksize;
>
>      /* Invalidate the blocks being written */
>      for (i = 0; i < nblk; i++) {
> -       bcache_invalidate(blk + i);
> +       bcache_invalidate(bc, blk + i);
>      }
>
>      /* Write the blocks */
> -    err = dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize);
> -
> -    /* Populate the block cache with the new data */
> -    if (err == 0) {
> -       for (i = 0; i < nblk; i++) {
> -           bcache_insert(buf + (i * bcache_blksize),blk + i);
> -       }
> -    }
> -
> -    return err;
> +    return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf, rsize));
>  }
>
>  /*
> @@ -158,61 +206,87 @@ write_strategy(void *devdata, int unit,
>   * device I/O and then use the I/O results to populate the cache.
>   */
>  static int
> -read_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
> -               char *buf, size_t *rsize)
> +read_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
> +    size_t size, char *buf, size_t *rsize)
>  {
>      struct bcache_devdata      *dd = (struct bcache_devdata *)devdata;
> -    int                                p_size, result;
> -    daddr_t                    p_blk, i, j, nblk;
> +    struct bcache              *bc = dd->dv_cache;
> +    size_t                     i, nblk, p_size, r_size, complete, ra;
> +    int                                result;
> +    daddr_t                    p_blk;
>      caddr_t                    p_buf;
>
> +    if (bc == NULL) {
> +       errno = ENODEV;
> +       return (-1);
> +    }
> +
> +    if (rsize != NULL)
> +       *rsize = 0;
> +
>      nblk = size / bcache_blksize;
> +    if ((nblk == 0 && size != 0) || offset != 0)
> +       nblk++;
>      result = 0;
> +    complete = 1;
>
> -    /* Satisfy any cache hits up front */
> +    /* Satisfy any cache hits up front, break on first miss */
>      for (i = 0; i < nblk; i++) {
> -       if (bcache_lookup(buf + (bcache_blksize * i), blk + i)) {
> -           bit_set(bcache_miss, i);    /* cache miss */
> -           bcache_misses++;
> +       if (BCACHE_LOOKUP(bc, (daddr_t)(blk + i))) {
> +           bcache_misses += (nblk - i);
> +           complete = 0;
> +           if (nblk - i > BCACHE_MINREADAHEAD && bc->ra > BCACHE_MINREADAHEAD)
> +               bc->ra >>= 1;   /* reduce read ahead */
> +           break;
>         } else {
> -           bit_clear(bcache_miss, i);  /* cache hit */
>             bcache_hits++;
>         }
>      }
>
> -    /* Go back and fill in any misses  XXX optimise */
> -    p_blk = -1;
> -    p_buf = NULL;
> -    p_size = 0;
> -    for (i = 0; i < nblk; i++) {
> -       if (bit_test(bcache_miss, i)) {
> -           /* miss, add to pending transfer */
> -           if (p_blk == -1) {
> -               p_blk = blk + i;
> -               p_buf = buf + (bcache_blksize * i);
> -               p_size = 1;
> -           } else {
> -               p_size++;
> -           }
> -       } else if (p_blk != -1) {
> -           /* hit, complete pending transfer */
> -           result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size * bcache_blksize, p_buf, NULL);
> -           if (result != 0)
> -               goto done;
> -           for (j = 0; j < p_size; j++)
> -               bcache_insert(p_buf + (j * bcache_blksize), p_blk + j);
> -           p_blk = -1;
> -       }
> -    }
> -    if (p_blk != -1) {
> -       /* pending transfer left */
> -       result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size * bcache_blksize, p_buf, NULL);
> -       if (result != 0)
> -           goto done;
> -       for (j = 0; j < p_size; j++)
> -           bcache_insert(p_buf + (j * bcache_blksize), p_blk + j);
> -    }
> -
> +   if (complete) {     /* whole set was in cache, return it */
> +       if (bc->ra < BCACHE_READAHEAD)
> +               bc->ra <<= 1;   /* increase read ahead */
> +       bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset,
> +           buf, size);
> +       goto done;
> +   }
> +
> +    /*
> +     * Fill in any misses. From check we have i pointing to first missing
> +     * block, read in all remaining blocks + readahead.
> +     * We have space at least for nblk - i before bcache wraps.
> +     */
> +    p_blk = blk + i;
> +    p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk));
> +    r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */
> +
> +    p_size = MIN(r_size, nblk - i);    /* read at least those blocks */
> +
> +    ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size);
> +    if (ra != bc->bcache_nblks) { /* do we have RA space? */
> +       ra = MIN(bc->ra, ra);
> +       p_size += ra;
> +    }
> +
> +    /* invalidate bcache */
> +    for (i = 0; i < p_size; i++) {
> +       bcache_invalidate(bc, p_blk + i);
> +    }
> +    r_size = 0;
> +    result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, 0,
> +       p_size * bcache_blksize, p_buf, &r_size);
> +
> +    if (result)
> +       goto done;
> +
> +    r_size /= bcache_blksize;
> +    for (i = 0; i < r_size; i++)
> +       bcache_insert(bc, p_blk + i);
> +
> +    bcache_rablks += ra;
> +    bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, buf,
> +       size);
> +
>   done:
>      if ((result == 0) && (rsize != NULL))
>         *rsize = size;
> @@ -220,130 +294,144 @@ read_strategy(void *devdata, int unit, i
>  }
>
>  /*
> - * Requests larger than 1/2 the cache size will be bypassed and go
> + * Requests larger than 1/2 cache size will be bypassed and go
>   * directly to the disk.  XXX tune this.
>   */
>  int
> -bcache_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size,
> -               char *buf, size_t *rsize)
> +bcache_strategy(void *devdata, int rw, daddr_t blk, size_t offset,
> +    size_t size, char *buf, size_t *rsize)
>  {
> -    static int                 bcache_unit = -1;
>      struct bcache_devdata      *dd = (struct bcache_devdata *)devdata;
> +    struct bcache              *bc = dd->dv_cache;
> +    u_int bcache_nblks = 0;
> +    int nblk, cblk, ret;
> +    size_t csize, isize, total;
>
>      bcache_ops++;
>
> -    if(bcache_unit != unit) {
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
>



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAJ-Vmo=WX8xwnuvbPJMib3BMuVKjQMkKt1qXkx=TPH5wZYPM1A>