Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 6 Feb 2017 08:58:40 +0000 (UTC)
From:      Toomas Soome <tsoome@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r313332 - head/sys/boot/common
Message-ID:  <201702060858.v168weso068556@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: tsoome
Date: Mon Feb  6 08:58:40 2017
New Revision: 313332
URL: https://svnweb.freebsd.org/changeset/base/313332

Log:
  loader: bcache read ahead block count should take account the large sectors
  
  The loader bcache is implementing simple read-ahead to boost the cache.
  The bcache is built based on 512B block sizes, and the read ahead is attempting
  to read number of cache blocks, based on amount of the free bcache space.
  
  However, there are devices using larger sector sizes than 512B, most obviously
  the CD media is based on 2k sectors. This means the read-ahead can not be just
  random number of blocks, but we should use value suitable also for use with
  larger sectors, as for example, with CD devices, we should read multiple of 2KB.
  Since the sector size from disk interface is not too reliable, i guess we can
  just use "good enough" value, so the implementation is rounding down the read
  ahead block count to be multiple of 16.
  
  This means we have covered sector sizes to 8k.
  
  In addition, the update does implement the end of cache marker, to help to
  detect the possible memory corruption - I have not seen it happening so far,
  but it does not hurt to have the detection mechanism in place.
  
  Reviewed by:	allanjude
  Approved by:	allanjude (mentor)
  Differential Revision:	https://reviews.freebsd.org/D9179

Modified:
  head/sys/boot/common/bcache.c
  head/sys/boot/common/bootstrap.h

Modified: head/sys/boot/common/bcache.c
==============================================================================
--- head/sys/boot/common/bcache.c	Mon Feb  6 08:56:04 2017	(r313331)
+++ head/sys/boot/common/bcache.c	Mon Feb  6 08:58:40 2017	(r313332)
@@ -64,7 +64,7 @@ struct bcachectl
 struct bcache {
     struct bcachectl	*bcache_ctl;
     caddr_t		bcache_data;
-    u_int		bcache_nblks;
+    size_t		bcache_nblks;
     size_t		ra;
 };
 
@@ -86,6 +86,7 @@ static u_int bcache_rablks;
 	((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno))
 #define	BCACHE_READAHEAD	256
 #define	BCACHE_MINREADAHEAD	32
+#define	BCACHE_MARKER		0xdeadbeef
 
 static void	bcache_invalidate(struct bcache *bc, daddr_t blkno);
 static void	bcache_insert(struct bcache *bc, daddr_t blkno);
@@ -95,7 +96,7 @@ static void	bcache_free_instance(struct 
  * Initialise the cache for (nblks) of (bsize).
  */
 void
-bcache_init(u_int nblks, size_t bsize)
+bcache_init(size_t nblks, size_t bsize)
 {
     /* set up control data */
     bcache_total_nblks = nblks;
@@ -122,6 +123,7 @@ bcache_allocate(void)
     u_int i;
     struct bcache *bc = malloc(sizeof (struct bcache));
     int disks = bcache_numdev;
+    uint32_t *marker;
 
     if (disks == 0)
 	disks = 1;	/* safe guard */
@@ -140,11 +142,13 @@ bcache_allocate(void)
 
     bc->bcache_nblks = bcache_total_nblks >> i;
     bcache_unit_nblks = bc->bcache_nblks;
-    bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
+    bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize +
+	sizeof(uint32_t));
     if (bc->bcache_data == NULL) {
 	/* dont error out yet. fall back to 32 blocks and try again */
 	bc->bcache_nblks = 32;
-	bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize);
+	bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize +
+	sizeof(uint32_t));
     }
 
     bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl));
@@ -152,8 +156,11 @@ bcache_allocate(void)
     if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) {
 	bcache_free_instance(bc);
 	errno = ENOMEM;
-	return(NULL);
+	return (NULL);
     }
+    /* Insert cache end marker. */
+    marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize);
+    *marker = BCACHE_MARKER;
 
     /* Flush the cache */
     for (i = 0; i < bc->bcache_nblks; i++) {
@@ -215,6 +222,9 @@ read_strategy(void *devdata, int rw, dad
     int				result;
     daddr_t			p_blk;
     caddr_t			p_buf;
+    uint32_t			*marker;
+
+    marker = (uint32_t *)(bc->bcache_data + bc->bcache_nblks * bcache_blksize);
 
     if (bc == NULL) {
 	errno = ENODEV;
@@ -261,9 +271,35 @@ read_strategy(void *devdata, int rw, dad
 
     p_size = MIN(r_size, nblk - i);	/* read at least those blocks */
 
+    /*
+     * The read ahead size setup.
+     * While the read ahead can save us IO, it also can complicate things:
+     * 1. We do not want to read ahead by wrapping around the
+     * bcache end - this would complicate the cache management.
+     * 2. We are using bc->ra as dynamic hint for read ahead size,
+     * detected cache hits will increase the read-ahead block count, and
+     * misses will decrease, see the code above.
+     * 3. The bcache is sized by 512B blocks, however, the underlying device
+     * may have a larger sector size, and we should perform the IO by
+     * taking into account these larger sector sizes. We could solve this by
+     * passing the sector size to bcache_allocate(), or by using ioctl(), but
+     * in this version we are using the constant, 16 blocks, and are rounding
+     * read ahead block count down to multiple of 16.
+     * Using the constant has two reasons, we are not entirely sure if the
+     * BIOS disk interface is providing the correct value for sector size.
+     * And secondly, this way we get the most conservative setup for the ra.
+     *
+     * The selection of multiple of 16 blocks (8KB) is quite arbitrary, however,
+     * we want to have the CD (2K) and the 4K disks to be covered.
+     * Also, as we have 32 blocks to be allocated as the fallback value in the
+     * bcache_allocate(), the 16 ra blocks will allow the read ahead
+     * to be used even with bcache this small.
+     */
+
     ra = bc->bcache_nblks - BHASH(bc, p_blk + p_size);
-    if (ra != bc->bcache_nblks) { /* do we have RA space? */
-	ra = MIN(bc->ra, ra);
+    if (ra != 0 && ra != bc->bcache_nblks) { /* do we have RA space? */
+	ra = MIN(bc->ra, ra - 1);
+	ra = rounddown(ra, 16);		/* multiple of 16 blocks */
 	p_size += ra;
     }
 
@@ -310,6 +346,12 @@ read_strategy(void *devdata, int rw, dad
 	result = 0;
     }
 
+    if (*marker != BCACHE_MARKER) {
+	printf("BUG: bcache corruption detected: nblks: %zu p_blk: %lu, "
+	    "p_size: %zu, ra: %zu\n", bc->bcache_nblks,
+	    (long unsigned)BHASH(bc, p_blk), p_size, ra);
+    }
+
  done:
     if ((result == 0) && (rsize != NULL))
 	*rsize = size;
@@ -338,7 +380,7 @@ bcache_strategy(void *devdata, int rw, d
     /* bypass large requests, or when the cache is inactive */
     if (bc == NULL ||
 	((size * 2 / bcache_blksize) > bcache_nblks)) {
-	DEBUG("bypass %d from %d", size / bcache_blksize, blk);
+	DEBUG("bypass %zu from %qu", size / bcache_blksize, blk);
 	bcache_bypasses++;
 	return (dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize));
     }

Modified: head/sys/boot/common/bootstrap.h
==============================================================================
--- head/sys/boot/common/bootstrap.h	Mon Feb  6 08:56:04 2017	(r313331)
+++ head/sys/boot/common/bootstrap.h	Mon Feb  6 08:58:40 2017	(r313332)
@@ -73,7 +73,7 @@ int	kern_pread(int fd, vm_offset_t dest,
 void	*alloc_pread(int fd, off_t off, size_t len);
 
 /* bcache.c */
-void	bcache_init(u_int nblks, size_t bsize);
+void	bcache_init(size_t nblks, size_t bsize);
 void	bcache_add_dev(int);
 void	*bcache_allocate(void);
 void	bcache_free(void *);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201702060858.v168weso068556>