Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 25 May 2016 23:06:52 +0000 (UTC)
From:      Jung-uk Kim <jkim@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r300700 - head/sys/amd64/amd64
Message-ID:  <201605252306.u4PN6qAE033020@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jkim
Date: Wed May 25 23:06:52 2016
New Revision: 300700
URL: https://svnweb.freebsd.org/changeset/base/300700

Log:
  Both Clang and GCC cannot generate efficient reserve_pv_entries().
  
  http://docs.freebsd.org/cgi/mid.cgi?552BFEB2.8040407
  
  Re-implement it entirely in inline assembly not to let compilers do silly
  spilling to memory.  For non-POPCNT case, use newly added bit_count(3).
  
  Reported by:	alc
  Reviewed by:	alc, kib
  Differential Revision:	https://reviews.freebsd.org/D6541

Modified:
  head/sys/amd64/amd64/pmap.c

Modified: head/sys/amd64/amd64/pmap.c
==============================================================================
--- head/sys/amd64/amd64/pmap.c	Wed May 25 22:16:11 2016	(r300699)
+++ head/sys/amd64/amd64/pmap.c	Wed May 25 23:06:52 2016	(r300700)
@@ -104,6 +104,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_vm.h"
 
 #include <sys/param.h>
+#include <sys/bitstring.h>
 #include <sys/bus.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -585,7 +586,7 @@ static caddr_t crashdumpmap;
 static void	free_pv_chunk(struct pv_chunk *pc);
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
-static int	popcnt_pc_map_elem_pq(uint64_t elem);
+static int	popcnt_pc_map_pq(uint64_t *map);
 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
 static void	reserve_pv_entries(pmap_t pmap, int needed,
 		    struct rwlock **lockp);
@@ -3126,7 +3127,7 @@ retry:
 }
 
 /*
- * Returns the number of one bits within the given PV chunk map element.
+ * Returns the number of one bits within the given PV chunk map.
  *
  * The erratas for Intel processors state that "POPCNT Instruction May
  * Take Longer to Execute Than Expected".  It is believed that the
@@ -3142,12 +3143,15 @@ retry:
  * 6th Gen Core: SKL029
  */
 static int
-popcnt_pc_map_elem_pq(uint64_t elem)
+popcnt_pc_map_pq(uint64_t *map)
 {
-	u_long result;
+	u_long result, tmp;
 
-	__asm __volatile("xorl %k0,%k0;popcntq %1,%0"
-	    : "=&r" (result) : "rm" (elem));
+	__asm __volatile("xorl %k0,%k0;popcntq %2,%0;"
+	    "xorl %k1,%k1;popcntq %3,%1;addl %k1,%k0;"
+	    "xorl %k1,%k1;popcntq %4,%1;addl %k1,%k0"
+	    : "=&r" (result), "=&r" (tmp)
+	    : "m" (map[0]), "m" (map[1]), "m" (map[2]));
 	return (result);
 }
 
@@ -3179,17 +3183,12 @@ retry:
 	avail = 0;
 	TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
 #ifndef __POPCNT__
-		if ((cpu_feature2 & CPUID2_POPCNT) == 0) {
-			free = bitcount64(pc->pc_map[0]);
-			free += bitcount64(pc->pc_map[1]);
-			free += bitcount64(pc->pc_map[2]);
-		} else
+		if ((cpu_feature2 & CPUID2_POPCNT) == 0)
+			bit_count((bitstr_t *)pc->pc_map, 0,
+			    sizeof(pc->pc_map) * NBBY, &free);
+		else
 #endif
-		{
-			free = popcnt_pc_map_elem_pq(pc->pc_map[0]);
-			free += popcnt_pc_map_elem_pq(pc->pc_map[1]);
-			free += popcnt_pc_map_elem_pq(pc->pc_map[2]);
-		}
+		free = popcnt_pc_map_pq(pc->pc_map);
 		if (free == 0)
 			break;
 		avail += free;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201605252306.u4PN6qAE033020>