From owner-svn-src-all@FreeBSD.ORG Tue Apr 14 20:05:29 2015 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 4EEB6EE7; Tue, 14 Apr 2015 20:05:29 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 3957B952; Tue, 14 Apr 2015 20:05:29 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id t3EK5TId073977; Tue, 14 Apr 2015 20:05:29 GMT (envelope-from jhb@FreeBSD.org) Received: (from jhb@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id t3EK5RnA073966; Tue, 14 Apr 2015 20:05:27 GMT (envelope-from jhb@FreeBSD.org) Message-Id: <201504142005.t3EK5RnA073966@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: jhb set sender to jhb@FreeBSD.org using -f From: John Baldwin Date: Tue, 14 Apr 2015 20:05:27 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org Subject: svn commit: r281538 - in stable: 10/sys/sys 9/sys/sys X-SVN-Group: stable-10 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 14 Apr 2015 20:05:29 -0000 Author: jhb Date: Tue Apr 14 20:05:26 2015 New Revision: 281538 URL: https://svnweb.freebsd.org/changeset/base/281538 Log: MFC 278474,278476,280279: Expand the bitcount*() API and use it to implement CPU_COUNT for cpusets. 278474: Use __builtin_popcnt() to implement a BIT_COUNT() operation for bitsets and use this to implement CPU_COUNT() to count the number of CPUs in a cpuset. 278476: Use __builtin_popcountl() instead of __builtin_popcount(). 280279: Expand the bitcount* API to support 64-bit integers, plain ints and longs and create a "hidden" API that can be used in other system headers without adding namespace pollution. - If the POPCNT instruction is enabled at compile time, use __builtin_popcount*() to implement __bitcount*(), otherwise fall back to software implementations. - Use the existing bitcount16() and bitcount32() from to implement the non-POPCNT __bitcount16() and __bitcount32() in . - For the non-POPCNT __bitcount64(), use a similar SWAR method on 64-bit systems. For 32-bit systems, use two __bitcount32() operations on the two halves. - Use __bitcount32() to provide a __bitcount() that operates on plain ints. - Use either __bitcount32() or __bitcount64() to provide a __bitcountl() that operates on longs. - Add public bitcount*() wrappers for __bitcount*() for use in the kernel in . - Use __bitcountl() instead of __builtin_popcountl() in BIT_COUNT(). Modified: stable/10/sys/sys/bitset.h stable/10/sys/sys/cpuset.h stable/10/sys/sys/libkern.h stable/10/sys/sys/systm.h stable/10/sys/sys/types.h Directory Properties: stable/10/ (props changed) Changes in other areas also in this revision: Modified: stable/9/sys/sys/bitset.h stable/9/sys/sys/cpuset.h stable/9/sys/sys/libkern.h stable/9/sys/sys/systm.h stable/9/sys/sys/types.h Directory Properties: stable/9/sys/ (props changed) stable/9/sys/sys/ (props changed) Modified: stable/10/sys/sys/bitset.h ============================================================================== --- stable/10/sys/sys/bitset.h Tue Apr 14 19:18:34 2015 (r281537) +++ stable/10/sys/sys/bitset.h Tue Apr 14 20:05:26 2015 (r281538) @@ -176,4 +176,14 @@ __bit; \ }) +#define BIT_COUNT(_s, p) __extension__ ({ \ + __size_t __i; \ + int __count; \ + \ + __count = 0; \ + for (__i = 0; __i < __bitset_words((_s)); __i++) \ + __count += __bitcountl((p)->__bits[__i]); \ + __count; \ +}) + #endif /* !_SYS_BITSET_H_ */ Modified: stable/10/sys/sys/cpuset.h ============================================================================== --- stable/10/sys/sys/cpuset.h Tue Apr 14 19:18:34 2015 (r281537) +++ stable/10/sys/sys/cpuset.h Tue Apr 14 20:05:26 2015 (r281538) @@ -60,6 +60,7 @@ #define CPU_OR_ATOMIC(d, s) BIT_OR_ATOMIC(CPU_SETSIZE, d, s) #define CPU_COPY_STORE_REL(f, t) BIT_COPY_STORE_REL(CPU_SETSIZE, f, t) #define CPU_FFS(p) BIT_FFS(CPU_SETSIZE, p) +#define CPU_COUNT(p) BIT_COUNT(CPU_SETSIZE, p) /* * Valid cpulevel_t values. Modified: stable/10/sys/sys/libkern.h ============================================================================== --- stable/10/sys/sys/libkern.h Tue Apr 14 19:18:34 2015 (r281537) +++ stable/10/sys/sys/libkern.h Tue Apr 14 20:05:26 2015 (r281538) @@ -97,6 +97,11 @@ int flsl(long); #ifndef HAVE_INLINE_FLSLL int flsll(long long); #endif +#define bitcount64(x) __bitcount64((uint64_t)(x)) +#define bitcount32(x) __bitcount32((uint32_t)(x)) +#define bitcount16(x) __bitcount16((uint16_t)(x)) +#define bitcountl(x) __bitcountl((u_long)(x)) +#define bitcount(x) __bitcount((u_int)(x)) int fnmatch(const char *, const char *, int); int locc(int, char *, u_int); Modified: stable/10/sys/sys/systm.h ============================================================================== --- stable/10/sys/sys/systm.h Tue Apr 14 19:18:34 2015 (r281537) +++ stable/10/sys/sys/systm.h Tue Apr 14 20:05:26 2015 (r281538) @@ -424,33 +424,6 @@ int alloc_unr_specific(struct unrhdr *uh int alloc_unrl(struct unrhdr *uh); void free_unr(struct unrhdr *uh, u_int item); -/* - * Population count algorithm using SWAR approach - * - "SIMD Within A Register". - */ -static __inline uint32_t -bitcount32(uint32_t x) -{ - - x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1); - x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2); - x = (x + (x >> 4)) & 0x0f0f0f0f; - x = (x + (x >> 8)); - x = (x + (x >> 16)) & 0x000000ff; - return (x); -} - -static __inline uint16_t -bitcount16(uint32_t x) -{ - - x = (x & 0x5555) + ((x & 0xaaaa) >> 1); - x = (x & 0x3333) + ((x & 0xcccc) >> 2); - x = (x + (x >> 4)) & 0x0f0f; - x = (x + (x >> 8)) & 0x00ff; - return (x); -} - void intr_prof_stack_use(struct thread *td, struct trapframe *frame); #endif /* !_SYS_SYSTM_H_ */ Modified: stable/10/sys/sys/types.h ============================================================================== --- stable/10/sys/sys/types.h Tue Apr 14 19:18:34 2015 (r281537) +++ stable/10/sys/sys/types.h Tue Apr 14 20:05:26 2015 (r281538) @@ -289,6 +289,68 @@ typedef _Bool bool; #include +#ifdef __POPCNT__ +#define __bitcount64(x) __builtin_popcountll((__uint64_t)(x)) +#define __bitcount32(x) __builtin_popcount((__uint32_t)(x)) +#define __bitcount16(x) __builtin_popcount((__uint16_t)(x)) +#define __bitcountl(x) __builtin_popcountl((unsigned long)(x)) +#define __bitcount(x) __builtin_popcount((unsigned int)(x)) +#else +/* + * Population count algorithm using SWAR approach + * - "SIMD Within A Register". + */ +static __inline __uint16_t +__bitcount16(__uint16_t _x) +{ + + _x = (_x & 0x5555) + ((_x & 0xaaaa) >> 1); + _x = (_x & 0x3333) + ((_x & 0xcccc) >> 2); + _x = (_x + (_x >> 4)) & 0x0f0f; + _x = (_x + (_x >> 8)) & 0x00ff; + return (_x); +} + +static __inline __uint32_t +__bitcount32(__uint32_t _x) +{ + + _x = (_x & 0x55555555) + ((_x & 0xaaaaaaaa) >> 1); + _x = (_x & 0x33333333) + ((_x & 0xcccccccc) >> 2); + _x = (_x + (_x >> 4)) & 0x0f0f0f0f; + _x = (_x + (_x >> 8)); + _x = (_x + (_x >> 16)) & 0x000000ff; + return (_x); +} + +#ifdef __LP64__ +static __inline __uint64_t +__bitcount64(__uint64_t _x) +{ + + _x = (_x & 0x5555555555555555) + ((_x & 0xaaaaaaaaaaaaaaaa) >> 1); + _x = (_x & 0x3333333333333333) + ((_x & 0xcccccccccccccccc) >> 2); + _x = (_x + (_x >> 4)) & 0x0f0f0f0f0f0f0f0f; + _x = (_x + (_x >> 8)); + _x = (_x + (_x >> 16)); + _x = (_x + (_x >> 32)) & 0x000000ff; + return (_x); +} + +#define __bitcountl(x) __bitcount64((unsigned long)(x)) +#else +static __inline __uint64_t +__bitcount64(__uint64_t _x) +{ + + return (__bitcount32(_x >> 32) + __bitcount32(_x)); +} + +#define __bitcountl(x) __bitcount32((unsigned long)(x)) +#endif +#define __bitcount(x) __bitcount32((unsigned int)(x)) +#endif + /* * minor() gives a cookie instead of an index since we don't want to * change the meanings of bits 0-15 or waste time and space shifting