Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 11 May 2018 00:32:32 +0000 (UTC)
From:      Jason Evans <jasone@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r333477 - in head: contrib/jemalloc contrib/jemalloc/doc contrib/jemalloc/include/jemalloc contrib/jemalloc/include/jemalloc/internal contrib/jemalloc/src lib/libc/stdlib/jemalloc
Message-ID:  <201805110032.w4B0WWbf036006@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jasone
Date: Fri May 11 00:32:31 2018
New Revision: 333477
URL: https://svnweb.freebsd.org/changeset/base/333477

Log:
  Update jemalloc to version 5.1.0.

Added:
  head/contrib/jemalloc/include/jemalloc/internal/arena_stats.h   (contents, props changed)
  head/contrib/jemalloc/include/jemalloc/internal/bin.h   (contents, props changed)
  head/contrib/jemalloc/include/jemalloc/internal/bin_stats.h   (contents, props changed)
  head/contrib/jemalloc/include/jemalloc/internal/cache_bin.h   (contents, props changed)
  head/contrib/jemalloc/include/jemalloc/internal/div.h   (contents, props changed)
  head/contrib/jemalloc/include/jemalloc/internal/emitter.h   (contents, props changed)
  head/contrib/jemalloc/include/jemalloc/internal/log.h   (contents, props changed)
  head/contrib/jemalloc/src/bin.c   (contents, props changed)
  head/contrib/jemalloc/src/div.c   (contents, props changed)
  head/contrib/jemalloc/src/log.c   (contents, props changed)
Deleted:
  head/contrib/jemalloc/include/jemalloc/internal/stats_tsd.h
Modified:
  head/contrib/jemalloc/COPYING
  head/contrib/jemalloc/ChangeLog
  head/contrib/jemalloc/FREEBSD-Xlist
  head/contrib/jemalloc/FREEBSD-diffs
  head/contrib/jemalloc/VERSION
  head/contrib/jemalloc/doc/jemalloc.3
  head/contrib/jemalloc/include/jemalloc/internal/arena_externs.h
  head/contrib/jemalloc/include/jemalloc/internal/arena_inlines_a.h
  head/contrib/jemalloc/include/jemalloc/internal/arena_inlines_b.h
  head/contrib/jemalloc/include/jemalloc/internal/arena_structs_b.h
  head/contrib/jemalloc/include/jemalloc/internal/arena_types.h
  head/contrib/jemalloc/include/jemalloc/internal/background_thread_externs.h
  head/contrib/jemalloc/include/jemalloc/internal/background_thread_structs.h
  head/contrib/jemalloc/include/jemalloc/internal/base_externs.h
  head/contrib/jemalloc/include/jemalloc/internal/base_inlines.h
  head/contrib/jemalloc/include/jemalloc/internal/base_structs.h
  head/contrib/jemalloc/include/jemalloc/internal/base_types.h
  head/contrib/jemalloc/include/jemalloc/internal/ctl.h
  head/contrib/jemalloc/include/jemalloc/internal/extent_externs.h
  head/contrib/jemalloc/include/jemalloc/internal/extent_inlines.h
  head/contrib/jemalloc/include/jemalloc/internal/extent_structs.h
  head/contrib/jemalloc/include/jemalloc/internal/extent_types.h
  head/contrib/jemalloc/include/jemalloc/internal/hash.h
  head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
  head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h
  head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h
  head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h
  head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h
  head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h
  head/contrib/jemalloc/include/jemalloc/internal/jemalloc_preamble.h
  head/contrib/jemalloc/include/jemalloc/internal/malloc_io.h
  head/contrib/jemalloc/include/jemalloc/internal/mutex_prof.h
  head/contrib/jemalloc/include/jemalloc/internal/pages.h
  head/contrib/jemalloc/include/jemalloc/internal/private_namespace.h
  head/contrib/jemalloc/include/jemalloc/internal/prof_inlines_a.h
  head/contrib/jemalloc/include/jemalloc/internal/prof_inlines_b.h
  head/contrib/jemalloc/include/jemalloc/internal/rtree.h
  head/contrib/jemalloc/include/jemalloc/internal/rtree_tsd.h
  head/contrib/jemalloc/include/jemalloc/internal/spin.h
  head/contrib/jemalloc/include/jemalloc/internal/stats.h
  head/contrib/jemalloc/include/jemalloc/internal/sz.h
  head/contrib/jemalloc/include/jemalloc/internal/tcache_externs.h
  head/contrib/jemalloc/include/jemalloc/internal/tcache_inlines.h
  head/contrib/jemalloc/include/jemalloc/internal/tcache_structs.h
  head/contrib/jemalloc/include/jemalloc/internal/tcache_types.h
  head/contrib/jemalloc/include/jemalloc/internal/ticker.h
  head/contrib/jemalloc/include/jemalloc/internal/tsd.h
  head/contrib/jemalloc/include/jemalloc/internal/tsd_tls.h
  head/contrib/jemalloc/include/jemalloc/internal/witness.h
  head/contrib/jemalloc/include/jemalloc/jemalloc.h
  head/contrib/jemalloc/src/arena.c
  head/contrib/jemalloc/src/background_thread.c
  head/contrib/jemalloc/src/base.c
  head/contrib/jemalloc/src/ctl.c
  head/contrib/jemalloc/src/extent.c
  head/contrib/jemalloc/src/extent_dss.c
  head/contrib/jemalloc/src/jemalloc.c
  head/contrib/jemalloc/src/malloc_io.c
  head/contrib/jemalloc/src/mutex.c
  head/contrib/jemalloc/src/pages.c
  head/contrib/jemalloc/src/prof.c
  head/contrib/jemalloc/src/stats.c
  head/contrib/jemalloc/src/sz.c
  head/contrib/jemalloc/src/tcache.c
  head/contrib/jemalloc/src/tsd.c
  head/lib/libc/stdlib/jemalloc/Makefile.inc

Modified: head/contrib/jemalloc/COPYING
==============================================================================
--- head/contrib/jemalloc/COPYING	Fri May 11 00:19:49 2018	(r333476)
+++ head/contrib/jemalloc/COPYING	Fri May 11 00:32:31 2018	(r333477)
@@ -1,10 +1,10 @@
 Unless otherwise specified, files in the jemalloc source distribution are
 subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2017 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2018 Jason Evans <jasone@canonware.com>.
 All rights reserved.
 Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
-Copyright (C) 2009-2017 Facebook, Inc.  All rights reserved.
+Copyright (C) 2009-2018 Facebook, Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

Modified: head/contrib/jemalloc/ChangeLog
==============================================================================
--- head/contrib/jemalloc/ChangeLog	Fri May 11 00:19:49 2018	(r333476)
+++ head/contrib/jemalloc/ChangeLog	Fri May 11 00:32:31 2018	(r333477)
@@ -4,6 +4,123 @@ brevity.  Much more detail can be found in the git rev
 
     https://github.com/jemalloc/jemalloc
 
+* 5.1.0 (May 4th, 2018)
+
+  This release is primarily about fine-tuning, ranging from several new features
+  to numerous notable performance and portability enhancements.  The release and
+  prior dev versions have been running in multiple large scale applications for
+  months, and the cumulative improvements are substantial in many cases.
+
+  Given the long and successful production runs, this release is likely a good
+  candidate for applications to upgrade, from both jemalloc 5.0 and before.  For
+  performance-critical applications, the newly added TUNING.md provides
+  guidelines on jemalloc tuning.
+
+  New features:
+  - Implement transparent huge page support for internal metadata.  (@interwq)
+  - Add opt.thp to allow enabling / disabling transparent huge pages for all
+    mappings.  (@interwq)
+  - Add maximum background thread count option.  (@djwatson)
+  - Allow prof_active to control opt.lg_prof_interval and prof.gdump.
+    (@interwq)
+  - Allow arena index lookup based on allocation addresses via mallctl.
+    (@lionkov)
+  - Allow disabling initial-exec TLS model.  (@davidtgoldblatt, @KenMacD)
+  - Add opt.lg_extent_max_active_fit to set the max ratio between the size of
+    the active extent selected (to split off from) and the size of the requested
+    allocation.  (@interwq, @davidtgoldblatt)
+  - Add retain_grow_limit to set the max size when growing virtual address
+    space.  (@interwq)
+  - Add mallctl interfaces:
+    + arena.<i>.retain_grow_limit  (@interwq)
+    + arenas.lookup  (@lionkov)
+    + max_background_threads  (@djwatson)
+    + opt.lg_extent_max_active_fit  (@interwq)
+    + opt.max_background_threads  (@djwatson)
+    + opt.metadata_thp  (@interwq)
+    + opt.thp  (@interwq)
+    + stats.metadata_thp  (@interwq)
+
+  Portability improvements:
+  - Support GNU/kFreeBSD configuration.  (@paravoid)
+  - Support m68k, nios2 and SH3 architectures.  (@paravoid)
+  - Fall back to FD_CLOEXEC when O_CLOEXEC is unavailable.  (@zonyitoo)
+  - Fix symbol listing for cross-compiling.  (@tamird)
+  - Fix high bits computation on ARM.  (@davidtgoldblatt, @paravoid)
+  - Disable the CPU_SPINWAIT macro for Power.  (@davidtgoldblatt, @marxin)
+  - Fix MSVC 2015 & 2017 builds.  (@rustyx)
+  - Improve RISC-V support.  (@EdSchouten)
+  - Set name mangling script in strict mode.  (@nicolov)
+  - Avoid MADV_HUGEPAGE on ARM.  (@marxin)
+  - Modify configure to determine return value of strerror_r.
+    (@davidtgoldblatt, @cferris1000)
+  - Make sure CXXFLAGS is tested with CPP compiler.  (@nehaljwani)
+  - Fix 32-bit build on MSVC.  (@rustyx)
+  - Fix external symbol on MSVC.  (@maksqwe)
+  - Avoid a printf format specifier warning.  (@jasone)
+  - Add configure option --disable-initial-exec-tls which can allow jemalloc to
+    be dynamically loaded after program startup.  (@davidtgoldblatt, @KenMacD)
+  - AArch64: Add ILP32 support.  (@cmuellner)
+  - Add --with-lg-vaddr configure option to support cross compiling.
+    (@cmuellner, @davidtgoldblatt)
+
+  Optimizations and refactors:
+  - Improve active extent fit with extent_max_active_fit.  This considerably
+    reduces fragmentation over time and improves virtual memory and metadata
+    usage.  (@davidtgoldblatt, @interwq)
+  - Eagerly coalesce large extents to reduce fragmentation.  (@interwq)
+  - sdallocx: only read size info when page aligned (i.e. possibly sampled),
+    which speeds up the sized deallocation path significantly.  (@interwq)
+  - Avoid attempting new mappings for in place expansion with retain, since
+    it rarely succeeds in practice and causes high overhead.  (@interwq)
+  - Refactor OOM handling in newImpl.  (@wqfish)
+  - Add internal fine-grained logging functionality for debugging use.
+    (@davidtgoldblatt)
+  - Refactor arena / tcache interactions.  (@davidtgoldblatt)
+  - Refactor extent management with dumpable flag.  (@davidtgoldblatt)
+  - Add runtime detection of lazy purging.  (@interwq)
+  - Use pairing heap instead of red-black tree for extents_avail.  (@djwatson)
+  - Use sysctl on startup in FreeBSD.  (@trasz)
+  - Use thread local prng state instead of atomic.  (@djwatson)
+  - Make decay to always purge one more extent than before, because in
+    practice large extents are usually the ones that cross the decay threshold.
+    Purging the additional extent helps save memory as well as reduce VM
+    fragmentation.  (@interwq)
+  - Fast division by dynamic values.  (@davidtgoldblatt)
+  - Improve the fit for aligned allocation.  (@interwq, @edwinsmith)
+  - Refactor extent_t bitpacking.  (@rkmisra)
+  - Optimize the generated assembly for ticker operations.  (@davidtgoldblatt)
+  - Convert stats printing to use a structured text emitter.  (@davidtgoldblatt)
+  - Remove preserve_lru feature for extents management.  (@djwatson)
+  - Consolidate two memory loads into one on the fast deallocation path.
+    (@davidtgoldblatt, @interwq)
+
+  Bug fixes (most of the issues are only relevant to jemalloc 5.0):
+  - Fix deadlock with multithreaded fork in OS X.  (@davidtgoldblatt)
+  - Validate returned file descriptor before use.  (@zonyitoo)
+  - Fix a few background thread initialization and shutdown issues.  (@interwq)
+  - Fix an extent coalesce + decay race by taking both coalescing extents off
+    the LRU list.  (@interwq)
+  - Fix potentially unbound increase during decay, caused by one thread keep
+    stashing memory to purge while other threads generating new pages.  The
+    number of pages to purge is checked to prevent this.  (@interwq)
+  - Fix a FreeBSD bootstrap assertion.  (@strejda, @interwq)
+  - Handle 32 bit mutex counters.  (@rkmisra)
+  - Fix a indexing bug when creating background threads.  (@davidtgoldblatt,
+    @binliu19)
+  - Fix arguments passed to extent_init.  (@yuleniwo, @interwq)
+  - Fix addresses used for ordering mutexes.  (@rkmisra)
+  - Fix abort_conf processing during bootstrap.  (@interwq)
+  - Fix include path order for out-of-tree builds.  (@cmuellner)
+
+  Incompatible changes:
+  - Remove --disable-thp.  (@interwq)
+  - Remove mallctl interfaces:
+    + config.thp  (@interwq)
+
+  Documentation:
+  - Add TUNING.md.  (@interwq, @davidtgoldblatt, @djwatson)
+
 * 5.0.1 (July 1, 2017)
 
   This bugfix release fixes several issues, most of which are obscure enough
@@ -515,7 +632,7 @@ brevity.  Much more detail can be found in the git rev
   these fixes, xallocx() now tries harder to partially fulfill requests for
   optional extra space.  Note that a couple of minor heap profiling
   optimizations are included, but these are better thought of as performance
-  fixes that were integral to disovering most of the other bugs.
+  fixes that were integral to discovering most of the other bugs.
 
   Optimizations:
   - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the

Modified: head/contrib/jemalloc/FREEBSD-Xlist
==============================================================================
--- head/contrib/jemalloc/FREEBSD-Xlist	Fri May 11 00:19:49 2018	(r333476)
+++ head/contrib/jemalloc/FREEBSD-Xlist	Fri May 11 00:32:31 2018	(r333477)
@@ -7,6 +7,7 @@ FREEBSD-*
 INSTALL.md
 Makefile*
 README
+TUNING.md
 autogen.sh
 autom4te.cache/
 bin/

Modified: head/contrib/jemalloc/FREEBSD-diffs
==============================================================================
--- head/contrib/jemalloc/FREEBSD-diffs	Fri May 11 00:19:49 2018	(r333476)
+++ head/contrib/jemalloc/FREEBSD-diffs	Fri May 11 00:32:31 2018	(r333477)
@@ -1,5 +1,5 @@
 diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
-index 21e401ac..c26f9f4a 100644
+index 1e12fd3a..c42a7e10 100644
 --- a/doc/jemalloc.xml.in
 +++ b/doc/jemalloc.xml.in
 @@ -53,11 +53,22 @@
@@ -26,7 +26,7 @@ index 21e401ac..c26f9f4a 100644
        <refsect2>
          <title>Standard API</title>
          <funcprototype>
-@@ -3252,4 +3263,18 @@ malloc_conf = "narenas:1";]]></programlisting></para>
+@@ -3376,4 +3387,18 @@ malloc_conf = "narenas:1";]]></programlisting></para>
      <para>The <function>posix_memalign()</function> function conforms
      to IEEE Std 1003.1-2001 (<quote>POSIX.1</quote>).</para>
    </refsect1>
@@ -64,7 +64,7 @@ index cd49afcb..85e2a991 100644
  #define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
  
 diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
-index 1efdb56b..12a7e5a8 100644
+index be70df51..84cd70da 100644
 --- a/include/jemalloc/internal/jemalloc_internal_decls.h
 +++ b/include/jemalloc/internal/jemalloc_internal_decls.h
 @@ -1,6 +1,9 @@
@@ -78,7 +78,7 @@ index 1efdb56b..12a7e5a8 100644
  #ifdef _WIN32
  #  include <windows.h>
 diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
-index 18539a09..c8af8683 100644
+index e621fbc8..dbdd5d6b 100644
 --- a/include/jemalloc/internal/jemalloc_preamble.h.in
 +++ b/include/jemalloc/internal/jemalloc_preamble.h.in
 @@ -8,6 +8,9 @@
@@ -91,7 +91,7 @@ index 18539a09..c8af8683 100644
  #define JEMALLOC_NO_DEMANGLE
  #ifdef JEMALLOC_JET
  #  undef JEMALLOC_IS_MALLOC
-@@ -68,13 +71,7 @@ static const bool config_fill =
+@@ -79,13 +82,7 @@ static const bool config_fill =
      false
  #endif
      ;
@@ -128,9 +128,23 @@ index 6520c251..0013cbe9 100644
  bool malloc_mutex_boot(void);
  void malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex);
  
+diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
+index 0b9841aa..f03eee61 100644
+--- a/include/jemalloc/internal/tsd.h
++++ b/include/jemalloc/internal/tsd.h
+@@ -122,7 +122,8 @@ struct tsd_s {
+ 	t use_a_getter_or_setter_instead_##n;
+ MALLOC_TSD
+ #undef O
+-};
++/* AddressSanitizer requires TLS data to be aligned to at least 8 bytes. */
++} JEMALLOC_ALIGNED(16);
+ 
+ /*
+  * Wrapper around tsd_t that makes it possible to avoid implicit conversion
 diff --git a/include/jemalloc/jemalloc_FreeBSD.h b/include/jemalloc/jemalloc_FreeBSD.h
 new file mode 100644
-index 00000000..355b565c
+index 00000000..b752b0e7
 --- /dev/null
 +++ b/include/jemalloc/jemalloc_FreeBSD.h
 @@ -0,0 +1,185 @@
@@ -203,7 +217,7 @@ index 00000000..355b565c
 +#  define LG_VADDR		32
 +#  define LG_SIZEOF_PTR		2
 +#endif
-+#ifdef __riscv__
++#ifdef __riscv
 +#  define LG_VADDR		64
 +#  define LG_SIZEOF_PTR		3
 +#endif
@@ -331,10 +345,10 @@ index f9438912..47d032c1 100755
 +#include "jemalloc_FreeBSD.h"
  EOF
 diff --git a/src/jemalloc.c b/src/jemalloc.c
-index 52c86aa6..868c9e86 100644
+index f93c16fa..e0ad297b 100644
 --- a/src/jemalloc.c
 +++ b/src/jemalloc.c
-@@ -20,6 +20,10 @@
+@@ -21,6 +21,10 @@
  /******************************************************************************/
  /* Data. */
  
@@ -345,7 +359,7 @@ index 52c86aa6..868c9e86 100644
  /* Runtime configuration options. */
  const char	*je_malloc_conf
  #ifndef _WIN32
-@@ -2981,6 +2985,103 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
+@@ -3160,6 +3164,103 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
   */
  /******************************************************************************/
  /*
@@ -449,7 +463,7 @@ index 52c86aa6..868c9e86 100644
   * The following functions are used by threading libraries for protection of
   * malloc during fork().
   */
-@@ -3141,4 +3242,11 @@ jemalloc_postfork_child(void) {
+@@ -3323,4 +3424,11 @@ jemalloc_postfork_child(void) {
  	ctl_postfork_child(tsd_tsdn(tsd));
  }
  
@@ -462,10 +476,10 @@ index 52c86aa6..868c9e86 100644
 +
  /******************************************************************************/
 diff --git a/src/malloc_io.c b/src/malloc_io.c
-index 6b99afcd..4363cb83 100644
+index 7bdc13f9..c8802c70 100644
 --- a/src/malloc_io.c
 +++ b/src/malloc_io.c
-@@ -88,6 +88,20 @@ wrtmessage(void *cbopaque, const char *s) {
+@@ -75,6 +75,20 @@ wrtmessage(void *cbopaque, const char *s) {
  
  JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
  
@@ -487,10 +501,10 @@ index 6b99afcd..4363cb83 100644
   * Wrapper around malloc_message() that avoids the need for
   * je_malloc_message(...) throughout the code.
 diff --git a/src/mutex.c b/src/mutex.c
-index a528ef0c..820af613 100644
+index 30222b3e..b2c36283 100644
 --- a/src/mutex.c
 +++ b/src/mutex.c
-@@ -40,6 +40,17 @@ pthread_create(pthread_t *__restrict thread,
+@@ -41,6 +41,17 @@ pthread_create(pthread_t *__restrict thread,
  #ifdef JEMALLOC_MUTEX_INIT_CB
  JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
      void *(calloc_cb)(size_t, size_t));
@@ -508,7 +522,7 @@ index a528ef0c..820af613 100644
  #endif
  
  void
-@@ -130,6 +141,16 @@ mutex_addr_comp(const witness_t *witness1, void *mutex1,
+@@ -131,6 +142,16 @@ mutex_addr_comp(const witness_t *witness1, void *mutex1,
  }
  
  bool

Modified: head/contrib/jemalloc/VERSION
==============================================================================
--- head/contrib/jemalloc/VERSION	Fri May 11 00:19:49 2018	(r333476)
+++ head/contrib/jemalloc/VERSION	Fri May 11 00:32:31 2018	(r333477)
@@ -1 +1 @@
-5.0.1-0-g896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb
+5.1.0-0-g61efbda7098de6fe64c362d309824864308c36d4

Modified: head/contrib/jemalloc/doc/jemalloc.3
==============================================================================
--- head/contrib/jemalloc/doc/jemalloc.3	Fri May 11 00:19:49 2018	(r333476)
+++ head/contrib/jemalloc/doc/jemalloc.3	Fri May 11 00:32:31 2018	(r333477)
@@ -2,12 +2,12 @@
 .\"     Title: JEMALLOC
 .\"    Author: Jason Evans
 .\" Generator: DocBook XSL Stylesheets v1.76.1 <http://docbook.sf.net/>;
-.\"      Date: 07/01/2017
+.\"      Date: 05/08/2018
 .\"    Manual: User Manual
-.\"    Source: jemalloc 5.0.1-0-g896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb
+.\"    Source: jemalloc 5.1.0-0-g61efbda7098de6fe64c362d309824864308c36d4
 .\"  Language: English
 .\"
-.TH "JEMALLOC" "3" "07/01/2017" "jemalloc 5.0.1-0-g896ed3a8b3f4" "User Manual"
+.TH "JEMALLOC" "3" "05/08/2018" "jemalloc 5.1.0-0-g61efbda7098d" "User Manual"
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
@@ -31,7 +31,7 @@
 jemalloc \- general purpose memory allocation functions
 .SH "LIBRARY"
 .PP
-This manual describes jemalloc 5\&.0\&.1\-0\-g896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb\&. More information can be found at the
+This manual describes jemalloc 5\&.1\&.0\-0\-g61efbda7098de6fe64c362d309824864308c36d4\&. More information can be found at the
 \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&.
 .PP
 The following configuration options are enabled in libc\*(Aqs built\-in jemalloc:
@@ -741,6 +741,13 @@ opt\&.background_thread
 can be used to set the default option\&. This option is only available on selected pthread\-based platforms\&.
 .RE
 .PP
+max_background_threads (\fBsize_t\fR) rw
+.RS 4
+Maximum number of background worker threads that will be created\&. This value is capped at
+opt\&.max_background_threads
+at startup\&.
+.RE
+.PP
 config\&.cache_oblivious (\fBbool\fR) r\-
 .RS 4
 \fB\-\-enable\-cache\-oblivious\fR
@@ -796,12 +803,6 @@ config\&.stats (\fBbool\fR) r\-
 was specified during build configuration\&.
 .RE
 .PP
-config\&.thp (\fBbool\fR) r\-
-.RS 4
-\fB\-\-disable\-thp\fR
-was not specified during build configuration, and the system supports transparent huge page manipulation\&.
-.RE
-.PP
 config\&.utrace (\fBbool\fR) r\-
 .RS 4
 \fB\-\-enable\-utrace\fR
@@ -834,6 +835,17 @@ in these cases\&. This option is disabled by default u
 is specified during configuration, in which case it is enabled by default\&.
 .RE
 .PP
+opt\&.metadata_thp (\fBconst char *\fR) r\-
+.RS 4
+Controls whether to allow jemalloc to use transparent huge page (THP) for internal metadata (see
+stats\&.metadata)\&.
+\(lqalways\(rq
+allows such usage\&.
+\(lqauto\(rq
+uses no THP initially, but may begin to do so when metadata usage reaches certain level\&. The default is
+\(lqdisabled\(rq\&.
+.RE
+.PP
 opt\&.retain (\fBbool\fR) r\-
 .RS 4
 If true, retain unused virtual memory for later reuse rather than discarding it by calling
@@ -883,11 +895,18 @@ setting uses one arena per physical CPU, which means t
 .PP
 opt\&.background_thread (\fBconst bool\fR) r\-
 .RS 4
-Internal background worker threads enabled/disabled\&. See
+Internal background worker threads enabled/disabled\&. Because of potential circular dependencies, enabling background thread using this option may cause crash or deadlock during initialization\&. For a reliable way to use this feature, see
 background_thread
 for dynamic control options and details\&. This option is disabled by default\&.
 .RE
 .PP
+opt\&.max_background_threads (\fBconst size_t\fR) r\-
+.RS 4
+Maximum number of background threads that will be created if
+background_thread
+is set\&. Defaults to number of cpus\&.
+.RE
+.PP
 opt\&.dirty_decay_ms (\fBssize_t\fR) r\-
 .RS 4
 Approximate time in milliseconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged (i\&.e\&. converted to muzzy via e\&.g\&.
@@ -895,7 +914,7 @@ madvise(\fI\&.\&.\&.\fR\fI\fBMADV_FREE\fR\fR)
 if supported by the operating system, or converted to clean otherwise) and/or reused\&. Dirty pages are defined as previously having been potentially written to by the application, and therefore consuming physical memory, yet having no current use\&. The pages are incrementally purged according to a sigmoidal decay curve that starts and ends with zero purge rate\&. A decay time of 0 causes all unused dirty pages to be purged immediately upon creation\&. A decay time of \-1 disables purging\&. The default decay time is 10 seconds\&. See
 arenas\&.dirty_decay_ms
 and
-arena\&.<i>\&.muzzy_decay_ms
+arena\&.<i>\&.dirty_decay_ms
 for related dynamic control options\&. See
 opt\&.muzzy_decay_ms
 for a description of muzzy pages\&.
@@ -911,6 +930,11 @@ arena\&.<i>\&.muzzy_decay_ms
 for related dynamic control options\&.
 .RE
 .PP
+opt\&.lg_extent_max_active_fit (\fBsize_t\fR) r\-
+.RS 4
+When reusing dirty extents, this determines the (log base 2 of the) maximum ratio between the size of the active extent selected (to split off from) and the size of the requested allocation\&. This prevents the splitting of large active extents for smaller allocations, which can reduce fragmentation over the long run (especially for non\-active extents)\&. Lower value may reduce fragmentation, at the cost of extra active extents\&. The default value is 6, which gives a maximum ratio of 64 (2^6)\&.
+.RE
+.PP
 opt\&.stats_print (\fBbool\fR) r\-
 .RS 4
 Enable/disable statistics printing at exit\&. If enabled, the
@@ -1008,6 +1032,15 @@ opt\&.lg_tcache_max (\fBsize_t\fR) r\-
 Maximum size class (log base 2) to cache in the thread\-specific cache (tcache)\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&.
 .RE
 .PP
+opt\&.thp (\fBconst char *\fR) r\-
+.RS 4
+Transparent hugepage (THP) mode\&. Settings "always", "never" and "default" are available if THP is supported by the operating system\&. The "always" setting enables transparent hugepage for all user memory mappings with
+\fI\fBMADV_HUGEPAGE\fR\fR; "never" ensures no transparent hugepage with
+\fI\fBMADV_NOHUGEPAGE\fR\fR; the default setting "default" makes no changes\&. Note that: this option does not affect THP for jemalloc internal metadata (see
+opt\&.metadata_thp); in addition, for arenas with customized
+extent_hooks, this option is bypassed as it is implemented as part of the default extent hooks\&.
+.RE
+.PP
 opt\&.prof (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
 .RS 4
 Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity\&. See the
@@ -1248,6 +1281,14 @@ opt\&.muzzy_decay_ms
 for additional information\&.
 .RE
 .PP
+arena\&.<i>\&.retain_grow_limit (\fBsize_t\fR) rw
+.RS 4
+Maximum size to grow retained region (only relevant when
+opt\&.retain
+is enabled)\&. This controls the maximum increment to expand virtual memory, or allocation through
+arena\&.<i>extent_hooks\&. In particular, if customized extent hooks reserve physical memory (e\&.g\&. 1G huge pages), this is useful to control the allocation hook\*(Aqs input size\&. The default is no limit\&.
+.RE
+.PP
 arena\&.<i>\&.extent_hooks (\fBextent_hooks_t *\fR) rw
 .RS 4
 Get or set the extent management hook functions for arena <i>\&. The functions must be capable of operating on all extant extents associated with arena <i>, usually by passing unknown extents to the replaced functions\&. In practice, it is feasible to control allocation for arenas explicitly created via
@@ -1278,7 +1319,7 @@ struct extent_hooks_s {
 The
 \fBextent_hooks_t\fR
 structure comprises function pointers which are described individually below\&. jemalloc uses these functions to manage extent lifetime, which starts off with allocation of mapped committed memory, in the simplest case followed by deallocation\&. However, there are performance and platform reasons to retain extents for later reuse\&. Cleanup attempts cascade from deallocation to decommit to forced purging to lazy purging, which gives the extent management functions opportunities to reject the most permanent cleanup operations in favor of less permanent (and often less costly) operations\&. All operations except allocation can be universally opted out of by setting the hook pointers to
-\fBNULL\fR, or selectively opted out of by returning failure\&.
+\fBNULL\fR, or selectively opted out of by returning failure\&. Note that once the extent hook is set, the structure is accessed directly by the associated arenas, so it must remain valid for the entire lifetime of the arenas\&.
 .HP \w'typedef\ void\ *(extent_alloc_t)('u
 .BI "typedef void *(extent_alloc_t)(extent_hooks_t\ *" "extent_hooks" ", void\ *" "new_addr" ", size_t\ " "size" ", size_t\ " "alignment" ", bool\ *" "zero" ", bool\ *" "commit" ", unsigned\ " "arena_ind" ");"
 .sp
@@ -1572,6 +1613,11 @@ arenas\&.create (\fBunsigned\fR, \fBextent_hooks_t *\f
 Explicitly create a new arena outside the range of automatically managed arenas, with optionally specified extent hooks, and return the new arena index\&.
 .RE
 .PP
+arenas\&.lookup (\fBunsigned\fR, \fBvoid*\fR) rw
+.RS 4
+Index of the arena to which an allocation belongs to\&.
+.RE
+.PP
 prof\&.thread_active_init (\fBbool\fR) rw [\fB\-\-enable\-prof\fR]
 .RS 4
 Control the initial setting for
@@ -1648,9 +1694,18 @@ stats\&.metadata (\fBsize_t\fR) r\- [\fB\-\-enable\-st
 .RS 4
 Total number of bytes dedicated to metadata, which comprise base allocations used for bootstrap\-sensitive allocator metadata structures (see
 stats\&.arenas\&.<i>\&.base) and internal allocations (see
-stats\&.arenas\&.<i>\&.internal)\&.
+stats\&.arenas\&.<i>\&.internal)\&. Transparent huge page (enabled with
+opt\&.metadata_thp) usage is not considered\&.
 .RE
 .PP
+stats\&.metadata_thp (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Number of transparent huge pages (THP) used for metadata\&. See
+stats\&.metadata
+and
+opt\&.metadata_thp) for details\&.
+.RE
+.PP
 stats\&.resident (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
 .RS 4
 Maximum number of bytes in physically resident data pages mapped by the allocator, comprising all pages dedicated to allocator metadata, pages backing active allocations, and unused dirty pages\&. This is a maximum rather than precise because pages may not actually be physically resident if they correspond to demand\-zeroed virtual memory that has not yet been touched\&. This is a multiple of the page size, and is larger than
@@ -1829,6 +1884,13 @@ Number of bytes dedicated to bootstrap\-sensitive allo
 stats\&.arenas\&.<i>\&.internal (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
 .RS 4
 Number of bytes dedicated to internal allocations\&. Internal allocations differ from application\-originated allocations in that they are for internal use, and that they are omitted from heap profiles\&.
+.RE
+.PP
+stats\&.arenas\&.<i>\&.metadata_thp (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
+.RS 4
+Number of transparent huge pages (THP) used for metadata\&. See
+opt\&.metadata_thp
+for details\&.
 .RE
 .PP
 stats\&.arenas\&.<i>\&.resident (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]

Modified: head/contrib/jemalloc/include/jemalloc/internal/arena_externs.h
==============================================================================
--- head/contrib/jemalloc/include/jemalloc/internal/arena_externs.h	Fri May 11 00:19:49 2018	(r333476)
+++ head/contrib/jemalloc/include/jemalloc/internal/arena_externs.h	Fri May 11 00:32:31 2018	(r333477)
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
+#include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/size_classes.h"
@@ -9,25 +10,19 @@
 extern ssize_t opt_dirty_decay_ms;
 extern ssize_t opt_muzzy_decay_ms;
 
-extern const arena_bin_info_t arena_bin_info[NBINS];
-
 extern percpu_arena_mode_t opt_percpu_arena;
 extern const char *percpu_arena_mode_names[];
 
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 
-void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    szind_t szind, uint64_t nrequests);
-void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    size_t size);
 void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
     unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
     ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
+    bin_stats_t *bstats, arena_stats_large_t *lstats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
 #ifdef JEMALLOC_JET
@@ -50,11 +45,11 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
-void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
+    cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
+void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
     bool zero);
 
-typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
+typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *);
 extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
@@ -77,6 +72,8 @@ ssize_t arena_dirty_decay_ms_default_get(void);
 bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_default_get(void);
 bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
+bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
+    size_t *old_limit, size_t *new_limit);
 unsigned arena_nthreads_get(arena_t *arena, bool internal);
 void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);

Modified: head/contrib/jemalloc/include/jemalloc/internal/arena_inlines_a.h
==============================================================================
--- head/contrib/jemalloc/include/jemalloc/internal/arena_inlines_a.h	Fri May 11 00:19:49 2018	(r333476)
+++ head/contrib/jemalloc/include/jemalloc/internal/arena_inlines_a.h	Fri May 11 00:32:31 2018	(r333477)
@@ -25,7 +25,7 @@ static inline bool
 arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
 	cassert(config_prof);
 
-	if (likely(prof_interval == 0)) {
+	if (likely(prof_interval == 0 || !prof_active_get_unlocked())) {
 		return false;
 	}
 

Modified: head/contrib/jemalloc/include/jemalloc/internal/arena_inlines_b.h
==============================================================================
--- head/contrib/jemalloc/include/jemalloc/internal/arena_inlines_b.h	Fri May 11 00:19:49 2018	(r333476)
+++ head/contrib/jemalloc/include/jemalloc/internal/arena_inlines_b.h	Fri May 11 00:32:31 2018	(r333477)
@@ -8,13 +8,6 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 
-static inline szind_t
-arena_bin_index(arena_t *arena, arena_bin_t *bin) {
-	szind_t binind = (szind_t)(bin - arena->bins);
-	assert(binind < NBINS);
-	return binind;
-}
-
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
 arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
@@ -35,7 +28,7 @@ arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, all
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, UNUSED size_t usize,
     alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -54,7 +47,7 @@ arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, siz
 }
 
 static inline void
-arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, UNUSED prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 

Added: head/contrib/jemalloc/include/jemalloc/internal/arena_stats.h
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/contrib/jemalloc/include/jemalloc/internal/arena_stats.h	Fri May 11 00:32:31 2018	(r333477)
@@ -0,0 +1,237 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STATS_H
+#define JEMALLOC_INTERNAL_ARENA_STATS_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/size_classes.h"
+
+/*
+ * In those architectures that support 64-bit atomics, we use atomic updates for
+ * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
+ * externally.
+ */
+#ifdef JEMALLOC_ATOMIC_U64
+typedef atomic_u64_t arena_stats_u64_t;
+#else
+/* Must hold the arena stats mutex while reading atomically. */
+typedef uint64_t arena_stats_u64_t;
+#endif
+
+typedef struct arena_stats_large_s arena_stats_large_t;
+struct arena_stats_large_s {
+	/*
+	 * Total number of allocation/deallocation requests served directly by
+	 * the arena.
+	 */
+	arena_stats_u64_t	nmalloc;
+	arena_stats_u64_t	ndalloc;
+
+	/*
+	 * Number of allocation requests that correspond to this size class.
+	 * This includes requests served by tcache, though tcache only
+	 * periodically merges into this counter.
+	 */
+	arena_stats_u64_t	nrequests; /* Partially derived. */
+
+	/* Current number of allocations of this size class. */
+	size_t		curlextents; /* Derived. */
+};
+
+typedef struct arena_stats_decay_s arena_stats_decay_t;
+struct arena_stats_decay_s {
+	/* Total number of purge sweeps. */
+	arena_stats_u64_t	npurge;
+	/* Total number of madvise calls made. */
+	arena_stats_u64_t	nmadvise;
+	/* Total number of pages purged. */
+	arena_stats_u64_t	purged;
+};
+
+/*
+ * Arena stats.  Note that fields marked "derived" are not directly maintained
+ * within the arena code; rather their values are derived during stats merge
+ * requests.
+ */
+typedef struct arena_stats_s arena_stats_t;
+struct arena_stats_s {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_t		mtx;
+#endif
+
+	/* Number of bytes currently mapped, excluding retained memory. */
+	atomic_zu_t		mapped; /* Partially derived. */
+
+	/*
+	 * Number of unused virtual memory bytes currently retained.  Retained
+	 * bytes are technically mapped (though always decommitted or purged),
+	 * but they are excluded from the mapped statistic (above).
+	 */
+	atomic_zu_t		retained; /* Derived. */
+
+	arena_stats_decay_t	decay_dirty;
+	arena_stats_decay_t	decay_muzzy;
+
+	atomic_zu_t		base; /* Derived. */
+	atomic_zu_t		internal;
+	atomic_zu_t		resident; /* Derived. */
+	atomic_zu_t		metadata_thp;
+
+	atomic_zu_t		allocated_large; /* Derived. */
+	arena_stats_u64_t	nmalloc_large; /* Derived. */
+	arena_stats_u64_t	ndalloc_large; /* Derived. */
+	arena_stats_u64_t	nrequests_large; /* Derived. */
+
+	/* Number of bytes cached in tcache associated with this arena. */
+	atomic_zu_t		tcache_bytes; /* Derived. */
+
+	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
+
+	/* One element for each large size class. */
+	arena_stats_large_t	lstats[NSIZES - NBINS];
+
+	/* Arena uptime. */
+	nstime_t		uptime;
+};
+
+static inline bool
+arena_stats_init(UNUSED tsdn_t *tsdn, arena_stats_t *arena_stats) {
+	if (config_debug) {
+		for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
+			assert(((char *)arena_stats)[i] == 0);
+		}
+	}
+#ifndef JEMALLOC_ATOMIC_U64
+	if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
+	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+#endif
+	/* Memory is zeroed, so there is no need to clear stats. */
+	return false;
+}
+
+static inline void
+arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_lock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static inline void
+arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_unlock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static inline uint64_t
+arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_u64(p, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	return *p;
+#endif
+}
+
+static inline void
+arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	*p += x;
+#endif
+}
+
+UNUSED static inline void
+arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	*p -= x;
+	assert(*p + x >= *p);
+#endif
+}
+
+/*
+ * Non-atomically sets *dst += src.  *dst needs external synchronization.
+ * This lets us avoid the cost of a fetch_add when its unnecessary (note that
+ * the types here are atomic).
+ */
+static inline void
+arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
+#ifdef JEMALLOC_ATOMIC_U64
+	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
+	atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
+#else
+	*dst += src;
+#endif
+}
+
+static inline size_t
+arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_zu(p, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	return atomic_load_zu(p, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+	atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+	atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
+#endif
+}
+
+/* Like the _u64 variant, needs an externally synchronized *dst. */
+static inline void
+arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
+	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
+	atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
+}
+
+static inline void
+arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    szind_t szind, uint64_t nrequests) {
+	arena_stats_lock(tsdn, arena_stats);
+	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
+	    NBINS].nrequests, nrequests);
+	arena_stats_unlock(tsdn, arena_stats);
+}
+
+static inline void
+arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
+	arena_stats_lock(tsdn, arena_stats);
+	arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
+	arena_stats_unlock(tsdn, arena_stats);
+}
+
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */

Modified: head/contrib/jemalloc/include/jemalloc/internal/arena_structs_b.h
==============================================================================
--- head/contrib/jemalloc/include/jemalloc/internal/arena_structs_b.h	Fri May 11 00:19:49 2018	(r333476)
+++ head/contrib/jemalloc/include/jemalloc/internal/arena_structs_b.h	Fri May 11 00:32:31 2018	(r333477)
@@ -1,7 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 
+#include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
@@ -10,45 +12,8 @@
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
-#include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ticker.h"
 
-/*
- * Read-only information associated with each element of arena_t's bins array
- * is stored separately, partly to reduce memory usage (only one copy, rather
- * than one per arena), but mainly to avoid false cacheline sharing.
- *
- * Each slab has the following layout:
- *
- *   /--------------------\
- *   | region 0           |
- *   |--------------------|
- *   | region 1           |
- *   |--------------------|
- *   | ...                |
- *   | ...                |
- *   | ...                |
- *   |--------------------|
- *   | region nregs-1     |
- *   \--------------------/
- */
-struct arena_bin_info_s {
-	/* Size of regions in a slab for this bin's size class. */
-	size_t			reg_size;
-
-	/* Total size of a slab for this bin's size class. */
-	size_t			slab_size;
-
-	/* Total number of regions in a slab for this bin's size class. */
-	uint32_t		nregs;
-
-	/*
-	 * Metadata used to manipulate bitmaps for slabs associated with this
-	 * bin.
-	 */
-	bitmap_info_t		bitmap_info;
-};
-
 struct arena_decay_s {
 	/* Synchronizes all non-atomic fields. */
 	malloc_mutex_t		mtx;
@@ -104,37 +69,11 @@ struct arena_decay_s {
 	 * arena and ctl code.
 	 *
 	 * Synchronization: Same as associated arena's stats field. */
-	decay_stats_t		*stats;
+	arena_stats_decay_t	*stats;
 	/* Peak number of pages in associated extents.  Used for debug only. */
 	uint64_t		ceil_npages;
 };
 
-struct arena_bin_s {
-	/* All operations on arena_bin_t fields require lock ownership. */
-	malloc_mutex_t		lock;
-
-	/*
-	 * Current slab being used to service allocations of this bin's size
-	 * class.  slabcur is independent of slabs_{nonfull,full}; whenever
-	 * slabcur is reassigned, the previous slab must be deallocated or
-	 * inserted into slabs_{nonfull,full}.
-	 */
-	extent_t		*slabcur;
-
-	/*
-	 * Heap of non-full slabs.  This heap is used to assure that new
-	 * allocations come from the non-full slab that is oldest/lowest in
-	 * memory.
-	 */
-	extent_heap_t		slabs_nonfull;
-
-	/* List used to track full slabs. */
-	extent_list_t		slabs_full;
-
-	/* Bin statistics. */
-	malloc_bin_stats_t	stats;
-};
-
 struct arena_s {
 	/*
 	 * Number of threads currently assigned to this arena.  Each thread has
@@ -162,14 +101,15 @@ struct arena_s {
 	arena_stats_t		stats;
 
 	/*
-	 * List of tcaches for extant threads associated with this arena.
-	 * Stats from these are merged incrementally, and at exit if
-	 * opt_stats_print is enabled.
+	 * Lists of tcaches and cache_bin_array_descriptors for extant threads
+	 * associated with this arena.  Stats from these are merged
+	 * incrementally, and at exit if opt_stats_print is enabled.
 	 *
 	 * Synchronization: tcache_ql_mtx.
 	 */
-	ql_head(tcache_t)	tcache_ql;
-	malloc_mutex_t		tcache_ql_mtx;
+	ql_head(tcache_t)			tcache_ql;
+	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
+	malloc_mutex_t				tcache_ql_mtx;
 
 	/* Synchronization: internal. */
 	prof_accum_t		prof_accum;
@@ -239,9 +179,14 @@ struct arena_s {
 	 * be effective even if multiple arenas' extent allocation requests are
 	 * highly interleaved.
 	 *
+	 * retain_grow_limit is the max allowed size ind to expand (unless the
+	 * required size is greater).  Default is no limit, and controlled
+	 * through mallctl only.
+	 *
 	 * Synchronization: extent_grow_mtx
 	 */
 	pszind_t		extent_grow_next;
+	pszind_t		retain_grow_limit;
 	malloc_mutex_t		extent_grow_mtx;
 
 	/*
@@ -258,7 +203,7 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	arena_bin_t		bins[NBINS];
+	bin_t			bins[NBINS];
 
 	/*
 	 * Base allocator, from which arena metadata are allocated.

Modified: head/contrib/jemalloc/include/jemalloc/internal/arena_types.h
==============================================================================
--- head/contrib/jemalloc/include/jemalloc/internal/arena_types.h	Fri May 11 00:19:49 2018	(r333476)
+++ head/contrib/jemalloc/include/jemalloc/internal/arena_types.h	Fri May 11 00:32:31 2018	(r333477)
@@ -12,9 +12,7 @@

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201805110032.w4B0WWbf036006>