Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 29 Feb 2016 19:10:33 +0000 (UTC)
From:      Jason Evans <jasone@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r296221 - in head: contrib/jemalloc contrib/jemalloc/doc contrib/jemalloc/include/jemalloc contrib/jemalloc/include/jemalloc/internal contrib/jemalloc/src include lib/libc/stdlib/jemalloc
Message-ID:  <201602291910.u1TJAX7t057795@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jasone
Date: Mon Feb 29 19:10:32 2016
New Revision: 296221
URL: https://svnweb.freebsd.org/changeset/base/296221

Log:
  Update jemalloc to 4.1.0.
  
  Add missing Symbol.map entry for __aligned_alloc.
  
  Add weak-->strong symbol binding for
  {malloc_stats_print,mallctl,mallctlnametomib,mallctlbymib} -->
  {__malloc_stats_print,__mallctl,__mallctlnametomib,__mallctlbymib}.  These
  bindings complete the set necessary to allow applications to replace all
  malloc-related symbols.

Added:
  head/contrib/jemalloc/include/jemalloc/internal/assert.h   (contents, props changed)
  head/contrib/jemalloc/include/jemalloc/internal/nstime.h   (contents, props changed)
  head/contrib/jemalloc/include/jemalloc/internal/smoothstep.h   (contents, props changed)
  head/contrib/jemalloc/include/jemalloc/internal/ticker.h   (contents, props changed)
  head/contrib/jemalloc/src/nstime.c   (contents, props changed)
  head/contrib/jemalloc/src/prng.c   (contents, props changed)
  head/contrib/jemalloc/src/ticker.c   (contents, props changed)
Modified:
  head/contrib/jemalloc/COPYING
  head/contrib/jemalloc/ChangeLog
  head/contrib/jemalloc/FREEBSD-Xlist
  head/contrib/jemalloc/FREEBSD-diffs
  head/contrib/jemalloc/VERSION
  head/contrib/jemalloc/doc/jemalloc.3
  head/contrib/jemalloc/include/jemalloc/internal/arena.h
  head/contrib/jemalloc/include/jemalloc/internal/atomic.h
  head/contrib/jemalloc/include/jemalloc/internal/bitmap.h
  head/contrib/jemalloc/include/jemalloc/internal/chunk_mmap.h
  head/contrib/jemalloc/include/jemalloc/internal/ckh.h
  head/contrib/jemalloc/include/jemalloc/internal/ctl.h
  head/contrib/jemalloc/include/jemalloc/internal/hash.h
  head/contrib/jemalloc/include/jemalloc/internal/huge.h
  head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal.h
  head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h
  head/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h
  head/contrib/jemalloc/include/jemalloc/internal/private_namespace.h
  head/contrib/jemalloc/include/jemalloc/internal/prng.h
  head/contrib/jemalloc/include/jemalloc/internal/prof.h
  head/contrib/jemalloc/include/jemalloc/internal/rb.h
  head/contrib/jemalloc/include/jemalloc/internal/size_classes.h
  head/contrib/jemalloc/include/jemalloc/internal/stats.h
  head/contrib/jemalloc/include/jemalloc/internal/tcache.h
  head/contrib/jemalloc/include/jemalloc/internal/tsd.h
  head/contrib/jemalloc/include/jemalloc/internal/util.h
  head/contrib/jemalloc/include/jemalloc/jemalloc.h
  head/contrib/jemalloc/include/jemalloc/jemalloc_FreeBSD.h
  head/contrib/jemalloc/src/arena.c
  head/contrib/jemalloc/src/bitmap.c
  head/contrib/jemalloc/src/chunk.c
  head/contrib/jemalloc/src/chunk_mmap.c
  head/contrib/jemalloc/src/ckh.c
  head/contrib/jemalloc/src/ctl.c
  head/contrib/jemalloc/src/extent.c
  head/contrib/jemalloc/src/huge.c
  head/contrib/jemalloc/src/jemalloc.c
  head/contrib/jemalloc/src/prof.c
  head/contrib/jemalloc/src/quarantine.c
  head/contrib/jemalloc/src/stats.c
  head/contrib/jemalloc/src/tcache.c
  head/contrib/jemalloc/src/tsd.c
  head/contrib/jemalloc/src/util.c
  head/include/malloc_np.h
  head/lib/libc/stdlib/jemalloc/Makefile.inc
  head/lib/libc/stdlib/jemalloc/Symbol.map

Modified: head/contrib/jemalloc/COPYING
==============================================================================
--- head/contrib/jemalloc/COPYING	Mon Feb 29 18:58:26 2016	(r296220)
+++ head/contrib/jemalloc/COPYING	Mon Feb 29 19:10:32 2016	(r296221)
@@ -1,10 +1,10 @@
 Unless otherwise specified, files in the jemalloc source distribution are
 subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2015 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2016 Jason Evans <jasone@canonware.com>.
 All rights reserved.
 Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
-Copyright (C) 2009-2015 Facebook, Inc.  All rights reserved.
+Copyright (C) 2009-2016 Facebook, Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

Modified: head/contrib/jemalloc/ChangeLog
==============================================================================
--- head/contrib/jemalloc/ChangeLog	Mon Feb 29 18:58:26 2016	(r296220)
+++ head/contrib/jemalloc/ChangeLog	Mon Feb 29 19:10:32 2016	(r296221)
@@ -4,6 +4,79 @@ brevity.  Much more detail can be found 
 
     https://github.com/jemalloc/jemalloc
 
+* 4.1.0 (February 28, 2016)
+
+  This release is primarily about optimizations, but it also incorporates a lot
+  of portability-motivated refactoring and enhancements.  Many people worked on
+  this release, to an extent that even with the omission here of minor changes
+  (see git revision history), and of the people who reported and diagnosed
+  issues, so much of the work was contributed that starting with this release,
+  changes are annotated with author credits to help reflect the collaborative
+  effort involved.
+
+  New features:
+  - Implement decay-based unused dirty page purging, a major optimization with
+    mallctl API impact.  This is an alternative to the existing ratio-based
+    unused dirty page purging, and is intended to eventually become the sole
+    purging mechanism.  New mallctls:
+    + opt.purge
+    + opt.decay_time
+    + arena.<i>.decay
+    + arena.<i>.decay_time
+    + arenas.decay_time
+    + stats.arenas.<i>.decay_time
+    (@jasone, @cevans87)
+  - Add --with-malloc-conf, which makes it possible to embed a default
+    options string during configuration.  This was motivated by the desire to
+    specify --with-malloc-conf=purge:decay , since the default must remain
+    purge:ratio until the 5.0.0 release.  (@jasone)
+  - Add MS Visual Studio 2015 support.  (@rustyx, @yuslepukhin)
+  - Make *allocx() size class overflow behavior defined.  The maximum
+    size class is now less than PTRDIFF_MAX to protect applications against
+    numerical overflow, and all allocation functions are guaranteed to indicate
+    errors rather than potentially crashing if the request size exceeds the
+    maximum size class.  (@jasone)
+  - jeprof:
+    + Add raw heap profile support.  (@jasone)
+    + Add --retain and --exclude for backtrace symbol filtering.  (@jasone)
+
+  Optimizations:
+  - Optimize the fast path to combine various bootstrapping and configuration
+    checks and execute more streamlined code in the common case.  (@interwq)
+  - Use linear scan for small bitmaps (used for small object tracking).  In
+    addition to speeding up bitmap operations on 64-bit systems, this reduces
+    allocator metadata overhead by approximately 0.2%.  (@djwatson)
+  - Separate arena_avail trees, which substantially speeds up run tree
+    operations.  (@djwatson)
+  - Use memoization (boot-time-computed table) for run quantization.  Separate
+    arena_avail trees reduced the importance of this optimization.  (@jasone)
+  - Attempt mmap-based in-place huge reallocation.  This can dramatically speed
+    up incremental huge reallocation.  (@jasone)
+
+  Incompatible changes:
+  - Make opt.narenas unsigned rather than size_t.  (@jasone)
+
+  Bug fixes:
+  - Fix stats.cactive accounting regression.  (@rustyx, @jasone)
+  - Handle unaligned keys in hash().  This caused problems for some ARM systems.
+    (@jasone, Christopher Ferris)
+  - Refactor arenas array.  In addition to fixing a fork-related deadlock, this
+    makes arena lookups faster and simpler.  (@jasone)
+  - Move retained memory allocation out of the default chunk allocation
+    function, to a location that gets executed even if the application installs
+    a custom chunk allocation function.  This resolves a virtual memory leak.
+    (@buchgr)
+  - Fix a potential tsd cleanup leak.  (Christopher Ferris, @jasone)
+  - Fix run quantization.  In practice this bug had no impact unless
+    applications requested memory with alignment exceeding one page.
+    (@jasone, @djwatson)
+  - Fix LinuxThreads-specific bootstrapping deadlock.  (Cosmin Paraschiv)
+  - jeprof:
+    + Don't discard curl options if timeout is not defined.  (@djwatson)
+    + Detect failed profile fetches.  (@djwatson)
+  - Fix stats.arenas.<i>.{dss,lg_dirty_mult,decay_time,pactive,pdirty} for
+    --disable-stats case.  (@jasone)
+
 * 4.0.4 (October 24, 2015)
 
   This bugfix release fixes another xallocx() regression.  No other regressions

Modified: head/contrib/jemalloc/FREEBSD-Xlist
==============================================================================
--- head/contrib/jemalloc/FREEBSD-Xlist	Mon Feb 29 18:58:26 2016	(r296220)
+++ head/contrib/jemalloc/FREEBSD-Xlist	Mon Feb 29 19:10:32 2016	(r296221)
@@ -8,6 +8,7 @@ README
 autogen.sh
 autom4te.cache/
 bin/
+build-aux/
 config.*
 configure*
 coverage.sh
@@ -26,6 +27,7 @@ include/jemalloc/internal/public_symbols
 include/jemalloc/internal/public_unnamespace.h
 include/jemalloc/internal/public_unnamespace.sh
 include/jemalloc/internal/size_classes.sh
+include/jemalloc/internal/smoothstep.sh
 include/jemalloc/jemalloc.h.in
 include/jemalloc/jemalloc.sh
 include/jemalloc/jemalloc_defs.h
@@ -44,6 +46,7 @@ include/jemalloc/jemalloc_typedefs.h.in
 include/msvc_compat/
 install-sh
 jemalloc.pc*
+msvc/
 src/valgrind.c
 src/zone.c
 test/

Modified: head/contrib/jemalloc/FREEBSD-diffs
==============================================================================
--- head/contrib/jemalloc/FREEBSD-diffs	Mon Feb 29 18:58:26 2016	(r296220)
+++ head/contrib/jemalloc/FREEBSD-diffs	Mon Feb 29 19:10:32 2016	(r296221)
@@ -1,5 +1,5 @@
 diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
-index 26a5e14..2a801b7 100644
+index bc5dbd1..ba182da 100644
 --- a/doc/jemalloc.xml.in
 +++ b/doc/jemalloc.xml.in
 @@ -53,11 +53,23 @@
@@ -27,7 +27,7 @@ index 26a5e14..2a801b7 100644
        <refsect2>
          <title>Standard API</title>
          <funcprototype>
-@@ -2759,4 +2771,18 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
+@@ -2905,4 +2917,18 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
      <para>The <function>posix_memalign<parameter/></function> function conforms
      to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
    </refsect1>
@@ -47,7 +47,7 @@ index 26a5e14..2a801b7 100644
 +  </refsect1>
  </refentry>
 diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
-index 654cd08..ad5382d 100644
+index 3f54391..d240256 100644
 --- a/include/jemalloc/internal/jemalloc_internal.h.in
 +++ b/include/jemalloc/internal/jemalloc_internal.h.in
 @@ -8,6 +8,9 @@
@@ -72,11 +72,11 @@ index 654cd08..ad5382d 100644
 -#endif
 -    ;
 +static const bool config_lazy_lock = true;
+ static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF;
  static const bool config_prof =
  #ifdef JEMALLOC_PROF
-     true
 diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
-index a601d6e..e7094b2 100644
+index 2b8ca5d..42d97f2 100644
 --- a/include/jemalloc/internal/jemalloc_internal_decls.h
 +++ b/include/jemalloc/internal/jemalloc_internal_decls.h
 @@ -1,6 +1,9 @@
@@ -111,10 +111,10 @@ index f051f29..561378f 100644
  
  #endif /* JEMALLOC_H_EXTERNS */
 diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
-index a90021a..34904bf 100644
+index 5880996..6e94e03 100644
 --- a/include/jemalloc/internal/private_symbols.txt
 +++ b/include/jemalloc/internal/private_symbols.txt
-@@ -280,7 +280,6 @@ iralloct_realign
+@@ -296,7 +296,6 @@ iralloct_realign
  isalloc
  isdalloct
  isqalloc
@@ -124,10 +124,10 @@ index a90021a..34904bf 100644
  jemalloc_postfork_child
 diff --git a/include/jemalloc/jemalloc_FreeBSD.h b/include/jemalloc/jemalloc_FreeBSD.h
 new file mode 100644
-index 0000000..737542e
+index 0000000..433dab5
 --- /dev/null
 +++ b/include/jemalloc/jemalloc_FreeBSD.h
-@@ -0,0 +1,142 @@
+@@ -0,0 +1,160 @@
 +/*
 + * Override settings that were generated in jemalloc_defs.h as necessary.
 + */
@@ -182,6 +182,9 @@ index 0000000..737542e
 +#elif defined(__powerpc__)
 +#  define LG_SIZEOF_PTR		2
 +#endif
++#ifdef __riscv__
++#  define LG_SIZEOF_PTR		3
++#endif
 +
 +#ifndef JEMALLOC_TLS_MODEL
 +#  define JEMALLOC_TLS_MODEL	/* Default. */
@@ -205,17 +208,22 @@ index 0000000..737542e
 +/* Mangle. */
 +#undef je_malloc
 +#undef je_calloc
-+#undef je_realloc
-+#undef je_free
 +#undef je_posix_memalign
 +#undef je_aligned_alloc
++#undef je_realloc
++#undef je_free
 +#undef je_malloc_usable_size
 +#undef je_mallocx
 +#undef je_rallocx
 +#undef je_xallocx
 +#undef je_sallocx
 +#undef je_dallocx
++#undef je_sdallocx
 +#undef je_nallocx
++#undef je_mallctl
++#undef je_mallctlnametomib
++#undef je_mallctlbymib
++#undef je_malloc_stats_print
 +#undef je_allocm
 +#undef je_rallocm
 +#undef je_sallocm
@@ -223,17 +231,22 @@ index 0000000..737542e
 +#undef je_nallocm
 +#define	je_malloc		__malloc
 +#define	je_calloc		__calloc
-+#define	je_realloc		__realloc
-+#define	je_free			__free
 +#define	je_posix_memalign	__posix_memalign
 +#define	je_aligned_alloc	__aligned_alloc
++#define	je_realloc		__realloc
++#define	je_free			__free
 +#define	je_malloc_usable_size	__malloc_usable_size
 +#define	je_mallocx		__mallocx
 +#define	je_rallocx		__rallocx
 +#define	je_xallocx		__xallocx
 +#define	je_sallocx		__sallocx
 +#define	je_dallocx		__dallocx
++#define	je_sdallocx		__sdallocx
 +#define	je_nallocx		__nallocx
++#define	je_mallctl		__mallctl
++#define	je_mallctlnametomib	__mallctlnametomib
++#define	je_mallctlbymib		__mallctlbymib
++#define	je_malloc_stats_print	__malloc_stats_print
 +#define	je_allocm		__allocm
 +#define	je_rallocm		__rallocm
 +#define	je_sallocm		__sallocm
@@ -253,17 +266,22 @@ index 0000000..737542e
 + */
 +__weak_reference(__malloc, malloc);
 +__weak_reference(__calloc, calloc);
-+__weak_reference(__realloc, realloc);
-+__weak_reference(__free, free);
 +__weak_reference(__posix_memalign, posix_memalign);
 +__weak_reference(__aligned_alloc, aligned_alloc);
++__weak_reference(__realloc, realloc);
++__weak_reference(__free, free);
 +__weak_reference(__malloc_usable_size, malloc_usable_size);
 +__weak_reference(__mallocx, mallocx);
 +__weak_reference(__rallocx, rallocx);
 +__weak_reference(__xallocx, xallocx);
 +__weak_reference(__sallocx, sallocx);
 +__weak_reference(__dallocx, dallocx);
++__weak_reference(__sdallocx, sdallocx);
 +__weak_reference(__nallocx, nallocx);
++__weak_reference(__mallctl, mallctl);
++__weak_reference(__mallctlnametomib, mallctlnametomib);
++__weak_reference(__mallctlbymib, mallctlbymib);
++__weak_reference(__malloc_stats_print, malloc_stats_print);
 +__weak_reference(__allocm, allocm);
 +__weak_reference(__rallocm, rallocm);
 +__weak_reference(__sallocm, sallocm);
@@ -282,7 +300,7 @@ index f943891..47d032c 100755
 +#include "jemalloc_FreeBSD.h"
  EOF
 diff --git a/src/jemalloc.c b/src/jemalloc.c
-index 5a2d324..b6cbb79 100644
+index 0735376..a34b85c 100644
 --- a/src/jemalloc.c
 +++ b/src/jemalloc.c
 @@ -4,6 +4,10 @@
@@ -296,7 +314,7 @@ index 5a2d324..b6cbb79 100644
  /* Runtime configuration options. */
  const char	*je_malloc_conf JEMALLOC_ATTR(weak);
  bool	opt_abort =
-@@ -2490,6 +2494,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
+@@ -2611,6 +2615,107 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
   */
  /******************************************************************************/
  /*
@@ -404,7 +422,7 @@ index 5a2d324..b6cbb79 100644
   * The following functions are used by threading libraries for protection of
   * malloc during fork().
   */
-@@ -2590,4 +2695,11 @@ jemalloc_postfork_child(void)
+@@ -2717,4 +2822,11 @@ jemalloc_postfork_child(void)
  	ctl_postfork_child();
  }
  
@@ -463,10 +481,10 @@ index 2d47af9..934d5aa 100644
 +#endif
 +}
 diff --git a/src/util.c b/src/util.c
-index 4cb0d6c..25b61c2 100644
+index 02673c7..116e981 100644
 --- a/src/util.c
 +++ b/src/util.c
-@@ -58,6 +58,22 @@ wrtmessage(void *cbopaque, const char *s)
+@@ -66,6 +66,22 @@ wrtmessage(void *cbopaque, const char *s)
  
  JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
  

Modified: head/contrib/jemalloc/VERSION
==============================================================================
--- head/contrib/jemalloc/VERSION	Mon Feb 29 18:58:26 2016	(r296220)
+++ head/contrib/jemalloc/VERSION	Mon Feb 29 19:10:32 2016	(r296221)
@@ -1 +1 @@
-4.0.4-0-g91010a9e2ebfc84b1ac1ed7fdde3bfed4f65f180
+4.1.0-1-g994da4232621dd1210fcf39bdf0d6454cefda473

Modified: head/contrib/jemalloc/doc/jemalloc.3
==============================================================================
--- head/contrib/jemalloc/doc/jemalloc.3	Mon Feb 29 18:58:26 2016	(r296220)
+++ head/contrib/jemalloc/doc/jemalloc.3	Mon Feb 29 19:10:32 2016	(r296221)
@@ -2,12 +2,12 @@
 .\"     Title: JEMALLOC
 .\"    Author: Jason Evans
 .\" Generator: DocBook XSL Stylesheets v1.76.1 <http://docbook.sf.net/>;
-.\"      Date: 10/24/2015
+.\"      Date: 02/28/2016
 .\"    Manual: User Manual
-.\"    Source: jemalloc 4.0.4-0-g91010a9e2ebfc84b1ac1ed7fdde3bfed4f65f180
+.\"    Source: jemalloc 4.1.0-1-g994da4232621dd1210fcf39bdf0d6454cefda473
 .\"  Language: English
 .\"
-.TH "JEMALLOC" "3" "10/24/2015" "jemalloc 4.0.4-0-g91010a9e2ebf" "User Manual"
+.TH "JEMALLOC" "3" "02/28/2016" "jemalloc 4.1.0-1-g994da4232621" "User Manual"
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
@@ -31,7 +31,7 @@
 jemalloc \- general purpose memory allocation functions
 .SH "LIBRARY"
 .PP
-This manual describes jemalloc 4\&.0\&.4\-0\-g91010a9e2ebfc84b1ac1ed7fdde3bfed4f65f180\&. More information can be found at the
+This manual describes jemalloc 4\&.1\&.0\-1\-g994da4232621dd1210fcf39bdf0d6454cefda473\&. More information can be found at the
 \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&.
 .PP
 The following configuration options are enabled in libc\*(Aqs built\-in jemalloc:
@@ -244,7 +244,7 @@ function allocates at least
 bytes of memory, and returns a pointer to the base address of the allocation\&. Behavior is undefined if
 \fIsize\fR
 is
-\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&.
+\fB0\fR\&.
 .PP
 The
 \fBrallocx\fR\fB\fR
@@ -255,7 +255,7 @@ to be at least
 bytes, and returns a pointer to the base address of the resulting allocation, which may or may not have moved from its original location\&. Behavior is undefined if
 \fIsize\fR
 is
-\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&.
+\fB0\fR\&.
 .PP
 The
 \fBxallocx\fR\fB\fR
@@ -301,10 +301,12 @@ function allocates no memory, but it per
 \fBmallocx\fR\fB\fR
 function, and returns the real size of the allocation that would result from the equivalent
 \fBmallocx\fR\fB\fR
-function call\&. Behavior is undefined if
+function call, or
+\fB0\fR
+if the inputs exceed the maximum supported size class and/or alignment\&. Behavior is undefined if
 \fIsize\fR
 is
-\fB0\fR, or if request size overflows due to size class and/or alignment constraints\&.
+\fB0\fR\&.
 .PP
 The
 \fBmallctl\fR\fB\fR
@@ -404,7 +406,8 @@ should not be depended on, since such be
 .PP
 Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various options that can be specified at compile\- or run\-time\&.
 .PP
-The string pointed to by the global variable
+The string specified via
+\fB\-\-with\-malloc\-conf\fR, the string pointed to by the global variable
 \fImalloc_conf\fR, the \(lqname\(rq of the file referenced by the symbolic link named
 /etc/malloc\&.conf, and the value of the environment variable
 \fBMALLOC_CONF\fR, will be interpreted, in that order, from left to right as options\&. Note that
@@ -414,8 +417,10 @@ may be read before
 is entered, so the declaration of
 \fImalloc_conf\fR
 should specify an initializer that contains the final value to be read by jemalloc\&.
+\fB\-\-with\-malloc\-conf\fR
+and
 \fImalloc_conf\fR
-is a compile\-time setting, whereas
+are compile\-time mechanisms, whereas
 /etc/malloc\&.conf
 and
 \fBMALLOC_CONF\fR
@@ -451,11 +456,7 @@ In addition to multiple arenas, unless
 \fB\-\-disable\-tcache\fR
 is specified during configuration, this allocator supports thread\-specific caching for small and large objects, in order to make it possible to completely avoid synchronization for most allocation requests\&. Such caching allows very fast allocation in the common case, but it increases memory usage and fragmentation, since a bounded number of objects can remain allocated in each thread cache\&.
 .PP
-Memory is conceptually broken into equal\-sized chunks, where the chunk size is a power of two that is greater than the page size\&. Chunks are always aligned to multiples of the chunk size\&. This alignment makes it possible to find metadata for user objects very quickly\&.
-.PP
-User objects are broken into three categories according to size: small, large, and huge\&. Small and large objects are managed entirely by arenas; huge objects are additionally aggregated in a single data structure that is shared by all threads\&. Huge objects are typically used by applications infrequently enough that this single data structure is not a scalability issue\&.
-.PP
-Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&.
+Memory is conceptually broken into equal\-sized chunks, where the chunk size is a power of two that is greater than the page size\&. Chunks are always aligned to multiples of the chunk size\&. This alignment makes it possible to find metadata for user objects very quickly\&. User objects are broken into three categories according to size: small, large, and huge\&. Multiple small and large objects can reside within a single chunk, whereas huge objects each have one or more chunks backing them\&. Each chunk that contains small and/or large objects tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&.
 .PP
 Small objects are managed in groups by page runs\&. Each run maintains a bitmap to track which regions are in use\&. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least
 sizeof(\fBdouble\fR)\&. All other object size classes are multiples of the quantum, spaced such that there are four size classes for each doubling in size, which limits internal fragmentation to approximately 20% for all but the smallest size classes\&. Small size classes are smaller than four times the page size, large size classes are smaller than the chunk size (see the
@@ -703,6 +704,13 @@ was specified during build configuration
 was specified during build configuration\&.
 .RE
 .PP
+"config\&.malloc_conf" (\fBconst char *\fR) r\-
+.RS 4
+Embedded configure\-time\-specified run\-time options string, empty unless
+\fB\-\-with\-malloc\-conf\fR
+was specified during build configuration\&.
+.RE
+.PP
 "config\&.munmap" (\fBbool\fR) r\-
 .RS 4
 \fB\-\-enable\-munmap\fR
@@ -788,11 +796,20 @@ is supported by the operating system; \(
 Virtual memory chunk size (log base 2)\&. If a chunk size outside the supported size range is specified, the size is silently clipped to the minimum/maximum supported size\&. The default chunk size is 2 MiB (2^21)\&.
 .RE
 .PP
-"opt\&.narenas" (\fBsize_t\fR) r\-
+"opt\&.narenas" (\fBunsigned\fR) r\-
 .RS 4
 Maximum number of arenas to use for automatic multiplexing of threads and arenas\&. The default is four times the number of CPUs, or one if there is a single CPU\&.
 .RE
 .PP
+"opt\&.purge" (\fBconst char *\fR) r\-
+.RS 4
+Purge mode is \(lqratio\(rq (default) or \(lqdecay\(rq\&. See
+"opt\&.lg_dirty_mult"
+for details of the ratio mode\&. See
+"opt\&.decay_time"
+for details of the decay mode\&.
+.RE
+.PP
 "opt\&.lg_dirty_mult" (\fBssize_t\fR) r\-
 .RS 4
 Per\-arena minimum ratio (log base 2) of active to dirty pages\&. Some dirty unused pages may be allowed to accumulate, within the limit set by the ratio (or one chunk worth of dirty pages, whichever is greater), before informing the kernel about some of those pages via
@@ -804,6 +821,15 @@ and
 for related dynamic control options\&.
 .RE
 .PP
+"opt\&.decay_time" (\fBssize_t\fR) r\-
+.RS 4
+Approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. The pages are incrementally purged according to a sigmoidal decay curve that starts and ends with zero purge rate\&. A decay time of 0 causes all unused dirty pages to be purged immediately upon creation\&. A decay time of \-1 disables purging\&. The default decay time is 10 seconds\&. See
+"arenas\&.decay_time"
+and
+"arena\&.<i>\&.decay_time"
+for related dynamic control options\&.
+.RE
+.PP
 "opt\&.stats_print" (\fBbool\fR) r\-
 .RS 4
 Enable/disable statistics printing at exit\&. If enabled, the
@@ -914,7 +940,9 @@ option for final profile dumping\&. Prof
 command, which is based on the
 \fBpprof\fR
 that is developed as part of the
-\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2\&.
+\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2\&. See
+HEAP PROFILE FORMAT
+for heap profile format documentation\&.
 .RE
 .PP
 "opt\&.prof_prefix" (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR]
@@ -1063,7 +1091,7 @@ macro to explicitly use the specified ca
 "tcache\&.flush" (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR]
 .RS 4
 Flush the specified thread\-specific cache (tcache)\&. The same considerations apply to this interface as to
-"thread\&.tcache\&.flush", except that the tcache will never be automatically be discarded\&.
+"thread\&.tcache\&.flush", except that the tcache will never be automatically discarded\&.
 .RE
 .PP
 "tcache\&.destroy" (\fBunsigned\fR) \-w [\fB\-\-enable\-tcache\fR]
@@ -1073,10 +1101,18 @@ Flush the specified thread\-specific cac
 .PP
 "arena\&.<i>\&.purge" (\fBvoid\fR) \-\-
 .RS 4
-Purge unused dirty pages for arena <i>, or for all arenas if <i> equals
+Purge all unused dirty pages for arena <i>, or for all arenas if <i> equals
 "arenas\&.narenas"\&.
 .RE
 .PP
+"arena\&.<i>\&.decay" (\fBvoid\fR) \-\-
+.RS 4
+Trigger decay\-based purging of unused dirty pages for arena <i>, or for all arenas if <i> equals
+"arenas\&.narenas"\&. The proportion of unused dirty pages to be purged depends on the current time; see
+"opt\&.decay_time"
+for details\&.
+.RE
+.PP
 "arena\&.<i>\&.dss" (\fBconst char *\fR) rw
 .RS 4
 Set the precedence of dss allocation as related to mmap allocation for arena <i>, or for all arenas if <i> equals
@@ -1092,6 +1128,13 @@ Current per\-arena minimum ratio (log ba
 for additional information\&.
 .RE
 .PP
+"arena\&.<i>\&.decay_time" (\fBssize_t\fR) rw
+.RS 4
+Current per\-arena approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. Each time this interface is set, all currently unused dirty pages are considered to have fully decayed, which causes immediate purging of all unused dirty pages unless the decay time is set to \-1 (i\&.e\&. purging disabled)\&. See
+"opt\&.decay_time"
+for additional information\&.
+.RE
+.PP
 "arena\&.<i>\&.chunk_hooks" (\fBchunk_hooks_t\fR) rw
 .RS 4
 Get or set the chunk management hook functions for arena <i>\&. The functions must be capable of operating on all extant chunks associated with arena <i>, usually by passing unknown chunks to the replaced functions\&. In practice, it is feasible to control allocation for arenas created via
@@ -1332,6 +1375,15 @@ during arena creation\&. See
 for additional information\&.
 .RE
 .PP
+"arenas\&.decay_time" (\fBssize_t\fR) rw
+.RS 4
+Current default per\-arena approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused, used to initialize
+"arena\&.<i>\&.decay_time"
+during arena creation\&. See
+"opt\&.decay_time"
+for additional information\&.
+.RE
+.PP
 "arenas\&.quantum" (\fBsize_t\fR) r\-
 .RS 4
 Quantum size\&.
@@ -1511,6 +1563,13 @@ Minimum ratio (log base 2) of active to 
 for details\&.
 .RE
 .PP
+"stats\&.arenas\&.<i>\&.decay_time" (\fBssize_t\fR) r\-
+.RS 4
+Approximate time in seconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged and/or reused\&. See
+"opt\&.decay_time"
+for details\&.
+.RE
+.PP
 "stats\&.arenas\&.<i>\&.nthreads" (\fBunsigned\fR) r\-
 .RS 4
 Number of threads currently assigned to arena\&.
@@ -1712,6 +1771,71 @@ Cumulative number of allocation requests
 .RS 4
 Current number of huge allocations for this size class\&.
 .RE
+.SH "HEAP PROFILE FORMAT"
+.PP
+Although the heap profiling functionality was originally designed to be compatible with the
+\fBpprof\fR
+command that is developed as part of the
+\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2, the addition of per thread heap profiling functionality required a different heap profile format\&. The
+\fBjeprof\fR
+command is derived from
+\fBpprof\fR, with enhancements to support the heap profile format described here\&.
+.PP
+In the following hypothetical heap profile,
+\fB[\&.\&.\&.]\fR
+indicates elision for the sake of compactness\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+heap_v2/524288
+  t*: 28106: 56637512 [0: 0]
+  [\&.\&.\&.]
+  t3: 352: 16777344 [0: 0]
+  [\&.\&.\&.]
+  t99: 17754: 29341640 [0: 0]
+  [\&.\&.\&.]
+@ 0x5f86da8 0x5f5a1dc [\&.\&.\&.] 0x29e4d4e 0xa200316 0xabb2988 [\&.\&.\&.]
+  t*: 13: 6688 [0: 0]
+  t3: 12: 6496 [0: ]
+  t99: 1: 192 [0: 0]
+[\&.\&.\&.]
+
+MAPPED_LIBRARIES:
+[\&.\&.\&.]
+.fi
+.if n \{\
+.RE
+.\}
+.sp
+The following matches the above heap profile, but most tokens are replaced with
+\fB<description>\fR
+to indicate descriptions of the corresponding fields\&.
+.sp
+.if n \{\
+.RS 4
+.\}
+.nf
+<heap_profile_format_version>/<mean_sample_interval>
+  <aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  [\&.\&.\&.]
+  <thread_3_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  [\&.\&.\&.]
+  <thread_99_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  [\&.\&.\&.]
+@ <top_frame> <frame> [\&.\&.\&.] <frame> <frame> <frame> [\&.\&.\&.]
+  <backtrace_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  <backtrace_thread_3>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  <backtrace_thread_99>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+[\&.\&.\&.]
+
+MAPPED_LIBRARIES:
+</proc/<pid>/maps>
+.fi
+.if n \{\
+.RE
+.\}
 .SH "DEBUGGING MALLOC PROBLEMS"
 .PP
 When debugging, it is a good idea to configure/build jemalloc with the

Modified: head/contrib/jemalloc/include/jemalloc/internal/arena.h
==============================================================================
--- head/contrib/jemalloc/include/jemalloc/internal/arena.h	Mon Feb 29 18:58:26 2016	(r296220)
+++ head/contrib/jemalloc/include/jemalloc/internal/arena.h	Mon Feb 29 19:10:32 2016	(r296221)
@@ -23,6 +23,18 @@
  */
 #define	LG_DIRTY_MULT_DEFAULT	3
 
+typedef enum {
+	purge_mode_ratio = 0,
+	purge_mode_decay = 1,
+
+	purge_mode_limit = 2
+} purge_mode_t;
+#define	PURGE_DEFAULT		purge_mode_ratio
+/* Default decay time in seconds. */
+#define	DECAY_TIME_DEFAULT	10
+/* Number of event ticks between time checks. */
+#define	DECAY_NTICKS_PER_UPDATE	1000
+
 typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
 typedef struct arena_run_s arena_run_t;
 typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
@@ -31,6 +43,7 @@ typedef struct arena_chunk_s arena_chunk
 typedef struct arena_bin_info_s arena_bin_info_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
+typedef struct arena_tdata_s arena_tdata_t;
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -154,15 +167,14 @@ struct arena_chunk_map_misc_s {
 
 		/* Profile counters, used for large object runs. */
 		union {
-			void				*prof_tctx_pun;
-			prof_tctx_t			*prof_tctx;
+			void			*prof_tctx_pun;
+			prof_tctx_t		*prof_tctx;
 		};
 
 		/* Small region run metadata. */
 		arena_run_t			run;
 	};
 };
-typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t;
 typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t;
 #endif /* JEMALLOC_ARENA_STRUCTS_A */
 
@@ -220,28 +232,28 @@ struct arena_chunk_s {
  */
 struct arena_bin_info_s {
 	/* Size of regions in a run for this bin's size class. */
-	size_t		reg_size;
+	size_t			reg_size;
 
 	/* Redzone size. */
-	size_t		redzone_size;
+	size_t			redzone_size;
 
 	/* Interval between regions (reg_size + (redzone_size << 1)). */
-	size_t		reg_interval;
+	size_t			reg_interval;
 
 	/* Total size of a run for this bin's size class. */
-	size_t		run_size;
+	size_t			run_size;
 
 	/* Total number of regions in a run for this bin's size class. */
-	uint32_t	nregs;
+	uint32_t		nregs;
 
 	/*
 	 * Metadata used to manipulate bitmaps for runs associated with this
 	 * bin.
 	 */
-	bitmap_info_t	bitmap_info;
+	bitmap_info_t		bitmap_info;
 
 	/* Offset of first region in a run for this bin's size class. */
-	uint32_t	reg0_offset;
+	uint32_t		reg0_offset;
 };
 
 struct arena_bin_s {
@@ -251,13 +263,13 @@ struct arena_bin_s {
 	 * which may be acquired while holding one or more bin locks, but not
 	 * vise versa.
 	 */
-	malloc_mutex_t	lock;
+	malloc_mutex_t		lock;
 
 	/*
 	 * Current run being used to service allocations of this bin's size
 	 * class.
 	 */
-	arena_run_t	*runcur;
+	arena_run_t		*runcur;
 
 	/*
 	 * Tree of non-full runs.  This tree is used when looking for an
@@ -266,10 +278,10 @@ struct arena_bin_s {
 	 * objects packed well, and it can also help reduce the number of
 	 * almost-empty chunks.
 	 */
-	arena_run_tree_t runs;
+	arena_run_tree_t	runs;
 
 	/* Bin statistics. */
-	malloc_bin_stats_t stats;
+	malloc_bin_stats_t	stats;
 };
 
 struct arena_s {
@@ -278,14 +290,14 @@ struct arena_s {
 
 	/*
 	 * Number of threads currently assigned to this arena.  This field is
-	 * protected by arenas_lock.
+	 * synchronized via atomic operations.
 	 */
 	unsigned		nthreads;
 
 	/*
 	 * There are three classes of arena operations from a locking
 	 * perspective:
-	 * 1) Thread assignment (modifies nthreads) is protected by arenas_lock.
+	 * 1) Thread assignment (modifies nthreads) is synchronized via atomics.
 	 * 2) Bin-related operations are protected by bin locks.
 	 * 3) Chunk- and run-related operations are protected by this mutex.
 	 */
@@ -324,7 +336,7 @@ struct arena_s {
 	/* Minimum ratio (log base 2) of nactive:ndirty. */
 	ssize_t			lg_dirty_mult;
 
-	/* True if a thread is currently executing arena_purge(). */
+	/* True if a thread is currently executing arena_purge_to_limit(). */
 	bool			purging;
 
 	/* Number of pages in active runs and huge regions. */
@@ -339,12 +351,6 @@ struct arena_s {
 	size_t			ndirty;
 
 	/*
-	 * Size/address-ordered tree of this arena's available runs.  The tree
-	 * is used for first-best-fit run allocation.
-	 */
-	arena_avail_tree_t	runs_avail;
-
-	/*
 	 * Unused dirty memory this arena manages.  Dirty memory is conceptually
 	 * tracked as an arbitrarily interleaved LRU of dirty runs and cached
 	 * chunks, but the list linkage is actually semi-duplicated in order to
@@ -375,6 +381,53 @@ struct arena_s {
 	arena_runs_dirty_link_t	runs_dirty;
 	extent_node_t		chunks_cache;
 
+	/*
+	 * Approximate time in seconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	ssize_t			decay_time;
+	/* decay_time / SMOOTHSTEP_NSTEPS. */
+	nstime_t		decay_interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t		decay_epoch;
+	/* decay_deadline randomness generator. */
+	uint64_t		decay_jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of decay_interval and
+	 * per epoch jitter which is a uniform random variable in
+	 * [0..decay_interval).  Epochs always advance by precise multiples of
+	 * decay_interval, but we randomize the deadline to reduce the
+	 * likelihood of arenas purging in lockstep.
+	 */
+	nstime_t		decay_deadline;
+	/*
+	 * Number of dirty pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between decay_ndirty and ndirty to
+	 * determine how many dirty pages, if any, were generated, and record
+	 * the result in decay_backlog.
+	 */
+	size_t			decay_ndirty;
+	/*
+	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
+	 * to the current contents of decay_backlog, i.e. the limit on how many
+	 * pages are allowed to exist for the decay epochs.
+	 */
+	size_t			decay_backlog_npages_limit;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to decay_epoch.
+	 */
+	size_t			decay_backlog[SMOOTHSTEP_NSTEPS];
+
 	/* Extant huge allocations. */
 	ql_head(extent_node_t)	huge;
 	/* Synchronizes all huge allocation/update/deallocation. */
@@ -402,6 +455,17 @@ struct arena_s {
 
 	/* bins is used to store trees of free regions. */
 	arena_bin_t		bins[NBINS];
+
+	/*
+	 * Quantized address-ordered trees of this arena's available runs.  The
+	 * trees are used for first-best-fit run allocation.
+	 */
+	arena_run_tree_t	runs_avail[1]; /* Dynamically sized. */
+};
+
+/* Used in conjunction with tsd for fast arena-related context lookup. */
+struct arena_tdata_s {
+	ticker_t		decay_ticker;
 };
 #endif /* JEMALLOC_ARENA_STRUCTS_B */
 
@@ -417,7 +481,10 @@ static const size_t	large_pad =
 #endif
     ;
 
+extern purge_mode_t	opt_purge;
+extern const char	*purge_mode_names[];
 extern ssize_t		opt_lg_dirty_mult;
+extern ssize_t		opt_decay_time;
 
 extern arena_bin_info_t	arena_bin_info[NBINS];
 
@@ -425,9 +492,15 @@ extern size_t		map_bias; /* Number of ar
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
 extern size_t		large_maxclass; /* Max large size class. */
+extern size_t		run_quantize_max; /* Max run_quantize_*() input. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 
+#ifdef JEMALLOC_JET
+typedef size_t (run_quantize_t)(size_t);
+extern run_quantize_t *run_quantize_floor;
+extern run_quantize_t *run_quantize_ceil;
+#endif
 void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
@@ -445,9 +518,11 @@ bool	arena_chunk_ralloc_huge_expand(aren
     size_t oldsize, size_t usize, bool *zero);
 ssize_t	arena_lg_dirty_mult_get(arena_t *arena);
 bool	arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult);
+ssize_t	arena_decay_time_get(arena_t *arena);
+bool	arena_decay_time_set(arena_t *arena, ssize_t decay_time);
 void	arena_maybe_purge(arena_t *arena);
-void	arena_purge_all(arena_t *arena);
-void	arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
+void	arena_purge(arena_t *arena, bool all);
+void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
     szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
     bool zero);
@@ -461,8 +536,9 @@ extern arena_dalloc_junk_small_t *arena_
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
-void	*arena_malloc_small(arena_t *arena, size_t size, bool zero);
-void	*arena_malloc_large(arena_t *arena, size_t size, bool zero);
+void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero);
+void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+    bool zero, tcache_t *tcache);
 void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
 void	arena_prof_promoted(const void *ptr, size_t size);
@@ -470,8 +546,8 @@ void	arena_dalloc_bin_junked_locked(aren
     void *ptr, arena_chunk_map_bits_t *bitselm);
 void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind);
+void	arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind);
 #ifdef JEMALLOC_JET
 typedef void (arena_dalloc_junk_large_t)(void *, size_t);
 extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
@@ -480,12 +556,13 @@ void	arena_dalloc_junk_large(void *ptr, 
 #endif
 void	arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
     void *ptr);
-void	arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+void	arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr);
 #ifdef JEMALLOC_JET
 typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t);
 extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
 #endif
-bool	arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
+bool	arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
 void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
@@ -493,10 +570,18 @@ dss_prec_t	arena_dss_prec_get(arena_t *a
 bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
 bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
-void	arena_stats_merge(arena_t *arena, const char **dss,
-    ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty,
-    arena_stats_t *astats, malloc_bin_stats_t *bstats,
+ssize_t	arena_decay_time_default_get(void);
+bool	arena_decay_time_default_set(ssize_t decay_time);
+void	arena_basic_stats_merge(arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty);
+void	arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
+    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
+    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
     malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
+unsigned	arena_nthreads_get(arena_t *arena);
+void	arena_nthreads_inc(arena_t *arena);
+void	arena_nthreads_dec(arena_t *arena);
 arena_t	*arena_new(unsigned ind);
 bool	arena_boot(void);
 void	arena_prefork(arena_t *arena);
@@ -512,7 +597,7 @@ arena_chunk_map_bits_t	*arena_bitselm_ge
     size_t pageind);
 arena_chunk_map_misc_t	*arena_miscelm_get(arena_chunk_t *chunk,
     size_t pageind);
-size_t	arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm);
+size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
 void	*arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
 arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
 arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
@@ -552,17 +637,19 @@ bool	arena_prof_accum_locked(arena_t *ar
 bool	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
-unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
+size_t	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
 prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
 void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	arena_prof_tctx_reset(const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx);
-void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
-    tcache_t *tcache);
+void	arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks);
+void	arena_decay_tick(tsd_t *tsd, arena_t *arena);
+void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+    bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
 size_t	arena_salloc(const void *ptr, bool demote);
-void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
 void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
 #endif
 
@@ -590,7 +677,7 @@ arena_miscelm_get(arena_chunk_t *chunk, 
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm)
+arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk +
@@ -970,7 +1057,7 @@ arena_ptr_small_binind_get(const void *p
 		run = &miscelm->run;
 		run_binind = run->binind;
 		bin = &arena->bins[run_binind];
-		actual_binind = bin - arena->bins;
+		actual_binind = (szind_t)(bin - arena->bins);
 		assert(run_binind == actual_binind);
 		bin_info = &arena_bin_info[actual_binind];
 		rpages = arena_miscelm_to_rpages(miscelm);
@@ -987,16 +1074,15 @@ arena_ptr_small_binind_get(const void *p

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201602291910.u1TJAX7t057795>