From: antirez <antirez@gmail.com>
Date: Tue, 15 May 2012 13:27:12 +0000 (+0200)
Subject: Jemalloc updated to 3.0.0.
X-Git-Url: https://git.saurik.com/redis.git/commitdiff_plain/ad4c0b4117ec15c0061b702f230caf1bc5eb4e06?ds=sidebyside

Jemalloc updated to 3.0.0.

Full changelog here:

http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git;a=blob_plain;f=ChangeLog;hb=master

Notable improvements from the point of view of Redis:

1) Bugfixing.
2) Support for Valgrind.
3) Support for OSX Lion, FreeBSD.
---

diff --git a/deps/jemalloc.orig/.gitignore b/deps/jemalloc.orig/.gitignore
new file mode 100644
index 00000000..32b4c424
--- /dev/null
+++ b/deps/jemalloc.orig/.gitignore
@@ -0,0 +1,23 @@
+/autom4te.cache/
+/config.stamp
+/config.log
+/config.status
+/configure
+/doc/html.xsl
+/doc/manpages.xsl
+/doc/jemalloc.xml
+/doc/jemalloc.html
+/doc/jemalloc.3
+/lib/
+/Makefile
+/include/jemalloc/internal/jemalloc_internal\.h
+/include/jemalloc/jemalloc\.h
+/include/jemalloc/jemalloc_defs\.h
+/test/jemalloc_test\.h
+/src/*.[od]
+/test/*.[od]
+/test/*.out
+/test/[a-z]*
+!test/*.c
+!test/*.exp
+/VERSION
diff --git a/deps/jemalloc.orig/COPYING b/deps/jemalloc.orig/COPYING
new file mode 100644
index 00000000..10ade120
--- /dev/null
+++ b/deps/jemalloc.orig/COPYING
@@ -0,0 +1,51 @@
+Unless otherwise specified, files in the jemalloc source distribution are
+subject to the following licenses:
+--------------------------------------------------------------------------------
+Copyright (C) 2002-2010 Jason Evans <jasone@canonware.com>.
+All rights reserved.
+Copyright (C) 2007-2010 Mozilla Foundation.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice(s),
+   this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice(s),
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+--------------------------------------------------------------------------------
+Copyright (C) 2009-2010 Facebook, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+* Neither the name of Facebook, Inc. nor the names of its contributors may be
+  used to endorse or promote products derived from this software without
+  specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+--------------------------------------------------------------------------------
diff --git a/deps/jemalloc.orig/ChangeLog b/deps/jemalloc.orig/ChangeLog
new file mode 100644
index 00000000..326ee7a9
--- /dev/null
+++ b/deps/jemalloc.orig/ChangeLog
@@ -0,0 +1,250 @@
+Following are change highlights associated with official releases.  Important
+bug fixes are all mentioned, but internal enhancements are omitted here for
+brevity (even though they are more fun to write about).  Much more detail can be
+found in the git revision history:
+
+    http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
+    git://canonware.com/jemalloc.git
+
+* 2.2.5 (November 14, 2011)
+
+  Bug fixes:
+  - Fix huge_ralloc() race when using mremap(2).  This is a serious bug that
+    could cause memory corruption and/or crashes.
+  - Fix huge_ralloc() to maintain chunk statistics.
+  - Fix malloc_stats_print(..., "a") output.
+
+* 2.2.4 (November 5, 2011)
+
+  Bug fixes:
+  - Initialize arenas_tsd before using it.  This bug existed for 2.2.[0-3], as
+    well as for --disable-tls builds in earlier releases.
+  - Do not assume a 4 KiB page size in test/rallocm.c.
+
+* 2.2.3 (August 31, 2011)
+
+  This version fixes numerous bugs related to heap profiling.
+
+  Bug fixes:
+  - Fix a prof-related race condition.  This bug could cause memory corruption,
+    but only occurred in non-default configurations (prof_accum:false).
+  - Fix off-by-one backtracing issues (make sure that prof_alloc_prep() is
+    excluded from backtraces).
+  - Fix a prof-related bug in realloc() (only triggered by OOM errors).
+  - Fix prof-related bugs in allocm() and rallocm().
+  - Fix prof_tdata_cleanup() for --disable-tls builds.
+  - Fix a relative include path, to fix objdir builds.
+
+* 2.2.2 (July 30, 2011)
+
+  Bug fixes:
+  - Fix a build error for --disable-tcache.
+  - Fix assertions in arena_purge() (for real this time).
+  - Add the --with-private-namespace option.  This is a workaround for symbol
+    conflicts that can inadvertently arise when using static libraries.
+
+* 2.2.1 (March 30, 2011)
+
+  Bug fixes:
+  - Implement atomic operations for x86/x64.  This fixes compilation failures
+    for versions of gcc that are still in wide use.
+  - Fix an assertion in arena_purge().
+
+* 2.2.0 (March 22, 2011)
+
+  This version incorporates several improvements to algorithms and data
+  structures that tend to reduce fragmentation and increase speed.
+
+  New features:
+  - Add the "stats.cactive" mallctl.
+  - Update pprof (from google-perftools 1.7).
+  - Improve backtracing-related configuration logic, and add the
+    --disable-prof-libgcc option.
+
+  Bug fixes:
+  - Change default symbol visibility from "internal", to "hidden", which
+    decreases the overhead of library-internal function calls.
+  - Fix symbol visibility so that it is also set on OS X.
+  - Fix a build dependency regression caused by the introduction of the .pic.o
+    suffix for PIC object files.
+  - Add missing checks for mutex initialization failures.
+  - Don't use libgcc-based backtracing except on x64, where it is known to work.
+  - Fix deadlocks on OS X that were due to memory allocation in
+    pthread_mutex_lock().
+  - Heap profiling-specific fixes:
+    + Fix memory corruption due to integer overflow in small region index
+      computation, when using a small enough sample interval that profiling
+      context pointers are stored in small run headers.
+    + Fix a bootstrap ordering bug that only occurred with TLS disabled.
+    + Fix a rallocm() rsize bug.
+    + Fix error detection bugs for aligned memory allocation.
+
+* 2.1.3 (March 14, 2011)
+
+  Bug fixes:
+  - Fix a cpp logic regression (due to the "thread.{de,}allocatedp" mallctl fix
+    for OS X in 2.1.2).
+  - Fix a "thread.arena" mallctl bug.
+  - Fix a thread cache stats merging bug.
+
+* 2.1.2 (March 2, 2011)
+
+  Bug fixes:
+  - Fix "thread.{de,}allocatedp" mallctl for OS X.
+  - Add missing jemalloc.a to build system.
+
+* 2.1.1 (January 31, 2011)
+
+  Bug fixes:
+  - Fix aligned huge reallocation (affected allocm()).
+  - Fix the ALLOCM_LG_ALIGN macro definition.
+  - Fix a heap dumping deadlock.
+  - Fix a "thread.arena" mallctl bug.
+
+* 2.1.0 (December 3, 2010)
+
+  This version incorporates some optimizations that can't quite be considered
+  bug fixes.
+
+  New features:
+  - Use Linux's mremap(2) for huge object reallocation when possible.
+  - Avoid locking in mallctl*() when possible.
+  - Add the "thread.[de]allocatedp" mallctl's.
+  - Convert the manual page source from roff to DocBook, and generate both roff
+    and HTML manuals.
+
+  Bug fixes:
+  - Fix a crash due to incorrect bootstrap ordering.  This only impacted
+    --enable-debug --enable-dss configurations.
+  - Fix a minor statistics bug for mallctl("swap.avail", ...).
+
+* 2.0.1 (October 29, 2010)
+
+  Bug fixes:
+  - Fix a race condition in heap profiling that could cause undefined behavior
+    if "opt.prof_accum" were disabled.
+  - Add missing mutex unlocks for some OOM error paths in the heap profiling
+    code.
+  - Fix a compilation error for non-C99 builds.
+
+* 2.0.0 (October 24, 2010)
+
+  This version focuses on the experimental *allocm() API, and on improved
+  run-time configuration/introspection.  Nonetheless, numerous performance
+  improvements are also included.
+
+  New features:
+  - Implement the experimental {,r,s,d}allocm() API, which provides a superset
+    of the functionality available via malloc(), calloc(), posix_memalign(),
+    realloc(), malloc_usable_size(), and free().  These functions can be used to
+    allocate/reallocate aligned zeroed memory, ask for optional extra memory
+    during reallocation, prevent object movement during reallocation, etc.
+  - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is
+    more human-readable, and more flexible.  For example:
+      JEMALLOC_OPTIONS=AJP
+    is now:
+      MALLOC_CONF=abort:true,fill:true,stats_print:true
+  - Port to Apple OS X.  Sponsored by Mozilla.
+  - Make it possible for the application to control thread-->arena mappings via
+    the "thread.arena" mallctl.
+  - Add compile-time support for all TLS-related functionality via pthreads TSD.
+    This is mainly of interest for OS X, which does not support TLS, but has a
+    TSD implementation with similar performance.
+  - Override memalign() and valloc() if they are provided by the system.
+  - Add the "arenas.purge" mallctl, which can be used to synchronously purge all
+    dirty unused pages.
+  - Make cumulative heap profiling data optional, so that it is possible to
+    limit the amount of memory consumed by heap profiling data structures.
+  - Add per thread allocation counters that can be accessed via the
+    "thread.allocated" and "thread.deallocated" mallctls.
+
+  Incompatible changes:
+  - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above).
+  - Increase default backtrace depth from 4 to 128 for heap profiling.
+  - Disable interval-based profile dumps by default.
+
+  Bug fixes:
+  - Remove bad assertions in fork handler functions.  These assertions could
+    cause aborts for some combinations of configure settings.
+  - Fix strerror_r() usage to deal with non-standard semantics in GNU libc.
+  - Fix leak context reporting.  This bug tended to cause the number of contexts
+    to be underreported (though the reported number of objects and bytes were
+    correct).
+  - Fix a realloc() bug for large in-place growing reallocation.  This bug could
+    cause memory corruption, but it was hard to trigger.
+  - Fix an allocation bug for small allocations that could be triggered if
+    multiple threads raced to create a new run of backing pages.
+  - Enhance the heap profiler to trigger samples based on usable size, rather
+    than request size.
+  - Fix a heap profiling bug due to sometimes losing track of requested object
+    size for sampled objects.
+
+* 1.0.3 (August 12, 2010)
+
+  Bug fixes:
+  - Fix the libunwind-based implementation of stack backtracing (used for heap
+    profiling).  This bug could cause zero-length backtraces to be reported.
+  - Add a missing mutex unlock in library initialization code.  If multiple
+    threads raced to initialize malloc, some of them could end up permanently
+    blocked.
+
+* 1.0.2 (May 11, 2010)
+
+  Bug fixes:
+  - Fix junk filling of large objects, which could cause memory corruption.
+  - Add MAP_NORESERVE support for chunk mapping, because otherwise virtual
+    memory limits could cause swap file configuration to fail.  Contributed by
+    Jordan DeLong.
+
+* 1.0.1 (April 14, 2010)
+
+  Bug fixes:
+  - Fix compilation when --enable-fill is specified.
+  - Fix threads-related profiling bugs that affected accuracy and caused memory
+    to be leaked during thread exit.
+  - Fix dirty page purging race conditions that could cause crashes.
+  - Fix crash in tcache flushing code during thread destruction.
+
+* 1.0.0 (April 11, 2010)
+
+  This release focuses on speed and run-time introspection.  Numerous
+  algorithmic improvements make this release substantially faster than its
+  predecessors.
+
+  New features:
+  - Implement autoconf-based configuration system.
+  - Add mallctl*(), for the purposes of introspection and run-time
+    configuration.
+  - Make it possible for the application to manually flush a thread's cache, via
+    the "tcache.flush" mallctl.
+  - Base maximum dirty page count on proportion of active memory.
+  - Compute various addtional run-time statistics, including per size class
+    statistics for large objects.
+  - Expose malloc_stats_print(), which can be called repeatedly by the
+    application.
+  - Simplify the malloc_message() signature to only take one string argument,
+    and incorporate an opaque data pointer argument for use by the application
+    in combination with malloc_stats_print().
+  - Add support for allocation backed by one or more swap files, and allow the
+    application to disable over-commit if swap files are in use.
+  - Implement allocation profiling and leak checking.
+
+  Removed features:
+  - Remove the dynamic arena rebalancing code, since thread-specific caching
+    reduces its utility.
+
+  Bug fixes:
+  - Modify chunk allocation to work when address space layout randomization
+    (ASLR) is in use.
+  - Fix thread cleanup bugs related to TLS destruction.
+  - Handle 0-size allocation requests in posix_memalign().
+  - Fix a chunk leak.  The leaked chunks were never touched, so this impacted
+    virtual memory usage, but not physical memory usage.
+
+* linux_2008082[78]a (August 27/28, 2008)
+
+  These snapshot releases are the simple result of incorporating Linux-specific
+  support into the FreeBSD malloc sources.
+
+--------------------------------------------------------------------------------
+vim:filetype=text:textwidth=80
diff --git a/deps/jemalloc.orig/INSTALL b/deps/jemalloc.orig/INSTALL
new file mode 100644
index 00000000..2a1e469c
--- /dev/null
+++ b/deps/jemalloc.orig/INSTALL
@@ -0,0 +1,257 @@
+Building and installing jemalloc can be as simple as typing the following while
+in the root directory of the source tree:
+
+    ./configure
+    make
+    make install
+
+=== Advanced configuration =====================================================
+
+The 'configure' script supports numerous options that allow control of which
+functionality is enabled, where jemalloc is installed, etc.  Optionally, pass
+any of the following arguments (not a definitive list) to 'configure':
+
+--help
+    Print a definitive list of options.
+
+--prefix=<install-root-dir>
+    Set the base directory in which to install.  For example:
+
+        ./configure --prefix=/usr/local
+
+    will cause files to be installed into /usr/local/include, /usr/local/lib,
+    and /usr/local/man.
+
+--with-rpath=<colon-separated-rpath>
+    Embed one or more library paths, so that libjemalloc can find the libraries
+    it is linked to.  This works only on ELF-based systems.
+
+--with-jemalloc-prefix=<prefix>
+    Prefix all public APIs with <prefix>.  For example, if <prefix> is
+    "prefix_", API changes like the following occur:
+
+      malloc()         --> prefix_malloc()
+      malloc_conf      --> prefix_malloc_conf
+      /etc/malloc.conf --> /etc/prefix_malloc.conf
+      MALLOC_CONF      --> PREFIX_MALLOC_CONF
+
+    This makes it possible to use jemalloc at the same time as the system
+    allocator, or even to use multiple copies of jemalloc simultaneously.
+
+    By default, the prefix is "", except on OS X, where it is "je_".  On OS X,
+    jemalloc overlays the default malloc zone, but makes no attempt to actually
+    replace the "malloc", "calloc", etc. symbols.
+
+--with-private-namespace=<prefix>
+    Prefix all library-private APIs with <prefix>.  For shared libraries,
+    symbol visibility mechanisms prevent these symbols from being exported, but
+    for static libraries, naming collisions are a real possibility.  By
+    default, the prefix is "" (empty string).
+
+--with-install-suffix=<suffix>
+    Append <suffix> to the base name of all installed files, such that multiple
+    versions of jemalloc can coexist in the same installation directory.  For
+    example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0.
+
+--enable-cc-silence
+    Enable code that silences non-useful compiler warnings.  This is helpful
+    when trying to tell serious warnings from those due to compiler
+    limitations, but it potentially incurs a performance penalty.
+
+--enable-debug
+    Enable assertions and validation code.  This incurs a substantial
+    performance hit, but is very useful during application development.
+
+--enable-stats
+    Enable statistics gathering functionality.  See the "opt.stats_print"
+    option documentation for usage details.
+
+--enable-prof
+    Enable heap profiling and leak detection functionality.  See the "opt.prof"
+    option documentation for usage details.  When enabled, there are several
+    approaches to backtracing, and the configure script chooses the first one
+    in the following list that appears to function correctly:
+
+    + libunwind      (requires --enable-prof-libunwind)
+    + libgcc         (unless --disable-prof-libgcc)
+    + gcc intrinsics (unless --disable-prof-gcc)
+
+--enable-prof-libunwind
+    Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
+    backtracing.
+
+--disable-prof-libgcc
+    Disable the use of libgcc's backtracing functionality.
+
+--disable-prof-gcc
+    Disable the use of gcc intrinsics for backtracing.
+
+--with-static-libunwind=<libunwind.a>
+    Statically link against the specified libunwind.a rather than dynamically
+    linking with -lunwind.
+
+--disable-tiny
+    Disable tiny (sub-quantum-sized) object support.  Technically it is not
+    legal for a malloc implementation to allocate objects with less than
+    quantum alignment (8 or 16 bytes, depending on architecture), but in
+    practice it never causes any problems if, for example, 4-byte allocations
+    are 4-byte-aligned.
+
+--disable-tcache
+    Disable thread-specific caches for small objects.  Objects are cached and
+    released in bulk, thus reducing the total number of mutex operations.  See
+    the "opt.tcache" option for usage details.
+
+--enable-swap
+    Enable mmap()ed swap file support.  When this feature is built in, it is
+    possible to specify one or more files that act as backing store.  This
+    effectively allows for per application swap files.
+
+--enable-dss
+    Enable support for page allocation/deallocation via sbrk(2), in addition to
+    mmap(2).
+
+--enable-fill
+    Enable support for junk/zero filling of memory.  See the "opt.junk"/
+    "opt.zero" option documentation for usage details.
+
+--enable-xmalloc
+    Enable support for optional immediate termination due to out-of-memory
+    errors, as is commonly implemented by "xmalloc" wrapper function for malloc.
+    See the "opt.xmalloc" option documentation for usage details.
+
+--enable-sysv
+    Enable support for System V semantics, wherein malloc(0) returns NULL
+    rather than a minimal allocation.  See the "opt.sysv" option documentation
+    for usage details.
+
+--enable-dynamic-page-shift
+    Under most conditions, the system page size never changes (usually 4KiB or
+    8KiB, depending on architecture and configuration), and unless this option
+    is enabled, jemalloc assumes that page size can safely be determined during
+    configuration and hard-coded.  Enabling dynamic page size determination has
+    a measurable impact on performance, since the compiler is forced to load
+    the page size from memory rather than embedding immediate values.
+
+--disable-lazy-lock
+    Disable code that wraps pthread_create() to detect when an application
+    switches from single-threaded to multi-threaded mode, so that it can avoid
+    mutex locking/unlocking operations while in single-threaded mode.  In
+    practice, this feature usually has little impact on performance unless
+    thread-specific caching is disabled.
+
+--disable-tls
+    Disable thread-local storage (TLS), which allows for fast access to
+    thread-local variables via the __thread keyword.  If TLS is available,
+    jemalloc uses it for several purposes.
+
+--with-xslroot=<path>
+    Specify where to find DocBook XSL stylesheets when building the
+    documentation.
+
+The following environment variables (not a definitive list) impact configure's
+behavior:
+
+CFLAGS="?"
+    Pass these flags to the compiler.  You probably shouldn't define this unless
+    you know what you are doing.  (Use EXTRA_CFLAGS instead.)
+
+EXTRA_CFLAGS="?"
+    Append these flags to CFLAGS.  This makes it possible to add flags such as
+    -Werror, while allowing the configure script to determine what other flags
+    are appropriate for the specified configuration.
+
+    The configure script specifically checks whether an optimization flag (-O*)
+    is specified in EXTRA_CFLAGS, and refrains from specifying an optimization
+    level if it finds that one has already been specified.
+
+CPPFLAGS="?"
+    Pass these flags to the C preprocessor.  Note that CFLAGS is not passed to
+    'cpp' when 'configure' is looking for include files, so you must use
+    CPPFLAGS instead if you need to help 'configure' find header files.
+
+LD_LIBRARY_PATH="?"
+    'ld' uses this colon-separated list to find libraries.
+
+LDFLAGS="?"
+    Pass these flags when linking.
+
+PATH="?"
+    'configure' uses this to find programs.
+
+=== Advanced compilation =======================================================
+
+To install only parts of jemalloc, use the following targets:
+
+    install_bin
+    install_include
+    install_lib
+    install_doc
+
+To clean up build results to varying degrees, use the following make targets:
+
+    clean
+    distclean
+    relclean
+
+=== Advanced installation ======================================================
+
+Optionally, define make variables when invoking make, including (not
+exclusively):
+
+INCLUDEDIR="?"
+    Use this as the installation prefix for header files.
+
+LIBDIR="?"
+    Use this as the installation prefix for libraries.
+
+MANDIR="?"
+    Use this as the installation prefix for man pages.
+
+DESTDIR="?"
+    Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR.  This is useful
+    when installing to a different path than was specified via --prefix.
+
+CC="?"
+    Use this to invoke the C compiler.
+
+CFLAGS="?"
+    Pass these flags to the compiler.
+
+CPPFLAGS="?"
+    Pass these flags to the C preprocessor.
+
+LDFLAGS="?"
+    Pass these flags when linking.
+
+PATH="?"
+    Use this to search for programs used during configuration and building.
+
+=== Development ================================================================
+
+If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh'
+script rather than 'configure'.  This re-generates 'configure', enables
+configuration dependency rules, and enables re-generation of automatically
+generated source files.
+
+The build system supports using an object directory separate from the source
+tree.  For example, you can create an 'obj' directory, and from within that
+directory, issue configuration and build commands:
+
+    autoconf
+    mkdir obj
+    cd obj
+    ../configure --enable-autogen
+    make
+
+=== Documentation ==============================================================
+
+The manual page is generated in both html and roff formats.  Any web browser
+can be used to view the html manual.  The roff manual page can be formatted
+prior to installation via any of the following commands:
+
+    nroff -man -t doc/jemalloc.3
+
+    groff -man -t -Tps doc/jemalloc.3 | ps2pdf - doc/jemalloc.3.pdf
+
+    (cd doc; groff -man -man-ext -t -Thtml jemalloc.3 > jemalloc.3.html)
diff --git a/deps/jemalloc.orig/Makefile.in b/deps/jemalloc.orig/Makefile.in
new file mode 100644
index 00000000..de7492f9
--- /dev/null
+++ b/deps/jemalloc.orig/Makefile.in
@@ -0,0 +1,259 @@
+# Clear out all vpaths, then set just one (default vpath) for the main build
+# directory.
+vpath
+vpath % .
+
+# Clear the default suffixes, so that built-in rules are not used.
+.SUFFIXES :
+
+SHELL := /bin/sh
+
+CC := @CC@
+
+# Configuration parameters.
+DESTDIR =
+BINDIR := $(DESTDIR)@BINDIR@
+INCLUDEDIR := $(DESTDIR)@INCLUDEDIR@
+LIBDIR := $(DESTDIR)@LIBDIR@
+DATADIR := $(DESTDIR)@DATADIR@
+MANDIR := $(DESTDIR)@MANDIR@
+
+# Build parameters.
+CPPFLAGS := @CPPFLAGS@ -I@srcroot@include -I@objroot@include
+CFLAGS := @CFLAGS@
+ifeq (macho, @abi@)
+CFLAGS += -dynamic
+endif
+LDFLAGS := @LDFLAGS@
+LIBS := @LIBS@
+RPATH_EXTRA := @RPATH_EXTRA@
+ifeq (macho, @abi@)
+SO := dylib
+WL_SONAME := dylib_install_name
+else
+SO := so
+WL_SONAME := soname
+endif
+REV := 1
+ifeq (macho, @abi@)
+TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib
+else
+TEST_LIBRARY_PATH :=
+endif
+
+# Lists of files.
+BINS := @srcroot@bin/pprof
+CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \
+	@objroot@include/jemalloc/jemalloc_defs@install_suffix@.h
+CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \
+	@srcroot@src/base.c @srcroot@src/bitmap.c @srcroot@src/chunk.c \
+	@srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \
+	@srcroot@src/chunk_swap.c @srcroot@src/ckh.c @srcroot@src/ctl.c \
+	@srcroot@src/extent.c @srcroot@src/hash.c @srcroot@src/huge.c \
+	@srcroot@src/mb.c @srcroot@src/mutex.c @srcroot@src/prof.c \
+	@srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c
+ifeq (macho, @abi@)
+CSRCS += @srcroot@src/zone.c
+endif
+STATIC_LIBS :=	@objroot@lib/libjemalloc@install_suffix@.a
+DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \
+	@objroot@lib/libjemalloc@install_suffix@.$(SO) \
+	@objroot@lib/libjemalloc@install_suffix@_pic.a
+MAN3 := @objroot@doc/jemalloc@install_suffix@.3
+DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml
+DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html)
+DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3)
+DOCS := $(DOCS_HTML) $(DOCS_MAN3)
+CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \
+	@srcroot@test/bitmap.c @srcroot@test/mremap.c \
+	@srcroot@test/posix_memalign.c @srcroot@test/rallocm.c \
+	@srcroot@test/thread_arena.c
+
+.PHONY: all dist doc_html doc_man doc
+.PHONY: install_bin install_include install_lib
+.PHONY: install_html install_man install_doc install
+.PHONY: tests check clean distclean relclean
+
+.SECONDARY : $(CTESTS:@srcroot@%.c=@objroot@%.o)
+
+# Default target.
+all: $(DSOS) $(STATIC_LIBS)
+
+dist: doc
+
+@srcroot@doc/%.html : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/html.xsl
+	@XSLTPROC@ -o $@ @objroot@doc/html.xsl $<
+
+@srcroot@doc/%.3 : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/manpages.xsl
+	@XSLTPROC@ -o $@ @objroot@doc/manpages.xsl $<
+
+doc_html: $(DOCS_HTML)
+doc_man: $(DOCS_MAN3)
+doc: $(DOCS)
+
+#
+# Include generated dependency files.
+#
+-include $(CSRCS:@srcroot@%.c=@objroot@%.d)
+-include $(CSRCS:@srcroot@%.c=@objroot@%.pic.d)
+-include $(CTESTS:@srcroot@%.c=@objroot@%.d)
+
+@objroot@src/%.o: @srcroot@src/%.c
+	@mkdir -p $(@D)
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $<
+	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
+
+@objroot@src/%.pic.o: @srcroot@src/%.c
+	@mkdir -p $(@D)
+	$(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $<
+	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $(basename $@))))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.pic.o \2/g\" > $(@:%.o=%.d)"
+
+%.$(SO) : %.$(SO).$(REV)
+	@mkdir -p $(@D)
+	ln -sf $(<F) $@
+
+@objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) : $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
+	@mkdir -p $(@D)
+	$(CC) -shared -Wl,-$(WL_SONAME),$(@F) $(RPATH_EXTRA:%=@RPATH@%) -o $@ $+ $(LDFLAGS) $(LIBS)
+
+@objroot@lib/libjemalloc@install_suffix@_pic.a : $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
+	@mkdir -p $(@D)
+	ar crus $@ $+
+
+@objroot@lib/libjemalloc@install_suffix@.a : $(CSRCS:@srcroot@%.c=@objroot@%.o)
+	@mkdir -p $(@D)
+	ar crus $@ $+
+
+@objroot@test/%.o: @srcroot@test/%.c
+	@mkdir -p $(@D)
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $<
+	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
+
+# Automatic dependency generation misses #include "*.c".
+@objroot@test/bitmap.o : @objroot@src/bitmap.o
+
+@objroot@test/%: @objroot@test/%.o \
+		 @objroot@lib/libjemalloc@install_suffix@.$(SO)
+	@mkdir -p $(@D)
+ifneq (@RPATH@, )
+	$(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ -lpthread
+else
+	$(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ -lpthread
+endif
+
+install_bin:
+	install -d $(BINDIR)
+	@for b in $(BINS); do \
+	echo "install -m 755 $$b $(BINDIR)"; \
+	install -m 755 $$b $(BINDIR); \
+done
+
+install_include:
+	install -d $(INCLUDEDIR)/jemalloc
+	@for h in $(CHDRS); do \
+	echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+	install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+done
+
+install_lib: $(DSOS) $(STATIC_LIBS)
+	install -d $(LIBDIR)
+	install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)
+	ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO)
+	install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR)
+	install -m 755 @objroot@lib/libjemalloc@install_suffix@.a $(LIBDIR)
+
+install_html:
+	install -d $(DATADIR)/doc/jemalloc@install_suffix@
+	@for d in $(DOCS_HTML); do \
+	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@"; \
+	install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@; \
+done
+
+install_man:
+	install -d $(MANDIR)/man3
+	@for d in $(DOCS_MAN3); do \
+	echo "install -m 644 $$d $(MANDIR)/man3"; \
+	install -m 644 $$d $(MANDIR)/man3; \
+done
+
+install_doc: install_html install_man
+
+install: install_bin install_include install_lib install_doc
+
+tests: $(CTESTS:@srcroot@%.c=@objroot@%)
+
+check: tests
+	@mkdir -p @objroot@test
+	@$(SHELL) -c 'total=0; \
+		failures=0; \
+		echo "========================================="; \
+		for t in $(CTESTS:@srcroot@%.c=@objroot@%); do \
+			total=`expr $$total + 1`; \
+			/bin/echo -n "$${t} ... "; \
+			$(TEST_LIBRARY_PATH) $${t} @abs_srcroot@ @abs_objroot@ \
+			  > @objroot@$${t}.out 2>&1; \
+			if test -e "@srcroot@$${t}.exp"; then \
+				diff -u @srcroot@$${t}.exp \
+				  @objroot@$${t}.out >/dev/null 2>&1; \
+				fail=$$?; \
+				if test "$${fail}" -eq "1" ; then \
+					failures=`expr $${failures} + 1`; \
+					echo "*** FAIL ***"; \
+				else \
+					echo "pass"; \
+				fi; \
+			else \
+				echo "*** FAIL *** (.exp file is missing)"; \
+				failures=`expr $${failures} + 1`; \
+			fi; \
+		done; \
+		echo "========================================="; \
+		echo "Failures: $${failures}/$${total}"'
+
+clean:
+	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.o)
+	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
+	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.d)
+	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.d)
+	rm -f $(CTESTS:@srcroot@%.c=@objroot@%)
+	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.o)
+	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.d)
+	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.out)
+	rm -f $(DSOS) $(STATIC_LIBS)
+
+distclean: clean
+	rm -rf @objroot@autom4te.cache
+	rm -f @objroot@config.log
+	rm -f @objroot@config.status
+	rm -f @objroot@config.stamp
+	rm -f @cfghdrs_out@
+	rm -f @cfgoutputs_out@
+
+relclean: distclean
+	rm -f @objroot@configure
+	rm -f @srcroot@VERSION
+	rm -f $(DOCS_HTML)
+	rm -f $(DOCS_MAN3)
+
+#===============================================================================
+# Re-configuration rules.
+
+ifeq (@enable_autogen@, 1)
+@srcroot@configure : @srcroot@configure.ac
+	cd ./@srcroot@ && @AUTOCONF@
+
+@objroot@config.status : @srcroot@configure
+	./@objroot@config.status --recheck
+
+@srcroot@config.stamp.in : @srcroot@configure.ac
+	echo stamp > @srcroot@config.stamp.in
+
+@objroot@config.stamp : @cfgoutputs_in@ @cfghdrs_in@ @srcroot@configure
+	./@objroot@config.status
+	@touch $@
+
+# There must be some action in order for make to re-read Makefile when it is
+# out of date.
+@cfgoutputs_out@ @cfghdrs_out@ : @objroot@config.stamp
+	@true
+endif
diff --git a/deps/jemalloc.orig/README b/deps/jemalloc.orig/README
new file mode 100644
index 00000000..4d7b552b
--- /dev/null
+++ b/deps/jemalloc.orig/README
@@ -0,0 +1,16 @@
+jemalloc is a general-purpose scalable concurrent malloc(3) implementation.
+This distribution is a stand-alone "portable" implementation that currently
+targets Linux and Apple OS X.  jemalloc is included as the default allocator in
+the FreeBSD and NetBSD operating systems, and it is used by the Mozilla Firefox
+web browser on Microsoft Windows-related platforms.  Depending on your needs,
+one of the other divergent versions may suit your needs better than this
+distribution.
+
+The COPYING file contains copyright and licensing information.
+
+The INSTALL file contains information on how to configure, build, and install
+jemalloc.
+
+The ChangeLog file contains a brief summary of changes for each release.
+
+URL: http://www.canonware.com/jemalloc/
diff --git a/deps/jemalloc.orig/autogen.sh b/deps/jemalloc.orig/autogen.sh
new file mode 100755
index 00000000..75f32da6
--- /dev/null
+++ b/deps/jemalloc.orig/autogen.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+for i in autoconf; do
+    echo "$i"
+    $i
+    if [ $? -ne 0 ]; then
+	echo "Error $? in $i"
+	exit 1
+    fi
+done
+
+echo "./configure --enable-autogen $@"
+./configure --enable-autogen $@
+if [ $? -ne 0 ]; then
+    echo "Error $? in ./configure"
+    exit 1
+fi
diff --git a/deps/jemalloc.orig/bin/pprof b/deps/jemalloc.orig/bin/pprof
new file mode 100755
index 00000000..280ddcc8
--- /dev/null
+++ b/deps/jemalloc.orig/bin/pprof
@@ -0,0 +1,4893 @@
+#! /usr/bin/env perl
+
+# Copyright (c) 1998-2007, Google Inc.
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 
+#     * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+#     * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Program for printing the profile generated by common/profiler.cc,
+# or by the heap profiler (common/debugallocation.cc)
+#
+# The profile contains a sequence of entries of the form:
+#       <count> <stack trace>
+# This program parses the profile, and generates user-readable
+# output.
+#
+# Examples:
+#
+# % tools/pprof "program" "profile"
+#   Enters "interactive" mode
+#
+# % tools/pprof --text "program" "profile"
+#   Generates one line per procedure
+#
+# % tools/pprof --gv "program" "profile"
+#   Generates annotated call-graph and displays via "gv"
+#
+# % tools/pprof --gv --focus=Mutex "program" "profile"
+#   Restrict to code paths that involve an entry that matches "Mutex"
+#
+# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile"
+#   Restrict to code paths that involve an entry that matches "Mutex"
+#   and does not match "string"
+#
+# % tools/pprof --list=IBF_CheckDocid "program" "profile"
+#   Generates disassembly listing of all routines with at least one
+#   sample that match the --list=<regexp> pattern.  The listing is
+#   annotated with the flat and cumulative sample counts at each line.
+#
+# % tools/pprof --disasm=IBF_CheckDocid "program" "profile"
+#   Generates disassembly listing of all routines with at least one
+#   sample that match the --disasm=<regexp> pattern.  The listing is
+#   annotated with the flat and cumulative sample counts at each PC value.
+#
+# TODO: Use color to indicate files?
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $PPROF_VERSION = "1.7";
+
+# These are the object tools we use which can come from a
+# user-specified location using --tools, from the PPROF_TOOLS
+# environment variable, or from the environment.
+my %obj_tool_map = (
+  "objdump" => "objdump",
+  "nm" => "nm",
+  "addr2line" => "addr2line",
+  "c++filt" => "c++filt",
+  ## ConfigureObjTools may add architecture-specific entries:
+  #"nm_pdb" => "nm-pdb",       # for reading windows (PDB-format) executables
+  #"addr2line_pdb" => "addr2line-pdb",                                # ditto
+  #"otool" => "otool",         # equivalent of objdump on OS X
+);
+my $DOT = "dot";          # leave non-absolute, since it may be in /usr/local
+my $GV = "gv";
+my $EVINCE = "evince";    # could also be xpdf or perhaps acroread
+my $KCACHEGRIND = "kcachegrind";
+my $PS2PDF = "ps2pdf";
+# These are used for dynamic profiles
+my $URL_FETCHER = "curl -s";
+
+# These are the web pages that servers need to support for dynamic profiles
+my $HEAP_PAGE = "/pprof/heap";
+my $PROFILE_PAGE = "/pprof/profile";   # must support cgi-param "?seconds=#"
+my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param
+                                                # ?seconds=#&event=x&period=n
+my $GROWTH_PAGE = "/pprof/growth";
+my $CONTENTION_PAGE = "/pprof/contention";
+my $WALL_PAGE = "/pprof/wall(?:\\?.*)?";  # accepts options like namefilter
+my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?";
+my $CENSUSPROFILE_PAGE = "/pprof/censusprofile";  # must support "?seconds=#"
+my $SYMBOL_PAGE = "/pprof/symbol";     # must support symbol lookup via POST
+my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
+
+# These are the web pages that can be named on the command line.
+# All the alternatives must begin with /.
+my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" .
+               "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" .
+               "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)";
+
+# default binary name
+my $UNKNOWN_BINARY = "(unknown)";
+
+# There is a pervasive dependency on the length (in hex characters,
+# i.e., nibbles) of an address, distinguishing between 32-bit and
+# 64-bit profiles.  To err on the safe size, default to 64-bit here:
+my $address_length = 16;
+
+# A list of paths to search for shared object files
+my @prefix_list = ();
+
+# Special routine name that should not have any symbols.
+# Used as separator to parse "addr2line -i" output.
+my $sep_symbol = '_fini';
+my $sep_address = undef;
+
+##### Argument parsing #####
+
+sub usage_string {
+  return <<EOF;
+Usage:
+pprof [options] <program> <profiles>
+   <profiles> is a space separated list of profile names.
+pprof [options] <symbolized-profiles>
+   <symbolized-profiles> is a list of profile files where each file contains
+   the necessary symbol mappings  as well as profile data (likely generated
+   with --raw).
+pprof [options] <profile>
+   <profile> is a remote form.  Symbols are obtained from host:port$SYMBOL_PAGE
+
+   Each name can be:
+   /path/to/profile        - a path to a profile file
+   host:port[/<service>]   - a location of a service to get profile from
+
+   The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile,
+                         $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall,
+                         $CENSUSPROFILE_PAGE, or /pprof/filteredprofile.
+   For instance: "pprof http://myserver.com:80$HEAP_PAGE".
+   If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
+pprof --symbols <program>
+   Maps addresses to symbol names.  In this mode, stdin should be a
+   list of library mappings, in the same format as is found in the heap-
+   and cpu-profile files (this loosely matches that of /proc/self/maps
+   on linux), followed by a list of hex addresses to map, one per line.
+
+   For more help with querying remote servers, including how to add the
+   necessary server-side support code, see this filename (or one like it):
+
+   /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html
+
+Options:
+   --cum               Sort by cumulative data
+   --base=<base>       Subtract <base> from <profile> before display
+   --interactive       Run in interactive mode (interactive "help" gives help) [default]
+   --seconds=<n>       Length of time for dynamic profiles [default=30 secs]
+   --add_lib=<file>    Read additional symbols and line info from the given library
+   --lib_prefix=<dir>  Comma separated list of library path prefixes
+
+Reporting Granularity:
+   --addresses         Report at address level
+   --lines             Report at source line level
+   --functions         Report at function level [default]
+   --files             Report at source file level
+
+Output type:
+   --text              Generate text report
+   --callgrind         Generate callgrind format to stdout
+   --gv                Generate Postscript and display
+   --evince            Generate PDF and display
+   --web               Generate SVG and display
+   --list=<regexp>     Generate source listing of matching routines
+   --disasm=<regexp>   Generate disassembly of matching routines
+   --symbols           Print demangled symbol names found at given addresses
+   --dot               Generate DOT file to stdout
+   --ps                Generate Postcript to stdout
+   --pdf               Generate PDF to stdout
+   --svg               Generate SVG to stdout
+   --gif               Generate GIF to stdout
+   --raw               Generate symbolized pprof data (useful with remote fetch)
+
+Heap-Profile Options:
+   --inuse_space       Display in-use (mega)bytes [default]
+   --inuse_objects     Display in-use objects
+   --alloc_space       Display allocated (mega)bytes
+   --alloc_objects     Display allocated objects
+   --show_bytes        Display space in bytes
+   --drop_negative     Ignore negative differences
+
+Contention-profile options:
+   --total_delay       Display total delay at each region [default]
+   --contentions       Display number of delays at each region
+   --mean_delay        Display mean delay at each region
+
+Call-graph Options:
+   --nodecount=<n>     Show at most so many nodes [default=80]
+   --nodefraction=<f>  Hide nodes below <f>*total [default=.005]
+   --edgefraction=<f>  Hide edges below <f>*total [default=.001]
+   --maxdegree=<n>     Max incoming/outgoing edges per node [default=8]
+   --focus=<regexp>    Focus on nodes matching <regexp>
+   --ignore=<regexp>   Ignore nodes matching <regexp>
+   --scale=<n>         Set GV scaling [default=0]
+   --heapcheck         Make nodes with non-0 object counts
+                       (i.e. direct leak generators) more visible
+
+Miscellaneous:
+   --tools=<prefix or binary:fullpath>[,...]   \$PATH for object tool pathnames
+   --test              Run unit tests
+   --help              This message
+   --version           Version information
+
+Environment Variables:
+   PPROF_TMPDIR        Profiles directory. Defaults to \$HOME/pprof
+   PPROF_TOOLS         Prefix for object tools pathnames
+
+Examples:
+
+pprof /bin/ls ls.prof
+                       Enters "interactive" mode
+pprof --text /bin/ls ls.prof
+                       Outputs one line per procedure
+pprof --web /bin/ls ls.prof
+                       Displays annotated call-graph in web browser
+pprof --gv /bin/ls ls.prof
+                       Displays annotated call-graph via 'gv'
+pprof --gv --focus=Mutex /bin/ls ls.prof
+                       Restricts to code paths including a .*Mutex.* entry
+pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof
+                       Code paths including Mutex but not string
+pprof --list=getdir /bin/ls ls.prof
+                       (Per-line) annotated source listing for getdir()
+pprof --disasm=getdir /bin/ls ls.prof
+                       (Per-PC) annotated disassembly for getdir()
+
+pprof http://localhost:1234/
+                       Enters "interactive" mode
+pprof --text localhost:1234
+                       Outputs one line per procedure for localhost:1234
+pprof --raw localhost:1234 > ./local.raw
+pprof --text ./local.raw
+                       Fetches a remote profile for later analysis and then
+                       analyzes it in text mode.
+EOF
+}
+
+sub version_string {
+  return <<EOF
+pprof (part of google-perftools $PPROF_VERSION)
+
+Copyright 1998-2007 Google Inc.
+
+This is BSD licensed software; see the source for copying conditions
+and license information.
+There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A
+PARTICULAR PURPOSE.
+EOF
+}
+
+sub usage {
+  my $msg = shift;
+  print STDERR "$msg\n\n";
+  print STDERR usage_string();
+  print STDERR "\nFATAL ERROR: $msg\n";    # just as a reminder
+  exit(1);
+}
+
+sub Init() {
+  # Setup tmp-file name and handler to clean it up.
+  # We do this in the very beginning so that we can use
+  # error() and cleanup() function anytime here after.
+  $main::tmpfile_sym = "/tmp/pprof$$.sym";
+  $main::tmpfile_ps = "/tmp/pprof$$";
+  $main::next_tmpfile = 0;
+  $SIG{'INT'} = \&sighandler;
+
+  # Cache from filename/linenumber to source code
+  $main::source_cache = ();
+
+  $main::opt_help = 0;
+  $main::opt_version = 0;
+
+  $main::opt_cum = 0;
+  $main::opt_base = '';
+  $main::opt_addresses = 0;
+  $main::opt_lines = 0;
+  $main::opt_functions = 0;
+  $main::opt_files = 0;
+  $main::opt_lib_prefix = "";
+
+  $main::opt_text = 0;
+  $main::opt_callgrind = 0;
+  $main::opt_list = "";
+  $main::opt_disasm = "";
+  $main::opt_symbols = 0;
+  $main::opt_gv = 0;
+  $main::opt_evince = 0;
+  $main::opt_web = 0;
+  $main::opt_dot = 0;
+  $main::opt_ps = 0;
+  $main::opt_pdf = 0;
+  $main::opt_gif = 0;
+  $main::opt_svg = 0;
+  $main::opt_raw = 0;
+
+  $main::opt_nodecount = 80;
+  $main::opt_nodefraction = 0.005;
+  $main::opt_edgefraction = 0.001;
+  $main::opt_maxdegree = 8;
+  $main::opt_focus = '';
+  $main::opt_ignore = '';
+  $main::opt_scale = 0;
+  $main::opt_heapcheck = 0;
+  $main::opt_seconds = 30;
+  $main::opt_lib = "";
+
+  $main::opt_inuse_space   = 0;
+  $main::opt_inuse_objects = 0;
+  $main::opt_alloc_space   = 0;
+  $main::opt_alloc_objects = 0;
+  $main::opt_show_bytes    = 0;
+  $main::opt_drop_negative = 0;
+  $main::opt_interactive   = 0;
+
+  $main::opt_total_delay = 0;
+  $main::opt_contentions = 0;
+  $main::opt_mean_delay = 0;
+
+  $main::opt_tools   = "";
+  $main::opt_debug   = 0;
+  $main::opt_test    = 0;
+
+  # These are undocumented flags used only by unittests.
+  $main::opt_test_stride = 0;
+
+  # Are we using $SYMBOL_PAGE?
+  $main::use_symbol_page = 0;
+
+  # Files returned by TempName.
+  %main::tempnames = ();
+
+  # Type of profile we are dealing with
+  # Supported types:
+  #     cpu
+  #     heap
+  #     growth
+  #     contention
+  $main::profile_type = '';     # Empty type means "unknown"
+
+  GetOptions("help!"          => \$main::opt_help,
+             "version!"       => \$main::opt_version,
+             "cum!"           => \$main::opt_cum,
+             "base=s"         => \$main::opt_base,
+             "seconds=i"      => \$main::opt_seconds,
+             "add_lib=s"      => \$main::opt_lib,
+             "lib_prefix=s"   => \$main::opt_lib_prefix,
+             "functions!"     => \$main::opt_functions,
+             "lines!"         => \$main::opt_lines,
+             "addresses!"     => \$main::opt_addresses,
+             "files!"         => \$main::opt_files,
+             "text!"          => \$main::opt_text,
+             "callgrind!"     => \$main::opt_callgrind,
+             "list=s"         => \$main::opt_list,
+             "disasm=s"       => \$main::opt_disasm,
+             "symbols!"       => \$main::opt_symbols,
+             "gv!"            => \$main::opt_gv,
+             "evince!"        => \$main::opt_evince,
+             "web!"           => \$main::opt_web,
+             "dot!"           => \$main::opt_dot,
+             "ps!"            => \$main::opt_ps,
+             "pdf!"           => \$main::opt_pdf,
+             "svg!"           => \$main::opt_svg,
+             "gif!"           => \$main::opt_gif,
+             "raw!"           => \$main::opt_raw,
+             "interactive!"   => \$main::opt_interactive,
+             "nodecount=i"    => \$main::opt_nodecount,
+             "nodefraction=f" => \$main::opt_nodefraction,
+             "edgefraction=f" => \$main::opt_edgefraction,
+             "maxdegree=i"    => \$main::opt_maxdegree,
+             "focus=s"        => \$main::opt_focus,
+             "ignore=s"       => \$main::opt_ignore,
+             "scale=i"        => \$main::opt_scale,
+             "heapcheck"      => \$main::opt_heapcheck,
+             "inuse_space!"   => \$main::opt_inuse_space,
+             "inuse_objects!" => \$main::opt_inuse_objects,
+             "alloc_space!"   => \$main::opt_alloc_space,
+             "alloc_objects!" => \$main::opt_alloc_objects,
+             "show_bytes!"    => \$main::opt_show_bytes,
+             "drop_negative!" => \$main::opt_drop_negative,
+             "total_delay!"   => \$main::opt_total_delay,
+             "contentions!"   => \$main::opt_contentions,
+             "mean_delay!"    => \$main::opt_mean_delay,
+             "tools=s"        => \$main::opt_tools,
+             "test!"          => \$main::opt_test,
+             "debug!"         => \$main::opt_debug,
+             # Undocumented flags used only by unittests:
+             "test_stride=i"  => \$main::opt_test_stride,
+      ) || usage("Invalid option(s)");
+
+  # Deal with the standard --help and --version
+  if ($main::opt_help) {
+    print usage_string();
+    exit(0);
+  }
+
+  if ($main::opt_version) {
+    print version_string();
+    exit(0);
+  }
+
+  # Disassembly/listing/symbols mode requires address-level info
+  if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) {
+    $main::opt_functions = 0;
+    $main::opt_lines = 0;
+    $main::opt_addresses = 1;
+    $main::opt_files = 0;
+  }
+
+  # Check heap-profiling flags
+  if ($main::opt_inuse_space +
+      $main::opt_inuse_objects +
+      $main::opt_alloc_space +
+      $main::opt_alloc_objects > 1) {
+    usage("Specify at most on of --inuse/--alloc options");
+  }
+
+  # Check output granularities
+  my $grains =
+      $main::opt_functions +
+      $main::opt_lines +
+      $main::opt_addresses +
+      $main::opt_files +
+      0;
+  if ($grains > 1) {
+    usage("Only specify one output granularity option");
+  }
+  if ($grains == 0) {
+    $main::opt_functions = 1;
+  }
+
+  # Check output modes
+  my $modes =
+      $main::opt_text +
+      $main::opt_callgrind +
+      ($main::opt_list eq '' ? 0 : 1) +
+      ($main::opt_disasm eq '' ? 0 : 1) +
+      ($main::opt_symbols == 0 ? 0 : 1) +
+      $main::opt_gv +
+      $main::opt_evince +
+      $main::opt_web +
+      $main::opt_dot +
+      $main::opt_ps +
+      $main::opt_pdf +
+      $main::opt_svg +
+      $main::opt_gif +
+      $main::opt_raw +
+      $main::opt_interactive +
+      0;
+  if ($modes > 1) {
+    usage("Only specify one output mode");
+  }
+  if ($modes == 0) {
+    if (-t STDOUT) {  # If STDOUT is a tty, activate interactive mode
+      $main::opt_interactive = 1;
+    } else {
+      $main::opt_text = 1;
+    }
+  }
+
+  if ($main::opt_test) {
+    RunUnitTests();
+    # Should not return
+    exit(1);
+  }
+
+  # Binary name and profile arguments list
+  $main::prog = "";
+  @main::pfile_args = ();
+
+  # Remote profiling without a binary (using $SYMBOL_PAGE instead)
+  if (IsProfileURL($ARGV[0])) {
+    $main::use_symbol_page = 1;
+  } elsif (IsSymbolizedProfileFile($ARGV[0])) {
+    $main::use_symbolized_profile = 1;
+    $main::prog = $UNKNOWN_BINARY;  # will be set later from the profile file
+  }
+
+  if ($main::use_symbol_page || $main::use_symbolized_profile) {
+    # We don't need a binary!
+    my %disabled = ('--lines' => $main::opt_lines,
+                    '--disasm' => $main::opt_disasm);
+    for my $option (keys %disabled) {
+      usage("$option cannot be used without a binary") if $disabled{$option};
+    }
+    # Set $main::prog later...
+    scalar(@ARGV) || usage("Did not specify profile file");
+  } elsif ($main::opt_symbols) {
+    # --symbols needs a binary-name (to run nm on, etc) but not profiles
+    $main::prog = shift(@ARGV) || usage("Did not specify program");
+  } else {
+    $main::prog = shift(@ARGV) || usage("Did not specify program");
+    scalar(@ARGV) || usage("Did not specify profile file");
+  }
+
+  # Parse profile file/location arguments
+  foreach my $farg (@ARGV) {
+    if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) {
+      my $machine = $1;
+      my $num_machines = $2;
+      my $path = $3;
+      for (my $i = 0; $i < $num_machines; $i++) {
+        unshift(@main::pfile_args, "$i.$machine$path");
+      }
+    } else {
+      unshift(@main::pfile_args, $farg);
+    }
+  }
+
+  if ($main::use_symbol_page) {
+    unless (IsProfileURL($main::pfile_args[0])) {
+      error("The first profile should be a remote form to use $SYMBOL_PAGE\n");
+    }
+    CheckSymbolPage();
+    $main::prog = FetchProgramName();
+  } elsif (!$main::use_symbolized_profile) {  # may not need objtools!
+    ConfigureObjTools($main::prog)
+  }
+
+  # Break the opt_list_prefix into the prefix_list array
+  @prefix_list = split (',', $main::opt_lib_prefix);
+
+  # Remove trailing / from the prefixes, in the list to prevent
+  # searching things like /my/path//lib/mylib.so
+  foreach (@prefix_list) {
+    s|/+$||;
+  }
+}
+
+sub Main() {
+  Init();
+  $main::collected_profile = undef;
+  @main::profile_files = ();
+  $main::op_time = time();
+
+  # Printing symbols is special and requires a lot less info that most.
+  if ($main::opt_symbols) {
+    PrintSymbols(*STDIN);   # Get /proc/maps and symbols output from stdin
+    return;
+  }
+
+  # Fetch all profile data
+  FetchDynamicProfiles();
+
+  # this will hold symbols that we read from the profile files
+  my $symbol_map = {};
+
+  # Read one profile, pick the last item on the list
+  my $data = ReadProfile($main::prog, pop(@main::profile_files));
+  my $profile = $data->{profile};
+  my $pcs = $data->{pcs};
+  my $libs = $data->{libs};   # Info about main program and shared libraries
+  $symbol_map = MergeSymbols($symbol_map, $data->{symbols});
+
+  # Add additional profiles, if available.
+  if (scalar(@main::profile_files) > 0) {
+    foreach my $pname (@main::profile_files) {
+      my $data2 = ReadProfile($main::prog, $pname);
+      $profile = AddProfile($profile, $data2->{profile});
+      $pcs = AddPcs($pcs, $data2->{pcs});
+      $symbol_map = MergeSymbols($symbol_map, $data2->{symbols});
+    }
+  }
+
+  # Subtract base from profile, if specified
+  if ($main::opt_base ne '') {
+    my $base = ReadProfile($main::prog, $main::opt_base);
+    $profile = SubtractProfile($profile, $base->{profile});
+    $pcs = AddPcs($pcs, $base->{pcs});
+    $symbol_map = MergeSymbols($symbol_map, $base->{symbols});
+  }
+
+  # Get total data in profile
+  my $total = TotalProfile($profile);
+
+  # Collect symbols
+  my $symbols;
+  if ($main::use_symbolized_profile) {
+    $symbols = FetchSymbols($pcs, $symbol_map);
+  } elsif ($main::use_symbol_page) {
+    $symbols = FetchSymbols($pcs);
+  } else {
+    # TODO(csilvers): $libs uses the /proc/self/maps data from profile1,
+    # which may differ from the data from subsequent profiles, especially
+    # if they were run on different machines.  Use appropriate libs for
+    # each pc somehow.
+    $symbols = ExtractSymbols($libs, $pcs);
+  }
+
+  # Remove uniniteresting stack items
+  $profile = RemoveUninterestingFrames($symbols, $profile);
+
+  # Focus?
+  if ($main::opt_focus ne '') {
+    $profile = FocusProfile($symbols, $profile, $main::opt_focus);
+  }
+
+  # Ignore?
+  if ($main::opt_ignore ne '') {
+    $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore);
+  }
+
+  my $calls = ExtractCalls($symbols, $profile);
+
+  # Reduce profiles to required output granularity, and also clean
+  # each stack trace so a given entry exists at most once.
+  my $reduced = ReduceProfile($symbols, $profile);
+
+  # Get derived profiles
+  my $flat = FlatProfile($reduced);
+  my $cumulative = CumulativeProfile($reduced);
+
+  # Print
+  if (!$main::opt_interactive) {
+    if ($main::opt_disasm) {
+      PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm, $total);
+    } elsif ($main::opt_list) {
+      PrintListing($libs, $flat, $cumulative, $main::opt_list);
+    } elsif ($main::opt_text) {
+      # Make sure the output is empty when have nothing to report
+      # (only matters when --heapcheck is given but we must be
+      # compatible with old branches that did not pass --heapcheck always):
+      if ($total != 0) {
+        printf("Total: %s %s\n", Unparse($total), Units());
+      }
+      PrintText($symbols, $flat, $cumulative, $total, -1);
+    } elsif ($main::opt_raw) {
+      PrintSymbolizedProfile($symbols, $profile, $main::prog);
+    } elsif ($main::opt_callgrind) {
+      PrintCallgrind($calls);
+    } else {
+      if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
+        if ($main::opt_gv) {
+          RunGV(TempName($main::next_tmpfile, "ps"), "");
+        } elsif ($main::opt_evince) {
+	  RunEvince(TempName($main::next_tmpfile, "pdf"), "");
+        } elsif ($main::opt_web) {
+          my $tmp = TempName($main::next_tmpfile, "svg");
+          RunWeb($tmp);
+          # The command we run might hand the file name off
+          # to an already running browser instance and then exit.
+          # Normally, we'd remove $tmp on exit (right now),
+          # but fork a child to remove $tmp a little later, so that the
+          # browser has time to load it first.
+          delete $main::tempnames{$tmp};
+          if (fork() == 0) {
+            sleep 5;
+            unlink($tmp);
+            exit(0);
+          }
+        }
+      } else {
+        cleanup();
+        exit(1);
+      }
+    }
+  } else {
+    InteractiveMode($profile, $symbols, $libs, $total);
+  }
+
+  cleanup();
+  exit(0);
+}
+
+##### Entry Point #####
+
+Main();
+
+# Temporary code to detect if we're running on a Goobuntu system.
+# These systems don't have the right stuff installed for the special
+# Readline libraries to work, so as a temporary workaround, we default
+# to using the normal stdio code, rather than the fancier readline-based
+# code
+sub ReadlineMightFail {
+  if (-e '/lib/libtermcap.so.2') {
+    return 0;  # libtermcap exists, so readline should be okay
+  } else {
+    return 1;
+  }
+}
+
+sub RunGV {
+  my $fname = shift;
+  my $bg = shift;       # "" or " &" if we should run in background
+  if (!system("$GV --version >/dev/null 2>&1")) {
+    # Options using double dash are supported by this gv version.
+    # Also, turn on noantialias to better handle bug in gv for
+    # postscript files with large dimensions.
+    # TODO: Maybe we should not pass the --noantialias flag
+    # if the gv version is known to work properly without the flag.
+    system("$GV --scale=$main::opt_scale --noantialias " . $fname . $bg);
+  } else {
+    # Old gv version - only supports options that use single dash.
+    print STDERR "$GV -scale $main::opt_scale\n";
+    system("$GV -scale $main::opt_scale " . $fname . $bg);
+  }
+}
+
+sub RunEvince {
+  my $fname = shift;
+  my $bg = shift;       # "" or " &" if we should run in background
+  system("$EVINCE " . $fname . $bg);
+}
+
+sub RunWeb {
+  my $fname = shift;
+  print STDERR "Loading web page file:///$fname\n";
+
+  if (`uname` =~ /Darwin/) {
+    # OS X: open will use standard preference for SVG files.
+    system("/usr/bin/open", $fname);
+    return;
+  }
+
+  # Some kind of Unix; try generic symlinks, then specific browsers.
+  # (Stop once we find one.)
+  # Works best if the browser is already running.
+  my @alt = (
+    "/etc/alternatives/gnome-www-browser",
+    "/etc/alternatives/x-www-browser",
+    "google-chrome",
+    "firefox",
+  );
+  foreach my $b (@alt) {
+    if (system($b, $fname) == 0) {
+      return;
+    }
+  }
+
+  print STDERR "Could not load web browser.\n";
+}
+
+sub RunKcachegrind {
+  my $fname = shift;
+  my $bg = shift;       # "" or " &" if we should run in background
+  print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n";
+  system("$KCACHEGRIND " . $fname . $bg);
+}
+
+
+##### Interactive helper routines #####
+
+sub InteractiveMode {
+  $| = 1;  # Make output unbuffered for interactive mode
+  my ($orig_profile, $symbols, $libs, $total) = @_;
+
+  print STDERR "Welcome to pprof!  For help, type 'help'.\n";
+
+  # Use ReadLine if it's installed and input comes from a console.
+  if ( -t STDIN &&
+       !ReadlineMightFail() &&
+       defined(eval {require Term::ReadLine}) ) {
+    my $term = new Term::ReadLine 'pprof';
+    while ( defined ($_ = $term->readline('(pprof) '))) {
+      $term->addhistory($_) if /\S/;
+      if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
+        last;    # exit when we get an interactive command to quit
+      }
+    }
+  } else {       # don't have readline
+    while (1) {
+      print STDERR "(pprof) ";
+      $_ = <STDIN>;
+      last if ! defined $_ ;
+      s/\r//g;         # turn windows-looking lines into unix-looking lines
+
+      # Save some flags that might be reset by InteractiveCommand()
+      my $save_opt_lines = $main::opt_lines;
+
+      if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
+        last;    # exit when we get an interactive command to quit
+      }
+
+      # Restore flags
+      $main::opt_lines = $save_opt_lines;
+    }
+  }
+}
+
+# Takes two args: orig profile, and command to run.
+# Returns 1 if we should keep going, or 0 if we were asked to quit
+sub InteractiveCommand {
+  my($orig_profile, $symbols, $libs, $total, $command) = @_;
+  $_ = $command;                # just to make future m//'s easier
+  if (!defined($_)) {
+    print STDERR "\n";
+    return 0;
+  }
+  if (m/^\s*quit/) {
+    return 0;
+  }
+  if (m/^\s*help/) {
+    InteractiveHelpMessage();
+    return 1;
+  }
+  # Clear all the mode options -- mode is controlled by "$command"
+  $main::opt_text = 0;
+  $main::opt_callgrind = 0;
+  $main::opt_disasm = 0;
+  $main::opt_list = 0;
+  $main::opt_gv = 0;
+  $main::opt_evince = 0;
+  $main::opt_cum = 0;
+
+  if (m/^\s*(text|top)(\d*)\s*(.*)/) {
+    $main::opt_text = 1;
+
+    my $line_limit = ($2 ne "") ? int($2) : 10;
+
+    my $routine;
+    my $ignore;
+    ($routine, $ignore) = ParseInteractiveArgs($3);
+
+    my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
+    my $reduced = ReduceProfile($symbols, $profile);
+
+    # Get derived profiles
+    my $flat = FlatProfile($reduced);
+    my $cumulative = CumulativeProfile($reduced);
+
+    PrintText($symbols, $flat, $cumulative, $total, $line_limit);
+    return 1;
+  }
+  if (m/^\s*callgrind\s*([^ \n]*)/) {
+    $main::opt_callgrind = 1;
+
+    # Get derived profiles
+    my $calls = ExtractCalls($symbols, $orig_profile);
+    my $filename = $1;
+    if ( $1 eq '' ) {
+      $filename = TempName($main::next_tmpfile, "callgrind");
+    }
+    PrintCallgrind($calls, $filename);
+    if ( $1 eq '' ) {
+      RunKcachegrind($filename, " & ");
+      $main::next_tmpfile++;
+    }
+
+    return 1;
+  }
+  if (m/^\s*list\s*(.+)/) {
+    $main::opt_list = 1;
+
+    my $routine;
+    my $ignore;
+    ($routine, $ignore) = ParseInteractiveArgs($1);
+
+    my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
+    my $reduced = ReduceProfile($symbols, $profile);
+
+    # Get derived profiles
+    my $flat = FlatProfile($reduced);
+    my $cumulative = CumulativeProfile($reduced);
+
+    PrintListing($libs, $flat, $cumulative, $routine);
+    return 1;
+  }
+  if (m/^\s*disasm\s*(.+)/) {
+    $main::opt_disasm = 1;
+
+    my $routine;
+    my $ignore;
+    ($routine, $ignore) = ParseInteractiveArgs($1);
+
+    # Process current profile to account for various settings
+    my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
+    my $reduced = ReduceProfile($symbols, $profile);
+
+    # Get derived profiles
+    my $flat = FlatProfile($reduced);
+    my $cumulative = CumulativeProfile($reduced);
+
+    PrintDisassembly($libs, $flat, $cumulative, $routine, $total);
+    return 1;
+  }
+  if (m/^\s*(gv|web|evince)\s*(.*)/) {
+    $main::opt_gv = 0;
+    $main::opt_evince = 0;
+    $main::opt_web = 0;
+    if ($1 eq "gv") {
+      $main::opt_gv = 1;
+    } elsif ($1 eq "evince") {
+      $main::opt_evince = 1;
+    } elsif ($1 eq "web") {
+      $main::opt_web = 1;
+    }
+
+    my $focus;
+    my $ignore;
+    ($focus, $ignore) = ParseInteractiveArgs($2);
+
+    # Process current profile to account for various settings
+    my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore);
+    my $reduced = ReduceProfile($symbols, $profile);
+
+    # Get derived profiles
+    my $flat = FlatProfile($reduced);
+    my $cumulative = CumulativeProfile($reduced);
+
+    if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
+      if ($main::opt_gv) {
+        RunGV(TempName($main::next_tmpfile, "ps"), " &");
+      } elsif ($main::opt_evince) {
+        RunEvince(TempName($main::next_tmpfile, "pdf"), " &");
+      } elsif ($main::opt_web) {
+        RunWeb(TempName($main::next_tmpfile, "svg"));
+      }
+      $main::next_tmpfile++;
+    }
+    return 1;
+  }
+  if (m/^\s*$/) {
+    return 1;
+  }
+  print STDERR "Unknown command: try 'help'.\n";
+  return 1;
+}
+
+
+sub ProcessProfile {
+  my $orig_profile = shift;
+  my $symbols = shift;
+  my $focus = shift;
+  my $ignore = shift;
+
+  # Process current profile to account for various settings
+  my $profile = $orig_profile;
+  my $total_count = TotalProfile($profile);
+  printf("Total: %s %s\n", Unparse($total_count), Units());
+  if ($focus ne '') {
+    $profile = FocusProfile($symbols, $profile, $focus);
+    my $focus_count = TotalProfile($profile);
+    printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n",
+           $focus,
+           Unparse($focus_count), Units(),
+           Unparse($total_count), ($focus_count*100.0) / $total_count);
+  }
+  if ($ignore ne '') {
+    $profile = IgnoreProfile($symbols, $profile, $ignore);
+    my $ignore_count = TotalProfile($profile);
+    printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n",
+           $ignore,
+           Unparse($ignore_count), Units(),
+           Unparse($total_count),
+           ($ignore_count*100.0) / $total_count);
+  }
+
+  return $profile;
+}
+
+sub InteractiveHelpMessage {
+  print STDERR <<ENDOFHELP;
+Interactive pprof mode
+
+Commands:
+  gv
+  gv [focus] [-ignore1] [-ignore2]
+      Show graphical hierarchical display of current profile.  Without
+      any arguments, shows all samples in the profile.  With the optional
+      "focus" argument, restricts the samples shown to just those where
+      the "focus" regular expression matches a routine name on the stack
+      trace.
+
+  web
+  web [focus] [-ignore1] [-ignore2]
+      Like GV, but displays profile in your web browser instead of using
+      Ghostview. Works best if your web browser is already running.
+      To change the browser that gets used:
+      On Linux, set the /etc/alternatives/gnome-www-browser symlink.
+      On OS X, change the Finder association for SVG files.
+
+  list [routine_regexp] [-ignore1] [-ignore2]
+      Show source listing of routines whose names match "routine_regexp"
+
+  top [--cum] [-ignore1] [-ignore2]
+  top20 [--cum] [-ignore1] [-ignore2]
+  top37 [--cum] [-ignore1] [-ignore2]
+      Show top lines ordered by flat profile count, or cumulative count
+      if --cum is specified.  If a number is present after 'top', the
+      top K routines will be shown (defaults to showing the top 10)
+
+  disasm [routine_regexp] [-ignore1] [-ignore2]
+      Show disassembly of routines whose names match "routine_regexp",
+      annotated with sample counts.
+
+  callgrind
+  callgrind [filename]
+      Generates callgrind file. If no filename is given, kcachegrind is called.
+
+  help - This listing
+  quit or ^D - End pprof
+
+For commands that accept optional -ignore tags, samples where any routine in
+the stack trace matches the regular expression in any of the -ignore
+parameters will be ignored.
+
+Further pprof details are available at this location (or one similar):
+
+ /usr/doc/google-perftools-$PPROF_VERSION/cpu_profiler.html
+ /usr/doc/google-perftools-$PPROF_VERSION/heap_profiler.html
+
+ENDOFHELP
+}
+sub ParseInteractiveArgs {
+  my $args = shift;
+  my $focus = "";
+  my $ignore = "";
+  my @x = split(/ +/, $args);
+  foreach $a (@x) {
+    if ($a =~ m/^(--|-)lines$/) {
+      $main::opt_lines = 1;
+    } elsif ($a =~ m/^(--|-)cum$/) {
+      $main::opt_cum = 1;
+    } elsif ($a =~ m/^-(.*)/) {
+      $ignore .= (($ignore ne "") ? "|" : "" ) . $1;
+    } else {
+      $focus .= (($focus ne "") ? "|" : "" ) . $a;
+    }
+  }
+  if ($ignore ne "") {
+    print STDERR "Ignoring samples in call stacks that match '$ignore'\n";
+  }
+  return ($focus, $ignore);
+}
+
+##### Output code #####
+
+sub TempName {
+  my $fnum = shift;
+  my $ext = shift;
+  my $file = "$main::tmpfile_ps.$fnum.$ext";
+  $main::tempnames{$file} = 1;
+  return $file;
+}
+
+# Print profile data in packed binary format (64-bit) to standard out
+sub PrintProfileData {
+  my $profile = shift;
+
+  # print header (64-bit style)
+  # (zero) (header-size) (version) (sample-period) (zero)
+  print pack('L*', 0, 0, 3, 0, 0, 0, 1, 0, 0, 0);
+
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    if ($#addrs >= 0) {
+      my $depth = $#addrs + 1;
+      # int(foo / 2**32) is the only reliable way to get rid of bottom
+      # 32 bits on both 32- and 64-bit systems.
+      print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32));
+      print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32));
+
+      foreach my $full_addr (@addrs) {
+        my $addr = $full_addr;
+        $addr =~ s/0x0*//;  # strip off leading 0x, zeroes
+        if (length($addr) > 16) {
+          print STDERR "Invalid address in profile: $full_addr\n";
+          next;
+        }
+        my $low_addr = substr($addr, -8);       # get last 8 hex chars
+        my $high_addr = substr($addr, -16, 8);  # get up to 8 more hex chars
+        print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr));
+      }
+    }
+  }
+}
+
+# Print symbols and profile data
+sub PrintSymbolizedProfile {
+  my $symbols = shift;
+  my $profile = shift;
+  my $prog = shift;
+
+  $SYMBOL_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+  my $symbol_marker = $&;
+
+  print '--- ', $symbol_marker, "\n";
+  if (defined($prog)) {
+    print 'binary=', $prog, "\n";
+  }
+  while (my ($pc, $name) = each(%{$symbols})) {
+    my $sep = ' ';
+    print '0x', $pc;
+    # We have a list of function names, which include the inlined
+    # calls.  They are separated (and terminated) by --, which is
+    # illegal in function names.
+    for (my $j = 2; $j <= $#{$name}; $j += 3) {
+      print $sep, $name->[$j];
+      $sep = '--';
+    }
+    print "\n";
+  }
+  print '---', "\n";
+
+  $PROFILE_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+  my $profile_marker = $&;
+  print '--- ', $profile_marker, "\n";
+  if (defined($main::collected_profile)) {
+    # if used with remote fetch, simply dump the collected profile to output.
+    open(SRC, "<$main::collected_profile");
+    while (<SRC>) {
+      print $_;
+    }
+    close(SRC);
+  } else {
+    # dump a cpu-format profile to standard out
+    PrintProfileData($profile);
+  }
+}
+
+# Print text output
+sub PrintText {
+  my $symbols = shift;
+  my $flat = shift;
+  my $cumulative = shift;
+  my $total = shift;
+  my $line_limit = shift;
+
+  # Which profile to sort by?
+  my $s = $main::opt_cum ? $cumulative : $flat;
+
+  my $running_sum = 0;
+  my $lines = 0;
+  foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b }
+                 keys(%{$cumulative})) {
+    my $f = GetEntry($flat, $k);
+    my $c = GetEntry($cumulative, $k);
+    $running_sum += $f;
+
+    my $sym = $k;
+    if (exists($symbols->{$k})) {
+      $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1];
+      if ($main::opt_addresses) {
+        $sym = $k . " " . $sym;
+      }
+    }
+
+    if ($f != 0 || $c != 0) {
+      printf("%8s %6s %6s %8s %6s %s\n",
+             Unparse($f),
+             Percent($f, $total),
+             Percent($running_sum, $total),
+             Unparse($c),
+             Percent($c, $total),
+             $sym);
+    }
+    $lines++;
+    last if ($line_limit >= 0 && $lines > $line_limit);
+  }
+}
+
+# Print the call graph in a way that's suiteable for callgrind.
+sub PrintCallgrind {
+  my $calls = shift;
+  my $filename;
+  if ($main::opt_interactive) {
+    $filename = shift;
+    print STDERR "Writing callgrind file to '$filename'.\n"
+  } else {
+    $filename = "&STDOUT";
+  }
+  open(CG, ">".$filename );
+  printf CG ("events: Hits\n\n");
+  foreach my $call ( map { $_->[0] }
+                     sort { $a->[1] cmp $b ->[1] ||
+                            $a->[2] <=> $b->[2] }
+                     map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/;
+                           [$_, $1, $2] }
+                     keys %$calls ) {
+    my $count = int($calls->{$call});
+    $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/;
+    my ( $caller_file, $caller_line, $caller_function,
+         $callee_file, $callee_line, $callee_function ) =
+       ( $1, $2, $3, $5, $6, $7 );
+
+      
+    printf CG ("fl=$caller_file\nfn=$caller_function\n");
+    if (defined $6) {
+      printf CG ("cfl=$callee_file\n");
+      printf CG ("cfn=$callee_function\n");
+      printf CG ("calls=$count $callee_line\n");
+    }
+    printf CG ("$caller_line $count\n\n");
+  }
+}
+
+# Print disassembly for all all routines that match $main::opt_disasm
+sub PrintDisassembly {
+  my $libs = shift;
+  my $flat = shift;
+  my $cumulative = shift;
+  my $disasm_opts = shift;
+  my $total = shift;
+
+  foreach my $lib (@{$libs}) {
+    my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts);
+    my $offset = AddressSub($lib->[1], $lib->[3]);
+    foreach my $routine (sort ByName keys(%{$symbol_table})) {
+      my $start_addr = $symbol_table->{$routine}->[0];
+      my $end_addr = $symbol_table->{$routine}->[1];
+      # See if there are any samples in this routine
+      my $length = hex(AddressSub($end_addr, $start_addr));
+      my $addr = AddressAdd($start_addr, $offset);
+      for (my $i = 0; $i < $length; $i++) {
+        if (defined($cumulative->{$addr})) {
+          PrintDisassembledFunction($lib->[0], $offset,
+                                    $routine, $flat, $cumulative,
+                                    $start_addr, $end_addr, $total);
+          last;
+        }
+        $addr = AddressInc($addr);
+      }
+    }
+  }
+}
+
+# Return reference to array of tuples of the form:
+#       [start_address, filename, linenumber, instruction, limit_address]
+# E.g.,
+#       ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"]
+sub Disassemble {
+  my $prog = shift;
+  my $offset = shift;
+  my $start_addr = shift;
+  my $end_addr = shift;
+
+  my $objdump = $obj_tool_map{"objdump"};
+  my $cmd = sprintf("$objdump -C -d -l --no-show-raw-insn " .
+                    "--start-address=0x$start_addr " .
+                    "--stop-address=0x$end_addr $prog");
+  open(OBJDUMP, "$cmd |") || error("$objdump: $!\n");
+  my @result = ();
+  my $filename = "";
+  my $linenumber = -1;
+  my $last = ["", "", "", ""];
+  while (<OBJDUMP>) {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    chop;
+    if (m|\s*([^:\s]+):(\d+)\s*$|) {
+      # Location line of the form:
+      #   <filename>:<linenumber>
+      $filename = $1;
+      $linenumber = $2;
+    } elsif (m/^ +([0-9a-f]+):\s*(.*)/) {
+      # Disassembly line -- zero-extend address to full length
+      my $addr = HexExtend($1);
+      my $k = AddressAdd($addr, $offset);
+      $last->[4] = $k;   # Store ending address for previous instruction
+      $last = [$k, $filename, $linenumber, $2, $end_addr];
+      push(@result, $last);
+    }
+  }
+  close(OBJDUMP);
+  return @result;
+}
+
+# The input file should contain lines of the form /proc/maps-like
+# output (same format as expected from the profiles) or that looks
+# like hex addresses (like "0xDEADBEEF").  We will parse all
+# /proc/maps output, and for all the hex addresses, we will output
+# "short" symbol names, one per line, in the same order as the input.
+sub PrintSymbols {
+  my $maps_and_symbols_file = shift;
+
+  # ParseLibraries expects pcs to be in a set.  Fine by us...
+  my @pclist = ();   # pcs in sorted order
+  my $pcs = {};
+  my $map = "";
+  foreach my $line (<$maps_and_symbols_file>) {
+    $line =~ s/\r//g;    # turn windows-looking lines into unix-looking lines
+    if ($line =~ /\b(0x[0-9a-f]+)\b/i) {
+      push(@pclist, HexExtend($1));
+      $pcs->{$pclist[-1]} = 1;
+    } else {
+      $map .= $line;
+    }
+  }
+
+  my $libs = ParseLibraries($main::prog, $map, $pcs);
+  my $symbols = ExtractSymbols($libs, $pcs);
+
+  foreach my $pc (@pclist) {
+    # ->[0] is the shortname, ->[2] is the full name
+    print(($symbols->{$pc}->[0] || "??") . "\n");
+  }
+}
+
+
+# For sorting functions by name
+sub ByName {
+  return ShortFunctionName($a) cmp ShortFunctionName($b);
+}
+
+# Print source-listing for all all routines that match $main::opt_list
+sub PrintListing {
+  my $libs = shift;
+  my $flat = shift;
+  my $cumulative = shift;
+  my $list_opts = shift;
+
+  foreach my $lib (@{$libs}) {
+    my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts);
+    my $offset = AddressSub($lib->[1], $lib->[3]);
+    foreach my $routine (sort ByName keys(%{$symbol_table})) {
+      # Print if there are any samples in this routine
+      my $start_addr = $symbol_table->{$routine}->[0];
+      my $end_addr = $symbol_table->{$routine}->[1];
+      my $length = hex(AddressSub($end_addr, $start_addr));
+      my $addr = AddressAdd($start_addr, $offset);
+      for (my $i = 0; $i < $length; $i++) {
+        if (defined($cumulative->{$addr})) {
+          PrintSource($lib->[0], $offset,
+                      $routine, $flat, $cumulative,
+                      $start_addr, $end_addr);
+          last;
+        }
+        $addr = AddressInc($addr);
+      }
+    }
+  }
+}
+
+# Returns the indentation of the line, if it has any non-whitespace
+# characters.  Otherwise, returns -1.
+sub Indentation {
+  my $line = shift;
+  if (m/^(\s*)\S/) {
+    return length($1);
+  } else {
+    return -1;
+  }
+}
+
+# Print source-listing for one routine
+sub PrintSource {
+  my $prog = shift;
+  my $offset = shift;
+  my $routine = shift;
+  my $flat = shift;
+  my $cumulative = shift;
+  my $start_addr = shift;
+  my $end_addr = shift;
+
+  # Disassemble all instructions (just to get line numbers)
+  my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr);
+
+  # Hack 1: assume that the first source file encountered in the
+  # disassembly contains the routine
+  my $filename = undef;
+  for (my $i = 0; $i <= $#instructions; $i++) {
+    if ($instructions[$i]->[2] >= 0) {
+      $filename = $instructions[$i]->[1];
+      last;
+    }
+  }
+  if (!defined($filename)) {
+    print STDERR "no filename found in $routine\n";
+    return;
+  }
+
+  # Hack 2: assume that the largest line number from $filename is the
+  # end of the procedure.  This is typically safe since if P1 contains
+  # an inlined call to P2, then P2 usually occurs earlier in the
+  # source file.  If this does not work, we might have to compute a
+  # density profile or just print all regions we find.
+  my $lastline = 0;
+  for (my $i = 0; $i <= $#instructions; $i++) {
+    my $f = $instructions[$i]->[1];
+    my $l = $instructions[$i]->[2];
+    if (($f eq $filename) && ($l > $lastline)) {
+      $lastline = $l;
+    }
+  }
+
+  # Hack 3: assume the first source location from "filename" is the start of
+  # the source code.
+  my $firstline = 1;
+  for (my $i = 0; $i <= $#instructions; $i++) {
+    if ($instructions[$i]->[1] eq $filename) {
+      $firstline = $instructions[$i]->[2];
+      last;
+    }
+  }
+
+  # Hack 4: Extend last line forward until its indentation is less than
+  # the indentation we saw on $firstline
+  my $oldlastline = $lastline;
+  {
+    if (!open(FILE, "<$filename")) {
+      print STDERR "$filename: $!\n";
+      return;
+    }
+    my $l = 0;
+    my $first_indentation = -1;
+    while (<FILE>) {
+      s/\r//g;         # turn windows-looking lines into unix-looking lines
+      $l++;
+      my $indent = Indentation($_);
+      if ($l >= $firstline) {
+        if ($first_indentation < 0 && $indent >= 0) {
+          $first_indentation = $indent;
+          last if ($first_indentation == 0);
+        }
+      }
+      if ($l >= $lastline && $indent >= 0) {
+        if ($indent >= $first_indentation) {
+          $lastline = $l+1;
+        } else {
+          last;
+        }
+      }
+    }
+    close(FILE);
+  }
+
+  # Assign all samples to the range $firstline,$lastline,
+  # Hack 4: If an instruction does not occur in the range, its samples
+  # are moved to the next instruction that occurs in the range.
+  my $samples1 = {};
+  my $samples2 = {};
+  my $running1 = 0;     # Unassigned flat counts
+  my $running2 = 0;     # Unassigned cumulative counts
+  my $total1 = 0;       # Total flat counts
+  my $total2 = 0;       # Total cumulative counts
+  foreach my $e (@instructions) {
+    # Add up counts for all address that fall inside this instruction
+    my $c1 = 0;
+    my $c2 = 0;
+    for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) {
+      $c1 += GetEntry($flat, $a);
+      $c2 += GetEntry($cumulative, $a);
+    }
+    $running1 += $c1;
+    $running2 += $c2;
+    $total1 += $c1;
+    $total2 += $c2;
+    my $file = $e->[1];
+    my $line = $e->[2];
+    if (($file eq $filename) &&
+        ($line >= $firstline) &&
+        ($line <= $lastline)) {
+      # Assign all accumulated samples to this line
+      AddEntry($samples1, $line, $running1);
+      AddEntry($samples2, $line, $running2);
+      $running1 = 0;
+      $running2 = 0;
+    }
+  }
+
+  # Assign any leftover samples to $lastline
+  AddEntry($samples1, $lastline, $running1);
+  AddEntry($samples2, $lastline, $running2);
+
+  printf("ROUTINE ====================== %s in %s\n" .
+         "%6s %6s Total %s (flat / cumulative)\n",
+         ShortFunctionName($routine),
+         $filename,
+         Units(),
+         Unparse($total1),
+         Unparse($total2));
+  if (!open(FILE, "<$filename")) {
+    print STDERR "$filename: $!\n";
+    return;
+  }
+  my $l = 0;
+  while (<FILE>) {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    $l++;
+    if ($l >= $firstline - 5 &&
+        (($l <= $oldlastline + 5) || ($l <= $lastline))) {
+      chop;
+      my $text = $_;
+      if ($l == $firstline) { printf("---\n"); }
+      printf("%6s %6s %4d: %s\n",
+             UnparseAlt(GetEntry($samples1, $l)),
+             UnparseAlt(GetEntry($samples2, $l)),
+             $l,
+             $text);
+      if ($l == $lastline)  { printf("---\n"); }
+    };
+  }
+  close(FILE);
+}
+
+# Return the source line for the specified file/linenumber.
+# Returns undef if not found.
+sub SourceLine {
+  my $file = shift;
+  my $line = shift;
+
+  # Look in cache
+  if (!defined($main::source_cache{$file})) {
+    if (100 < scalar keys(%main::source_cache)) {
+      # Clear the cache when it gets too big
+      $main::source_cache = ();
+    }
+
+    # Read all lines from the file
+    if (!open(FILE, "<$file")) {
+      print STDERR "$file: $!\n";
+      $main::source_cache{$file} = [];  # Cache the negative result
+      return undef;
+    }
+    my $lines = [];
+    push(@{$lines}, "");        # So we can use 1-based line numbers as indices
+    while (<FILE>) {
+      push(@{$lines}, $_);
+    }
+    close(FILE);
+
+    # Save the lines in the cache
+    $main::source_cache{$file} = $lines;
+  }
+
+  my $lines = $main::source_cache{$file};
+  if (($line < 0) || ($line > $#{$lines})) {
+    return undef;
+  } else {
+    return $lines->[$line];
+  }
+}
+
+# Print disassembly for one routine with interspersed source if available
+sub PrintDisassembledFunction {
+  my $prog = shift;
+  my $offset = shift;
+  my $routine = shift;
+  my $flat = shift;
+  my $cumulative = shift;
+  my $start_addr = shift;
+  my $end_addr = shift;
+  my $total = shift;
+
+  # Disassemble all instructions
+  my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr);
+
+  # Make array of counts per instruction
+  my @flat_count = ();
+  my @cum_count = ();
+  my $flat_total = 0;
+  my $cum_total = 0;
+  foreach my $e (@instructions) {
+    # Add up counts for all address that fall inside this instruction
+    my $c1 = 0;
+    my $c2 = 0;
+    for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) {
+      $c1 += GetEntry($flat, $a);
+      $c2 += GetEntry($cumulative, $a);
+    }
+    push(@flat_count, $c1);
+    push(@cum_count, $c2);
+    $flat_total += $c1;
+    $cum_total += $c2;
+  }
+
+  # Print header with total counts
+  printf("ROUTINE ====================== %s\n" .
+         "%6s %6s %s (flat, cumulative) %.1f%% of total\n",
+         ShortFunctionName($routine),
+         Unparse($flat_total),
+         Unparse($cum_total),
+         Units(),
+         ($cum_total * 100.0) / $total);
+
+  # Process instructions in order
+  my $current_file = "";
+  for (my $i = 0; $i <= $#instructions; ) {
+    my $e = $instructions[$i];
+
+    # Print the new file name whenever we switch files
+    if ($e->[1] ne $current_file) {
+      $current_file = $e->[1];
+      my $fname = $current_file;
+      $fname =~ s|^\./||;   # Trim leading "./"
+
+      # Shorten long file names
+      if (length($fname) >= 58) {
+        $fname = "..." . substr($fname, -55);
+      }
+      printf("-------------------- %s\n", $fname);
+    }
+
+    # TODO: Compute range of lines to print together to deal with
+    # small reorderings.
+    my $first_line = $e->[2];
+    my $last_line = $first_line;
+    my %flat_sum = ();
+    my %cum_sum = ();
+    for (my $l = $first_line; $l <= $last_line; $l++) {
+      $flat_sum{$l} = 0;
+      $cum_sum{$l} = 0;
+    }
+
+    # Find run of instructions for this range of source lines
+    my $first_inst = $i;
+    while (($i <= $#instructions) &&
+           ($instructions[$i]->[2] >= $first_line) &&
+           ($instructions[$i]->[2] <= $last_line)) {
+      $e = $instructions[$i];
+      $flat_sum{$e->[2]} += $flat_count[$i];
+      $cum_sum{$e->[2]} += $cum_count[$i];
+      $i++;
+    }
+    my $last_inst = $i - 1;
+
+    # Print source lines
+    for (my $l = $first_line; $l <= $last_line; $l++) {
+      my $line = SourceLine($current_file, $l);
+      if (!defined($line)) {
+        $line = "?\n";
+        next;
+      } else {
+        $line =~ s/^\s+//;
+      }
+      printf("%6s %6s %5d: %s",
+             UnparseAlt($flat_sum{$l}),
+             UnparseAlt($cum_sum{$l}),
+             $l,
+             $line);
+    }
+
+    # Print disassembly
+    for (my $x = $first_inst; $x <= $last_inst; $x++) {
+      my $e = $instructions[$x];
+      my $address = $e->[0];
+      $address = AddressSub($address, $offset);  # Make relative to section
+      $address =~ s/^0x//;
+      $address =~ s/^0*//;
+
+      # Trim symbols
+      my $d = $e->[3];
+      while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax)
+      while ($d =~ s/(\w+)<[^<>]*>/$1/g)  { }       # Remove template arguments
+
+      printf("%6s %6s    %8s: %6s\n",
+             UnparseAlt($flat_count[$x]),
+             UnparseAlt($cum_count[$x]),
+             $address,
+             $d);
+    }
+  }
+}
+
+# Print DOT graph
+sub PrintDot {
+  my $prog = shift;
+  my $symbols = shift;
+  my $raw = shift;
+  my $flat = shift;
+  my $cumulative = shift;
+  my $overall_total = shift;
+
+  # Get total
+  my $local_total = TotalProfile($flat);
+  my $nodelimit = int($main::opt_nodefraction * $local_total);
+  my $edgelimit = int($main::opt_edgefraction * $local_total);
+  my $nodecount = $main::opt_nodecount;
+
+  # Find nodes to include
+  my @list = (sort { abs(GetEntry($cumulative, $b)) <=>
+                     abs(GetEntry($cumulative, $a))
+                     || $a cmp $b }
+              keys(%{$cumulative}));
+  my $last = $nodecount - 1;
+  if ($last > $#list) {
+    $last = $#list;
+  }
+  while (($last >= 0) &&
+         (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) {
+    $last--;
+  }
+  if ($last < 0) {
+    print STDERR "No nodes to print\n";
+    return 0;
+  }
+
+  if ($nodelimit > 0 || $edgelimit > 0) {
+    printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n",
+                   Unparse($nodelimit), Units(),
+                   Unparse($edgelimit), Units());
+  }
+
+  # Open DOT output file
+  my $output;
+  if ($main::opt_gv) {
+    $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps");
+  } elsif ($main::opt_evince) {
+    $output = "| $DOT -Tps2 | $PS2PDF - " . TempName($main::next_tmpfile, "pdf");
+  } elsif ($main::opt_ps) {
+    $output = "| $DOT -Tps2";
+  } elsif ($main::opt_pdf) {
+    $output = "| $DOT -Tps2 | $PS2PDF - -";
+  } elsif ($main::opt_web || $main::opt_svg) {
+    # We need to post-process the SVG, so write to a temporary file always.
+    $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg");
+  } elsif ($main::opt_gif) {
+    $output = "| $DOT -Tgif";
+  } else {
+    $output = ">&STDOUT";
+  }
+  open(DOT, $output) || error("$output: $!\n");
+
+  # Title
+  printf DOT ("digraph \"%s; %s %s\" {\n",
+              $prog,
+              Unparse($overall_total),
+              Units());
+  if ($main::opt_pdf) {
+    # The output is more printable if we set the page size for dot.
+    printf DOT ("size=\"8,11\"\n");
+  }
+  printf DOT ("node [width=0.375,height=0.25];\n");
+
+  # Print legend
+  printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," .
+              "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n",
+              $prog,
+              sprintf("Total %s: %s", Units(), Unparse($overall_total)),
+              sprintf("Focusing on: %s", Unparse($local_total)),
+              sprintf("Dropped nodes with <= %s abs(%s)",
+                      Unparse($nodelimit), Units()),
+              sprintf("Dropped edges with <= %s %s",
+                      Unparse($edgelimit), Units())
+              );
+
+  # Print nodes
+  my %node = ();
+  my $nextnode = 1;
+  foreach my $a (@list[0..$last]) {
+    # Pick font size
+    my $f = GetEntry($flat, $a);
+    my $c = GetEntry($cumulative, $a);
+
+    my $fs = 8;
+    if ($local_total > 0) {
+      $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total)));
+    }
+
+    $node{$a} = $nextnode++;
+    my $sym = $a;
+    $sym =~ s/\s+/\\n/g;
+    $sym =~ s/::/\\n/g;
+
+    # Extra cumulative info to print for non-leaves
+    my $extra = "";
+    if ($f != $c) {
+      $extra = sprintf("\\rof %s (%s)",
+                       Unparse($c),
+                       Percent($c, $overall_total));
+    }
+    my $style = "";
+    if ($main::opt_heapcheck) {
+      if ($f > 0) {
+        # make leak-causing nodes more visible (add a background)
+        $style = ",style=filled,fillcolor=gray"
+      } elsif ($f < 0) {
+        # make anti-leak-causing nodes (which almost never occur)
+        # stand out as well (triple border)
+        $style = ",peripheries=3"
+      }
+    }
+
+    printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" .
+                "\",shape=box,fontsize=%.1f%s];\n",
+                $node{$a},
+                $sym,
+                Unparse($f),
+                Percent($f, $overall_total),
+                $extra,
+                $fs,
+                $style,
+               );
+  }
+
+  # Get edges and counts per edge
+  my %edge = ();
+  my $n;
+  foreach my $k (keys(%{$raw})) {
+    # TODO: omit low %age edges
+    $n = $raw->{$k};
+    my @translated = TranslateStack($symbols, $k);
+    for (my $i = 1; $i <= $#translated; $i++) {
+      my $src = $translated[$i];
+      my $dst = $translated[$i-1];
+      #next if ($src eq $dst);  # Avoid self-edges?
+      if (exists($node{$src}) && exists($node{$dst})) {
+        my $edge_label = "$src\001$dst";
+        if (!exists($edge{$edge_label})) {
+          $edge{$edge_label} = 0;
+        }
+        $edge{$edge_label} += $n;
+      }
+    }
+  }
+
+  # Print edges (process in order of decreasing counts)
+  my %indegree = ();   # Number of incoming edges added per node so far
+  my %outdegree = ();  # Number of outgoing edges added per node so far
+  foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) {
+    my @x = split(/\001/, $e);
+    $n = $edge{$e};
+
+    # Initialize degree of kept incoming and outgoing edges if necessary
+    my $src = $x[0];
+    my $dst = $x[1];
+    if (!exists($outdegree{$src})) { $outdegree{$src} = 0; }
+    if (!exists($indegree{$dst})) { $indegree{$dst} = 0; }
+
+    my $keep;
+    if ($indegree{$dst} == 0) {
+      # Keep edge if needed for reachability
+      $keep = 1;
+    } elsif (abs($n) <= $edgelimit) {
+      # Drop if we are below --edgefraction
+      $keep = 0;
+    } elsif ($outdegree{$src} >= $main::opt_maxdegree ||
+             $indegree{$dst} >= $main::opt_maxdegree) {
+      # Keep limited number of in/out edges per node
+      $keep = 0;
+    } else {
+      $keep = 1;
+    }
+
+    if ($keep) {
+      $outdegree{$src}++;
+      $indegree{$dst}++;
+
+      # Compute line width based on edge count
+      my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0);
+      if ($fraction > 1) { $fraction = 1; }
+      my $w = $fraction * 2;
+      if ($w < 1 && ($main::opt_web || $main::opt_svg)) {
+        # SVG output treats line widths < 1 poorly.
+        $w = 1;
+      }
+
+      # Dot sometimes segfaults if given edge weights that are too large, so
+      # we cap the weights at a large value
+      my $edgeweight = abs($n) ** 0.7;
+      if ($edgeweight > 100000) { $edgeweight = 100000; }
+      $edgeweight = int($edgeweight);
+
+      my $style = sprintf("setlinewidth(%f)", $w);
+      if ($x[1] =~ m/\(inline\)/) {
+        $style .= ",dashed";
+      }
+
+      # Use a slightly squashed function of the edge count as the weight
+      printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n",
+                  $node{$x[0]},
+                  $node{$x[1]},
+                  Unparse($n),
+                  $edgeweight,
+                  $style);
+    }
+  }
+
+  print DOT ("}\n");
+  close(DOT);
+
+  if ($main::opt_web || $main::opt_svg) {
+    # Rewrite SVG to be more usable inside web browser.
+    RewriteSvg(TempName($main::next_tmpfile, "svg"));
+  }
+
+  return 1;
+}
+
+sub RewriteSvg {
+  my $svgfile = shift;
+
+  open(SVG, $svgfile) || die "open temp svg: $!";
+  my @svg = <SVG>;
+  close(SVG);
+  unlink $svgfile;
+  my $svg = join('', @svg);
+
+  # Dot's SVG output is
+  #
+  #    <svg width="___" height="___"
+  #     viewBox="___" xmlns=...>
+  #    <g id="graph0" transform="...">
+  #    ...
+  #    </g>
+  #    </svg>
+  #
+  # Change it to
+  #
+  #    <svg width="100%" height="100%"
+  #     xmlns=...>
+  #    $svg_javascript
+  #    <g id="viewport" transform="translate(0,0)">
+  #    <g id="graph0" transform="...">
+  #    ...
+  #    </g>
+  #    </g>
+  #    </svg>
+
+  # Fix width, height; drop viewBox.
+  $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/;
+
+  # Insert script, viewport <g> above first <g>
+  my $svg_javascript = SvgJavascript();
+  my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n";
+  $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/;
+
+  # Insert final </g> above </svg>.
+  $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/;
+  $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/;
+
+  if ($main::opt_svg) {
+    # --svg: write to standard output.
+    print $svg;
+  } else {
+    # Write back to temporary file.
+    open(SVG, ">$svgfile") || die "open $svgfile: $!";
+    print SVG $svg;
+    close(SVG);
+  }
+}
+
+sub SvgJavascript {
+  return <<'EOF';
+<script type="text/ecmascript"><![CDATA[
+// SVGPan
+// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/
+// Local modification: if(true || ...) below to force panning, never moving.
+
+/**
+ *  SVGPan library 1.2
+ * ====================
+ *
+ * Given an unique existing element with id "viewport", including the
+ * the library into any SVG adds the following capabilities:
+ *
+ *  - Mouse panning
+ *  - Mouse zooming (using the wheel)
+ *  - Object dargging
+ *
+ * Known issues:
+ *
+ *  - Zooming (while panning) on Safari has still some issues
+ *
+ * Releases:
+ *
+ * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui
+ *	Fixed a bug with browser mouse handler interaction
+ *
+ * 1.1, Wed Feb  3 17:39:33 GMT 2010, Zeng Xiaohui
+ *	Updated the zoom code to support the mouse wheel on Safari/Chrome
+ *
+ * 1.0, Andrea Leofreddi
+ *	First release
+ *
+ * This code is licensed under the following BSD license:
+ *
+ * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are
+ * permitted provided that the following conditions are met:
+ *
+ *    1. Redistributions of source code must retain the above copyright notice, this list of
+ *       conditions and the following disclaimer.
+ *
+ *    2. Redistributions in binary form must reproduce the above copyright notice, this list
+ *       of conditions and the following disclaimer in the documentation and/or other materials
+ *       provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are those of the
+ * authors and should not be interpreted as representing official policies, either expressed
+ * or implied, of Andrea Leofreddi.
+ */
+
+var root = document.documentElement;
+
+var state = 'none', stateTarget, stateOrigin, stateTf;
+
+setupHandlers(root);
+
+/**
+ * Register handlers
+ */
+function setupHandlers(root){
+	setAttributes(root, {
+		"onmouseup" : "add(evt)",
+		"onmousedown" : "handleMouseDown(evt)",
+		"onmousemove" : "handleMouseMove(evt)",
+		"onmouseup" : "handleMouseUp(evt)",
+		//"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element
+	});
+
+	if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0)
+		window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari
+	else
+		window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others
+
+	var g = svgDoc.getElementById("svg");
+	g.width = "100%";
+	g.height = "100%";
+}
+
+/**
+ * Instance an SVGPoint object with given event coordinates.
+ */
+function getEventPoint(evt) {
+	var p = root.createSVGPoint();
+
+	p.x = evt.clientX;
+	p.y = evt.clientY;
+
+	return p;
+}
+
+/**
+ * Sets the current transform matrix of an element.
+ */
+function setCTM(element, matrix) {
+	var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")";
+
+	element.setAttribute("transform", s);
+}
+
+/**
+ * Dumps a matrix to a string (useful for debug).
+ */
+function dumpMatrix(matrix) {
+	var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n  " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n  0, 0, 1 ]";
+
+	return s;
+}
+
+/**
+ * Sets attributes of an element.
+ */
+function setAttributes(element, attributes){
+	for (i in attributes)
+		element.setAttributeNS(null, i, attributes[i]);
+}
+
+/**
+ * Handle mouse move event.
+ */
+function handleMouseWheel(evt) {
+	if(evt.preventDefault)
+		evt.preventDefault();
+
+	evt.returnValue = false;
+
+	var svgDoc = evt.target.ownerDocument;
+
+	var delta;
+
+	if(evt.wheelDelta)
+		delta = evt.wheelDelta / 3600; // Chrome/Safari
+	else
+		delta = evt.detail / -90; // Mozilla
+
+	var z = 1 + delta; // Zoom factor: 0.9/1.1
+
+	var g = svgDoc.getElementById("viewport");
+
+	var p = getEventPoint(evt);
+
+	p = p.matrixTransform(g.getCTM().inverse());
+
+	// Compute new scale matrix in current mouse position
+	var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y);
+
+        setCTM(g, g.getCTM().multiply(k));
+
+	stateTf = stateTf.multiply(k.inverse());
+}
+
+/**
+ * Handle mouse move event.
+ */
+function handleMouseMove(evt) {
+	if(evt.preventDefault)
+		evt.preventDefault();
+
+	evt.returnValue = false;
+
+	var svgDoc = evt.target.ownerDocument;
+
+	var g = svgDoc.getElementById("viewport");
+
+	if(state == 'pan') {
+		// Pan mode
+		var p = getEventPoint(evt).matrixTransform(stateTf);
+
+		setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y));
+	} else if(state == 'move') {
+		// Move mode
+		var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse());
+
+		setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM()));
+
+		stateOrigin = p;
+	}
+}
+
+/**
+ * Handle click event.
+ */
+function handleMouseDown(evt) {
+	if(evt.preventDefault)
+		evt.preventDefault();
+
+	evt.returnValue = false;
+
+	var svgDoc = evt.target.ownerDocument;
+
+	var g = svgDoc.getElementById("viewport");
+
+	if(true || evt.target.tagName == "svg") {
+		// Pan mode
+		state = 'pan';
+
+		stateTf = g.getCTM().inverse();
+
+		stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
+	} else {
+		// Move mode
+		state = 'move';
+
+		stateTarget = evt.target;
+
+		stateTf = g.getCTM().inverse();
+
+		stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
+	}
+}
+
+/**
+ * Handle mouse button release event.
+ */
+function handleMouseUp(evt) {
+	if(evt.preventDefault)
+		evt.preventDefault();
+
+	evt.returnValue = false;
+
+	var svgDoc = evt.target.ownerDocument;
+
+	if(state == 'pan' || state == 'move') {
+		// Quit pan mode
+		state = '';
+	}
+}
+
+]]></script>
+EOF
+}
+
+# Return a small number that identifies the argument.
+# Multiple calls with the same argument will return the same number.
+# Calls with different arguments will return different numbers.
+sub ShortIdFor {
+  my $key = shift;
+  my $id = $main::uniqueid{$key};
+  if (!defined($id)) {
+    $id = keys(%main::uniqueid) + 1;
+    $main::uniqueid{$key} = $id;
+  }
+  return $id;
+}
+
+# Translate a stack of addresses into a stack of symbols
+sub TranslateStack {
+  my $symbols = shift;
+  my $k = shift;
+
+  my @addrs = split(/\n/, $k);
+  my @result = ();
+  for (my $i = 0; $i <= $#addrs; $i++) {
+    my $a = $addrs[$i];
+
+    # Skip large addresses since they sometimes show up as fake entries on RH9
+    if (length($a) > 8 && $a gt "7fffffffffffffff") {
+      next;
+    }
+
+    if ($main::opt_disasm || $main::opt_list) {
+      # We want just the address for the key
+      push(@result, $a);
+      next;
+    }
+
+    my $symlist = $symbols->{$a};
+    if (!defined($symlist)) {
+      $symlist = [$a, "", $a];
+    }
+
+    # We can have a sequence of symbols for a particular entry
+    # (more than one symbol in the case of inlining).  Callers
+    # come before callees in symlist, so walk backwards since
+    # the translated stack should contain callees before callers.
+    for (my $j = $#{$symlist}; $j >= 2; $j -= 3) {
+      my $func = $symlist->[$j-2];
+      my $fileline = $symlist->[$j-1];
+      my $fullfunc = $symlist->[$j];
+      if ($j > 2) {
+        $func = "$func (inline)";
+      }
+
+      # Do not merge nodes corresponding to Callback::Run since that
+      # causes confusing cycles in dot display.  Instead, we synthesize
+      # a unique name for this frame per caller.
+      if ($func =~ m/Callback.*::Run$/) {
+        my $caller = ($i > 0) ? $addrs[$i-1] : 0;
+        $func = "Run#" . ShortIdFor($caller);
+      }
+
+      if ($main::opt_addresses) {
+        push(@result, "$a $func $fileline");
+      } elsif ($main::opt_lines) {
+        if ($func eq '??' && $fileline eq '??:0') {
+          push(@result, "$a");
+        } else {
+          push(@result, "$func $fileline");
+        }
+      } elsif ($main::opt_functions) {
+        if ($func eq '??') {
+          push(@result, "$a");
+        } else {
+          push(@result, $func);
+        }
+      } elsif ($main::opt_files) {
+        if ($fileline eq '??:0' || $fileline eq '') {
+          push(@result, "$a");
+        } else {
+          my $f = $fileline;
+          $f =~ s/:\d+$//;
+          push(@result, $f);
+        }
+      } else {
+        push(@result, $a);
+        last;  # Do not print inlined info
+      }
+    }
+  }
+
+  # print join(",", @addrs), " => ", join(",", @result), "\n";
+  return @result;
+}
+
+# Generate percent string for a number and a total
+sub Percent {
+  my $num = shift;
+  my $tot = shift;
+  if ($tot != 0) {
+    return sprintf("%.1f%%", $num * 100.0 / $tot);
+  } else {
+    return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf");
+  }
+}
+
+# Generate pretty-printed form of number
+sub Unparse {
+  my $num = shift;
+  if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
+    if ($main::opt_inuse_objects || $main::opt_alloc_objects) {
+      return sprintf("%d", $num);
+    } else {
+      if ($main::opt_show_bytes) {
+        return sprintf("%d", $num);
+      } else {
+        return sprintf("%.1f", $num / 1048576.0);
+      }
+    }
+  } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) {
+    return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds
+  } else {
+    return sprintf("%d", $num);
+  }
+}
+
+# Alternate pretty-printed form: 0 maps to "."
+sub UnparseAlt {
+  my $num = shift;
+  if ($num == 0) {
+    return ".";
+  } else {
+    return Unparse($num);
+  }
+}
+
+# Return output units
+sub Units {
+  if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
+    if ($main::opt_inuse_objects || $main::opt_alloc_objects) {
+      return "objects";
+    } else {
+      if ($main::opt_show_bytes) {
+        return "B";
+      } else {
+        return "MB";
+      }
+    }
+  } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) {
+    return "seconds";
+  } else {
+    return "samples";
+  }
+}
+
+##### Profile manipulation code #####
+
+# Generate flattened profile:
+# If count is charged to stack [a,b,c,d], in generated profile,
+# it will be charged to [a]
+sub FlatProfile {
+  my $profile = shift;
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    if ($#addrs >= 0) {
+      AddEntry($result, $addrs[0], $count);
+    }
+  }
+  return $result;
+}
+
+# Generate cumulative profile:
+# If count is charged to stack [a,b,c,d], in generated profile,
+# it will be charged to [a], [b], [c], [d]
+sub CumulativeProfile {
+  my $profile = shift;
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    foreach my $a (@addrs) {
+      AddEntry($result, $a, $count);
+    }
+  }
+  return $result;
+}
+
+# If the second-youngest PC on the stack is always the same, returns
+# that pc.  Otherwise, returns undef.
+sub IsSecondPcAlwaysTheSame {
+  my $profile = shift;
+
+  my $second_pc = undef;
+  foreach my $k (keys(%{$profile})) {
+    my @addrs = split(/\n/, $k);
+    if ($#addrs < 1) {
+      return undef;
+    }
+    if (not defined $second_pc) {
+      $second_pc = $addrs[1];
+    } else {
+      if ($second_pc ne $addrs[1]) {
+        return undef;
+      }
+    }
+  }
+  return $second_pc;
+}
+
+sub ExtractSymbolLocation {
+  my $symbols = shift;
+  my $address = shift;
+  # 'addr2line' outputs "??:0" for unknown locations; we do the
+  # same to be consistent.
+  my $location = "??:0:unknown";
+  if (exists $symbols->{$address}) {
+    my $file = $symbols->{$address}->[1];
+    if ($file eq "?") {
+      $file = "??:0"
+    }
+    $location = $file . ":" . $symbols->{$address}->[0];
+  }
+  return $location;
+}
+
+# Extracts a graph of calls.
+sub ExtractCalls {
+  my $symbols = shift;
+  my $profile = shift;
+
+  my $calls = {};
+  while( my ($stack_trace, $count) = each %$profile ) {
+    my @address = split(/\n/, $stack_trace);
+    my $destination = ExtractSymbolLocation($symbols, $address[0]);
+    AddEntry($calls, $destination, $count);
+    for (my $i = 1; $i <= $#address; $i++) {
+      my $source = ExtractSymbolLocation($symbols, $address[$i]);
+      my $call = "$source -> $destination";
+      AddEntry($calls, $call, $count);
+      $destination = $source;
+    }
+  }
+
+  return $calls;
+}
+
+sub RemoveUninterestingFrames {
+  my $symbols = shift;
+  my $profile = shift;
+
+  # List of function names to skip
+  my %skip = ();
+  my $skip_regexp = 'NOMATCH';
+  if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
+    foreach my $name ('calloc',
+                      'cfree',
+                      'malloc',
+                      'free',
+                      'memalign',
+                      'posix_memalign',
+                      'pvalloc',
+                      'valloc',
+                      'realloc',
+                      'tc_calloc',
+                      'tc_cfree',
+                      'tc_malloc',
+                      'tc_free',
+                      'tc_memalign',
+                      'tc_posix_memalign',
+                      'tc_pvalloc',
+                      'tc_valloc',
+                      'tc_realloc',
+                      'tc_new',
+                      'tc_delete',
+                      'tc_newarray',
+                      'tc_deletearray',
+                      'tc_new_nothrow',
+                      'tc_newarray_nothrow',
+                      'do_malloc',
+                      '::do_malloc',   # new name -- got moved to an unnamed ns
+                      '::do_malloc_or_cpp_alloc',
+                      'DoSampledAllocation',
+                      'simple_alloc::allocate',
+                      '__malloc_alloc_template::allocate',
+                      '__builtin_delete',
+                      '__builtin_new',
+                      '__builtin_vec_delete',
+                      '__builtin_vec_new',
+                      'operator new',
+                      'operator new[]',
+                      # These mark the beginning/end of our custom sections
+                      '__start_google_malloc',
+                      '__stop_google_malloc',
+                      '__start_malloc_hook',
+                      '__stop_malloc_hook') {
+      $skip{$name} = 1;
+      $skip{"_" . $name} = 1;   # Mach (OS X) adds a _ prefix to everything
+    }
+    # TODO: Remove TCMalloc once everything has been
+    # moved into the tcmalloc:: namespace and we have flushed
+    # old code out of the system.
+    $skip_regexp = "TCMalloc|^tcmalloc::";
+  } elsif ($main::profile_type eq 'contention') {
+    foreach my $vname ('base::RecordLockProfileData',
+                       'base::SubmitMutexProfileData',
+                       'base::SubmitSpinLockProfileData',
+                       'Mutex::Unlock',
+                       'Mutex::UnlockSlow',
+                       'Mutex::ReaderUnlock',
+                       'MutexLock::~MutexLock',
+                       'SpinLock::Unlock',
+                       'SpinLock::SlowUnlock',
+                       'SpinLockHolder::~SpinLockHolder') {
+      $skip{$vname} = 1;
+    }
+  } elsif ($main::profile_type eq 'cpu') {
+    # Drop signal handlers used for CPU profile collection
+    # TODO(dpeng): this should not be necessary; it's taken
+    # care of by the general 2nd-pc mechanism below.
+    foreach my $name ('ProfileData::Add',           # historical
+                      'ProfileData::prof_handler',  # historical
+                      'CpuProfiler::prof_handler',
+                      '__FRAME_END__',
+                      '__pthread_sighandler',
+                      '__restore') {
+      $skip{$name} = 1;
+    }
+  } else {
+    # Nothing skipped for unknown types
+  }
+
+  if ($main::profile_type eq 'cpu') {
+    # If all the second-youngest program counters are the same,
+    # this STRONGLY suggests that it is an artifact of measurement,
+    # i.e., stack frames pushed by the CPU profiler signal handler.
+    # Hence, we delete them.
+    # (The topmost PC is read from the signal structure, not from
+    # the stack, so it does not get involved.)
+    while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) {
+      my $result = {};
+      my $func = '';
+      if (exists($symbols->{$second_pc})) {
+        $second_pc = $symbols->{$second_pc}->[0];
+      }
+      print STDERR "Removing $second_pc from all stack traces.\n";
+      foreach my $k (keys(%{$profile})) {
+        my $count = $profile->{$k};
+        my @addrs = split(/\n/, $k);
+        splice @addrs, 1, 1;
+        my $reduced_path = join("\n", @addrs);
+        AddEntry($result, $reduced_path, $count);
+      }
+      $profile = $result;
+    }
+  }
+
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    my @path = ();
+    foreach my $a (@addrs) {
+      if (exists($symbols->{$a})) {
+        my $func = $symbols->{$a}->[0];
+        if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
+          next;
+        }
+      }
+      push(@path, $a);
+    }
+    my $reduced_path = join("\n", @path);
+    AddEntry($result, $reduced_path, $count);
+  }
+  return $result;
+}
+
+# Reduce profile to granularity given by user
+sub ReduceProfile {
+  my $symbols = shift;
+  my $profile = shift;
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @translated = TranslateStack($symbols, $k);
+    my @path = ();
+    my %seen = ();
+    $seen{''} = 1;      # So that empty keys are skipped
+    foreach my $e (@translated) {
+      # To avoid double-counting due to recursion, skip a stack-trace
+      # entry if it has already been seen
+      if (!$seen{$e}) {
+        $seen{$e} = 1;
+        push(@path, $e);
+      }
+    }
+    my $reduced_path = join("\n", @path);
+    AddEntry($result, $reduced_path, $count);
+  }
+  return $result;
+}
+
+# Does the specified symbol array match the regexp?
+sub SymbolMatches {
+  my $sym = shift;
+  my $re = shift;
+  if (defined($sym)) {
+    for (my $i = 0; $i < $#{$sym}; $i += 3) {
+      if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) {
+        return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+# Focus only on paths involving specified regexps
+sub FocusProfile {
+  my $symbols = shift;
+  my $profile = shift;
+  my $focus = shift;
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    foreach my $a (@addrs) {
+      # Reply if it matches either the address/shortname/fileline
+      if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) {
+        AddEntry($result, $k, $count);
+        last;
+      }
+    }
+  }
+  return $result;
+}
+
+# Focus only on paths not involving specified regexps
+sub IgnoreProfile {
+  my $symbols = shift;
+  my $profile = shift;
+  my $ignore = shift;
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    my $matched = 0;
+    foreach my $a (@addrs) {
+      # Reply if it matches either the address/shortname/fileline
+      if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) {
+        $matched = 1;
+        last;
+      }
+    }
+    if (!$matched) {
+      AddEntry($result, $k, $count);
+    }
+  }
+  return $result;
+}
+
+# Get total count in profile
+sub TotalProfile {
+  my $profile = shift;
+  my $result = 0;
+  foreach my $k (keys(%{$profile})) {
+    $result += $profile->{$k};
+  }
+  return $result;
+}
+
+# Add A to B
+sub AddProfile {
+  my $A = shift;
+  my $B = shift;
+
+  my $R = {};
+  # add all keys in A
+  foreach my $k (keys(%{$A})) {
+    my $v = $A->{$k};
+    AddEntry($R, $k, $v);
+  }
+  # add all keys in B
+  foreach my $k (keys(%{$B})) {
+    my $v = $B->{$k};
+    AddEntry($R, $k, $v);
+  }
+  return $R;
+}
+
+# Merges symbol maps
+sub MergeSymbols {
+  my $A = shift;
+  my $B = shift;
+
+  my $R = {};
+  foreach my $k (keys(%{$A})) {
+    $R->{$k} = $A->{$k};
+  }
+  if (defined($B)) {
+    foreach my $k (keys(%{$B})) {
+      $R->{$k} = $B->{$k};
+    }
+  }
+  return $R;
+}
+
+
+# Add A to B
+sub AddPcs {
+  my $A = shift;
+  my $B = shift;
+
+  my $R = {};
+  # add all keys in A
+  foreach my $k (keys(%{$A})) {
+    $R->{$k} = 1
+  }
+  # add all keys in B
+  foreach my $k (keys(%{$B})) {
+    $R->{$k} = 1
+  }
+  return $R;
+}
+
+# Subtract B from A
+sub SubtractProfile {
+  my $A = shift;
+  my $B = shift;
+
+  my $R = {};
+  foreach my $k (keys(%{$A})) {
+    my $v = $A->{$k} - GetEntry($B, $k);
+    if ($v < 0 && $main::opt_drop_negative) {
+      $v = 0;
+    }
+    AddEntry($R, $k, $v);
+  }
+  if (!$main::opt_drop_negative) {
+    # Take care of when subtracted profile has more entries
+    foreach my $k (keys(%{$B})) {
+      if (!exists($A->{$k})) {
+        AddEntry($R, $k, 0 - $B->{$k});
+      }
+    }
+  }
+  return $R;
+}
+
+# Get entry from profile; zero if not present
+sub GetEntry {
+  my $profile = shift;
+  my $k = shift;
+  if (exists($profile->{$k})) {
+    return $profile->{$k};
+  } else {
+    return 0;
+  }
+}
+
+# Add entry to specified profile
+sub AddEntry {
+  my $profile = shift;
+  my $k = shift;
+  my $n = shift;
+  if (!exists($profile->{$k})) {
+    $profile->{$k} = 0;
+  }
+  $profile->{$k} += $n;
+}
+
+# Add a stack of entries to specified profile, and add them to the $pcs
+# list.
+sub AddEntries {
+  my $profile = shift;
+  my $pcs = shift;
+  my $stack = shift;
+  my $count = shift;
+  my @k = ();
+
+  foreach my $e (split(/\s+/, $stack)) {
+    my $pc = HexExtend($e);
+    $pcs->{$pc} = 1;
+    push @k, $pc;
+  }
+  AddEntry($profile, (join "\n", @k), $count);
+}
+
+##### Code to profile a server dynamically #####
+
+sub CheckSymbolPage {
+  my $url = SymbolPageURL();
+  open(SYMBOL, "$URL_FETCHER '$url' |");
+  my $line = <SYMBOL>;
+  $line =~ s/\r//g;         # turn windows-looking lines into unix-looking lines
+  close(SYMBOL);
+  unless (defined($line)) {
+    error("$url doesn't exist\n");
+  }
+
+  if ($line =~ /^num_symbols:\s+(\d+)$/) {
+    if ($1 == 0) {
+      error("Stripped binary. No symbols available.\n");
+    }
+  } else {
+    error("Failed to get the number of symbols from $url\n");
+  }
+}
+
+sub IsProfileURL {
+  my $profile_name = shift;
+  if (-f $profile_name) {
+    printf STDERR "Using local file $profile_name.\n";
+    return 0;
+  }
+  return 1;
+}
+
+sub ParseProfileURL {
+  my $profile_name = shift;
+
+  if (!defined($profile_name) || $profile_name eq "") {
+    return ();
+  }
+
+  # Split profile URL - matches all non-empty strings, so no test.
+  $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,;
+
+  my $proto = $1 || "http://";
+  my $hostport = $2;
+  my $prefix = $3;
+  my $profile = $4 || "/";
+
+  my $host = $hostport;
+  $host =~ s/:.*//;
+
+  my $baseurl = "$proto$hostport$prefix";
+  return ($host, $baseurl, $profile);
+}
+
+# We fetch symbols from the first profile argument.
+sub SymbolPageURL {
+  my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
+  return "$baseURL$SYMBOL_PAGE";
+}
+
+sub FetchProgramName() {
+  my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
+  my $url = "$baseURL$PROGRAM_NAME_PAGE";
+  my $command_line = "$URL_FETCHER '$url'";
+  open(CMDLINE, "$command_line |") or error($command_line);
+  my $cmdline = <CMDLINE>;
+  $cmdline =~ s/\r//g;   # turn windows-looking lines into unix-looking lines
+  close(CMDLINE);
+  error("Failed to get program name from $url\n") unless defined($cmdline);
+  $cmdline =~ s/\x00.+//;  # Remove argv[1] and latters.
+  $cmdline =~ s!\n!!g;  # Remove LFs.
+  return $cmdline;
+}
+
+# Gee, curl's -L (--location) option isn't reliable at least
+# with its 7.12.3 version.  Curl will forget to post data if
+# there is a redirection.  This function is a workaround for
+# curl.  Redirection happens on borg hosts.
+sub ResolveRedirectionForCurl {
+  my $url = shift;
+  my $command_line = "$URL_FETCHER --head '$url'";
+  open(CMDLINE, "$command_line |") or error($command_line);
+  while (<CMDLINE>) {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    if (/^Location: (.*)/) {
+      $url = $1;
+    }
+  }
+  close(CMDLINE);
+  return $url;
+}
+
+# Add a timeout flat to URL_FETCHER
+sub AddFetchTimeout {
+  my $fetcher = shift;
+  my $timeout = shift;
+  if (defined($timeout)) {
+    if ($fetcher =~ m/\bcurl -s/) {
+      $fetcher .= sprintf(" --max-time %d", $timeout);
+    } elsif ($fetcher =~ m/\brpcget\b/) {
+      $fetcher .= sprintf(" --deadline=%d", $timeout);
+    }
+  }
+  return $fetcher;
+}
+
+# Reads a symbol map from the file handle name given as $1, returning
+# the resulting symbol map.  Also processes variables relating to symbols.
+# Currently, the only variable processed is 'binary=<value>' which updates
+# $main::prog to have the correct program name.
+sub ReadSymbols {
+  my $in = shift;
+  my $map = {};
+  while (<$in>) {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    # Removes all the leading zeroes from the symbols, see comment below.
+    if (m/^0x0*([0-9a-f]+)\s+(.+)/) {
+      $map->{$1} = $2;
+    } elsif (m/^---/) {
+      last;
+    } elsif (m/^([a-z][^=]*)=(.*)$/ ) {
+      my ($variable, $value) = ($1, $2);
+      for ($variable, $value) {
+        s/^\s+//;
+        s/\s+$//;
+      }
+      if ($variable eq "binary") {
+        if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) {
+          printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n",
+                         $main::prog, $value);
+        }
+        $main::prog = $value;
+      } else {
+        printf STDERR ("Ignoring unknown variable in symbols list: " .
+            "'%s' = '%s'\n", $variable, $value);
+      }
+    }
+  }
+  return $map;
+}
+
+# Fetches and processes symbols to prepare them for use in the profile output
+# code.  If the optional 'symbol_map' arg is not given, fetches symbols from
+# $SYMBOL_PAGE for all PC values found in profile.  Otherwise, the raw symbols
+# are assumed to have already been fetched into 'symbol_map' and are simply
+# extracted and processed.
+sub FetchSymbols {
+  my $pcset = shift;
+  my $symbol_map = shift;
+
+  my %seen = ();
+  my @pcs = grep { !$seen{$_}++ } keys(%$pcset);  # uniq
+
+  if (!defined($symbol_map)) {
+    my $post_data = join("+", sort((map {"0x" . "$_"} @pcs)));
+
+    open(POSTFILE, ">$main::tmpfile_sym");
+    print POSTFILE $post_data;
+    close(POSTFILE);
+
+    my $url = SymbolPageURL();
+
+    my $command_line;
+    if ($URL_FETCHER =~ m/\bcurl -s/) {
+      $url = ResolveRedirectionForCurl($url);
+      $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'";
+    } else {
+      $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'";
+    }
+    # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
+    my $cppfilt = $obj_tool_map{"c++filt"};
+    open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
+    $symbol_map = ReadSymbols(*SYMBOL{IO});
+    close(SYMBOL);
+  }
+
+  my $symbols = {};
+  foreach my $pc (@pcs) {
+    my $fullname;
+    # For 64 bits binaries, symbols are extracted with 8 leading zeroes.
+    # Then /symbol reads the long symbols in as uint64, and outputs
+    # the result with a "0x%08llx" format which get rid of the zeroes.
+    # By removing all the leading zeroes in both $pc and the symbols from
+    # /symbol, the symbols match and are retrievable from the map.
+    my $shortpc = $pc;
+    $shortpc =~ s/^0*//;
+    # Each line may have a list of names, which includes the function
+    # and also other functions it has inlined.  They are separated
+    # (in PrintSymbolizedFile), by --, which is illegal in function names.
+    my $fullnames;
+    if (defined($symbol_map->{$shortpc})) {
+      $fullnames = $symbol_map->{$shortpc};
+    } else {
+      $fullnames = "0x" . $pc;  # Just use addresses
+    }
+    my $sym = [];
+    $symbols->{$pc} = $sym;
+    foreach my $fullname (split("--", $fullnames)) {
+      my $name = ShortFunctionName($fullname);
+      push(@{$sym}, $name, "?", $fullname);
+    }
+  }
+  return $symbols;
+}
+
+sub BaseName {
+  my $file_name = shift;
+  $file_name =~ s!^.*/!!;  # Remove directory name
+  return $file_name;
+}
+
+sub MakeProfileBaseName {
+  my ($binary_name, $profile_name) = @_;
+  my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
+  my $binary_shortname = BaseName($binary_name);
+  return sprintf("%s.%s.%s",
+                 $binary_shortname, $main::op_time, $host);
+}
+
+sub FetchDynamicProfile {
+  my $binary_name = shift;
+  my $profile_name = shift;
+  my $fetch_name_only = shift;
+  my $encourage_patience = shift;
+
+  if (!IsProfileURL($profile_name)) {
+    return $profile_name;
+  } else {
+    my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
+    if ($path eq "" || $path eq "/") {
+      # Missing type specifier defaults to cpu-profile
+      $path = $PROFILE_PAGE;
+    }
+
+    my $profile_file = MakeProfileBaseName($binary_name, $profile_name);
+
+    my $url = "$baseURL$path";
+    my $fetch_timeout = undef;
+    if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) {
+      if ($path =~ m/[?]/) {
+        $url .= "&";
+      } else {
+        $url .= "?";
+      }
+      $url .= sprintf("seconds=%d", $main::opt_seconds);
+      $fetch_timeout = $main::opt_seconds * 1.01 + 60;
+    } else {
+      # For non-CPU profiles, we add a type-extension to
+      # the target profile file name.
+      my $suffix = $path;
+      $suffix =~ s,/,.,g;
+      $profile_file .= $suffix;
+    }
+
+    my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof");
+    if (! -d $profile_dir) {
+      mkdir($profile_dir)
+          || die("Unable to create profile directory $profile_dir: $!\n");
+    }
+    my $tmp_profile = "$profile_dir/.tmp.$profile_file";
+    my $real_profile = "$profile_dir/$profile_file";
+
+    if ($fetch_name_only > 0) {
+      return $real_profile;
+    }
+
+    my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout);
+    my $cmd = "$fetcher '$url' > '$tmp_profile'";
+    if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){
+      print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n  ${real_profile}\n";
+      if ($encourage_patience) {
+        print STDERR "Be patient...\n";
+      }
+    } else {
+      print STDERR "Fetching $path profile from $url to\n  ${real_profile}\n";
+    }
+
+    (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n");
+    (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n");
+    print STDERR "Wrote profile to $real_profile\n";
+    $main::collected_profile = $real_profile;
+    return $main::collected_profile;
+  }
+}
+
+# Collect profiles in parallel
+sub FetchDynamicProfiles {
+  my $items = scalar(@main::pfile_args);
+  my $levels = log($items) / log(2);
+
+  if ($items == 1) {
+    $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1);
+  } else {
+    # math rounding issues
+    if ((2 ** $levels) < $items) {
+     $levels++;
+    }
+    my $count = scalar(@main::pfile_args);
+    for (my $i = 0; $i < $count; $i++) {
+      $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0);
+    }
+    print STDERR "Fetching $count profiles, Be patient...\n";
+    FetchDynamicProfilesRecurse($levels, 0, 0);
+    $main::collected_profile = join(" \\\n    ", @main::profile_files);
+  }
+}
+
+# Recursively fork a process to get enough processes
+# collecting profiles
+sub FetchDynamicProfilesRecurse {
+  my $maxlevel = shift;
+  my $level = shift;
+  my $position = shift;
+
+  if (my $pid = fork()) {
+    $position = 0 | ($position << 1);
+    TryCollectProfile($maxlevel, $level, $position);
+    wait;
+  } else {
+    $position = 1 | ($position << 1);
+    TryCollectProfile($maxlevel, $level, $position);
+    cleanup();
+    exit(0);
+  }
+}
+
+# Collect a single profile
+sub TryCollectProfile {
+  my $maxlevel = shift;
+  my $level = shift;
+  my $position = shift;
+
+  if ($level >= ($maxlevel - 1)) {
+    if ($position < scalar(@main::pfile_args)) {
+      FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0);
+    }
+  } else {
+    FetchDynamicProfilesRecurse($maxlevel, $level+1, $position);
+  }
+}
+
+##### Parsing code #####
+
+# Provide a small streaming-read module to handle very large
+# cpu-profile files.  Stream in chunks along a sliding window.
+# Provides an interface to get one 'slot', correctly handling
+# endian-ness differences.  A slot is one 32-bit or 64-bit word
+# (depending on the input profile).  We tell endianness and bit-size
+# for the profile by looking at the first 8 bytes: in cpu profiles,
+# the second slot is always 3 (we'll accept anything that's not 0).
+BEGIN {
+  package CpuProfileStream;
+
+  sub new {
+    my ($class, $file, $fname) = @_;
+    my $self = { file        => $file,
+                 base        => 0,
+                 stride      => 512 * 1024,   # must be a multiple of bitsize/8
+                 slots       => [],
+                 unpack_code => "",           # N for big-endian, V for little
+                 perl_is_64bit => 1,          # matters if profile is 64-bit
+    };
+    bless $self, $class;
+    # Let unittests adjust the stride
+    if ($main::opt_test_stride > 0) {
+      $self->{stride} = $main::opt_test_stride;
+    }
+    # Read the first two slots to figure out bitsize and endianness.
+    my $slots = $self->{slots};
+    my $str;
+    read($self->{file}, $str, 8);
+    # Set the global $address_length based on what we see here.
+    # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars).
+    $address_length = ($str eq (chr(0)x8)) ? 16 : 8;
+    if ($address_length == 8) {
+      if (substr($str, 6, 2) eq chr(0)x2) {
+        $self->{unpack_code} = 'V';  # Little-endian.
+      } elsif (substr($str, 4, 2) eq chr(0)x2) {
+        $self->{unpack_code} = 'N';  # Big-endian
+      } else {
+        ::error("$fname: header size >= 2**16\n");
+      }
+      @$slots = unpack($self->{unpack_code} . "*", $str);
+    } else {
+      # If we're a 64-bit profile, check if we're a 64-bit-capable
+      # perl.  Otherwise, each slot will be represented as a float
+      # instead of an int64, losing precision and making all the
+      # 64-bit addresses wrong.  We won't complain yet, but will
+      # later if we ever see a value that doesn't fit in 32 bits.
+      my $has_q = 0;
+      eval { $has_q = pack("Q", "1") ? 1 : 1; };
+      if (!$has_q) {
+	$self->{perl_is_64bit} = 0;
+      }
+      read($self->{file}, $str, 8);
+      if (substr($str, 4, 4) eq chr(0)x4) {
+        # We'd love to use 'Q', but it's a) not universal, b) not endian-proof.
+        $self->{unpack_code} = 'V';  # Little-endian.
+      } elsif (substr($str, 0, 4) eq chr(0)x4) {
+        $self->{unpack_code} = 'N';  # Big-endian
+      } else {
+        ::error("$fname: header size >= 2**32\n");
+      }
+      my @pair = unpack($self->{unpack_code} . "*", $str);
+      # Since we know one of the pair is 0, it's fine to just add them.
+      @$slots = (0, $pair[0] + $pair[1]);
+    }
+    return $self;
+  }
+
+  # Load more data when we access slots->get(X) which is not yet in memory.
+  sub overflow {
+    my ($self) = @_;
+    my $slots = $self->{slots};
+    $self->{base} += $#$slots + 1;   # skip over data we're replacing
+    my $str;
+    read($self->{file}, $str, $self->{stride});
+    if ($address_length == 8) {      # the 32-bit case
+      # This is the easy case: unpack provides 32-bit unpacking primitives.
+      @$slots = unpack($self->{unpack_code} . "*", $str);
+    } else {
+      # We need to unpack 32 bits at a time and combine.
+      my @b32_values = unpack($self->{unpack_code} . "*", $str);
+      my @b64_values = ();
+      for (my $i = 0; $i < $#b32_values; $i += 2) {
+        # TODO(csilvers): if this is a 32-bit perl, the math below
+        #    could end up in a too-large int, which perl will promote
+        #    to a double, losing necessary precision.  Deal with that.
+	#    Right now, we just die.
+	my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]);
+        if ($self->{unpack_code} eq 'N') {    # big-endian
+	  ($lo, $hi) = ($hi, $lo);
+	}
+	my $value = $lo + $hi * (2**32);
+	if (!$self->{perl_is_64bit} &&   # check value is exactly represented
+	    (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) {
+	  ::error("Need a 64-bit perl to process this 64-bit profile.\n");
+	}
+	push(@b64_values, $value);
+      }
+      @$slots = @b64_values;
+    }
+  }
+
+  # Access the i-th long in the file (logically), or -1 at EOF.
+  sub get {
+    my ($self, $idx) = @_;
+    my $slots = $self->{slots};
+    while ($#$slots >= 0) {
+      if ($idx < $self->{base}) {
+        # The only time we expect a reference to $slots[$i - something]
+        # after referencing $slots[$i] is reading the very first header.
+        # Since $stride > |header|, that shouldn't cause any lookback
+        # errors.  And everything after the header is sequential.
+        print STDERR "Unexpected look-back reading CPU profile";
+        return -1;   # shrug, don't know what better to return
+      } elsif ($idx > $self->{base} + $#$slots) {
+        $self->overflow();
+      } else {
+        return $slots->[$idx - $self->{base}];
+      }
+    }
+    # If we get here, $slots is [], which means we've reached EOF
+    return -1;  # unique since slots is supposed to hold unsigned numbers
+  }
+}
+
+# Reads the top, 'header' section of a profile, and returns the last
+# line of the header, commonly called a 'header line'.  The header
+# section of a profile consists of zero or more 'command' lines that
+# are instructions to pprof, which pprof executes when reading the
+# header.  All 'command' lines start with a %.  After the command
+# lines is the 'header line', which is a profile-specific line that
+# indicates what type of profile it is, and perhaps other global
+# information about the profile.  For instance, here's a header line
+# for a heap profile:
+#   heap profile:     53:    38236 [  5525:  1284029] @ heapprofile
+# For historical reasons, the CPU profile does not contain a text-
+# readable header line.  If the profile looks like a CPU profile,
+# this function returns "".  If no header line could be found, this
+# function returns undef.
+#
+# The following commands are recognized:
+#   %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:'
+#
+# The input file should be in binmode.
+sub ReadProfileHeader {
+  local *PROFILE = shift;
+  my $firstchar = "";
+  my $line = "";
+  read(PROFILE, $firstchar, 1);
+  seek(PROFILE, -1, 1);                    # unread the firstchar
+  if ($firstchar !~ /[[:print:]]/) {       # is not a text character
+    return "";
+  }
+  while (defined($line = <PROFILE>)) {
+    $line =~ s/\r//g;   # turn windows-looking lines into unix-looking lines
+    if ($line =~ /^%warn\s+(.*)/) {        # 'warn' command
+      # Note this matches both '%warn blah\n' and '%warn\n'.
+      print STDERR "WARNING: $1\n";        # print the rest of the line
+    } elsif ($line =~ /^%/) {
+      print STDERR "Ignoring unknown command from profile header: $line";
+    } else {
+      # End of commands, must be the header line.
+      return $line;
+    }
+  }
+  return undef;     # got to EOF without seeing a header line
+}
+
+sub IsSymbolizedProfileFile {
+  my $file_name = shift;
+  if (!(-e $file_name) || !(-r $file_name)) {
+    return 0;
+  }
+  # Check if the file contains a symbol-section marker.
+  open(TFILE, "<$file_name");
+  binmode TFILE;
+  my $firstline = ReadProfileHeader(*TFILE);
+  close(TFILE);
+  if (!$firstline) {
+    return 0;
+  }
+  $SYMBOL_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+  my $symbol_marker = $&;
+  return $firstline =~ /^--- *$symbol_marker/;
+}
+
+# Parse profile generated by common/profiler.cc and return a reference
+# to a map:
+#      $result->{version}     Version number of profile file
+#      $result->{period}      Sampling period (in microseconds)
+#      $result->{profile}     Profile object
+#      $result->{map}         Memory map info from profile
+#      $result->{pcs}         Hash of all PC values seen, key is hex address
+sub ReadProfile {
+  my $prog = shift;
+  my $fname = shift;
+  my $result;            # return value
+
+  $CONTENTION_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+  my $contention_marker = $&;
+  $GROWTH_PAGE  =~ m,[^/]+$,;    # matches everything after the last slash
+  my $growth_marker = $&;
+  $SYMBOL_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+  my $symbol_marker = $&;
+  $PROFILE_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+  my $profile_marker = $&;
+
+  # Look at first line to see if it is a heap or a CPU profile.
+  # CPU profile may start with no header at all, and just binary data
+  # (starting with \0\0\0\0) -- in that case, don't try to read the
+  # whole firstline, since it may be gigabytes(!) of data.
+  open(PROFILE, "<$fname") || error("$fname: $!\n");
+  binmode PROFILE;      # New perls do UTF-8 processing
+  my $header = ReadProfileHeader(*PROFILE);
+  if (!defined($header)) {   # means "at EOF"
+    error("Profile is empty.\n");
+  }
+
+  my $symbols;
+  if ($header =~ m/^--- *$symbol_marker/o) {
+    # Verify that the user asked for a symbolized profile
+    if (!$main::use_symbolized_profile) {
+      # we have both a binary and symbolized profiles, abort
+      error("FATAL ERROR: Symbolized profile\n   $fname\ncannot be used with " .
+	    "a binary arg. Try again without passing\n   $prog\n");
+    }
+    # Read the symbol section of the symbolized profile file.
+    $symbols = ReadSymbols(*PROFILE{IO});
+    # Read the next line to get the header for the remaining profile.
+    $header = ReadProfileHeader(*PROFILE) || "";
+  }
+
+  $main::profile_type = '';
+  if ($header =~ m/^heap profile:.*$growth_marker/o) {
+    $main::profile_type = 'growth';
+    $result =  ReadHeapProfile($prog, *PROFILE, $header);
+  } elsif ($header =~ m/^heap profile:/) {
+    $main::profile_type = 'heap';
+    $result =  ReadHeapProfile($prog, *PROFILE, $header);
+  } elsif ($header =~ m/^--- *$contention_marker/o) {
+    $main::profile_type = 'contention';
+    $result = ReadSynchProfile($prog, *PROFILE);
+  } elsif ($header =~ m/^--- *Stacks:/) {
+    print STDERR
+      "Old format contention profile: mistakenly reports " .
+      "condition variable signals as lock contentions.\n";
+    $main::profile_type = 'contention';
+    $result = ReadSynchProfile($prog, *PROFILE);
+  } elsif ($header =~ m/^--- *$profile_marker/) {
+    # the binary cpu profile data starts immediately after this line
+    $main::profile_type = 'cpu';
+    $result = ReadCPUProfile($prog, $fname, *PROFILE);
+  } else {
+    if (defined($symbols)) {
+      # a symbolized profile contains a format we don't recognize, bail out
+      error("$fname: Cannot recognize profile section after symbols.\n");
+    }
+    # no ascii header present -- must be a CPU profile
+    $main::profile_type = 'cpu';
+    $result = ReadCPUProfile($prog, $fname, *PROFILE);
+  }
+
+  close(PROFILE);
+
+  # if we got symbols along with the profile, return those as well
+  if (defined($symbols)) {
+    $result->{symbols} = $symbols;
+  }
+
+  return $result;
+}
+
+# Subtract one from caller pc so we map back to call instr.
+# However, don't do this if we're reading a symbolized profile
+# file, in which case the subtract-one was done when the file
+# was written.
+#
+# We apply the same logic to all readers, though ReadCPUProfile uses an
+# independent implementation.
+sub FixCallerAddresses {
+  my $stack = shift;
+  if ($main::use_symbolized_profile) {
+    return $stack;
+  } else {
+    $stack =~ /(\s)/;
+    my $delimiter = $1;
+    my @addrs = split(' ', $stack);
+    my @fixedaddrs;
+    $#fixedaddrs = $#addrs;
+    if ($#addrs >= 0) {
+      $fixedaddrs[0] = $addrs[0];
+    }
+    for (my $i = 1; $i <= $#addrs; $i++) {
+      $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1");
+    }
+    return join $delimiter, @fixedaddrs;
+  }
+}
+
+# CPU profile reader
+sub ReadCPUProfile {
+  my $prog = shift;
+  my $fname = shift;       # just used for logging
+  local *PROFILE = shift;
+  my $version;
+  my $period;
+  my $i;
+  my $profile = {};
+  my $pcs = {};
+
+  # Parse string into array of slots.
+  my $slots = CpuProfileStream->new(*PROFILE, $fname);
+
+  # Read header.  The current header version is a 5-element structure
+  # containing:
+  #   0: header count (always 0)
+  #   1: header "words" (after this one: 3)
+  #   2: format version (0)
+  #   3: sampling period (usec)
+  #   4: unused padding (always 0)
+  if ($slots->get(0) != 0 ) {
+    error("$fname: not a profile file, or old format profile file\n");
+  }
+  $i = 2 + $slots->get(1);
+  $version = $slots->get(2);
+  $period = $slots->get(3);
+  # Do some sanity checking on these header values.
+  if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) {
+    error("$fname: not a profile file, or corrupted profile file\n");
+  }
+
+  # Parse profile
+  while ($slots->get($i) != -1) {
+    my $n = $slots->get($i++);
+    my $d = $slots->get($i++);
+    if ($d > (2**16)) {  # TODO(csilvers): what's a reasonable max-stack-depth?
+      my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8));
+      print STDERR "At index $i (address $addr):\n";
+      error("$fname: stack trace depth >= 2**32\n");
+    }
+    if ($slots->get($i) == 0) {
+      # End of profile data marker
+      $i += $d;
+      last;
+    }
+
+    # Make key out of the stack entries
+    my @k = ();
+    for (my $j = 0; $j < $d; $j++) {
+      my $pc = $slots->get($i+$j);
+      # Subtract one from caller pc so we map back to call instr.
+      # However, don't do this if we're reading a symbolized profile
+      # file, in which case the subtract-one was done when the file
+      # was written.
+      if ($j > 0 && !$main::use_symbolized_profile) {
+        $pc--;
+      }
+      $pc = sprintf("%0*x", $address_length, $pc);
+      $pcs->{$pc} = 1;
+      push @k, $pc;
+    }
+
+    AddEntry($profile, (join "\n", @k), $n);
+    $i += $d;
+  }
+
+  # Parse map
+  my $map = '';
+  seek(PROFILE, $i * 4, 0);
+  read(PROFILE, $map, (stat PROFILE)[7]);
+
+  my $r = {};
+  $r->{version} = $version;
+  $r->{period} = $period;
+  $r->{profile} = $profile;
+  $r->{libs} = ParseLibraries($prog, $map, $pcs);
+  $r->{pcs} = $pcs;
+
+  return $r;
+}
+
+sub ReadHeapProfile {
+  my $prog = shift;
+  local *PROFILE = shift;
+  my $header = shift;
+
+  my $index = 1;
+  if ($main::opt_inuse_space) {
+    $index = 1;
+  } elsif ($main::opt_inuse_objects) {
+    $index = 0;
+  } elsif ($main::opt_alloc_space) {
+    $index = 3;
+  } elsif ($main::opt_alloc_objects) {
+    $index = 2;
+  }
+
+  # Find the type of this profile.  The header line looks like:
+  #    heap profile:   1246:  8800744 [  1246:  8800744] @ <heap-url>/266053
+  # There are two pairs <count: size>, the first inuse objects/space, and the
+  # second allocated objects/space.  This is followed optionally by a profile
+  # type, and if that is present, optionally by a sampling frequency.
+  # For remote heap profiles (v1):
+  # The interpretation of the sampling frequency is that the profiler, for
+  # each sample, calculates a uniformly distributed random integer less than
+  # the given value, and records the next sample after that many bytes have
+  # been allocated.  Therefore, the expected sample interval is half of the
+  # given frequency.  By default, if not specified, the expected sample
+  # interval is 128KB.  Only remote-heap-page profiles are adjusted for
+  # sample size.
+  # For remote heap profiles (v2):
+  # The sampling frequency is the rate of a Poisson process. This means that
+  # the probability of sampling an allocation of size X with sampling rate Y
+  # is 1 - exp(-X/Y)
+  # For version 2, a typical header line might look like this:
+  # heap profile:   1922: 127792360 [  1922: 127792360] @ <heap-url>_v2/524288
+  # the trailing number (524288) is the sampling rate. (Version 1 showed
+  # double the 'rate' here)
+  my $sampling_algorithm = 0;
+  my $sample_adjustment = 0;
+  chomp($header);
+  my $type = "unknown";
+  if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") {
+    if (defined($6) && ($6 ne '')) {
+      $type = $6;
+      my $sample_period = $8;
+      # $type is "heapprofile" for profiles generated by the
+      # heap-profiler, and either "heap" or "heap_v2" for profiles
+      # generated by sampling directly within tcmalloc.  It can also
+      # be "growth" for heap-growth profiles.  The first is typically
+      # found for profiles generated locally, and the others for
+      # remote profiles.
+      if (($type eq "heapprofile") || ($type !~ /heap/) ) {
+        # No need to adjust for the sampling rate with heap-profiler-derived data
+        $sampling_algorithm = 0;
+      } elsif ($type =~ /_v2/) {
+        $sampling_algorithm = 2;     # version 2 sampling
+        if (defined($sample_period) && ($sample_period ne '')) {
+          $sample_adjustment = int($sample_period);
+        }
+      } else {
+        $sampling_algorithm = 1;     # version 1 sampling
+        if (defined($sample_period) && ($sample_period ne '')) {
+          $sample_adjustment = int($sample_period)/2;
+        }
+      }
+    } else {
+      # We detect whether or not this is a remote-heap profile by checking
+      # that the total-allocated stats ($n2,$s2) are exactly the
+      # same as the in-use stats ($n1,$s1).  It is remotely conceivable
+      # that a non-remote-heap profile may pass this check, but it is hard
+      # to imagine how that could happen.
+      # In this case it's so old it's guaranteed to be remote-heap version 1.
+      my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
+      if (($n1 == $n2) && ($s1 == $s2)) {
+        # This is likely to be a remote-heap based sample profile
+        $sampling_algorithm = 1;
+      }
+    }
+  }
+
+  if ($sampling_algorithm > 0) {
+    # For remote-heap generated profiles, adjust the counts and sizes to
+    # account for the sample rate (we sample once every 128KB by default).
+    if ($sample_adjustment == 0) {
+      # Turn on profile adjustment.
+      $sample_adjustment = 128*1024;
+      print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n";
+    } else {
+      printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n",
+                     $sample_adjustment);
+    }
+    if ($sampling_algorithm > 1) {
+      # We don't bother printing anything for the original version (version 1)
+      printf STDERR "Heap version $sampling_algorithm\n";
+    }
+  }
+
+  my $profile = {};
+  my $pcs = {};
+  my $map = "";
+
+  while (<PROFILE>) {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    if (/^MAPPED_LIBRARIES:/) {
+      # Read the /proc/self/maps data
+      while (<PROFILE>) {
+        s/\r//g;         # turn windows-looking lines into unix-looking lines
+        $map .= $_;
+      }
+      last;
+    }
+
+    if (/^--- Memory map:/) {
+      # Read /proc/self/maps data as formatted by DumpAddressMap()
+      my $buildvar = "";
+      while (<PROFILE>) {
+        s/\r//g;         # turn windows-looking lines into unix-looking lines
+        # Parse "build=<dir>" specification if supplied
+        if (m/^\s*build=(.*)\n/) {
+          $buildvar = $1;
+        }
+
+        # Expand "$build" variable if available
+        $_ =~ s/\$build\b/$buildvar/g;
+
+        $map .= $_;
+      }
+      last;
+    }
+
+    # Read entry of the form:
+    #  <count1>: <bytes1> [<count2>: <bytes2>] @ a1 a2 a3 ... an
+    s/^\s*//;
+    s/\s*$//;
+    if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) {
+      my $stack = $5;
+      my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
+
+      if ($sample_adjustment) {
+        if ($sampling_algorithm == 2) {
+          # Remote-heap version 2
+          # The sampling frequency is the rate of a Poisson process.
+          # This means that the probability of sampling an allocation of
+          # size X with sampling rate Y is 1 - exp(-X/Y)
+	  if ($n1 != 0) {
+	    my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+	    my $scale_factor = 1/(1 - exp(-$ratio));
+	    $n1 *= $scale_factor;
+	    $s1 *= $scale_factor;
+	  }
+	  if ($n2 != 0) {
+	    my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+	    my $scale_factor = 1/(1 - exp(-$ratio));
+	    $n2 *= $scale_factor;
+	    $s2 *= $scale_factor;
+	  }
+        } else {
+          # Remote-heap version 1
+          my $ratio;
+          $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+          if ($ratio < 1) {
+            $n1 /= $ratio;
+            $s1 /= $ratio;
+          }
+          $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+          if ($ratio < 1) {
+            $n2 /= $ratio;
+            $s2 /= $ratio;
+          }
+        }
+      }
+
+      my @counts = ($n1, $s1, $n2, $s2);
+      AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
+    }
+  }
+
+  my $r = {};
+  $r->{version} = "heap";
+  $r->{period} = 1;
+  $r->{profile} = $profile;
+  $r->{libs} = ParseLibraries($prog, $map, $pcs);
+  $r->{pcs} = $pcs;
+  return $r;
+}
+
+sub ReadSynchProfile {
+  my $prog = shift;
+  local *PROFILE = shift;
+  my $header = shift;
+
+  my $map = '';
+  my $profile = {};
+  my $pcs = {};
+  my $sampling_period = 1;
+  my $cyclespernanosec = 2.8;   # Default assumption for old binaries
+  my $seen_clockrate = 0;
+  my $line;
+
+  my $index = 0;
+  if ($main::opt_total_delay) {
+    $index = 0;
+  } elsif ($main::opt_contentions) {
+    $index = 1;
+  } elsif ($main::opt_mean_delay) {
+    $index = 2;
+  }
+
+  while ( $line = <PROFILE> ) {
+    $line =~ s/\r//g;      # turn windows-looking lines into unix-looking lines
+    if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) {
+      my ($cycles, $count, $stack) = ($1, $2, $3);
+
+      # Convert cycles to nanoseconds
+      $cycles /= $cyclespernanosec;
+
+      # Adjust for sampling done by application
+      $cycles *= $sampling_period;
+      $count *= $sampling_period;
+
+      my @values = ($cycles, $count, $cycles / $count);
+      AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]);
+
+    } elsif ( $line =~ /^(slow release).*thread \d+  \@\s*(.*?)\s*$/ ||
+              $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
+      my ($cycles, $stack) = ($1, $2);
+      if ($cycles !~ /^\d+$/) {
+        next;
+      }
+
+      # Convert cycles to nanoseconds
+      $cycles /= $cyclespernanosec;
+
+      # Adjust for sampling done by application
+      $cycles *= $sampling_period;
+
+      AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles);
+
+    } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) {
+      my ($variable, $value) = ($1,$2);
+      for ($variable, $value) {
+        s/^\s+//;
+        s/\s+$//;
+      }
+      if ($variable eq "cycles/second") {
+        $cyclespernanosec = $value / 1e9;
+        $seen_clockrate = 1;
+      } elsif ($variable eq "sampling period") {
+        $sampling_period = $value;
+      } elsif ($variable eq "ms since reset") {
+        # Currently nothing is done with this value in pprof
+        # So we just silently ignore it for now
+      } elsif ($variable eq "discarded samples") {
+        # Currently nothing is done with this value in pprof
+        # So we just silently ignore it for now
+      } else {
+        printf STDERR ("Ignoring unnknown variable in /contention output: " .
+                       "'%s' = '%s'\n",$variable,$value);
+      }
+    } else {
+      # Memory map entry
+      $map .= $line;
+    }
+  }
+
+  if (!$seen_clockrate) {
+    printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n",
+                   $cyclespernanosec);
+  }
+
+  my $r = {};
+  $r->{version} = 0;
+  $r->{period} = $sampling_period;
+  $r->{profile} = $profile;
+  $r->{libs} = ParseLibraries($prog, $map, $pcs);
+  $r->{pcs} = $pcs;
+  return $r;
+}
+
+# Given a hex value in the form "0x1abcd" return "0001abcd" or
+# "000000000001abcd", depending on the current address length.
+# There's probably a more idiomatic (or faster) way to do this...
+sub HexExtend {
+  my $addr = shift;
+
+  $addr =~ s/^0x//;
+
+  if (length $addr > $address_length) {
+    printf STDERR "Warning:  address $addr is longer than address length $address_length\n";
+  }
+
+  return substr("000000000000000".$addr, -$address_length);
+}
+
+##### Symbol extraction #####
+
+# Aggressively search the lib_prefix values for the given library
+# If all else fails, just return the name of the library unmodified.
+# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so"
+# it will search the following locations in this order, until it finds a file:
+#   /my/path/lib/dir/mylib.so
+#   /other/path/lib/dir/mylib.so
+#   /my/path/dir/mylib.so
+#   /other/path/dir/mylib.so
+#   /my/path/mylib.so
+#   /other/path/mylib.so
+#   /lib/dir/mylib.so              (returned as last resort)
+sub FindLibrary {
+  my $file = shift;
+  my $suffix = $file;
+
+  # Search for the library as described above
+  do {
+    foreach my $prefix (@prefix_list) {
+      my $fullpath = $prefix . $suffix;
+      if (-e $fullpath) {
+        return $fullpath;
+      }
+    }
+  } while ($suffix =~ s|^/[^/]+/|/|);
+  return $file;
+}
+
+# Return path to library with debugging symbols.
+# For libc libraries, the copy in /usr/lib/debug contains debugging symbols
+sub DebuggingLibrary {
+  my $file = shift;
+  if ($file =~ m|^/| && -f "/usr/lib/debug$file") {
+    return "/usr/lib/debug$file";
+  }
+  return undef;
+}
+
+# Parse text section header of a library using objdump
+sub ParseTextSectionHeaderFromObjdump {
+  my $lib = shift;
+
+  my $size = undef;
+  my $vma;
+  my $file_offset;
+  # Get objdump output from the library file to figure out how to
+  # map between mapped addresses and addresses in the library.
+  my $objdump = $obj_tool_map{"objdump"};
+  open(OBJDUMP, "$objdump -h $lib |")
+                || error("$objdump $lib: $!\n");
+  while (<OBJDUMP>) {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    # Idx Name          Size      VMA       LMA       File off  Algn
+    #  10 .text         00104b2c  420156f0  420156f0  000156f0  2**4
+    # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file
+    # offset may still be 8.  But AddressSub below will still handle that.
+    my @x = split;
+    if (($#x >= 6) && ($x[1] eq '.text')) {
+      $size = $x[2];
+      $vma = $x[3];
+      $file_offset = $x[5];
+      last;
+    }
+  }
+  close(OBJDUMP);
+
+  if (!defined($size)) {
+    return undef;
+  }
+
+  my $r = {};
+  $r->{size} = $size;
+  $r->{vma} = $vma;
+  $r->{file_offset} = $file_offset;
+
+  return $r;
+}
+
+# Parse text section header of a library using otool (on OS X)
+sub ParseTextSectionHeaderFromOtool {
+  my $lib = shift;
+
+  my $size = undef;
+  my $vma = undef;
+  my $file_offset = undef;
+  # Get otool output from the library file to figure out how to
+  # map between mapped addresses and addresses in the library.
+  my $otool = $obj_tool_map{"otool"};
+  open(OTOOL, "$otool -l $lib |")
+                || error("$otool $lib: $!\n");
+  my $cmd = "";
+  my $sectname = "";
+  my $segname = "";
+  foreach my $line (<OTOOL>) {
+    $line =~ s/\r//g;      # turn windows-looking lines into unix-looking lines
+    # Load command <#>
+    #       cmd LC_SEGMENT
+    # [...]
+    # Section
+    #   sectname __text
+    #    segname __TEXT
+    #       addr 0x000009f8
+    #       size 0x00018b9e
+    #     offset 2552
+    #      align 2^2 (4)
+    # We will need to strip off the leading 0x from the hex addresses,
+    # and convert the offset into hex.
+    if ($line =~ /Load command/) {
+      $cmd = "";
+      $sectname = "";
+      $segname = "";
+    } elsif ($line =~ /Section/) {
+      $sectname = "";
+      $segname = "";
+    } elsif ($line =~ /cmd (\w+)/) {
+      $cmd = $1;
+    } elsif ($line =~ /sectname (\w+)/) {
+      $sectname = $1;
+    } elsif ($line =~ /segname (\w+)/) {
+      $segname = $1;
+    } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") &&
+               $sectname eq "__text" &&
+               $segname eq "__TEXT")) {
+      next;
+    } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) {
+      $vma = $1;
+    } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) {
+      $size = $1;
+    } elsif ($line =~ /\boffset ([0-9]+)/) {
+      $file_offset = sprintf("%016x", $1);
+    }
+    if (defined($vma) && defined($size) && defined($file_offset)) {
+      last;
+    }
+  }
+  close(OTOOL);
+
+  if (!defined($vma) || !defined($size) || !defined($file_offset)) {
+     return undef;
+  }
+
+  my $r = {};
+  $r->{size} = $size;
+  $r->{vma} = $vma;
+  $r->{file_offset} = $file_offset;
+
+  return $r;
+}
+
+sub ParseTextSectionHeader {
+  # obj_tool_map("otool") is only defined if we're in a Mach-O environment
+  if (defined($obj_tool_map{"otool"})) {
+    my $r = ParseTextSectionHeaderFromOtool(@_);
+    if (defined($r)){
+      return $r;
+    }
+  }
+  # If otool doesn't work, or we don't have it, fall back to objdump
+  return ParseTextSectionHeaderFromObjdump(@_);
+}
+
+# Split /proc/pid/maps dump into a list of libraries
+sub ParseLibraries {
+  return if $main::use_symbol_page;  # We don't need libraries info.
+  my $prog = shift;
+  my $map = shift;
+  my $pcs = shift;
+
+  my $result = [];
+  my $h = "[a-f0-9]+";
+  my $zero_offset = HexExtend("0");
+
+  my $buildvar = "";
+  foreach my $l (split("\n", $map)) {
+    if ($l =~ m/^\s*build=(.*)$/) {
+      $buildvar = $1;
+    }
+
+    my $start;
+    my $finish;
+    my $offset;
+    my $lib;
+    if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) {
+      # Full line from /proc/self/maps.  Example:
+      #   40000000-40015000 r-xp 00000000 03:01 12845071   /lib/ld-2.3.2.so
+      $start = HexExtend($1);
+      $finish = HexExtend($2);
+      $offset = HexExtend($3);
+      $lib = $4;
+      $lib =~ s|\\|/|g;     # turn windows-style paths into unix-style paths
+    } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) {
+      # Cooked line from DumpAddressMap.  Example:
+      #   40000000-40015000: /lib/ld-2.3.2.so
+      $start = HexExtend($1);
+      $finish = HexExtend($2);
+      $offset = $zero_offset;
+      $lib = $3;
+    } else {
+      next;
+    }
+
+    # Expand "$build" variable if available
+    $lib =~ s/\$build\b/$buildvar/g;
+
+    $lib = FindLibrary($lib);
+
+    # Check for pre-relocated libraries, which use pre-relocated symbol tables
+    # and thus require adjusting the offset that we'll use to translate
+    # VM addresses into symbol table addresses.
+    # Only do this if we're not going to fetch the symbol table from a
+    # debugging copy of the library.
+    if (!DebuggingLibrary($lib)) {
+      my $text = ParseTextSectionHeader($lib);
+      if (defined($text)) {
+         my $vma_offset = AddressSub($text->{vma}, $text->{file_offset});
+         $offset = AddressAdd($offset, $vma_offset);
+      }
+    }
+
+    push(@{$result}, [$lib, $start, $finish, $offset]);
+  }
+
+  # Append special entry for additional library (not relocated)
+  if ($main::opt_lib ne "") {
+    my $text = ParseTextSectionHeader($main::opt_lib);
+    if (defined($text)) {
+       my $start = $text->{vma};
+       my $finish = AddressAdd($start, $text->{size});
+
+       push(@{$result}, [$main::opt_lib, $start, $finish, $start]);
+    }
+  }
+
+  # Append special entry for the main program.  This covers
+  # 0..max_pc_value_seen, so that we assume pc values not found in one
+  # of the library ranges will be treated as coming from the main
+  # program binary.
+  my $min_pc = HexExtend("0");
+  my $max_pc = $min_pc;          # find the maximal PC value in any sample
+  foreach my $pc (keys(%{$pcs})) {
+    if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); }
+  }
+  push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]);
+
+  return $result;
+}
+
+# Add two hex addresses of length $address_length.
+# Run pprof --test for unit test if this is changed.
+sub AddressAdd {
+  my $addr1 = shift;
+  my $addr2 = shift;
+  my $sum;
+
+  if ($address_length == 8) {
+    # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
+    $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16);
+    return sprintf("%08x", $sum);
+
+  } else {
+    # Do the addition in 7-nibble chunks to trivialize carry handling.
+
+    if ($main::opt_debug and $main::opt_test) {
+      print STDERR "AddressAdd $addr1 + $addr2 = ";
+    }
+
+    my $a1 = substr($addr1,-7);
+    $addr1 = substr($addr1,0,-7);
+    my $a2 = substr($addr2,-7);
+    $addr2 = substr($addr2,0,-7);
+    $sum = hex($a1) + hex($a2);
+    my $c = 0;
+    if ($sum > 0xfffffff) {
+      $c = 1;
+      $sum -= 0x10000000;
+    }
+    my $r = sprintf("%07x", $sum);
+
+    $a1 = substr($addr1,-7);
+    $addr1 = substr($addr1,0,-7);
+    $a2 = substr($addr2,-7);
+    $addr2 = substr($addr2,0,-7);
+    $sum = hex($a1) + hex($a2) + $c;
+    $c = 0;
+    if ($sum > 0xfffffff) {
+      $c = 1;
+      $sum -= 0x10000000;
+    }
+    $r = sprintf("%07x", $sum) . $r;
+
+    $sum = hex($addr1) + hex($addr2) + $c;
+    if ($sum > 0xff) { $sum -= 0x100; }
+    $r = sprintf("%02x", $sum) . $r;
+
+    if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; }
+
+    return $r;
+  }
+}
+
+
+# Subtract two hex addresses of length $address_length.
+# Run pprof --test for unit test if this is changed.
+sub AddressSub {
+  my $addr1 = shift;
+  my $addr2 = shift;
+  my $diff;
+
+  if ($address_length == 8) {
+    # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
+    $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16);
+    return sprintf("%08x", $diff);
+
+  } else {
+    # Do the addition in 7-nibble chunks to trivialize borrow handling.
+    # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; }
+
+    my $a1 = hex(substr($addr1,-7));
+    $addr1 = substr($addr1,0,-7);
+    my $a2 = hex(substr($addr2,-7));
+    $addr2 = substr($addr2,0,-7);
+    my $b = 0;
+    if ($a2 > $a1) {
+      $b = 1;
+      $a1 += 0x10000000;
+    }
+    $diff = $a1 - $a2;
+    my $r = sprintf("%07x", $diff);
+
+    $a1 = hex(substr($addr1,-7));
+    $addr1 = substr($addr1,0,-7);
+    $a2 = hex(substr($addr2,-7)) + $b;
+    $addr2 = substr($addr2,0,-7);
+    $b = 0;
+    if ($a2 > $a1) {
+      $b = 1;
+      $a1 += 0x10000000;
+    }
+    $diff = $a1 - $a2;
+    $r = sprintf("%07x", $diff) . $r;
+
+    $a1 = hex($addr1);
+    $a2 = hex($addr2) + $b;
+    if ($a2 > $a1) { $a1 += 0x100; }
+    $diff = $a1 - $a2;
+    $r = sprintf("%02x", $diff) . $r;
+
+    # if ($main::opt_debug) { print STDERR "$r\n"; }
+
+    return $r;
+  }
+}
+
+# Increment a hex addresses of length $address_length.
+# Run pprof --test for unit test if this is changed.
+sub AddressInc {
+  my $addr = shift;
+  my $sum;
+
+  if ($address_length == 8) {
+    # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
+    $sum = (hex($addr)+1) % (0x10000000 * 16);
+    return sprintf("%08x", $sum);
+
+  } else {
+    # Do the addition in 7-nibble chunks to trivialize carry handling.
+    # We are always doing this to step through the addresses in a function,
+    # and will almost never overflow the first chunk, so we check for this
+    # case and exit early.
+
+    # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; }
+
+    my $a1 = substr($addr,-7);
+    $addr = substr($addr,0,-7);
+    $sum = hex($a1) + 1;
+    my $r = sprintf("%07x", $sum);
+    if ($sum <= 0xfffffff) {
+      $r = $addr . $r;
+      # if ($main::opt_debug) { print STDERR "$r\n"; }
+      return HexExtend($r);
+    } else {
+      $r = "0000000";
+    }
+
+    $a1 = substr($addr,-7);
+    $addr = substr($addr,0,-7);
+    $sum = hex($a1) + 1;
+    $r = sprintf("%07x", $sum) . $r;
+    if ($sum <= 0xfffffff) {
+      $r = $addr . $r;
+      # if ($main::opt_debug) { print STDERR "$r\n"; }
+      return HexExtend($r);
+    } else {
+      $r = "00000000000000";
+    }
+
+    $sum = hex($addr) + 1;
+    if ($sum > 0xff) { $sum -= 0x100; }
+    $r = sprintf("%02x", $sum) . $r;
+
+    # if ($main::opt_debug) { print STDERR "$r\n"; }
+    return $r;
+  }
+}
+
+# Extract symbols for all PC values found in profile
+sub ExtractSymbols {
+  my $libs = shift;
+  my $pcset = shift;
+
+  my $symbols = {};
+
+  # Map each PC value to the containing library.  To make this faster,
+  # we sort libraries by their starting pc value (highest first), and
+  # advance through the libraries as we advance the pc.  Sometimes the
+  # addresses of libraries may overlap with the addresses of the main
+  # binary, so to make sure the libraries 'win', we iterate over the
+  # libraries in reverse order (which assumes the binary doesn't start
+  # in the middle of a library, which seems a fair assumption).
+  my @pcs = (sort { $a cmp $b } keys(%{$pcset}));  # pcset is 0-extended strings
+  foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) {
+    my $libname = $lib->[0];
+    my $start = $lib->[1];
+    my $finish = $lib->[2];
+    my $offset = $lib->[3];
+
+    # Get list of pcs that belong in this library.
+    my $contained = [];
+    my ($start_pc_index, $finish_pc_index);
+    # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index].
+    for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0;
+	 $finish_pc_index--) {
+      last if $pcs[$finish_pc_index - 1] le $finish;
+    }
+    # Find smallest start_pc_index such that $start <= $pc[$start_pc_index].
+    for ($start_pc_index = $finish_pc_index; $start_pc_index > 0;
+	 $start_pc_index--) {
+      last if $pcs[$start_pc_index - 1] lt $start;
+    }
+    # This keeps PC values higher than $pc[$finish_pc_index] in @pcs,
+    # in case there are overlaps in libraries and the main binary.
+    @{$contained} = splice(@pcs, $start_pc_index,
+			   $finish_pc_index - $start_pc_index);
+    # Map to symbols
+    MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols);
+  }
+
+  return $symbols;
+}
+
+# Map list of PC values to symbols for a given image
+sub MapToSymbols {
+  my $image = shift;
+  my $offset = shift;
+  my $pclist = shift;
+  my $symbols = shift;
+
+  my $debug = 0;
+
+  # Ignore empty binaries
+  if ($#{$pclist} < 0) { return; }
+
+  # Figure out the addr2line command to use
+  my $addr2line = $obj_tool_map{"addr2line"};
+  my $cmd = "$addr2line -f -C -e $image";
+  if (exists $obj_tool_map{"addr2line_pdb"}) {
+    $addr2line = $obj_tool_map{"addr2line_pdb"};
+    $cmd = "$addr2line --demangle -f -C -e $image";
+  }
+
+  # If "addr2line" isn't installed on the system at all, just use
+  # nm to get what info we can (function names, but not line numbers).
+  if (system("$addr2line --help >/dev/null 2>&1") != 0) {
+    MapSymbolsWithNM($image, $offset, $pclist, $symbols);
+    return;
+  }
+
+  # "addr2line -i" can produce a variable number of lines per input
+  # address, with no separator that allows us to tell when data for
+  # the next address starts.  So we find the address for a special
+  # symbol (_fini) and interleave this address between all real
+  # addresses passed to addr2line.  The name of this special symbol
+  # can then be used as a separator.
+  $sep_address = undef;  # May be filled in by MapSymbolsWithNM()
+  my $nm_symbols = {};
+  MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols);
+  # TODO(csilvers): only add '-i' if addr2line supports it.
+  if (defined($sep_address)) {
+    # Only add " -i" to addr2line if the binary supports it.
+    # addr2line --help returns 0, but not if it sees an unknown flag first.
+    if (system("$cmd -i --help >/dev/null 2>&1") == 0) {
+      $cmd .= " -i";
+    } else {
+      $sep_address = undef;   # no need for sep_address if we don't support -i
+    }
+  }
+
+  # Make file with all PC values with intervening 'sep_address' so
+  # that we can reliably detect the end of inlined function list
+  open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n");
+  if ($debug) { print("---- $image ---\n"); }
+  for (my $i = 0; $i <= $#{$pclist}; $i++) {
+    # addr2line always reads hex addresses, and does not need '0x' prefix.
+    if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); }
+    printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset));
+    if (defined($sep_address)) {
+      printf ADDRESSES ("%s\n", $sep_address);
+    }
+  }
+  close(ADDRESSES);
+  if ($debug) {
+    print("----\n");
+    system("cat $main::tmpfile_sym");
+    print("----\n");
+    system("$cmd <$main::tmpfile_sym");
+    print("----\n");
+  }
+
+  open(SYMBOLS, "$cmd <$main::tmpfile_sym |") || error("$cmd: $!\n");
+  my $count = 0;   # Index in pclist
+  while (<SYMBOLS>) {
+    # Read fullfunction and filelineinfo from next pair of lines
+    s/\r?\n$//g;
+    my $fullfunction = $_;
+    $_ = <SYMBOLS>;
+    s/\r?\n$//g;
+    my $filelinenum = $_;
+
+    if (defined($sep_address) && $fullfunction eq $sep_symbol) {
+      # Terminating marker for data for this address
+      $count++;
+      next;
+    }
+
+    $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths
+
+    my $pcstr = $pclist->[$count];
+    my $function = ShortFunctionName($fullfunction);
+    if ($fullfunction eq '??') {
+      # See if nm found a symbol
+      my $nms = $nm_symbols->{$pcstr};
+      if (defined($nms)) {
+        $function = $nms->[0];
+        $fullfunction = $nms->[2];
+      }
+    }
+
+    # Prepend to accumulated symbols for pcstr
+    # (so that caller comes before callee)
+    my $sym = $symbols->{$pcstr};
+    if (!defined($sym)) {
+      $sym = [];
+      $symbols->{$pcstr} = $sym;
+    }
+    unshift(@{$sym}, $function, $filelinenum, $fullfunction);
+    if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); }
+    if (!defined($sep_address)) {
+      # Inlining is off, se this entry ends immediately
+      $count++;
+    }
+  }
+  close(SYMBOLS);
+}
+
+# Use nm to map the list of referenced PCs to symbols.  Return true iff we
+# are able to read procedure information via nm.
+sub MapSymbolsWithNM {
+  my $image = shift;
+  my $offset = shift;
+  my $pclist = shift;
+  my $symbols = shift;
+
+  # Get nm output sorted by increasing address
+  my $symbol_table = GetProcedureBoundaries($image, ".");
+  if (!%{$symbol_table}) {
+    return 0;
+  }
+  # Start addresses are already the right length (8 or 16 hex digits).
+  my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] }
+    keys(%{$symbol_table});
+
+  if ($#names < 0) {
+    # No symbols: just use addresses
+    foreach my $pc (@{$pclist}) {
+      my $pcstr = "0x" . $pc;
+      $symbols->{$pc} = [$pcstr, "?", $pcstr];
+    }
+    return 0;
+  }
+
+  # Sort addresses so we can do a join against nm output
+  my $index = 0;
+  my $fullname = $names[0];
+  my $name = ShortFunctionName($fullname);
+  foreach my $pc (sort { $a cmp $b } @{$pclist}) {
+    # Adjust for mapped offset
+    my $mpc = AddressSub($pc, $offset);
+    while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){
+      $index++;
+      $fullname = $names[$index];
+      $name = ShortFunctionName($fullname);
+    }
+    if ($mpc lt $symbol_table->{$fullname}->[1]) {
+      $symbols->{$pc} = [$name, "?", $fullname];
+    } else {
+      my $pcstr = "0x" . $pc;
+      $symbols->{$pc} = [$pcstr, "?", $pcstr];
+    }
+  }
+  return 1;
+}
+
+sub ShortFunctionName {
+  my $function = shift;
+  while ($function =~ s/\([^()]*\)(\s*const)?//g) { }   # Argument types
+  while ($function =~ s/<[^<>]*>//g)  { }    # Remove template arguments
+  $function =~ s/^.*\s+(\w+::)/$1/;          # Remove leading type
+  return $function;
+}
+
+##### Miscellaneous #####
+
+# Find the right versions of the above object tools to use.  The
+# argument is the program file being analyzed, and should be an ELF
+# 32-bit or ELF 64-bit executable file.  The location of the tools
+# is determined by considering the following options in this order:
+#   1) --tools option, if set
+#   2) PPROF_TOOLS environment variable, if set
+#   3) the environment
+sub ConfigureObjTools {
+  my $prog_file = shift;
+
+  # Check for the existence of $prog_file because /usr/bin/file does not
+  # predictably return error status in prod.
+  (-e $prog_file)  || error("$prog_file does not exist.\n");
+
+  # Follow symlinks (at least for systems where "file" supports that)
+  my $file_type = `/usr/bin/file -L $prog_file 2>/dev/null || /usr/bin/file $prog_file`;
+  if ($file_type =~ /64-bit/) {
+    # Change $address_length to 16 if the program file is ELF 64-bit.
+    # We can't detect this from many (most?) heap or lock contention
+    # profiles, since the actual addresses referenced are generally in low
+    # memory even for 64-bit programs.
+    $address_length = 16;
+  }
+
+  if ($file_type =~ /MS Windows/) {
+    # For windows, we provide a version of nm and addr2line as part of
+    # the opensource release, which is capable of parsing
+    # Windows-style PDB executables.  It should live in the path, or
+    # in the same directory as pprof.
+    $obj_tool_map{"nm_pdb"} = "nm-pdb";
+    $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb";
+  }
+
+  if ($file_type =~ /Mach-O/) {
+    # OS X uses otool to examine Mach-O files, rather than objdump.
+    $obj_tool_map{"otool"} = "otool";
+    $obj_tool_map{"addr2line"} = "false";  # no addr2line
+    $obj_tool_map{"objdump"} = "false";  # no objdump
+  }
+
+  # Go fill in %obj_tool_map with the pathnames to use:
+  foreach my $tool (keys %obj_tool_map) {
+    $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool});
+  }
+}
+
+# Returns the path of a caller-specified object tool.  If --tools or
+# PPROF_TOOLS are specified, then returns the full path to the tool
+# with that prefix.  Otherwise, returns the path unmodified (which
+# means we will look for it on PATH).
+sub ConfigureTool {
+  my $tool = shift;
+  my $path;
+
+  # --tools (or $PPROF_TOOLS) is a comma separated list, where each
+  # item is either a) a pathname prefix, or b) a map of the form
+  # <tool>:<path>.  First we look for an entry of type (b) for our
+  # tool.  If one is found, we use it.  Otherwise, we consider all the
+  # pathname prefixes in turn, until one yields an existing file.  If
+  # none does, we use a default path.
+  my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || "";
+  if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) {
+    $path = $2;
+    # TODO(csilvers): sanity-check that $path exists?  Hard if it's relative.
+  } elsif ($tools ne '') {
+    foreach my $prefix (split(',', $tools)) {
+      next if ($prefix =~ /:/);    # ignore "tool:fullpath" entries in the list
+      if (-x $prefix . $tool) {
+        $path = $prefix . $tool;
+        last;
+      }
+    }
+    if (!$path) {
+      error("No '$tool' found with prefix specified by " .
+            "--tools (or \$PPROF_TOOLS) '$tools'\n");
+    }
+  } else {
+    # ... otherwise use the version that exists in the same directory as
+    # pprof.  If there's nothing there, use $PATH.
+    $0 =~ m,[^/]*$,;     # this is everything after the last slash
+    my $dirname = $`;    # this is everything up to and including the last slash
+    if (-x "$dirname$tool") {
+      $path = "$dirname$tool";
+    } else { 
+      $path = $tool;
+    }
+  }
+  if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; }
+  return $path;
+}
+
+sub cleanup {
+  unlink($main::tmpfile_sym);
+  unlink(keys %main::tempnames);
+
+  # We leave any collected profiles in $HOME/pprof in case the user wants
+  # to look at them later.  We print a message informing them of this.
+  if ((scalar(@main::profile_files) > 0) &&
+      defined($main::collected_profile)) {
+    if (scalar(@main::profile_files) == 1) {
+      print STDERR "Dynamically gathered profile is in $main::collected_profile\n";
+    }
+    print STDERR "If you want to investigate this profile further, you can do:\n";
+    print STDERR "\n";
+    print STDERR "  pprof \\\n";
+    print STDERR "    $main::prog \\\n";
+    print STDERR "    $main::collected_profile\n";
+    print STDERR "\n";
+  }
+}
+
+sub sighandler {
+  cleanup();
+  exit(1);
+}
+
+sub error {
+  my $msg = shift;
+  print STDERR $msg;
+  cleanup();
+  exit(1);
+}
+
+
+# Run $nm_command and get all the resulting procedure boundaries whose
+# names match "$regexp" and returns them in a hashtable mapping from
+# procedure name to a two-element vector of [start address, end address]
+sub GetProcedureBoundariesViaNm {
+  my $nm_command = shift;
+  my $regexp = shift;
+
+  my $symbol_table = {};
+  open(NM, "$nm_command |") || error("$nm_command: $!\n");
+  my $last_start = "0";
+  my $routine = "";
+  while (<NM>) {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    if (m/^\s*([0-9a-f]+) (.) (..*)/) {
+      my $start_val = $1;
+      my $type = $2;
+      my $this_routine = $3;
+
+      # It's possible for two symbols to share the same address, if
+      # one is a zero-length variable (like __start_google_malloc) or
+      # one symbol is a weak alias to another (like __libc_malloc).
+      # In such cases, we want to ignore all values except for the
+      # actual symbol, which in nm-speak has type "T".  The logic
+      # below does this, though it's a bit tricky: what happens when
+      # we have a series of lines with the same address, is the first
+      # one gets queued up to be processed.  However, it won't
+      # *actually* be processed until later, when we read a line with
+      # a different address.  That means that as long as we're reading
+      # lines with the same address, we have a chance to replace that
+      # item in the queue, which we do whenever we see a 'T' entry --
+      # that is, a line with type 'T'.  If we never see a 'T' entry,
+      # we'll just go ahead and process the first entry (which never
+      # got touched in the queue), and ignore the others.
+      if ($start_val eq $last_start && $type =~ /t/i) {
+        # We are the 'T' symbol at this address, replace previous symbol.
+        $routine = $this_routine;
+        next;
+      } elsif ($start_val eq $last_start) {
+        # We're not the 'T' symbol at this address, so ignore us.
+        next;
+      }
+
+      if ($this_routine eq $sep_symbol) {
+        $sep_address = HexExtend($start_val);
+      }
+
+      # Tag this routine with the starting address in case the image
+      # has multiple occurrences of this routine.  We use a syntax
+      # that resembles template paramters that are automatically
+      # stripped out by ShortFunctionName()
+      $this_routine .= "<$start_val>";
+
+      if (defined($routine) && $routine =~ m/$regexp/) {
+        $symbol_table->{$routine} = [HexExtend($last_start),
+                                     HexExtend($start_val)];
+      }
+      $last_start = $start_val;
+      $routine = $this_routine;
+    } elsif (m/^Loaded image name: (.+)/) {
+      # The win32 nm workalike emits information about the binary it is using.
+      if ($main::opt_debug) { print STDERR "Using Image $1\n"; }
+    } elsif (m/^PDB file name: (.+)/) {
+      # The win32 nm workalike emits information about the pdb it is using.
+      if ($main::opt_debug) { print STDERR "Using PDB $1\n"; }
+    }
+  }
+  close(NM);
+  # Handle the last line in the nm output.  Unfortunately, we don't know
+  # how big this last symbol is, because we don't know how big the file
+  # is.  For now, we just give it a size of 0.
+  # TODO(csilvers): do better here.
+  if (defined($routine) && $routine =~ m/$regexp/) {
+    $symbol_table->{$routine} = [HexExtend($last_start),
+                                 HexExtend($last_start)];
+  }
+  return $symbol_table;
+}
+
+# Gets the procedure boundaries for all routines in "$image" whose names
+# match "$regexp" and returns them in a hashtable mapping from procedure
+# name to a two-element vector of [start address, end address].
+# Will return an empty map if nm is not installed or not working properly.
+sub GetProcedureBoundaries {
+  my $image = shift;
+  my $regexp = shift;
+
+  # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
+  my $debugging = DebuggingLibrary($image);
+  if ($debugging) {
+    $image = $debugging;
+  }
+
+  my $nm = $obj_tool_map{"nm"};
+  my $cppfilt = $obj_tool_map{"c++filt"};
+
+  # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm
+  # binary doesn't support --demangle.  In addition, for OS X we need
+  # to use the -f flag to get 'flat' nm output (otherwise we don't sort
+  # properly and get incorrect results).  Unfortunately, GNU nm uses -f
+  # in an incompatible way.  So first we test whether our nm supports
+  # --demangle and -f.
+  my $demangle_flag = "";
+  my $cppfilt_flag = "";
+  if (system("$nm --demangle $image >/dev/null 2>&1") == 0) {
+    # In this mode, we do "nm --demangle <foo>"
+    $demangle_flag = "--demangle";
+    $cppfilt_flag = "";
+  } elsif (system("$cppfilt $image >/dev/null 2>&1") == 0) {
+    # In this mode, we do "nm <foo> | c++filt"
+    $cppfilt_flag = " | $cppfilt";
+  };
+  my $flatten_flag = "";
+  if (system("$nm -f $image >/dev/null 2>&1") == 0) {
+    $flatten_flag = "-f";
+  }
+
+  # Finally, in the case $imagie isn't a debug library, we try again with
+  # -D to at least get *exported* symbols.  If we can't use --demangle,
+  # we use c++filt instead, if it exists on this system.
+  my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" .
+                     " $image 2>/dev/null $cppfilt_flag",
+                     "$nm -D -n $flatten_flag $demangle_flag" .
+                     " $image 2>/dev/null $cppfilt_flag",
+                     # 6nm is for Go binaries
+		     "6nm $image 2>/dev/null | sort",
+                     );
+
+  # If the executable is an MS Windows PDB-format executable, we'll
+  # have set up obj_tool_map("nm_pdb").  In this case, we actually
+  # want to use both unix nm and windows-specific nm_pdb, since
+  # PDB-format executables can apparently include dwarf .o files.
+  if (exists $obj_tool_map{"nm_pdb"}) {
+    my $nm_pdb = $obj_tool_map{"nm_pdb"};
+    push(@nm_commands, "$nm_pdb --demangle $image 2>/dev/null");
+  }
+
+  foreach my $nm_command (@nm_commands) {
+    my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp);
+    return $symbol_table if (%{$symbol_table});
+  }
+  my $symbol_table = {};
+  return $symbol_table;
+}
+
+
+# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings.
+# To make them more readable, we add underscores at interesting places.
+# This routine removes the underscores, producing the canonical representation
+# used by pprof to represent addresses, particularly in the tested routines.
+sub CanonicalHex {
+  my $arg = shift;
+  return join '', (split '_',$arg);
+}
+
+
+# Unit test for AddressAdd:
+sub AddressAddUnitTest {
+  my $test_data_8 = shift;
+  my $test_data_16 = shift;
+  my $error_count = 0;
+  my $fail_count = 0;
+  my $pass_count = 0;
+  # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n";
+
+  # First a few 8-nibble addresses.  Note that this implementation uses
+  # plain old arithmetic, so a quick sanity check along with verifying what
+  # happens to overflow (we want it to wrap):
+  $address_length = 8;
+  foreach my $row (@{$test_data_8}) {
+    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+    my $sum = AddressAdd ($row->[0], $row->[1]);
+    if ($sum ne $row->[2]) {
+      printf STDERR "ERROR: %s != %s + %s = %s\n", $sum,
+             $row->[0], $row->[1], $row->[2];
+      ++$fail_count;
+    } else {
+      ++$pass_count;
+    }
+  }
+  printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n",
+         $pass_count, $fail_count;
+  $error_count = $fail_count;
+  $fail_count = 0;
+  $pass_count = 0;
+
+  # Now 16-nibble addresses.
+  $address_length = 16;
+  foreach my $row (@{$test_data_16}) {
+    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+    my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1]));
+    my $expected = join '', (split '_',$row->[2]);
+    if ($sum ne CanonicalHex($row->[2])) {
+      printf STDERR "ERROR: %s != %s + %s = %s\n", $sum,
+             $row->[0], $row->[1], $row->[2];
+      ++$fail_count;
+    } else {
+      ++$pass_count;
+    }
+  }
+  printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n",
+         $pass_count, $fail_count;
+  $error_count += $fail_count;
+
+  return $error_count;
+}
+
+
+# Unit test for AddressSub:
+sub AddressSubUnitTest {
+  my $test_data_8 = shift;
+  my $test_data_16 = shift;
+  my $error_count = 0;
+  my $fail_count = 0;
+  my $pass_count = 0;
+  # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n";
+
+  # First a few 8-nibble addresses.  Note that this implementation uses
+  # plain old arithmetic, so a quick sanity check along with verifying what
+  # happens to overflow (we want it to wrap):
+  $address_length = 8;
+  foreach my $row (@{$test_data_8}) {
+    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+    my $sum = AddressSub ($row->[0], $row->[1]);
+    if ($sum ne $row->[3]) {
+      printf STDERR "ERROR: %s != %s - %s = %s\n", $sum,
+             $row->[0], $row->[1], $row->[3];
+      ++$fail_count;
+    } else {
+      ++$pass_count;
+    }
+  }
+  printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n",
+         $pass_count, $fail_count;
+  $error_count = $fail_count;
+  $fail_count = 0;
+  $pass_count = 0;
+
+  # Now 16-nibble addresses.
+  $address_length = 16;
+  foreach my $row (@{$test_data_16}) {
+    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+    my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1]));
+    if ($sum ne CanonicalHex($row->[3])) {
+      printf STDERR "ERROR: %s != %s - %s = %s\n", $sum,
+             $row->[0], $row->[1], $row->[3];
+      ++$fail_count;
+    } else {
+      ++$pass_count;
+    }
+  }
+  printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n",
+         $pass_count, $fail_count;
+  $error_count += $fail_count;
+
+  return $error_count;
+}
+
+
+# Unit test for AddressInc:
+sub AddressIncUnitTest {
+  my $test_data_8 = shift;
+  my $test_data_16 = shift;
+  my $error_count = 0;
+  my $fail_count = 0;
+  my $pass_count = 0;
+  # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n";
+
+  # First a few 8-nibble addresses.  Note that this implementation uses
+  # plain old arithmetic, so a quick sanity check along with verifying what
+  # happens to overflow (we want it to wrap):
+  $address_length = 8;
+  foreach my $row (@{$test_data_8}) {
+    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+    my $sum = AddressInc ($row->[0]);
+    if ($sum ne $row->[4]) {
+      printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum,
+             $row->[0], $row->[4];
+      ++$fail_count;
+    } else {
+      ++$pass_count;
+    }
+  }
+  printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n",
+         $pass_count, $fail_count;
+  $error_count = $fail_count;
+  $fail_count = 0;
+  $pass_count = 0;
+
+  # Now 16-nibble addresses.
+  $address_length = 16;
+  foreach my $row (@{$test_data_16}) {
+    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
+    my $sum = AddressInc (CanonicalHex($row->[0]));
+    if ($sum ne CanonicalHex($row->[4])) {
+      printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum,
+             $row->[0], $row->[4];
+      ++$fail_count;
+    } else {
+      ++$pass_count;
+    }
+  }
+  printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n",
+         $pass_count, $fail_count;
+  $error_count += $fail_count;
+
+  return $error_count;
+}
+
+
+# Driver for unit tests.
+# Currently just the address add/subtract/increment routines for 64-bit.
+sub RunUnitTests {
+  my $error_count = 0;
+
+  # This is a list of tuples [a, b, a+b, a-b, a+1]
+  my $unit_test_data_8 = [
+    [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)],
+    [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)],
+    [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)],
+    [qw(00000001 ffffffff 00000000 00000002 00000002)],
+    [qw(00000001 fffffff0 fffffff1 00000011 00000002)],
+  ];
+  my $unit_test_data_16 = [
+    # The implementation handles data in 7-nibble chunks, so those are the
+    # interesting boundaries.
+    [qw(aaaaaaaa 50505050
+        00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)],
+    [qw(50505050 aaaaaaaa
+        00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)],
+    [qw(ffffffff aaaaaaaa
+        00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)],
+    [qw(00000001 ffffffff
+        00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)],
+    [qw(00000001 fffffff0
+        00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)],
+
+    [qw(00_a00000a_aaaaaaa 50505050
+        00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)],
+    [qw(0f_fff0005_0505050 aaaaaaaa
+        0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)],
+    [qw(00_000000f_fffffff 01_800000a_aaaaaaa
+        01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)],
+    [qw(00_0000000_0000001 ff_fffffff_fffffff
+        00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)],
+    [qw(00_0000000_0000001 ff_fffffff_ffffff0
+        ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)],
+  ];
+
+  $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16);
+  $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16);
+  $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16);
+  if ($error_count > 0) {
+    print STDERR $error_count, " errors: FAILED\n";
+  } else {
+    print STDERR "PASS\n";
+  }
+  exit ($error_count);
+}
diff --git a/deps/jemalloc.orig/config.guess b/deps/jemalloc.orig/config.guess
new file mode 100755
index 00000000..0773d0f6
--- /dev/null
+++ b/deps/jemalloc.orig/config.guess
@@ -0,0 +1,1456 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+timestamp='2004-03-03'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Originally written by Per Bothner <per@bothner.com>.
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted ChangeLog entry.
+#
+# This script attempts to guess a canonical system name similar to
+# config.sub.  If it succeeds, it prints the system name on stdout, and
+# exits with 0.  Otherwise, it exits with 1.
+#
+# The plan is that this can be called by configure scripts if you
+# don't specify an explicit build system type.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit 0 ;;
+    --version | -v )
+       echo "$version" ; exit 0 ;;
+    --help | --h* | -h )
+       echo "$usage"; exit 0 ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help" >&2
+       exit 1 ;;
+    * )
+       break ;;
+  esac
+done
+
+if test $# != 0; then
+  echo "$me: too many arguments$help" >&2
+  exit 1
+fi
+
+trap 'exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
+# compiler to aid in system detection is discouraged as it requires
+# temporary files to be created and, as you can see below, it is a
+# headache to deal with in a portable fashion.
+
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+# Portable tmp directory creation inspired by the Autoconf team.
+
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,)    echo "int x;" > $dummy.c ;
+	for c in cc gcc c89 c99 ; do
+	  if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
+	     CC_FOR_BUILD="$c"; break ;
+	  fi ;
+	done ;
+	if test x"$CC_FOR_BUILD" = x ; then
+	  CC_FOR_BUILD=no_compiler_found ;
+	fi
+	;;
+ ,,*)   CC_FOR_BUILD=$CC ;;
+ ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+esac ;'
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 1994-08-24)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+	PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+    *:NetBSD:*:*)
+	# NetBSD (nbsd) targets should (where applicable) match one or
+	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
+	# switched to ELF, *-*-netbsd* would select the old
+	# object file format.  This provides both forward
+	# compatibility and a consistent mechanism for selecting the
+	# object file format.
+	#
+	# Note: NetBSD doesn't particularly care about the vendor
+	# portion of the name.  We always set it to "unknown".
+	sysctl="sysctl -n hw.machine_arch"
+	UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
+	    /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
+	case "${UNAME_MACHINE_ARCH}" in
+	    armeb) machine=armeb-unknown ;;
+	    arm*) machine=arm-unknown ;;
+	    sh3el) machine=shl-unknown ;;
+	    sh3eb) machine=sh-unknown ;;
+	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+	esac
+	# The Operating System including object format, if it has switched
+	# to ELF recently, or will in the future.
+	case "${UNAME_MACHINE_ARCH}" in
+	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+		eval $set_cc_for_build
+		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+			| grep __ELF__ >/dev/null
+		then
+		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+		    # Return netbsd for either.  FIX?
+		    os=netbsd
+		else
+		    os=netbsdelf
+		fi
+		;;
+	    *)
+	        os=netbsd
+		;;
+	esac
+	# The OS release
+	# Debian GNU/NetBSD machines have a different userland, and
+	# thus, need a distinct triplet. However, they do not need
+	# kernel version information, so it can be replaced with a
+	# suitable tag, in the style of linux-gnu.
+	case "${UNAME_VERSION}" in
+	    Debian*)
+		release='-gnu'
+		;;
+	    *)
+		release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+		;;
+	esac
+	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+	# contains redundant information, the shorter form:
+	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+	echo "${machine}-${os}${release}"
+	exit 0 ;;
+    amd64:OpenBSD:*:*)
+	echo x86_64-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    amiga:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    arc:OpenBSD:*:*)
+	echo mipsel-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    cats:OpenBSD:*:*)
+	echo arm-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    hp300:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mac68k:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    macppc:OpenBSD:*:*)
+	echo powerpc-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mvme68k:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mvme88k:OpenBSD:*:*)
+	echo m88k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mvmeppc:OpenBSD:*:*)
+	echo powerpc-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    pegasos:OpenBSD:*:*)
+	echo powerpc-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    pmax:OpenBSD:*:*)
+	echo mipsel-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    sgi:OpenBSD:*:*)
+	echo mipseb-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    sun3:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    wgrisc:OpenBSD:*:*)
+	echo mipsel-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    *:OpenBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    *:ekkoBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
+	exit 0 ;;
+    macppc:MirBSD:*:*)
+	echo powerppc-unknown-mirbsd${UNAME_RELEASE}
+	exit 0 ;;
+    *:MirBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
+	exit 0 ;;
+    alpha:OSF1:*:*)
+	case $UNAME_RELEASE in
+	*4.0)
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+		;;
+	*5.*)
+	        UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		;;
+	esac
+	# According to Compaq, /usr/sbin/psrinfo has been available on
+	# OSF/1 and Tru64 systems produced since 1995.  I hope that
+	# covers most systems running today.  This code pipes the CPU
+	# types through head -n 1, so we only detect the type of CPU 0.
+	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	case "$ALPHA_CPU_TYPE" in
+	    "EV4 (21064)")
+		UNAME_MACHINE="alpha" ;;
+	    "EV4.5 (21064)")
+		UNAME_MACHINE="alpha" ;;
+	    "LCA4 (21066/21068)")
+		UNAME_MACHINE="alpha" ;;
+	    "EV5 (21164)")
+		UNAME_MACHINE="alphaev5" ;;
+	    "EV5.6 (21164A)")
+		UNAME_MACHINE="alphaev56" ;;
+	    "EV5.6 (21164PC)")
+		UNAME_MACHINE="alphapca56" ;;
+	    "EV5.7 (21164PC)")
+		UNAME_MACHINE="alphapca57" ;;
+	    "EV6 (21264)")
+		UNAME_MACHINE="alphaev6" ;;
+	    "EV6.7 (21264A)")
+		UNAME_MACHINE="alphaev67" ;;
+	    "EV6.8CB (21264C)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.8AL (21264B)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.8CX (21264D)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.9A (21264/EV69A)")
+		UNAME_MACHINE="alphaev69" ;;
+	    "EV7 (21364)")
+		UNAME_MACHINE="alphaev7" ;;
+	    "EV7.9 (21364A)")
+		UNAME_MACHINE="alphaev79" ;;
+	esac
+	# A Pn.n version is a patched version.
+	# A Vn.n version is a released version.
+	# A Tn.n version is a released field test version.
+	# A Xn.n version is an unreleased experimental baselevel.
+	# 1.2 uses "1.2" for uname -r.
+	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+	exit 0 ;;
+    Alpha*:OpenVMS:*:*)
+	echo alpha-hp-vms
+	exit 0 ;;
+    Alpha\ *:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# Should we change UNAME_MACHINE based on the output of uname instead
+	# of the specific Alpha model?
+	echo alpha-pc-interix
+	exit 0 ;;
+    21064:Windows_NT:50:3)
+	echo alpha-dec-winnt3.5
+	exit 0 ;;
+    Amiga*:UNIX_System_V:4.0:*)
+	echo m68k-unknown-sysv4
+	exit 0;;
+    *:[Aa]miga[Oo][Ss]:*:*)
+	echo ${UNAME_MACHINE}-unknown-amigaos
+	exit 0 ;;
+    *:[Mm]orph[Oo][Ss]:*:*)
+	echo ${UNAME_MACHINE}-unknown-morphos
+	exit 0 ;;
+    *:OS/390:*:*)
+	echo i370-ibm-openedition
+	exit 0 ;;
+    *:OS400:*:*)
+        echo powerpc-ibm-os400
+	exit 0 ;;
+    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+	echo arm-acorn-riscix${UNAME_RELEASE}
+	exit 0;;
+    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+	echo hppa1.1-hitachi-hiuxmpp
+	exit 0;;
+    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+	if test "`(/bin/universe) 2>/dev/null`" = att ; then
+		echo pyramid-pyramid-sysv3
+	else
+		echo pyramid-pyramid-bsd
+	fi
+	exit 0 ;;
+    NILE*:*:*:dcosx)
+	echo pyramid-pyramid-svr4
+	exit 0 ;;
+    DRS?6000:unix:4.0:6*)
+	echo sparc-icl-nx6
+	exit 0 ;;
+    DRS?6000:UNIX_SV:4.2*:7*)
+	case `/usr/bin/uname -p` in
+	    sparc) echo sparc-icl-nx7 && exit 0 ;;
+	esac ;;
+    sun4H:SunOS:5.*:*)
+	echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    i86pc:SunOS:5.*:*)
+	echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    sun4*:SunOS:6*:*)
+	# According to config.sub, this is the proper way to canonicalize
+	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
+	# it's likely to be more like Solaris than SunOS4.
+	echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    sun4*:SunOS:*:*)
+	case "`/usr/bin/arch -k`" in
+	    Series*|S4*)
+		UNAME_RELEASE=`uname -v`
+		;;
+	esac
+	# Japanese Language versions have a version number like `4.1.3-JL'.
+	echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+	exit 0 ;;
+    sun3*:SunOS:*:*)
+	echo m68k-sun-sunos${UNAME_RELEASE}
+	exit 0 ;;
+    sun*:*:4.2BSD:*)
+	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+	case "`/bin/arch`" in
+	    sun3)
+		echo m68k-sun-sunos${UNAME_RELEASE}
+		;;
+	    sun4)
+		echo sparc-sun-sunos${UNAME_RELEASE}
+		;;
+	esac
+	exit 0 ;;
+    aushp:SunOS:*:*)
+	echo sparc-auspex-sunos${UNAME_RELEASE}
+	exit 0 ;;
+    # The situation for MiNT is a little confusing.  The machine name
+    # can be virtually everything (everything which is not
+    # "atarist" or "atariste" at least should have a processor
+    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
+    # to the lowercase version "mint" (or "freemint").  Finally
+    # the system name "TOS" denotes a system which is actually not
+    # MiNT.  But MiNT is downward compatible to TOS, so this should
+    # be no problem.
+    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+        echo m68k-atari-mint${UNAME_RELEASE}
+	exit 0 ;;
+    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint${UNAME_RELEASE}
+        exit 0 ;;
+    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+        echo m68k-atari-mint${UNAME_RELEASE}
+	exit 0 ;;
+    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+        echo m68k-milan-mint${UNAME_RELEASE}
+        exit 0 ;;
+    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+        echo m68k-hades-mint${UNAME_RELEASE}
+        exit 0 ;;
+    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+        echo m68k-unknown-mint${UNAME_RELEASE}
+        exit 0 ;;
+    m68k:machten:*:*)
+	echo m68k-apple-machten${UNAME_RELEASE}
+	exit 0 ;;
+    powerpc:machten:*:*)
+	echo powerpc-apple-machten${UNAME_RELEASE}
+	exit 0 ;;
+    RISC*:Mach:*:*)
+	echo mips-dec-mach_bsd4.3
+	exit 0 ;;
+    RISC*:ULTRIX:*:*)
+	echo mips-dec-ultrix${UNAME_RELEASE}
+	exit 0 ;;
+    VAX*:ULTRIX*:*:*)
+	echo vax-dec-ultrix${UNAME_RELEASE}
+	exit 0 ;;
+    2020:CLIX:*:* | 2430:CLIX:*:*)
+	echo clipper-intergraph-clix${UNAME_RELEASE}
+	exit 0 ;;
+    mips:*:*:UMIPS | mips:*:*:RISCos)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+#ifdef __cplusplus
+#include <stdio.h>  /* for printf() prototype */
+	int main (int argc, char *argv[]) {
+#else
+	int main (argc, argv) int argc; char *argv[]; {
+#endif
+	#if defined (host_mips) && defined (MIPSEB)
+	#if defined (SYSTYPE_SYSV)
+	  printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_SVR4)
+	  printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+	  printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+	#endif
+	#endif
+	  exit (-1);
+	}
+EOF
+	$CC_FOR_BUILD -o $dummy $dummy.c \
+	  && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \
+	  && exit 0
+	echo mips-mips-riscos${UNAME_RELEASE}
+	exit 0 ;;
+    Motorola:PowerMAX_OS:*:*)
+	echo powerpc-motorola-powermax
+	exit 0 ;;
+    Motorola:*:4.3:PL8-*)
+	echo powerpc-harris-powermax
+	exit 0 ;;
+    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+	echo powerpc-harris-powermax
+	exit 0 ;;
+    Night_Hawk:Power_UNIX:*:*)
+	echo powerpc-harris-powerunix
+	exit 0 ;;
+    m88k:CX/UX:7*:*)
+	echo m88k-harris-cxux7
+	exit 0 ;;
+    m88k:*:4*:R4*)
+	echo m88k-motorola-sysv4
+	exit 0 ;;
+    m88k:*:3*:R3*)
+	echo m88k-motorola-sysv3
+	exit 0 ;;
+    AViiON:dgux:*:*)
+        # DG/UX returns AViiON for all architectures
+        UNAME_PROCESSOR=`/usr/bin/uname -p`
+	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+	then
+	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+	       [ ${TARGET_BINARY_INTERFACE}x = x ]
+	    then
+		echo m88k-dg-dgux${UNAME_RELEASE}
+	    else
+		echo m88k-dg-dguxbcs${UNAME_RELEASE}
+	    fi
+	else
+	    echo i586-dg-dgux${UNAME_RELEASE}
+	fi
+ 	exit 0 ;;
+    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
+	echo m88k-dolphin-sysv3
+	exit 0 ;;
+    M88*:*:R3*:*)
+	# Delta 88k system running SVR3
+	echo m88k-motorola-sysv3
+	exit 0 ;;
+    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+	echo m88k-tektronix-sysv3
+	exit 0 ;;
+    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+	echo m68k-tektronix-bsd
+	exit 0 ;;
+    *:IRIX*:*:*)
+	echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+	exit 0 ;;
+    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+	echo romp-ibm-aix      # uname -m gives an 8 hex-code CPU id
+	exit 0 ;;              # Note that: echo "'`uname -s`'" gives 'AIX '
+    i*86:AIX:*:*)
+	echo i386-ibm-aix
+	exit 0 ;;
+    ia64:AIX:*:*)
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+	fi
+	echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+	exit 0 ;;
+    *:AIX:2:3)
+	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+		eval $set_cc_for_build
+		sed 's/^		//' << EOF >$dummy.c
+		#include <sys/systemcfg.h>
+
+		main()
+			{
+			if (!__power_pc())
+				exit(1);
+			puts("powerpc-ibm-aix3.2.5");
+			exit(0);
+			}
+EOF
+		$CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0
+		echo rs6000-ibm-aix3.2.5
+	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+		echo rs6000-ibm-aix3.2.4
+	else
+		echo rs6000-ibm-aix3.2
+	fi
+	exit 0 ;;
+    *:AIX:*:[45])
+	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+		IBM_ARCH=rs6000
+	else
+		IBM_ARCH=powerpc
+	fi
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+	fi
+	echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+	exit 0 ;;
+    *:AIX:*:*)
+	echo rs6000-ibm-aix
+	exit 0 ;;
+    ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+	echo romp-ibm-bsd4.4
+	exit 0 ;;
+    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
+	echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
+	exit 0 ;;                           # report: romp-ibm BSD 4.3
+    *:BOSX:*:*)
+	echo rs6000-bull-bosx
+	exit 0 ;;
+    DPX/2?00:B.O.S.:*:*)
+	echo m68k-bull-sysv3
+	exit 0 ;;
+    9000/[34]??:4.3bsd:1.*:*)
+	echo m68k-hp-bsd
+	exit 0 ;;
+    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+	echo m68k-hp-bsd4.4
+	exit 0 ;;
+    9000/[34678]??:HP-UX:*:*)
+	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+	case "${UNAME_MACHINE}" in
+	    9000/31? )            HP_ARCH=m68000 ;;
+	    9000/[34]?? )         HP_ARCH=m68k ;;
+	    9000/[678][0-9][0-9])
+		if [ -x /usr/bin/getconf ]; then
+		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+                    case "${sc_cpu_version}" in
+                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+                      532)                      # CPU_PA_RISC2_0
+                        case "${sc_kernel_bits}" in
+                          32) HP_ARCH="hppa2.0n" ;;
+                          64) HP_ARCH="hppa2.0w" ;;
+			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
+                        esac ;;
+                    esac
+		fi
+		if [ "${HP_ARCH}" = "" ]; then
+		    eval $set_cc_for_build
+		    sed 's/^              //' << EOF >$dummy.c
+
+              #define _HPUX_SOURCE
+              #include <stdlib.h>
+              #include <unistd.h>
+
+              int main ()
+              {
+              #if defined(_SC_KERNEL_BITS)
+                  long bits = sysconf(_SC_KERNEL_BITS);
+              #endif
+                  long cpu  = sysconf (_SC_CPU_VERSION);
+
+                  switch (cpu)
+              	{
+              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+              	case CPU_PA_RISC2_0:
+              #if defined(_SC_KERNEL_BITS)
+              	    switch (bits)
+              		{
+              		case 64: puts ("hppa2.0w"); break;
+              		case 32: puts ("hppa2.0n"); break;
+              		default: puts ("hppa2.0"); break;
+              		} break;
+              #else  /* !defined(_SC_KERNEL_BITS) */
+              	    puts ("hppa2.0"); break;
+              #endif
+              	default: puts ("hppa1.0"); break;
+              	}
+                  exit (0);
+              }
+EOF
+		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+		    test -z "$HP_ARCH" && HP_ARCH=hppa
+		fi ;;
+	esac
+	if [ ${HP_ARCH} = "hppa2.0w" ]
+	then
+	    # avoid double evaluation of $set_cc_for_build
+	    test -n "$CC_FOR_BUILD" || eval $set_cc_for_build
+	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E -) | grep __LP64__ >/dev/null
+	    then
+		HP_ARCH="hppa2.0w"
+	    else
+		HP_ARCH="hppa64"
+	    fi
+	fi
+	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+	exit 0 ;;
+    ia64:HP-UX:*:*)
+	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+	echo ia64-hp-hpux${HPUX_REV}
+	exit 0 ;;
+    3050*:HI-UX:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#include <unistd.h>
+	int
+	main ()
+	{
+	  long cpu = sysconf (_SC_CPU_VERSION);
+	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
+	     results, however.  */
+	  if (CPU_IS_PA_RISC (cpu))
+	    {
+	      switch (cpu)
+		{
+		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+		  default: puts ("hppa-hitachi-hiuxwe2"); break;
+		}
+	    }
+	  else if (CPU_IS_HP_MC68K (cpu))
+	    puts ("m68k-hitachi-hiuxwe2");
+	  else puts ("unknown-hitachi-hiuxwe2");
+	  exit (0);
+	}
+EOF
+	$CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0
+	echo unknown-hitachi-hiuxwe2
+	exit 0 ;;
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+	echo hppa1.1-hp-bsd
+	exit 0 ;;
+    9000/8??:4.3bsd:*:*)
+	echo hppa1.0-hp-bsd
+	exit 0 ;;
+    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+	echo hppa1.0-hp-mpeix
+	exit 0 ;;
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+	echo hppa1.1-hp-osf
+	exit 0 ;;
+    hp8??:OSF1:*:*)
+	echo hppa1.0-hp-osf
+	exit 0 ;;
+    i*86:OSF1:*:*)
+	if [ -x /usr/sbin/sysversion ] ; then
+	    echo ${UNAME_MACHINE}-unknown-osf1mk
+	else
+	    echo ${UNAME_MACHINE}-unknown-osf1
+	fi
+	exit 0 ;;
+    parisc*:Lites*:*:*)
+	echo hppa1.1-hp-lites
+	exit 0 ;;
+    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+	echo c1-convex-bsd
+        exit 0 ;;
+    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+        exit 0 ;;
+    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+	echo c34-convex-bsd
+        exit 0 ;;
+    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+	echo c38-convex-bsd
+        exit 0 ;;
+    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+	echo c4-convex-bsd
+        exit 0 ;;
+    CRAY*Y-MP:*:*:*)
+	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*[A-Z]90:*:*:*)
+	echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+	      -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*TS:*:*:*)
+	echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*T3E:*:*:*)
+	echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*SV1:*:*:*)
+	echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    *:UNICOS/mp:*:*)
+	echo nv1-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+        exit 0 ;;
+    5000:UNIX_System_V:4.*:*)
+        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit 0 ;;
+    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+	exit 0 ;;
+    sparc*:BSD/OS:*:*)
+	echo sparc-unknown-bsdi${UNAME_RELEASE}
+	exit 0 ;;
+    *:BSD/OS:*:*)
+	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+	exit 0 ;;
+    *:FreeBSD:*:*)
+	# Determine whether the default compiler uses glibc.
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#include <features.h>
+	#if __GLIBC__ >= 2
+	LIBC=gnu
+	#else
+	LIBC=
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
+	# GNU/KFreeBSD systems have a "k" prefix to indicate we are using
+	# FreeBSD's kernel, but not the complete OS.
+	case ${LIBC} in gnu) kernel_only='k' ;; esac
+	echo ${UNAME_MACHINE}-unknown-${kernel_only}freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`${LIBC:+-$LIBC}
+	exit 0 ;;
+    i*:CYGWIN*:*)
+	echo ${UNAME_MACHINE}-pc-cygwin
+	exit 0 ;;
+    i*:MINGW*:*)
+	echo ${UNAME_MACHINE}-pc-mingw32
+	exit 0 ;;
+    i*:PW*:*)
+	echo ${UNAME_MACHINE}-pc-pw32
+	exit 0 ;;
+    x86:Interix*:[34]*)
+	echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//'
+	exit 0 ;;
+    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
+	echo i${UNAME_MACHINE}-pc-mks
+	exit 0 ;;
+    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+	# UNAME_MACHINE based on the output of uname instead of i386?
+	echo i586-pc-interix
+	exit 0 ;;
+    i*:UWIN*:*)
+	echo ${UNAME_MACHINE}-pc-uwin
+	exit 0 ;;
+    p*:CYGWIN*:*)
+	echo powerpcle-unknown-cygwin
+	exit 0 ;;
+    prep*:SunOS:5.*:*)
+	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    *:GNU:*:*)
+	# the GNU system
+	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+	exit 0 ;;
+    *:GNU/*:*:*)
+	# other systems with GNU libc and userland
+	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
+	exit 0 ;;
+    i*86:Minix:*:*)
+	echo ${UNAME_MACHINE}-pc-minix
+	exit 0 ;;
+    arm*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    cris:Linux:*:*)
+	echo cris-axis-linux-gnu
+	exit 0 ;;
+    ia64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    m68*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    mips:Linux:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#undef CPU
+	#undef mips
+	#undef mipsel
+	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+	CPU=mipsel
+	#else
+	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+	CPU=mips
+	#else
+	CPU=
+	#endif
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
+	test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0
+	;;
+    mips64:Linux:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#undef CPU
+	#undef mips64
+	#undef mips64el
+	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+	CPU=mips64el
+	#else
+	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+	CPU=mips64
+	#else
+	CPU=
+	#endif
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
+	test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0
+	;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-gnu
+	exit 0 ;;
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-gnu
+	exit 0 ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+        esac
+	objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
+	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	exit 0 ;;
+    parisc:Linux:*:* | hppa:Linux:*:*)
+	# Look for CPU level
+	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+	  PA7*) echo hppa1.1-unknown-linux-gnu ;;
+	  PA8*) echo hppa2.0-unknown-linux-gnu ;;
+	  *)    echo hppa-unknown-linux-gnu ;;
+	esac
+	exit 0 ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-gnu
+	exit 0 ;;
+    s390:Linux:*:* | s390x:Linux:*:*)
+	echo ${UNAME_MACHINE}-ibm-linux
+	exit 0 ;;
+    sh64*:Linux:*:*)
+    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    sh*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    sparc:Linux:*:* | sparc64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    x86_64:Linux:*:*)
+	echo x86_64-unknown-linux-gnu
+	exit 0 ;;
+    i*86:Linux:*:*)
+	# The BFD linker knows what the default object file format is, so
+	# first see if it will tell us. cd to the root directory to prevent
+	# problems with other programs or directories called `ld' in the path.
+	# Set LC_ALL=C to ensure ld outputs messages in English.
+	ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
+			 | sed -ne '/supported targets:/!d
+				    s/[ 	][ 	]*/ /g
+				    s/.*supported targets: *//
+				    s/ .*//
+				    p'`
+        case "$ld_supported_targets" in
+	  elf32-i386)
+		TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
+		;;
+	  a.out-i386-linux)
+		echo "${UNAME_MACHINE}-pc-linux-gnuaout"
+		exit 0 ;;
+	  coff-i386)
+		echo "${UNAME_MACHINE}-pc-linux-gnucoff"
+		exit 0 ;;
+	  "")
+		# Either a pre-BFD a.out linker (linux-gnuoldld) or
+		# one that does not give us useful --help.
+		echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
+		exit 0 ;;
+	esac
+	# Determine whether the default compiler is a.out or elf
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#include <features.h>
+	#ifdef __ELF__
+	# ifdef __GLIBC__
+	#  if __GLIBC__ >= 2
+	LIBC=gnu
+	#  else
+	LIBC=gnulibc1
+	#  endif
+	# else
+	LIBC=gnulibc1
+	# endif
+	#else
+	#ifdef __INTEL_COMPILER
+	LIBC=gnu
+	#else
+	LIBC=gnuaout
+	#endif
+	#endif
+	#ifdef __dietlibc__
+	LIBC=dietlibc
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
+	test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0
+	test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0
+	;;
+    i*86:DYNIX/ptx:4*:*)
+	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+	# earlier versions are messed up and put the nodename in both
+	# sysname and nodename.
+	echo i386-sequent-sysv4
+	exit 0 ;;
+    i*86:UNIX_SV:4.2MP:2.*)
+        # Unixware is an offshoot of SVR4, but it has its own version
+        # number series starting with 2...
+        # I am not positive that other SVR4 systems won't match this,
+	# I just have to hope.  -- rms.
+        # Use sysv4.2uw... so that sysv4* matches it.
+	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+	exit 0 ;;
+    i*86:OS/2:*:*)
+	# If we were able to find `uname', then EMX Unix compatibility
+	# is probably installed.
+	echo ${UNAME_MACHINE}-pc-os2-emx
+	exit 0 ;;
+    i*86:XTS-300:*:STOP)
+	echo ${UNAME_MACHINE}-unknown-stop
+	exit 0 ;;
+    i*86:atheos:*:*)
+	echo ${UNAME_MACHINE}-unknown-atheos
+	exit 0 ;;
+	i*86:syllable:*:*)
+	echo ${UNAME_MACHINE}-pc-syllable
+	exit 0 ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+	echo i386-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    i*86:*DOS:*:*)
+	echo ${UNAME_MACHINE}-pc-msdosdjgpp
+	exit 0 ;;
+    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
+	UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+		echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
+	else
+		echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+	fi
+	exit 0 ;;
+    i*86:*:5:[78]*)
+	case `/bin/uname -X | grep "^Machine"` in
+	    *486*)	     UNAME_MACHINE=i486 ;;
+	    *Pentium)	     UNAME_MACHINE=i586 ;;
+	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+	esac
+	echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+	exit 0 ;;
+    i*86:*:3.2:*)
+	if test -f /usr/options/cb.name; then
+		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+		echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
+	elif /bin/uname -X 2>/dev/null >/dev/null ; then
+		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
+		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
+			&& UNAME_MACHINE=i586
+		(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
+	else
+		echo ${UNAME_MACHINE}-pc-sysv32
+	fi
+	exit 0 ;;
+    pc:*:*:*)
+	# Left here for compatibility:
+        # uname -m prints for DJGPP always 'pc', but it prints nothing about
+        # the processor, so we play safe by assuming i386.
+	echo i386-pc-msdosdjgpp
+        exit 0 ;;
+    Intel:Mach:3*:*)
+	echo i386-pc-mach3
+	exit 0 ;;
+    paragon:*:*:*)
+	echo i860-intel-osf1
+	exit 0 ;;
+    i860:*:4.*:*) # i860-SVR4
+	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+	  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+	else # Add other i860-SVR4 vendors below as they are discovered.
+	  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
+	fi
+	exit 0 ;;
+    mini*:CTIX:SYS*5:*)
+	# "miniframe"
+	echo m68010-convergent-sysv
+	exit 0 ;;
+    mc68k:UNIX:SYSTEM5:3.51m)
+	echo m68k-convergent-sysv
+	exit 0 ;;
+    M680?0:D-NIX:5.3:*)
+	echo m68k-diab-dnix
+	exit 0 ;;
+    M68*:*:R3V[567]*:*)
+	test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;;
+    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0)
+	OS_REL=''
+	test -r /etc/.relid \
+	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && echo i486-ncr-sysv4.3${OS_REL} && exit 0
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	  && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;;
+    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+          && echo i486-ncr-sysv4 && exit 0 ;;
+    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+	echo m68k-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    mc68030:UNIX_System_V:4.*:*)
+	echo m68k-atari-sysv4
+	exit 0 ;;
+    TSUNAMI:LynxOS:2.*:*)
+	echo sparc-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    rs6000:LynxOS:2.*:*)
+	echo rs6000-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
+	echo powerpc-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    SM[BE]S:UNIX_SV:*:*)
+	echo mips-dde-sysv${UNAME_RELEASE}
+	exit 0 ;;
+    RM*:ReliantUNIX-*:*:*)
+	echo mips-sni-sysv4
+	exit 0 ;;
+    RM*:SINIX-*:*:*)
+	echo mips-sni-sysv4
+	exit 0 ;;
+    *:SINIX-*:*:*)
+	if uname -p 2>/dev/null >/dev/null ; then
+		UNAME_MACHINE=`(uname -p) 2>/dev/null`
+		echo ${UNAME_MACHINE}-sni-sysv4
+	else
+		echo ns32k-sni-sysv
+	fi
+	exit 0 ;;
+    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+                      # says <Richard.M.Bartel@ccMail.Census.GOV>
+        echo i586-unisys-sysv4
+        exit 0 ;;
+    *:UNIX_System_V:4*:FTX*)
+	# From Gerald Hewes <hewes@openmarket.com>.
+	# How about differentiating between stratus architectures? -djm
+	echo hppa1.1-stratus-sysv4
+	exit 0 ;;
+    *:*:*:FTX*)
+	# From seanf@swdc.stratus.com.
+	echo i860-stratus-sysv4
+	exit 0 ;;
+    *:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo hppa1.1-stratus-vos
+	exit 0 ;;
+    mc68*:A/UX:*:*)
+	echo m68k-apple-aux${UNAME_RELEASE}
+	exit 0 ;;
+    news*:NEWS-OS:6*:*)
+	echo mips-sony-newsos6
+	exit 0 ;;
+    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+	if [ -d /usr/nec ]; then
+	        echo mips-nec-sysv${UNAME_RELEASE}
+	else
+	        echo mips-unknown-sysv${UNAME_RELEASE}
+	fi
+        exit 0 ;;
+    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
+	echo powerpc-be-beos
+	exit 0 ;;
+    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
+	echo powerpc-apple-beos
+	exit 0 ;;
+    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
+	echo i586-pc-beos
+	exit 0 ;;
+    SX-4:SUPER-UX:*:*)
+	echo sx4-nec-superux${UNAME_RELEASE}
+	exit 0 ;;
+    SX-5:SUPER-UX:*:*)
+	echo sx5-nec-superux${UNAME_RELEASE}
+	exit 0 ;;
+    SX-6:SUPER-UX:*:*)
+	echo sx6-nec-superux${UNAME_RELEASE}
+	exit 0 ;;
+    Power*:Rhapsody:*:*)
+	echo powerpc-apple-rhapsody${UNAME_RELEASE}
+	exit 0 ;;
+    *:Rhapsody:*:*)
+	echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+	exit 0 ;;
+    *:Darwin:*:*)
+	case `uname -p` in
+	    *86) UNAME_PROCESSOR=i686 ;;
+	    powerpc) UNAME_PROCESSOR=powerpc ;;
+	esac
+	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
+	exit 0 ;;
+    *:procnto*:*:* | *:QNX:[0123456789]*:*)
+	UNAME_PROCESSOR=`uname -p`
+	if test "$UNAME_PROCESSOR" = "x86"; then
+		UNAME_PROCESSOR=i386
+		UNAME_MACHINE=pc
+	fi
+	echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+	exit 0 ;;
+    *:QNX:*:4*)
+	echo i386-pc-qnx
+	exit 0 ;;
+    NSR-?:NONSTOP_KERNEL:*:*)
+	echo nsr-tandem-nsk${UNAME_RELEASE}
+	exit 0 ;;
+    *:NonStop-UX:*:*)
+	echo mips-compaq-nonstopux
+	exit 0 ;;
+    BS2000:POSIX*:*:*)
+	echo bs2000-siemens-sysv
+	exit 0 ;;
+    DS/*:UNIX_System_V:*:*)
+	echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+	exit 0 ;;
+    *:Plan9:*:*)
+	# "uname -m" is not consistent, so use $cputype instead. 386
+	# is converted to i386 for consistency with other x86
+	# operating systems.
+	if test "$cputype" = "386"; then
+	    UNAME_MACHINE=i386
+	else
+	    UNAME_MACHINE="$cputype"
+	fi
+	echo ${UNAME_MACHINE}-unknown-plan9
+	exit 0 ;;
+    *:TOPS-10:*:*)
+	echo pdp10-unknown-tops10
+	exit 0 ;;
+    *:TENEX:*:*)
+	echo pdp10-unknown-tenex
+	exit 0 ;;
+    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+	echo pdp10-dec-tops20
+	exit 0 ;;
+    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+	echo pdp10-xkl-tops20
+	exit 0 ;;
+    *:TOPS-20:*:*)
+	echo pdp10-unknown-tops20
+	exit 0 ;;
+    *:ITS:*:*)
+	echo pdp10-unknown-its
+	exit 0 ;;
+    SEI:*:*:SEIUX)
+        echo mips-sei-seiux${UNAME_RELEASE}
+	exit 0 ;;
+    *:DragonFly:*:*)
+	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+	exit 0 ;;
+esac
+
+#echo '(No uname command or uname output not recognized.)' 1>&2
+#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
+
+eval $set_cc_for_build
+cat >$dummy.c <<EOF
+#ifdef _SEQUENT_
+# include <sys/types.h>
+# include <sys/utsname.h>
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
+     I don't know....  */
+  printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+  printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+          "4"
+#else
+	  ""
+#endif
+         ); exit (0);
+#endif
+#endif
+
+#if defined (__arm) && defined (__acorn) && defined (__unix)
+  printf ("arm-acorn-riscix"); exit (0);
+#endif
+
+#if defined (hp300) && !defined (hpux)
+  printf ("m68k-hp-bsd\n"); exit (0);
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+  int version;
+  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
+  if (version < 4)
+    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+  else
+    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+  exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+  printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+  printf ("ns32k-encore-mach\n"); exit (0);
+#else
+  printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+  printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+  printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+  printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+    struct utsname un;
+
+    uname(&un);
+
+    if (strncmp(un.version, "V2", 2) == 0) {
+	printf ("i386-sequent-ptx2\n"); exit (0);
+    }
+    if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+	printf ("i386-sequent-ptx1\n"); exit (0);
+    }
+    printf ("i386-sequent-ptx\n"); exit (0);
+
+#endif
+
+#if defined (vax)
+# if !defined (ultrix)
+#  include <sys/param.h>
+#  if defined (BSD)
+#   if BSD == 43
+      printf ("vax-dec-bsd4.3\n"); exit (0);
+#   else
+#    if BSD == 199006
+      printf ("vax-dec-bsd4.3reno\n"); exit (0);
+#    else
+      printf ("vax-dec-bsd\n"); exit (0);
+#    endif
+#   endif
+#  else
+    printf ("vax-dec-bsd\n"); exit (0);
+#  endif
+# else
+    printf ("vax-dec-ultrix\n"); exit (0);
+# endif
+#endif
+
+#if defined (alliant) && defined (i860)
+  printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+  exit (1);
+}
+EOF
+
+$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && exit 0
+
+# Apollos put the system type in the environment.
+
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; }
+
+# Convex versions that predate uname can use getsysinfo(1)
+
+if [ -x /usr/convex/getsysinfo ]
+then
+    case `getsysinfo -f cpu_type` in
+    c1*)
+	echo c1-convex-bsd
+	exit 0 ;;
+    c2*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+	exit 0 ;;
+    c34*)
+	echo c34-convex-bsd
+	exit 0 ;;
+    c38*)
+	echo c38-convex-bsd
+	exit 0 ;;
+    c4*)
+	echo c4-convex-bsd
+	exit 0 ;;
+    esac
+fi
+
+cat >&2 <<EOF
+$0: unable to guess system type
+
+This script, last modified $timestamp, has failed to recognize
+the operating system you are using. It is advised that you
+download the most up to date version of the config scripts from
+
+    ftp://ftp.gnu.org/pub/gnu/config/
+
+If the version you run ($0) is already up to date, please
+send the following data and any information you think might be
+pertinent to <config-patches@gnu.org> in order to provide the needed
+information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = ${UNAME_MACHINE}
+UNAME_RELEASE = ${UNAME_RELEASE}
+UNAME_SYSTEM  = ${UNAME_SYSTEM}
+UNAME_VERSION = ${UNAME_VERSION}
+EOF
+
+exit 1
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/deps/jemalloc.orig/config.stamp.in b/deps/jemalloc.orig/config.stamp.in
new file mode 100644
index 00000000..e69de29b
diff --git a/deps/jemalloc.orig/config.sub b/deps/jemalloc.orig/config.sub
new file mode 100755
index 00000000..264f820a
--- /dev/null
+++ b/deps/jemalloc.orig/config.sub
@@ -0,0 +1,1549 @@
+#! /bin/sh
+# Configuration validation subroutine script.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+timestamp='2004-02-23'
+
+# This file is (in principle) common to ALL GNU software.
+# The presence of a machine in this file suggests that SOME GNU software
+# can handle that machine.  It does not imply ALL GNU software can.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted ChangeLog entry.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support.  The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+       $0 [OPTION] ALIAS
+
+Canonicalize a configuration name.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit 0 ;;
+    --version | -v )
+       echo "$version" ; exit 0 ;;
+    --help | --h* | -h )
+       echo "$usage"; exit 0 ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help"
+       exit 1 ;;
+
+    *local*)
+       # First pass through any local machine types.
+       echo $1
+       exit 0;;
+
+    * )
+       break ;;
+  esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+    exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+    exit 1;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+  nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \
+  kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*)
+    os=-$maybe_os
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+    ;;
+  *)
+    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+    if [ $basic_machine != $1 ]
+    then os=`echo $1 | sed 's/.*-/-/'`
+    else os=; fi
+    ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work.  We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+	-sun*os*)
+		# Prevent following clause from handling this invalid input.
+		;;
+	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+	-apple | -axis)
+		os=
+		basic_machine=$1
+		;;
+	-sim | -cisco | -oki | -wec | -winbond)
+		os=
+		basic_machine=$1
+		;;
+	-scout)
+		;;
+	-wrs)
+		os=-vxworks
+		basic_machine=$1
+		;;
+	-chorusos*)
+		os=-chorusos
+		basic_machine=$1
+		;;
+ 	-chorusrdb)
+ 		os=-chorusrdb
+		basic_machine=$1
+ 		;;
+	-hiux*)
+		os=-hiuxwe2
+		;;
+	-sco5)
+		os=-sco3.2v5
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco4)
+		os=-sco3.2v4
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2.[4-9]*)
+		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2v[4-9]*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco*)
+		os=-sco3.2v2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-udk*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-isc)
+		os=-isc2.2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-clix*)
+		basic_machine=clipper-intergraph
+		;;
+	-isc*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-lynx*)
+		os=-lynxos
+		;;
+	-ptx*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+		;;
+	-windowsnt*)
+		os=`echo $os | sed -e 's/windowsnt/winnt/'`
+		;;
+	-psos*)
+		os=-psos
+		;;
+	-mint | -mint[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+	# Recognize the basic CPU types without company name.
+	# Some are omitted here because they have special meanings below.
+	1750a | 580 \
+	| a29k \
+	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+	| am33_2.0 \
+	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \
+	| c4x | clipper \
+	| d10v | d30v | dlx | dsp16xx \
+	| fr30 | frv \
+	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| i370 | i860 | i960 | ia64 \
+	| ip2k | iq2000 \
+	| m32r | m68000 | m68k | m88k | mcore \
+	| mips | mipsbe | mipseb | mipsel | mipsle \
+	| mips16 \
+	| mips64 | mips64el \
+	| mips64vr | mips64vrel \
+	| mips64orion | mips64orionel \
+	| mips64vr4100 | mips64vr4100el \
+	| mips64vr4300 | mips64vr4300el \
+	| mips64vr5000 | mips64vr5000el \
+	| mipsisa32 | mipsisa32el \
+	| mipsisa32r2 | mipsisa32r2el \
+	| mipsisa64 | mipsisa64el \
+	| mipsisa64r2 | mipsisa64r2el \
+	| mipsisa64sb1 | mipsisa64sb1el \
+	| mipsisa64sr71k | mipsisa64sr71kel \
+	| mipstx39 | mipstx39el \
+	| mn10200 | mn10300 \
+	| msp430 \
+	| ns16k | ns32k \
+	| openrisc | or32 \
+	| pdp10 | pdp11 | pj | pjl \
+	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+	| pyramid \
+	| sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \
+	| sh64 | sh64le \
+	| sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \
+	| strongarm \
+	| tahoe | thumb | tic4x | tic80 | tron \
+	| v850 | v850e \
+	| we32k \
+	| x86 | xscale | xstormy16 | xtensa \
+	| z8k)
+		basic_machine=$basic_machine-unknown
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12)
+		# Motorola 68HC11/12.
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+		;;
+
+	# We use `pc' rather than `unknown'
+	# because (1) that's what they normally are, and
+	# (2) the word "unknown" tends to confuse beginning users.
+	i*86 | x86_64)
+	  basic_machine=$basic_machine-pc
+	  ;;
+	# Object if more than one company name word.
+	*-*-*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+	# Recognize the basic CPU types with company name.
+	580-* \
+	| a29k-* \
+	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
+	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+	| avr-* \
+	| bs2000-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
+	| clipper-* | cydra-* \
+	| d10v-* | d30v-* | dlx-* \
+	| elxsi-* \
+	| f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \
+	| h8300-* | h8500-* \
+	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| i*86-* | i860-* | i960-* | ia64-* \
+	| ip2k-* | iq2000-* \
+	| m32r-* \
+	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+	| m88110-* | m88k-* | mcore-* \
+	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+	| mips16-* \
+	| mips64-* | mips64el-* \
+	| mips64vr-* | mips64vrel-* \
+	| mips64orion-* | mips64orionel-* \
+	| mips64vr4100-* | mips64vr4100el-* \
+	| mips64vr4300-* | mips64vr4300el-* \
+	| mips64vr5000-* | mips64vr5000el-* \
+	| mipsisa32-* | mipsisa32el-* \
+	| mipsisa32r2-* | mipsisa32r2el-* \
+	| mipsisa64-* | mipsisa64el-* \
+	| mipsisa64r2-* | mipsisa64r2el-* \
+	| mipsisa64sb1-* | mipsisa64sb1el-* \
+	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
+	| mipstx39-* | mipstx39el-* \
+	| msp430-* \
+	| none-* | np1-* | nv1-* | ns16k-* | ns32k-* \
+	| orion-* \
+	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+	| pyramid-* \
+	| romp-* | rs6000-* \
+	| sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \
+	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+	| sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \
+	| sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \
+	| tahoe-* | thumb-* \
+	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tron-* \
+	| v850-* | v850e-* | vax-* \
+	| we32k-* \
+	| x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \
+	| xtensa-* \
+	| ymp-* \
+	| z8k-*)
+		;;
+	# Recognize the various machine names and aliases which stand
+	# for a CPU type and a company and sometimes even an OS.
+	386bsd)
+		basic_machine=i386-unknown
+		os=-bsd
+		;;
+	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+		basic_machine=m68000-att
+		;;
+	3b*)
+		basic_machine=we32k-att
+		;;
+	a29khif)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+    	abacus)
+		basic_machine=abacus-unknown
+		;;
+	adobe68k)
+		basic_machine=m68010-adobe
+		os=-scout
+		;;
+	alliant | fx80)
+		basic_machine=fx80-alliant
+		;;
+	altos | altos3068)
+		basic_machine=m68k-altos
+		;;
+	am29k)
+		basic_machine=a29k-none
+		os=-bsd
+		;;
+	amd64)
+		basic_machine=x86_64-pc
+		;;
+	amd64-*)
+		basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	amdahl)
+		basic_machine=580-amdahl
+		os=-sysv
+		;;
+	amiga | amiga-*)
+		basic_machine=m68k-unknown
+		;;
+	amigaos | amigados)
+		basic_machine=m68k-unknown
+		os=-amigaos
+		;;
+	amigaunix | amix)
+		basic_machine=m68k-unknown
+		os=-sysv4
+		;;
+	apollo68)
+		basic_machine=m68k-apollo
+		os=-sysv
+		;;
+	apollo68bsd)
+		basic_machine=m68k-apollo
+		os=-bsd
+		;;
+	aux)
+		basic_machine=m68k-apple
+		os=-aux
+		;;
+	balance)
+		basic_machine=ns32k-sequent
+		os=-dynix
+		;;
+	c90)
+		basic_machine=c90-cray
+		os=-unicos
+		;;
+	convex-c1)
+		basic_machine=c1-convex
+		os=-bsd
+		;;
+	convex-c2)
+		basic_machine=c2-convex
+		os=-bsd
+		;;
+	convex-c32)
+		basic_machine=c32-convex
+		os=-bsd
+		;;
+	convex-c34)
+		basic_machine=c34-convex
+		os=-bsd
+		;;
+	convex-c38)
+		basic_machine=c38-convex
+		os=-bsd
+		;;
+	cray | j90)
+		basic_machine=j90-cray
+		os=-unicos
+		;;
+	cr16c)
+		basic_machine=cr16c-unknown
+		os=-elf
+		;;
+	crds | unos)
+		basic_machine=m68k-crds
+		;;
+	cris | cris-* | etrax*)
+		basic_machine=cris-axis
+		;;
+	crx)
+		basic_machine=crx-unknown
+		os=-elf
+		;;
+	da30 | da30-*)
+		basic_machine=m68k-da30
+		;;
+	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+		basic_machine=mips-dec
+		;;
+	decsystem10* | dec10*)
+		basic_machine=pdp10-dec
+		os=-tops10
+		;;
+	decsystem20* | dec20*)
+		basic_machine=pdp10-dec
+		os=-tops20
+		;;
+	delta | 3300 | motorola-3300 | motorola-delta \
+	      | 3300-motorola | delta-motorola)
+		basic_machine=m68k-motorola
+		;;
+	delta88)
+		basic_machine=m88k-motorola
+		os=-sysv3
+		;;
+	dpx20 | dpx20-*)
+		basic_machine=rs6000-bull
+		os=-bosx
+		;;
+	dpx2* | dpx2*-bull)
+		basic_machine=m68k-bull
+		os=-sysv3
+		;;
+	ebmon29k)
+		basic_machine=a29k-amd
+		os=-ebmon
+		;;
+	elxsi)
+		basic_machine=elxsi-elxsi
+		os=-bsd
+		;;
+	encore | umax | mmax)
+		basic_machine=ns32k-encore
+		;;
+	es1800 | OSE68k | ose68k | ose | OSE)
+		basic_machine=m68k-ericsson
+		os=-ose
+		;;
+	fx2800)
+		basic_machine=i860-alliant
+		;;
+	genix)
+		basic_machine=ns32k-ns
+		;;
+	gmicro)
+		basic_machine=tron-gmicro
+		os=-sysv
+		;;
+	go32)
+		basic_machine=i386-pc
+		os=-go32
+		;;
+	h3050r* | hiux*)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	h8300hms)
+		basic_machine=h8300-hitachi
+		os=-hms
+		;;
+	h8300xray)
+		basic_machine=h8300-hitachi
+		os=-xray
+		;;
+	h8500hms)
+		basic_machine=h8500-hitachi
+		os=-hms
+		;;
+	harris)
+		basic_machine=m88k-harris
+		os=-sysv3
+		;;
+	hp300-*)
+		basic_machine=m68k-hp
+		;;
+	hp300bsd)
+		basic_machine=m68k-hp
+		os=-bsd
+		;;
+	hp300hpux)
+		basic_machine=m68k-hp
+		os=-hpux
+		;;
+	hp3k9[0-9][0-9] | hp9[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k2[0-9][0-9] | hp9k31[0-9])
+		basic_machine=m68000-hp
+		;;
+	hp9k3[2-9][0-9])
+		basic_machine=m68k-hp
+		;;
+	hp9k6[0-9][0-9] | hp6[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k7[0-79][0-9] | hp7[0-79][0-9])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k78[0-9] | hp78[0-9])
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][13679] | hp8[0-9][13679])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][0-9] | hp8[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hppa-next)
+		os=-nextstep3
+		;;
+	hppaosf)
+		basic_machine=hppa1.1-hp
+		os=-osf
+		;;
+	hppro)
+		basic_machine=hppa1.1-hp
+		os=-proelf
+		;;
+	i370-ibm* | ibm*)
+		basic_machine=i370-ibm
+		;;
+# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
+	i*86v32)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv32
+		;;
+	i*86v4*)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv4
+		;;
+	i*86v)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv
+		;;
+	i*86sol2)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-solaris2
+		;;
+	i386mach)
+		basic_machine=i386-mach
+		os=-mach
+		;;
+	i386-vsta | vsta)
+		basic_machine=i386-unknown
+		os=-vsta
+		;;
+	iris | iris4d)
+		basic_machine=mips-sgi
+		case $os in
+		    -irix*)
+			;;
+		    *)
+			os=-irix4
+			;;
+		esac
+		;;
+	isi68 | isi)
+		basic_machine=m68k-isi
+		os=-sysv
+		;;
+	m88k-omron*)
+		basic_machine=m88k-omron
+		;;
+	magnum | m3230)
+		basic_machine=mips-mips
+		os=-sysv
+		;;
+	merlin)
+		basic_machine=ns32k-utek
+		os=-sysv
+		;;
+	mingw32)
+		basic_machine=i386-pc
+		os=-mingw32
+		;;
+	miniframe)
+		basic_machine=m68000-convergent
+		;;
+	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+	mips3*-*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
+		;;
+	mips3*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+		;;
+	mmix*)
+		basic_machine=mmix-knuth
+		os=-mmixware
+		;;
+	monitor)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	morphos)
+		basic_machine=powerpc-unknown
+		os=-morphos
+		;;
+	msdos)
+		basic_machine=i386-pc
+		os=-msdos
+		;;
+	mvs)
+		basic_machine=i370-ibm
+		os=-mvs
+		;;
+	ncr3000)
+		basic_machine=i486-ncr
+		os=-sysv4
+		;;
+	netbsd386)
+		basic_machine=i386-unknown
+		os=-netbsd
+		;;
+	netwinder)
+		basic_machine=armv4l-rebel
+		os=-linux
+		;;
+	news | news700 | news800 | news900)
+		basic_machine=m68k-sony
+		os=-newsos
+		;;
+	news1000)
+		basic_machine=m68030-sony
+		os=-newsos
+		;;
+	news-3600 | risc-news)
+		basic_machine=mips-sony
+		os=-newsos
+		;;
+	necv70)
+		basic_machine=v70-nec
+		os=-sysv
+		;;
+	next | m*-next )
+		basic_machine=m68k-next
+		case $os in
+		    -nextstep* )
+			;;
+		    -ns2*)
+		      os=-nextstep2
+			;;
+		    *)
+		      os=-nextstep3
+			;;
+		esac
+		;;
+	nh3000)
+		basic_machine=m68k-harris
+		os=-cxux
+		;;
+	nh[45]000)
+		basic_machine=m88k-harris
+		os=-cxux
+		;;
+	nindy960)
+		basic_machine=i960-intel
+		os=-nindy
+		;;
+	mon960)
+		basic_machine=i960-intel
+		os=-mon960
+		;;
+	nonstopux)
+		basic_machine=mips-compaq
+		os=-nonstopux
+		;;
+	np1)
+		basic_machine=np1-gould
+		;;
+	nv1)
+		basic_machine=nv1-cray
+		os=-unicosmp
+		;;
+	nsr-tandem)
+		basic_machine=nsr-tandem
+		;;
+	op50n-* | op60c-*)
+		basic_machine=hppa1.1-oki
+		os=-proelf
+		;;
+	or32 | or32-*)
+		basic_machine=or32-unknown
+		os=-coff
+		;;
+	os400)
+		basic_machine=powerpc-ibm
+		os=-os400
+		;;
+	OSE68000 | ose68000)
+		basic_machine=m68000-ericsson
+		os=-ose
+		;;
+	os68k)
+		basic_machine=m68k-none
+		os=-os68k
+		;;
+	pa-hitachi)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	paragon)
+		basic_machine=i860-intel
+		os=-osf
+		;;
+	pbd)
+		basic_machine=sparc-tti
+		;;
+	pbb)
+		basic_machine=m68k-tti
+		;;
+	pc532 | pc532-*)
+		basic_machine=ns32k-pc532
+		;;
+	pentium | p5 | k5 | k6 | nexgen | viac3)
+		basic_machine=i586-pc
+		;;
+	pentiumpro | p6 | 6x86 | athlon | athlon_*)
+		basic_machine=i686-pc
+		;;
+	pentiumii | pentium2 | pentiumiii | pentium3)
+		basic_machine=i686-pc
+		;;
+	pentium4)
+		basic_machine=i786-pc
+		;;
+	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+		basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumpro-* | p6-* | 6x86-* | athlon-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentium4-*)
+		basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pn)
+		basic_machine=pn-gould
+		;;
+	power)	basic_machine=power-ibm
+		;;
+	ppc)	basic_machine=powerpc-unknown
+		;;
+	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppcle | powerpclittle | ppc-le | powerpc-little)
+		basic_machine=powerpcle-unknown
+		;;
+	ppcle-* | powerpclittle-*)
+		basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64)	basic_machine=powerpc64-unknown
+		;;
+	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+		basic_machine=powerpc64le-unknown
+		;;
+	ppc64le-* | powerpc64little-*)
+		basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ps2)
+		basic_machine=i386-ibm
+		;;
+	pw32)
+		basic_machine=i586-unknown
+		os=-pw32
+		;;
+	rom68k)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	rm[46]00)
+		basic_machine=mips-siemens
+		;;
+	rtpc | rtpc-*)
+		basic_machine=romp-ibm
+		;;
+	s390 | s390-*)
+		basic_machine=s390-ibm
+		;;
+	s390x | s390x-*)
+		basic_machine=s390x-ibm
+		;;
+	sa29200)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	sb1)
+		basic_machine=mipsisa64sb1-unknown
+		;;
+	sb1el)
+		basic_machine=mipsisa64sb1el-unknown
+		;;
+	sei)
+		basic_machine=mips-sei
+		os=-seiux
+		;;
+	sequent)
+		basic_machine=i386-sequent
+		;;
+	sh)
+		basic_machine=sh-hitachi
+		os=-hms
+		;;
+	sh64)
+		basic_machine=sh64-unknown
+		;;
+	sparclite-wrs | simso-wrs)
+		basic_machine=sparclite-wrs
+		os=-vxworks
+		;;
+	sps7)
+		basic_machine=m68k-bull
+		os=-sysv2
+		;;
+	spur)
+		basic_machine=spur-unknown
+		;;
+	st2000)
+		basic_machine=m68k-tandem
+		;;
+	stratus)
+		basic_machine=i860-stratus
+		os=-sysv4
+		;;
+	sun2)
+		basic_machine=m68000-sun
+		;;
+	sun2os3)
+		basic_machine=m68000-sun
+		os=-sunos3
+		;;
+	sun2os4)
+		basic_machine=m68000-sun
+		os=-sunos4
+		;;
+	sun3os3)
+		basic_machine=m68k-sun
+		os=-sunos3
+		;;
+	sun3os4)
+		basic_machine=m68k-sun
+		os=-sunos4
+		;;
+	sun4os3)
+		basic_machine=sparc-sun
+		os=-sunos3
+		;;
+	sun4os4)
+		basic_machine=sparc-sun
+		os=-sunos4
+		;;
+	sun4sol2)
+		basic_machine=sparc-sun
+		os=-solaris2
+		;;
+	sun3 | sun3-*)
+		basic_machine=m68k-sun
+		;;
+	sun4)
+		basic_machine=sparc-sun
+		;;
+	sun386 | sun386i | roadrunner)
+		basic_machine=i386-sun
+		;;
+	sv1)
+		basic_machine=sv1-cray
+		os=-unicos
+		;;
+	symmetry)
+		basic_machine=i386-sequent
+		os=-dynix
+		;;
+	t3e)
+		basic_machine=alphaev5-cray
+		os=-unicos
+		;;
+	t90)
+		basic_machine=t90-cray
+		os=-unicos
+		;;
+	tic54x | c54x*)
+		basic_machine=tic54x-unknown
+		os=-coff
+		;;
+	tic55x | c55x*)
+		basic_machine=tic55x-unknown
+		os=-coff
+		;;
+	tic6x | c6x*)
+		basic_machine=tic6x-unknown
+		os=-coff
+		;;
+	tx39)
+		basic_machine=mipstx39-unknown
+		;;
+	tx39el)
+		basic_machine=mipstx39el-unknown
+		;;
+	toad1)
+		basic_machine=pdp10-xkl
+		os=-tops20
+		;;
+	tower | tower-32)
+		basic_machine=m68k-ncr
+		;;
+	tpf)
+		basic_machine=s390x-ibm
+		os=-tpf
+		;;
+	udi29k)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	ultra3)
+		basic_machine=a29k-nyu
+		os=-sym1
+		;;
+	v810 | necv810)
+		basic_machine=v810-nec
+		os=-none
+		;;
+	vaxv)
+		basic_machine=vax-dec
+		os=-sysv
+		;;
+	vms)
+		basic_machine=vax-dec
+		os=-vms
+		;;
+	vpp*|vx|vx-*)
+		basic_machine=f301-fujitsu
+		;;
+	vxworks960)
+		basic_machine=i960-wrs
+		os=-vxworks
+		;;
+	vxworks68)
+		basic_machine=m68k-wrs
+		os=-vxworks
+		;;
+	vxworks29k)
+		basic_machine=a29k-wrs
+		os=-vxworks
+		;;
+	w65*)
+		basic_machine=w65-wdc
+		os=-none
+		;;
+	w89k-*)
+		basic_machine=hppa1.1-winbond
+		os=-proelf
+		;;
+	xps | xps100)
+		basic_machine=xps100-honeywell
+		;;
+	ymp)
+		basic_machine=ymp-cray
+		os=-unicos
+		;;
+	z8k-*-coff)
+		basic_machine=z8k-unknown
+		os=-sim
+		;;
+	none)
+		basic_machine=none-none
+		os=-none
+		;;
+
+# Here we handle the default manufacturer of certain CPU types.  It is in
+# some cases the only manufacturer, in others, it is the most popular.
+	w89k)
+		basic_machine=hppa1.1-winbond
+		;;
+	op50n)
+		basic_machine=hppa1.1-oki
+		;;
+	op60c)
+		basic_machine=hppa1.1-oki
+		;;
+	romp)
+		basic_machine=romp-ibm
+		;;
+	rs6000)
+		basic_machine=rs6000-ibm
+		;;
+	vax)
+		basic_machine=vax-dec
+		;;
+	pdp10)
+		# there are many clones, so DEC is not a safe bet
+		basic_machine=pdp10-unknown
+		;;
+	pdp11)
+		basic_machine=pdp11-dec
+		;;
+	we32k)
+		basic_machine=we32k-att
+		;;
+	sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele)
+		basic_machine=sh-unknown
+		;;
+	sh64)
+		basic_machine=sh64-unknown
+		;;
+	sparc | sparcv9 | sparcv9b)
+		basic_machine=sparc-sun
+		;;
+	cydra)
+		basic_machine=cydra-cydrome
+		;;
+	orion)
+		basic_machine=orion-highlevel
+		;;
+	orion105)
+		basic_machine=clipper-highlevel
+		;;
+	mac | mpw | mac-mpw)
+		basic_machine=m68k-apple
+		;;
+	pmac | pmac-mpw)
+		basic_machine=powerpc-apple
+		;;
+	*-unknown)
+		# Make sure to match an already-canonicalized machine name.
+		;;
+	*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+	*-digital*)
+		basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+		;;
+	*-commodore*)
+		basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+		;;
+	*)
+		;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+        # First match some system type aliases
+        # that might get confused with valid system types.
+	# -solaris* is a basic system type, with this one exception.
+	-solaris1 | -solaris1.*)
+		os=`echo $os | sed -e 's|solaris1|sunos4|'`
+		;;
+	-solaris)
+		os=-solaris2
+		;;
+	-svr4*)
+		os=-sysv4
+		;;
+	-unixware*)
+		os=-sysv4.2uw
+		;;
+	-gnu/linux*)
+		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+		;;
+	# First accept the basic system types.
+	# The portable systems comes first.
+	# Each alternative MUST END IN A *, to match a version number.
+	# -sysv* is not here because it comes later, after sysvr4.
+	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
+	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+	      | -aos* \
+	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \
+	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
+	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+	      | -chorusos* | -chorusrdb* \
+	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \
+	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
+	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
+	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*)
+	# Remember, each alternative MUST END IN *, to match a version number.
+		;;
+	-qnx*)
+		case $basic_machine in
+		    x86-* | i*86-*)
+			;;
+		    *)
+			os=-nto$os
+			;;
+		esac
+		;;
+	-nto-qnx*)
+		;;
+	-nto*)
+		os=`echo $os | sed -e 's|nto|nto-qnx|'`
+		;;
+	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
+	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+		;;
+	-mac*)
+		os=`echo $os | sed -e 's|mac|macos|'`
+		;;
+	-linux-dietlibc)
+		os=-linux-dietlibc
+		;;
+	-linux*)
+		os=`echo $os | sed -e 's|linux|linux-gnu|'`
+		;;
+	-sunos5*)
+		os=`echo $os | sed -e 's|sunos5|solaris2|'`
+		;;
+	-sunos6*)
+		os=`echo $os | sed -e 's|sunos6|solaris3|'`
+		;;
+	-opened*)
+		os=-openedition
+		;;
+        -os400*)
+		os=-os400
+		;;
+	-wince*)
+		os=-wince
+		;;
+	-osfrose*)
+		os=-osfrose
+		;;
+	-osf*)
+		os=-osf
+		;;
+	-utek*)
+		os=-bsd
+		;;
+	-dynix*)
+		os=-bsd
+		;;
+	-acis*)
+		os=-aos
+		;;
+	-atheos*)
+		os=-atheos
+		;;
+	-syllable*)
+		os=-syllable
+		;;
+	-386bsd)
+		os=-bsd
+		;;
+	-ctix* | -uts*)
+		os=-sysv
+		;;
+	-nova*)
+		os=-rtmk-nova
+		;;
+	-ns2 )
+		os=-nextstep2
+		;;
+	-nsk*)
+		os=-nsk
+		;;
+	# Preserve the version number of sinix5.
+	-sinix5.*)
+		os=`echo $os | sed -e 's|sinix|sysv|'`
+		;;
+	-sinix*)
+		os=-sysv4
+		;;
+        -tpf*)
+		os=-tpf
+		;;
+	-triton*)
+		os=-sysv3
+		;;
+	-oss*)
+		os=-sysv3
+		;;
+	-svr4)
+		os=-sysv4
+		;;
+	-svr3)
+		os=-sysv3
+		;;
+	-sysvr4)
+		os=-sysv4
+		;;
+	# This must come after -sysvr4.
+	-sysv*)
+		;;
+	-ose*)
+		os=-ose
+		;;
+	-es1800*)
+		os=-ose
+		;;
+	-xenix)
+		os=-xenix
+		;;
+	-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+		os=-mint
+		;;
+	-aros*)
+		os=-aros
+		;;
+	-kaos*)
+		os=-kaos
+		;;
+	-none)
+		;;
+	*)
+		# Get rid of the `-' at the beginning of $os.
+		os=`echo $os | sed 's/[^-]*-//'`
+		echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
+		exit 1
+		;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system.  Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+	*-acorn)
+		os=-riscix1.2
+		;;
+	arm*-rebel)
+		os=-linux
+		;;
+	arm*-semi)
+		os=-aout
+		;;
+    c4x-* | tic4x-*)
+        os=-coff
+        ;;
+	# This must come before the *-dec entry.
+	pdp10-*)
+		os=-tops20
+		;;
+	pdp11-*)
+		os=-none
+		;;
+	*-dec | vax-*)
+		os=-ultrix4.2
+		;;
+	m68*-apollo)
+		os=-domain
+		;;
+	i386-sun)
+		os=-sunos4.0.2
+		;;
+	m68000-sun)
+		os=-sunos3
+		# This also exists in the configure program, but was not the
+		# default.
+		# os=-sunos4
+		;;
+	m68*-cisco)
+		os=-aout
+		;;
+	mips*-cisco)
+		os=-elf
+		;;
+	mips*-*)
+		os=-elf
+		;;
+	or32-*)
+		os=-coff
+		;;
+	*-tti)	# must be before sparc entry or we get the wrong os.
+		os=-sysv3
+		;;
+	sparc-* | *-sun)
+		os=-sunos4.1.1
+		;;
+	*-be)
+		os=-beos
+		;;
+	*-ibm)
+		os=-aix
+		;;
+	*-wec)
+		os=-proelf
+		;;
+	*-winbond)
+		os=-proelf
+		;;
+	*-oki)
+		os=-proelf
+		;;
+	*-hp)
+		os=-hpux
+		;;
+	*-hitachi)
+		os=-hiux
+		;;
+	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+		os=-sysv
+		;;
+	*-cbm)
+		os=-amigaos
+		;;
+	*-dg)
+		os=-dgux
+		;;
+	*-dolphin)
+		os=-sysv3
+		;;
+	m68k-ccur)
+		os=-rtu
+		;;
+	m88k-omron*)
+		os=-luna
+		;;
+	*-next )
+		os=-nextstep
+		;;
+	*-sequent)
+		os=-ptx
+		;;
+	*-crds)
+		os=-unos
+		;;
+	*-ns)
+		os=-genix
+		;;
+	i370-*)
+		os=-mvs
+		;;
+	*-next)
+		os=-nextstep3
+		;;
+	*-gould)
+		os=-sysv
+		;;
+	*-highlevel)
+		os=-bsd
+		;;
+	*-encore)
+		os=-bsd
+		;;
+	*-sgi)
+		os=-irix
+		;;
+	*-siemens)
+		os=-sysv4
+		;;
+	*-masscomp)
+		os=-rtu
+		;;
+	f30[01]-fujitsu | f700-fujitsu)
+		os=-uxpv
+		;;
+	*-rom68k)
+		os=-coff
+		;;
+	*-*bug)
+		os=-coff
+		;;
+	*-apple)
+		os=-macos
+		;;
+	*-atari*)
+		os=-mint
+		;;
+	*)
+		os=-none
+		;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer.  We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+	*-unknown)
+		case $os in
+			-riscix*)
+				vendor=acorn
+				;;
+			-sunos*)
+				vendor=sun
+				;;
+			-aix*)
+				vendor=ibm
+				;;
+			-beos*)
+				vendor=be
+				;;
+			-hpux*)
+				vendor=hp
+				;;
+			-mpeix*)
+				vendor=hp
+				;;
+			-hiux*)
+				vendor=hitachi
+				;;
+			-unos*)
+				vendor=crds
+				;;
+			-dgux*)
+				vendor=dg
+				;;
+			-luna*)
+				vendor=omron
+				;;
+			-genix*)
+				vendor=ns
+				;;
+			-mvs* | -opened*)
+				vendor=ibm
+				;;
+			-os400*)
+				vendor=ibm
+				;;
+			-ptx*)
+				vendor=sequent
+				;;
+			-tpf*)
+				vendor=ibm
+				;;
+			-vxsim* | -vxworks* | -windiss*)
+				vendor=wrs
+				;;
+			-aux*)
+				vendor=apple
+				;;
+			-hms*)
+				vendor=hitachi
+				;;
+			-mpw* | -macos*)
+				vendor=apple
+				;;
+			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+				vendor=atari
+				;;
+			-vos*)
+				vendor=stratus
+				;;
+		esac
+		basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
+		;;
+esac
+
+echo $basic_machine$os
+exit 0
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/deps/jemalloc.orig/configure.ac b/deps/jemalloc.orig/configure.ac
new file mode 100644
index 00000000..b58aa520
--- /dev/null
+++ b/deps/jemalloc.orig/configure.ac
@@ -0,0 +1,938 @@
+dnl Process this file with autoconf to produce a configure script.
+AC_INIT([Makefile.in])
+
+dnl ============================================================================
+dnl Custom macro definitions.
+
+dnl JE_CFLAGS_APPEND(cflag)
+AC_DEFUN([JE_CFLAGS_APPEND],
+[
+AC_MSG_CHECKING([whether compiler supports $1])
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="$1"
+else
+  CFLAGS="${CFLAGS} $1"
+fi
+AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+]], [[
+    return 0;
+]])],
+              AC_MSG_RESULT([yes]),
+              AC_MSG_RESULT([no])
+              [CFLAGS="${TCFLAGS}"]
+)
+])
+
+dnl JE_COMPILABLE(label, hcode, mcode, rvar)
+AC_DEFUN([JE_COMPILABLE],
+[
+AC_MSG_CHECKING([whether $1 is compilable])
+AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[$2], [$3])],
+              AC_MSG_RESULT([yes])
+              [$4="yes"],
+              AC_MSG_RESULT([no])
+              [$4="no"]
+)
+])
+
+dnl ============================================================================
+
+srcroot=$srcdir
+if test "x${srcroot}" = "x." ; then
+  srcroot=""
+else
+  srcroot="${srcroot}/"
+fi
+AC_SUBST([srcroot])
+abs_srcroot="`cd \"${srcdir}\"; pwd`/"
+AC_SUBST([abs_srcroot])
+
+objroot=""
+AC_SUBST([objroot])
+abs_objroot="`pwd`/"
+AC_SUBST([abs_objroot])
+
+dnl Munge install path variables.
+if test "x$prefix" = "xNONE" ; then
+  prefix="/usr/local"
+fi
+if test "x$exec_prefix" = "xNONE" ; then
+  exec_prefix=$prefix
+fi
+PREFIX=$prefix
+AC_SUBST([PREFIX])
+BINDIR=`eval echo $bindir`
+BINDIR=`eval echo $BINDIR`
+AC_SUBST([BINDIR])
+INCLUDEDIR=`eval echo $includedir`
+INCLUDEDIR=`eval echo $INCLUDEDIR`
+AC_SUBST([INCLUDEDIR])
+LIBDIR=`eval echo $libdir`
+LIBDIR=`eval echo $LIBDIR`
+AC_SUBST([LIBDIR])
+DATADIR=`eval echo $datadir`
+DATADIR=`eval echo $DATADIR`
+AC_SUBST([DATADIR])
+MANDIR=`eval echo $mandir`
+MANDIR=`eval echo $MANDIR`
+AC_SUBST([MANDIR])
+
+dnl Support for building documentation.
+AC_PATH_PROG([XSLTPROC], [xsltproc], , [$PATH])
+AC_ARG_WITH([xslroot],
+  [AS_HELP_STRING([--with-xslroot=<path>], [XSL stylesheet root path])],
+if test "x$with_xslroot" = "xno" ; then
+  XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl"
+else
+  XSLROOT="${with_xslroot}"
+fi,
+  XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl"
+)
+AC_SUBST([XSLROOT])
+
+dnl If CFLAGS isn't defined, set CFLAGS to something reasonable.  Otherwise,
+dnl just prevent autoconf from molesting CFLAGS.
+CFLAGS=$CFLAGS
+AC_PROG_CC
+if test "x$CFLAGS" = "x" ; then
+  no_CFLAGS="yes"
+  if test "x$GCC" = "xyes" ; then
+    JE_CFLAGS_APPEND([-std=gnu99])
+    JE_CFLAGS_APPEND([-Wall])
+    JE_CFLAGS_APPEND([-pipe])
+    JE_CFLAGS_APPEND([-g3])
+  fi
+fi
+dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
+if test "x$EXTRA_CFLAGS" != "x" ; then
+  JE_CFLAGS_APPEND([$EXTRA_CFLAGS])
+fi
+AC_PROG_CPP
+
+AC_CHECK_SIZEOF([void *])
+if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
+  LG_SIZEOF_PTR=3
+elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
+  LG_SIZEOF_PTR=2
+else
+  AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}])
+fi
+AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR])
+
+AC_CHECK_SIZEOF([int])
+if test "x${ac_cv_sizeof_int}" = "x8" ; then
+  LG_SIZEOF_INT=3
+elif test "x${ac_cv_sizeof_int}" = "x4" ; then
+  LG_SIZEOF_INT=2
+else
+  AC_MSG_ERROR([Unsupported int size: ${ac_cv_sizeof_int}])
+fi
+AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT])
+
+AC_CHECK_SIZEOF([long])
+if test "x${ac_cv_sizeof_long}" = "x8" ; then
+  LG_SIZEOF_LONG=3
+elif test "x${ac_cv_sizeof_long}" = "x4" ; then
+  LG_SIZEOF_LONG=2
+else
+  AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}])
+fi
+AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
+
+AC_CANONICAL_HOST
+dnl CPU-specific settings.
+CPU_SPINWAIT=""
+case "${host_cpu}" in
+  i[[345]]86)
+	;;
+  i686)
+	JE_COMPILABLE([__asm__], [], [[__asm__ volatile("pause"); return 0;]],
+	              [asm])
+	if test "x${asm}" = "xyes" ; then
+	    CPU_SPINWAIT='__asm__ volatile("pause")'
+	fi
+	;;
+  x86_64)
+	JE_COMPILABLE([__asm__ syntax], [],
+	              [[__asm__ volatile("pause"); return 0;]], [asm])
+	if test "x${asm}" = "xyes" ; then
+	    CPU_SPINWAIT='__asm__ volatile("pause")'
+	fi
+	;;
+  *)
+	;;
+esac
+AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
+
+dnl Platform-specific settings.  abi and RPATH can probably be determined
+dnl programmatically, but doing so is error-prone, which makes it generally
+dnl not worth the trouble.
+dnl 
+dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
+dnl definitions need to be seen before any headers are included, which is a pain
+dnl to make happen otherwise.
+case "${host}" in
+  *-*-darwin*)
+	CFLAGS="$CFLAGS -fno-common -no-cpp-precomp"
+	abi="macho"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
+	RPATH=""
+	;;
+  *-*-freebsd*)
+	CFLAGS="$CFLAGS"
+	abi="elf"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
+	RPATH="-Wl,-rpath,"
+	;;
+  *-*-linux*)
+	CFLAGS="$CFLAGS"
+	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+	abi="elf"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED])
+	RPATH="-Wl,-rpath,"
+	;;
+  *-*-netbsd*)
+	AC_MSG_CHECKING([ABI])
+        AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+[[#ifdef __ELF__
+/* ELF */
+#else
+#error aout
+#endif
+]])],
+                          [CFLAGS="$CFLAGS"; abi="elf"],
+                          [abi="aout"])
+	AC_MSG_RESULT([$abi])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
+	RPATH="-Wl,-rpath,"
+	;;
+  *-*-solaris2*)
+	CFLAGS="$CFLAGS"
+	abi="elf"
+	RPATH="-Wl,-R,"
+	dnl Solaris needs this for sigwait().
+	CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
+	LIBS="$LIBS -lposix4 -lsocket -lnsl"
+	;;
+  *)
+	AC_MSG_RESULT([Unsupported operating system: ${host}])
+	abi="elf"
+	RPATH="-Wl,-rpath,"
+	;;
+esac
+AC_SUBST([abi])
+AC_SUBST([RPATH])
+
+JE_COMPILABLE([__attribute__ syntax],
+              [static __attribute__((unused)) void foo(void){}],
+              [],
+              [attribute])
+if test "x${attribute}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
+  if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
+    JE_CFLAGS_APPEND([-fvisibility=hidden])
+  fi
+fi
+
+JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [
+#define _GNU_SOURCE
+#include <sys/mman.h>
+], [
+void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0);
+], [mremap_fixed])
+if test "x${mremap_fixed}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_MREMAP_FIXED])
+fi
+
+dnl Support optional additions to rpath.
+AC_ARG_WITH([rpath],
+  [AS_HELP_STRING([--with-rpath=<rpath>], [Colon-separated rpath (ELF systems only)])],
+if test "x$with_rpath" = "xno" ; then
+  RPATH_EXTRA=
+else
+  RPATH_EXTRA="`echo $with_rpath | tr \":\" \" \"`"
+fi,
+  RPATH_EXTRA=
+)
+AC_SUBST([RPATH_EXTRA])
+
+dnl Disable rules that do automatic regeneration of configure output by default.
+AC_ARG_ENABLE([autogen],
+  [AS_HELP_STRING([--enable-autogen], [Automatically regenerate configure output])],
+if test "x$enable_autogen" = "xno" ; then
+  enable_autogen="0"
+else
+  enable_autogen="1"
+fi
+,
+enable_autogen="0"
+)
+AC_SUBST([enable_autogen])
+
+AC_PROG_INSTALL
+AC_PROG_RANLIB
+AC_PATH_PROG([AR], [ar], , [$PATH])
+AC_PATH_PROG([LD], [ld], , [$PATH])
+AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH])
+
+dnl Do not prefix public APIs by default.
+AC_ARG_WITH([jemalloc_prefix],
+  [AS_HELP_STRING([--with-jemalloc-prefix=<prefix>], [Prefix to prepend to all public APIs])],
+  [JEMALLOC_PREFIX="$with_jemalloc_prefix"],
+  [if test "x$abi" != "xmacho" ; then
+  JEMALLOC_PREFIX=""
+else
+  JEMALLOC_PREFIX="je_"
+fi]
+)
+if test "x$JEMALLOC_PREFIX" != "x" ; then
+  JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
+  AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
+  AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
+  AC_DEFINE_UNQUOTED([JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix)], [${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix])
+fi
+
+dnl Do not mangle library-private APIs by default.
+AC_ARG_WITH([private_namespace],
+  [AS_HELP_STRING([--with-private-namespace=<prefix>], [Prefix to prepend to all library-private APIs])],
+  [JEMALLOC_PRIVATE_NAMESPACE="$with_private_namespace"],
+  [JEMALLOC_PRIVATE_NAMESPACE=""]
+)
+AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], ["$JEMALLOC_PRIVATE_NAMESPACE"])
+if test "x$JEMALLOC_PRIVATE_NAMESPACE" != "x" ; then
+  AC_DEFINE_UNQUOTED([JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix)], [${JEMALLOC_PRIVATE_NAMESPACE}##string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix])
+else
+  AC_DEFINE_UNQUOTED([JEMALLOC_N(string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix)], [string_that_no_one_should_want_to_use_as_a_jemalloc_private_namespace_prefix])
+fi
+
+dnl Do not add suffix to installed files by default.
+AC_ARG_WITH([install_suffix],
+  [AS_HELP_STRING([--with-install-suffix=<suffix>], [Suffix to append to all installed files])],
+  [INSTALL_SUFFIX="$with_install_suffix"],
+  [INSTALL_SUFFIX=]
+)
+install_suffix="$INSTALL_SUFFIX"
+AC_SUBST([install_suffix])
+
+cfgoutputs_in="${srcroot}Makefile.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc.h.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in"
+cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/jemalloc_test.h.in"
+
+cfgoutputs_out="Makefile"
+cfgoutputs_out="${cfgoutputs_out} doc/html.xsl"
+cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl"
+cfgoutputs_out="${cfgoutputs_out} doc/jemalloc${install_suffix}.xml"
+cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc${install_suffix}.h"
+cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_out="${cfgoutputs_out} test/jemalloc_test.h"
+
+cfgoutputs_tup="Makefile"
+cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in"
+cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in"
+cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc${install_suffix}.xml:doc/jemalloc.xml.in"
+cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc${install_suffix}.h:include/jemalloc/jemalloc.h.in"
+cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in"
+
+cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in"
+
+cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h"
+
+cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in"
+
+dnl Do not silence irrelevant compiler warnings by default, since enabling this
+dnl option incurs a performance penalty.
+AC_ARG_ENABLE([cc-silence],
+  [AS_HELP_STRING([--enable-cc-silence],
+                  [Silence irrelevant compiler warnings])],
+[if test "x$enable_cc_silence" = "xno" ; then
+  enable_cc_silence="0"
+else
+  enable_cc_silence="1"
+fi
+],
+[enable_cc_silence="0"]
+)
+if test "x$enable_cc_silence" = "x1" ; then
+  AC_DEFINE([JEMALLOC_CC_SILENCE])
+fi
+
+dnl Do not compile with debugging by default.
+AC_ARG_ENABLE([debug],
+  [AS_HELP_STRING([--enable-debug], [Build debugging code])],
+[if test "x$enable_debug" = "xno" ; then
+  enable_debug="0"
+else
+  enable_debug="1"
+fi
+],
+[enable_debug="0"]
+)
+if test "x$enable_debug" = "x1" ; then
+  AC_DEFINE([JEMALLOC_DEBUG], [ ])
+  AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
+fi
+AC_SUBST([enable_debug])
+
+dnl Only optimize if not debugging.
+if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then
+  dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS.
+  optimize="no"
+  echo "$EXTRA_CFLAGS" | grep "\-O" >/dev/null || optimize="yes"
+  if test "x${optimize}" = "xyes" ; then
+    if test "x$GCC" = "xyes" ; then
+      JE_CFLAGS_APPEND([-O3])
+      JE_CFLAGS_APPEND([-funroll-loops])
+    else
+      JE_CFLAGS_APPEND([-O])
+    fi
+  fi
+fi
+
+dnl Do not enable statistics calculation by default.
+AC_ARG_ENABLE([stats],
+  [AS_HELP_STRING([--enable-stats], [Enable statistics calculation/reporting])],
+[if test "x$enable_stats" = "xno" ; then
+  enable_stats="0"
+else
+  enable_stats="1"
+fi
+],
+[enable_stats="0"]
+)
+if test "x$enable_stats" = "x1" ; then
+  AC_DEFINE([JEMALLOC_STATS], [ ])
+fi
+AC_SUBST([enable_stats])
+
+dnl Do not enable profiling by default.
+AC_ARG_ENABLE([prof],
+  [AS_HELP_STRING([--enable-prof], [Enable allocation profiling])],
+[if test "x$enable_prof" = "xno" ; then
+  enable_prof="0"
+else
+  enable_prof="1"
+fi
+],
+[enable_prof="0"]
+)
+if test "x$enable_prof" = "x1" ; then
+  backtrace_method=""
+else
+  backtrace_method="N/A"
+fi
+
+AC_ARG_ENABLE([prof-libunwind],
+  [AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])],
+[if test "x$enable_prof_libunwind" = "xno" ; then
+  enable_prof_libunwind="0"
+else
+  enable_prof_libunwind="1"
+fi
+],
+[enable_prof_libunwind="0"]
+)
+AC_ARG_WITH([static_libunwind],
+  [AS_HELP_STRING([--with-static-libunwind=<libunwind.a>],
+  [Path to static libunwind library; use rather than dynamically linking])],
+if test "x$with_static_libunwind" = "xno" ; then
+  LUNWIND="-lunwind"
+else
+  if test ! -f "$with_static_libunwind" ; then
+    AC_MSG_ERROR([Static libunwind not found: $with_static_libunwind])
+  fi
+  LUNWIND="$with_static_libunwind"
+fi,
+  LUNWIND="-lunwind"
+)
+if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
+  AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
+  if test "x$LUNWIND" = "x-lunwind" ; then
+    AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"],
+                 [enable_prof_libunwind="0"])
+  else
+    LIBS="$LIBS $LUNWIND"
+  fi
+  if test "x${enable_prof_libunwind}" = "x1" ; then
+    backtrace_method="libunwind"
+    AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ])
+  fi
+fi
+
+AC_ARG_ENABLE([prof-libgcc],
+  [AS_HELP_STRING([--disable-prof-libgcc],
+  [Do not use libgcc for backtracing])],
+[if test "x$enable_prof_libgcc" = "xno" ; then
+  enable_prof_libgcc="0"
+else
+  enable_prof_libgcc="1"
+fi
+],
+[enable_prof_libgcc="1"]
+)
+if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \
+     -a "x$GCC" = "xyes" ; then
+  AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
+  AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
+  dnl The following is conservative, in that it only has entries for CPUs on
+  dnl which jemalloc has been tested.
+  AC_MSG_CHECKING([libgcc-based backtracing reliability on ${host_cpu}])
+  case "${host_cpu}" in
+    i[[3456]]86)
+      AC_MSG_RESULT([unreliable])
+      enable_prof_libgcc="0";
+      ;;
+    x86_64)
+      AC_MSG_RESULT([reliable])
+      ;;
+    *)
+      AC_MSG_RESULT([unreliable])
+      enable_prof_libgcc="0";
+      ;;
+  esac
+  if test "x${enable_prof_libgcc}" = "x1" ; then
+    backtrace_method="libgcc"
+    AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
+  fi
+else
+  enable_prof_libgcc="0"
+fi
+
+AC_ARG_ENABLE([prof-gcc],
+  [AS_HELP_STRING([--disable-prof-gcc],
+  [Do not use gcc intrinsics for backtracing])],
+[if test "x$enable_prof_gcc" = "xno" ; then
+  enable_prof_gcc="0"
+else
+  enable_prof_gcc="1"
+fi
+],
+[enable_prof_gcc="1"]
+)
+if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \
+     -a "x$GCC" = "xyes" ; then
+  backtrace_method="gcc intrinsics"
+  AC_DEFINE([JEMALLOC_PROF_GCC], [ ])
+else
+  enable_prof_gcc="0"
+fi
+
+if test "x$backtrace_method" = "x" ; then
+  backtrace_method="none (disabling profiling)"
+  enable_prof="0"
+fi
+AC_MSG_CHECKING([configured backtracing method])
+AC_MSG_RESULT([$backtrace_method])
+if test "x$enable_prof" = "x1" ; then
+  LIBS="$LIBS -lm"
+  AC_DEFINE([JEMALLOC_PROF], [ ])
+fi
+AC_SUBST([enable_prof])
+
+dnl Enable tiny allocations by default.
+AC_ARG_ENABLE([tiny],
+  [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])],
+[if test "x$enable_tiny" = "xno" ; then
+  enable_tiny="0"
+else
+  enable_tiny="1"
+fi
+],
+[enable_tiny="1"]
+)
+if test "x$enable_tiny" = "x1" ; then
+  AC_DEFINE([JEMALLOC_TINY], [ ])
+fi
+AC_SUBST([enable_tiny])
+
+dnl Enable thread-specific caching by default.
+AC_ARG_ENABLE([tcache],
+  [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])],
+[if test "x$enable_tcache" = "xno" ; then
+  enable_tcache="0"
+else
+  enable_tcache="1"
+fi
+],
+[enable_tcache="1"]
+)
+if test "x$enable_tcache" = "x1" ; then
+  AC_DEFINE([JEMALLOC_TCACHE], [ ])
+fi
+AC_SUBST([enable_tcache])
+
+dnl Do not enable mmap()ped swap files by default.
+AC_ARG_ENABLE([swap],
+  [AS_HELP_STRING([--enable-swap], [Enable mmap()ped swap files])],
+[if test "x$enable_swap" = "xno" ; then
+  enable_swap="0"
+else
+  enable_swap="1"
+fi
+],
+[enable_swap="0"]
+)
+if test "x$enable_swap" = "x1" ; then
+  AC_DEFINE([JEMALLOC_SWAP], [ ])
+fi
+AC_SUBST([enable_swap])
+
+dnl Do not enable allocation from DSS by default.
+AC_ARG_ENABLE([dss],
+  [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])],
+[if test "x$enable_dss" = "xno" ; then
+  enable_dss="0"
+else
+  enable_dss="1"
+fi
+],
+[enable_dss="0"]
+)
+if test "x$enable_dss" = "x1" ; then
+  AC_DEFINE([JEMALLOC_DSS], [ ])
+fi
+AC_SUBST([enable_dss])
+
+dnl Do not support the junk/zero filling option by default.
+AC_ARG_ENABLE([fill],
+  [AS_HELP_STRING([--enable-fill], [Support junk/zero filling option])],
+[if test "x$enable_fill" = "xno" ; then
+  enable_fill="0"
+else
+  enable_fill="1"
+fi
+],
+[enable_fill="0"]
+)
+if test "x$enable_fill" = "x1" ; then
+  AC_DEFINE([JEMALLOC_FILL], [ ])
+fi
+AC_SUBST([enable_fill])
+
+dnl Do not support the xmalloc option by default.
+AC_ARG_ENABLE([xmalloc],
+  [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])],
+[if test "x$enable_xmalloc" = "xno" ; then
+  enable_xmalloc="0"
+else
+  enable_xmalloc="1"
+fi
+],
+[enable_xmalloc="0"]
+)
+if test "x$enable_xmalloc" = "x1" ; then
+  AC_DEFINE([JEMALLOC_XMALLOC], [ ])
+fi
+AC_SUBST([enable_xmalloc])
+
+dnl Do not support the SYSV option by default.
+AC_ARG_ENABLE([sysv],
+  [AS_HELP_STRING([--enable-sysv], [Support SYSV semantics option])],
+[if test "x$enable_sysv" = "xno" ; then
+  enable_sysv="0"
+else
+  enable_sysv="1"
+fi
+],
+[enable_sysv="0"]
+)
+if test "x$enable_sysv" = "x1" ; then
+  AC_DEFINE([JEMALLOC_SYSV], [ ])
+fi
+AC_SUBST([enable_sysv])
+
+dnl Do not determine page shift at run time by default.
+AC_ARG_ENABLE([dynamic_page_shift],
+  [AS_HELP_STRING([--enable-dynamic-page-shift],
+  [Determine page size at run time (don't trust configure result)])],
+[if test "x$enable_dynamic_page_shift" = "xno" ; then
+  enable_dynamic_page_shift="0"
+else
+  enable_dynamic_page_shift="1"
+fi
+],
+[enable_dynamic_page_shift="0"]
+)
+if test "x$enable_dynamic_page_shift" = "x1" ; then
+  AC_DEFINE([DYNAMIC_PAGE_SHIFT], [ ])
+fi
+AC_SUBST([enable_dynamic_page_shift])
+
+AC_MSG_CHECKING([STATIC_PAGE_SHIFT])
+AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[#include <stdio.h>
+#include <unistd.h>
+#include <strings.h>
+]], [[
+    long result;
+    FILE *f;
+
+    result = sysconf(_SC_PAGESIZE);
+    if (result == -1) {
+	return 1;
+    }
+    f = fopen("conftest.out", "w");
+    if (f == NULL) {
+	return 1;
+    }
+    fprintf(f, "%u\n", ffs((int)result) - 1);
+    close(f);
+
+    return 0;
+]])],
+              [STATIC_PAGE_SHIFT=`cat conftest.out`]
+              AC_MSG_RESULT([$STATIC_PAGE_SHIFT])
+              AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$STATIC_PAGE_SHIFT]),
+              AC_MSG_RESULT([error]))
+
+dnl ============================================================================
+dnl jemalloc configuration.
+dnl 
+
+dnl Set VERSION if source directory has an embedded git repository.
+if test -d "${srcroot}.git" ; then
+  git describe --long --abbrev=40 > ${srcroot}VERSION
+fi
+jemalloc_version=`cat ${srcroot}VERSION`
+jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'`
+jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]2}'`
+jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]3}'`
+jemalloc_version_nrev=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]4}'`
+jemalloc_version_gid=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]5}'`
+AC_SUBST([jemalloc_version])
+AC_SUBST([jemalloc_version_major])
+AC_SUBST([jemalloc_version_minor])
+AC_SUBST([jemalloc_version_bugfix])
+AC_SUBST([jemalloc_version_nrev])
+AC_SUBST([jemalloc_version_gid])
+
+dnl ============================================================================
+dnl Configure pthreads.
+
+AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
+AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"],
+             [AC_MSG_ERROR([libpthread is missing])])
+
+CPPFLAGS="$CPPFLAGS -D_REENTRANT"
+
+dnl Enable lazy locking by default.
+AC_ARG_ENABLE([lazy_lock],
+  [AS_HELP_STRING([--disable-lazy-lock],
+  [Disable lazy locking (always lock, even when single-threaded)])],
+[if test "x$enable_lazy_lock" = "xno" ; then
+  enable_lazy_lock="0"
+else
+  enable_lazy_lock="1"
+fi
+],
+[enable_lazy_lock="1"]
+)
+if test "x$enable_lazy_lock" = "x1" ; then
+  AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
+  AC_CHECK_LIB([dl], [dlopen], [LIBS="$LIBS -ldl"],
+               [AC_MSG_ERROR([libdl is missing])])
+  AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
+fi
+AC_SUBST([enable_lazy_lock])
+
+AC_ARG_ENABLE([tls],
+  [AS_HELP_STRING([--disable-tls], [Disable thread-local storage (__thread keyword)])],
+if test "x$enable_tls" = "xno" ; then
+  enable_tls="0"
+else
+  enable_tls="1"
+fi
+,
+enable_tls="1"
+)
+if test "x${enable_tls}" = "x1" ; then
+AC_MSG_CHECKING([for TLS])
+AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+    __thread int x;
+]], [[
+    x = 42;
+
+    return 0;
+]])],
+              AC_MSG_RESULT([yes]),
+              AC_MSG_RESULT([no])
+              enable_tls="0")
+fi
+AC_SUBST([enable_tls])
+if test "x${enable_tls}" = "x0" ; then
+  AC_DEFINE_UNQUOTED([NO_TLS], [ ])
+fi
+
+dnl ============================================================================
+dnl Check for ffsl(3), and fail if not found.  This function exists on all
+dnl platforms that jemalloc currently has a chance of functioning on without
+dnl modification.
+
+AC_CHECK_FUNC([ffsl], [],
+	      [AC_MSG_ERROR([Cannot build without ffsl(3)])])
+
+dnl ============================================================================
+dnl Check for atomic(3) operations as provided on Darwin.
+
+JE_COMPILABLE([Darwin OSAtomic*()], [
+#include <libkern/OSAtomic.h>
+#include <inttypes.h>
+], [
+	{
+		int32_t x32 = 0;
+		volatile int32_t *x32p = &x32;
+		OSAtomicAdd32(1, x32p);
+	}
+	{
+		int64_t x64 = 0;
+		volatile int64_t *x64p = &x64;
+		OSAtomicAdd64(1, x64p);
+	}
+], [osatomic])
+if test "x${osatomic}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_OSATOMIC])
+fi
+
+dnl ============================================================================
+dnl Check for spinlock(3) operations as provided on Darwin.
+
+JE_COMPILABLE([Darwin OSSpin*()], [
+#include <libkern/OSAtomic.h>
+#include <inttypes.h>
+], [
+	OSSpinLock lock = 0;
+	OSSpinLockLock(&lock);
+	OSSpinLockUnlock(&lock);
+], [osspin])
+if test "x${osspin}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_OSSPIN])
+fi
+
+dnl ============================================================================
+dnl Check for allocator-related functions that should be wrapped.
+
+AC_CHECK_FUNC([memalign],
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN])])
+AC_CHECK_FUNC([valloc],
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC])])
+
+dnl ============================================================================
+dnl Darwin-related configuration.
+
+if test "x${abi}" = "xmacho" ; then
+  AC_DEFINE([JEMALLOC_IVSALLOC])
+  AC_DEFINE([JEMALLOC_ZONE])
+
+  dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6
+  dnl releases.  malloc_zone_t and malloc_introspection_t have new fields in
+  dnl 10.6, which is the only source-level indication of the change.
+  AC_MSG_CHECKING([malloc zone version])
+  AC_TRY_COMPILE([#include <stdlib.h>
+#include <malloc/malloc.h>], [
+	static malloc_zone_t zone;
+	static struct malloc_introspection_t zone_introspect;
+
+	zone.size = NULL;
+	zone.malloc = NULL;
+	zone.calloc = NULL;
+	zone.valloc = NULL;
+	zone.free = NULL;
+	zone.realloc = NULL;
+	zone.destroy = NULL;
+	zone.zone_name = "jemalloc_zone";
+	zone.batch_malloc = NULL;
+	zone.batch_free = NULL;
+	zone.introspect = &zone_introspect;
+	zone.version = 6;
+	zone.memalign = NULL;
+	zone.free_definite_size = NULL;
+
+	zone_introspect.enumerator = NULL;
+	zone_introspect.good_size = NULL;
+	zone_introspect.check = NULL;
+	zone_introspect.print = NULL;
+	zone_introspect.log = NULL;
+	zone_introspect.force_lock = NULL;
+	zone_introspect.force_unlock = NULL;
+	zone_introspect.statistics = NULL;
+	zone_introspect.zone_locked = NULL;
+], [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [6])
+    AC_MSG_RESULT([6])],
+   [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [3])
+   AC_MSG_RESULT([3])])
+fi
+
+dnl ============================================================================
+dnl Check for typedefs, structures, and compiler characteristics.
+AC_HEADER_STDBOOL
+
+dnl Process .in files.
+AC_SUBST([cfghdrs_in])
+AC_SUBST([cfghdrs_out])
+AC_CONFIG_HEADERS([$cfghdrs_tup])
+
+dnl ============================================================================
+dnl Generate outputs.
+AC_CONFIG_FILES([$cfgoutputs_tup config.stamp])
+AC_SUBST([cfgoutputs_in])
+AC_SUBST([cfgoutputs_out])
+AC_OUTPUT
+
+dnl ============================================================================
+dnl Print out the results of configuration.
+AC_MSG_RESULT([===============================================================================])
+AC_MSG_RESULT([jemalloc version   : $jemalloc_version])
+AC_MSG_RESULT([])
+AC_MSG_RESULT([CC                 : ${CC}])
+AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
+AC_MSG_RESULT([CFLAGS             : ${CFLAGS}])
+AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
+AC_MSG_RESULT([LIBS               : ${LIBS}])
+AC_MSG_RESULT([RPATH_EXTRA        : ${RPATH_EXTRA}])
+AC_MSG_RESULT([])
+AC_MSG_RESULT([XSLTPROC           : ${XSLTPROC}])
+AC_MSG_RESULT([XSLROOT            : ${XSLROOT}])
+AC_MSG_RESULT([])
+AC_MSG_RESULT([PREFIX             : ${PREFIX}])
+AC_MSG_RESULT([BINDIR             : ${BINDIR}])
+AC_MSG_RESULT([INCLUDEDIR         : ${INCLUDEDIR}])
+AC_MSG_RESULT([LIBDIR             : ${LIBDIR}])
+AC_MSG_RESULT([DATADIR            : ${DATADIR}])
+AC_MSG_RESULT([MANDIR             : ${MANDIR}])
+AC_MSG_RESULT([])
+AC_MSG_RESULT([srcroot            : ${srcroot}])
+AC_MSG_RESULT([abs_srcroot        : ${abs_srcroot}])
+AC_MSG_RESULT([objroot            : ${objroot}])
+AC_MSG_RESULT([abs_objroot        : ${abs_objroot}])
+AC_MSG_RESULT([])
+AC_MSG_RESULT([JEMALLOC_PREFIX    : ${JEMALLOC_PREFIX}])
+AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE])
+AC_MSG_RESULT([                   : ${JEMALLOC_PRIVATE_NAMESPACE}])
+AC_MSG_RESULT([install_suffix     : ${install_suffix}])
+AC_MSG_RESULT([autogen            : ${enable_autogen}])
+AC_MSG_RESULT([cc-silence         : ${enable_cc_silence}])
+AC_MSG_RESULT([debug              : ${enable_debug}])
+AC_MSG_RESULT([stats              : ${enable_stats}])
+AC_MSG_RESULT([prof               : ${enable_prof}])
+AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
+AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
+AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
+AC_MSG_RESULT([tiny               : ${enable_tiny}])
+AC_MSG_RESULT([tcache             : ${enable_tcache}])
+AC_MSG_RESULT([fill               : ${enable_fill}])
+AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
+AC_MSG_RESULT([sysv               : ${enable_sysv}])
+AC_MSG_RESULT([swap               : ${enable_swap}])
+AC_MSG_RESULT([dss                : ${enable_dss}])
+AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}])
+AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
+AC_MSG_RESULT([tls                : ${enable_tls}])
+AC_MSG_RESULT([===============================================================================])
diff --git a/deps/jemalloc.orig/doc/html.xsl.in b/deps/jemalloc.orig/doc/html.xsl.in
new file mode 100644
index 00000000..a91d9746
--- /dev/null
+++ b/deps/jemalloc.orig/doc/html.xsl.in
@@ -0,0 +1,4 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+  <xsl:import href="@XSLROOT@/html/docbook.xsl"/>
+  <xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+</xsl:stylesheet>
diff --git a/deps/jemalloc.orig/doc/jemalloc.xml.in b/deps/jemalloc.orig/doc/jemalloc.xml.in
new file mode 100644
index 00000000..7a32879a
--- /dev/null
+++ b/deps/jemalloc.orig/doc/jemalloc.xml.in
@@ -0,0 +1,2280 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<?xml-stylesheet type="text/xsl"
+        href="http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+        "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd" [
+]>
+
+<refentry>
+  <refentryinfo>
+    <title>User Manual</title>
+    <productname>jemalloc</productname>
+    <releaseinfo role="version">@jemalloc_version@</releaseinfo>
+    <authorgroup>
+      <author>
+        <firstname>Jason</firstname>
+        <surname>Evans</surname>
+        <personblurb>Author</personblurb>
+      </author>
+    </authorgroup>
+  </refentryinfo>
+  <refmeta>
+    <refentrytitle>JEMALLOC</refentrytitle>
+    <manvolnum>3</manvolnum>
+  </refmeta>
+  <refnamediv>
+    <refdescriptor>jemalloc</refdescriptor>
+    <refname>jemalloc</refname>
+    <!-- Each refname causes a man page file to be created.  Only if this were
+         the system malloc(3) implementation would these files be appropriate.
+    <refname>malloc</refname>
+    <refname>calloc</refname>
+    <refname>posix_memalign</refname>
+    <refname>realloc</refname>
+    <refname>free</refname>
+    <refname>malloc_usable_size</refname>
+    <refname>malloc_stats_print</refname>
+    <refname>mallctl</refname>
+    <refname>mallctlnametomib</refname>
+    <refname>mallctlbymib</refname>
+    <refname>allocm</refname>
+    <refname>rallocm</refname>
+    <refname>sallocm</refname>
+    <refname>dallocm</refname>
+    -->
+    <refpurpose>general purpose memory allocation functions</refpurpose>
+  </refnamediv>
+  <refsect1 id="library">
+    <title>LIBRARY</title>
+    <para>This manual describes jemalloc @jemalloc_version@.  More information
+    can be found at the <ulink
+    url="http://www.canonware.com/jemalloc/">jemalloc website</ulink>.</para>
+  </refsect1>
+  <refsynopsisdiv>
+    <title>SYNOPSIS</title>
+    <funcsynopsis>
+      <funcsynopsisinfo>#include &lt;<filename class="headerfile">stdlib.h</filename>&gt;
+#include &lt;<filename class="headerfile">jemalloc/jemalloc.h</filename>&gt;</funcsynopsisinfo>
+      <refsect2>
+        <title>Standard API</title>
+        <funcprototype>
+          <funcdef>void *<function>malloc</function></funcdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>void *<function>calloc</function></funcdef>
+          <paramdef>size_t <parameter>number</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>int <function>posix_memalign</function></funcdef>
+          <paramdef>void **<parameter>ptr</parameter></paramdef>
+          <paramdef>size_t <parameter>alignment</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>void *<function>realloc</function></funcdef>
+          <paramdef>void *<parameter>ptr</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>void <function>free</function></funcdef>
+          <paramdef>void *<parameter>ptr</parameter></paramdef>
+        </funcprototype>
+      </refsect2>
+      <refsect2>
+        <title>Non-standard API</title>
+        <funcprototype>
+          <funcdef>size_t <function>malloc_usable_size</function></funcdef>
+          <paramdef>const void *<parameter>ptr</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>void <function>malloc_stats_print</function></funcdef>
+          <paramdef>void <parameter>(*write_cb)</parameter>
+            <funcparams>void *, const char *</funcparams>
+          </paramdef>
+          <paramdef>void *<parameter>cbopaque</parameter></paramdef>
+          <paramdef>const char *<parameter>opts</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>int <function>mallctl</function></funcdef>
+          <paramdef>const char *<parameter>name</parameter></paramdef>
+          <paramdef>void *<parameter>oldp</parameter></paramdef>
+          <paramdef>size_t *<parameter>oldlenp</parameter></paramdef>
+          <paramdef>void *<parameter>newp</parameter></paramdef>
+          <paramdef>size_t <parameter>newlen</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>int <function>mallctlnametomib</function></funcdef>
+          <paramdef>const char *<parameter>name</parameter></paramdef>
+          <paramdef>size_t *<parameter>mibp</parameter></paramdef>
+          <paramdef>size_t *<parameter>miblenp</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>int <function>mallctlbymib</function></funcdef>
+          <paramdef>const size_t *<parameter>mib</parameter></paramdef>
+          <paramdef>size_t <parameter>miblen</parameter></paramdef>
+          <paramdef>void *<parameter>oldp</parameter></paramdef>
+          <paramdef>size_t *<parameter>oldlenp</parameter></paramdef>
+          <paramdef>void *<parameter>newp</parameter></paramdef>
+          <paramdef>size_t <parameter>newlen</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>void <function>(*malloc_message)</function></funcdef>
+          <paramdef>void *<parameter>cbopaque</parameter></paramdef>
+          <paramdef>const char *<parameter>s</parameter></paramdef>
+        </funcprototype>
+        <para><type>const char *</type><varname>malloc_conf</varname>;</para>
+      </refsect2>
+      <refsect2>
+      <title>Experimental API</title>
+        <funcprototype>
+          <funcdef>int <function>allocm</function></funcdef>
+          <paramdef>void **<parameter>ptr</parameter></paramdef>
+          <paramdef>size_t *<parameter>rsize</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+          <paramdef>int <parameter>flags</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>int <function>rallocm</function></funcdef>
+          <paramdef>void **<parameter>ptr</parameter></paramdef>
+          <paramdef>size_t *<parameter>rsize</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+          <paramdef>size_t <parameter>extra</parameter></paramdef>
+          <paramdef>int <parameter>flags</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>int <function>sallocm</function></funcdef>
+          <paramdef>const void *<parameter>ptr</parameter></paramdef>
+          <paramdef>size_t *<parameter>rsize</parameter></paramdef>
+          <paramdef>int <parameter>flags</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>int <function>dallocm</function></funcdef>
+          <paramdef>void *<parameter>ptr</parameter></paramdef>
+          <paramdef>int <parameter>flags</parameter></paramdef>
+        </funcprototype>
+      </refsect2>
+    </funcsynopsis>
+  </refsynopsisdiv>
+  <refsect1 id="description">
+    <title>DESCRIPTION</title>
+    <refsect2>
+      <title>Standard API</title>
+
+      <para>The <function>malloc<parameter/></function> function allocates
+      <parameter>size</parameter> bytes of uninitialized memory.  The allocated
+      space is suitably aligned (after possible pointer coercion) for storage
+      of any type of object.</para>
+
+      <para>The <function>calloc<parameter/></function> function allocates
+      space for <parameter>number</parameter> objects, each
+      <parameter>size</parameter> bytes in length.  The result is identical to
+      calling <function>malloc<parameter/></function> with an argument of
+      <parameter>number</parameter> * <parameter>size</parameter>, with the
+      exception that the allocated memory is explicitly initialized to zero
+      bytes.</para>
+
+      <para>The <function>posix_memalign<parameter/></function> function
+      allocates <parameter>size</parameter> bytes of memory such that the
+      allocation's base address is an even multiple of
+      <parameter>alignment</parameter>, and returns the allocation in the value
+      pointed to by <parameter>ptr</parameter>.  The requested
+      <parameter>alignment</parameter> must be a power of 2 at least as large
+      as <code language="C">sizeof(<type>void *</type>)</code>.</para>
+
+      <para>The <function>realloc<parameter/></function> function changes the
+      size of the previously allocated memory referenced by
+      <parameter>ptr</parameter> to <parameter>size</parameter> bytes.  The
+      contents of the memory are unchanged up to the lesser of the new and old
+      sizes.  If the new size is larger, the contents of the newly allocated
+      portion of the memory are undefined.  Upon success, the memory referenced
+      by <parameter>ptr</parameter> is freed and a pointer to the newly
+      allocated memory is returned.  Note that
+      <function>realloc<parameter/></function> may move the memory allocation,
+      resulting in a different return value than <parameter>ptr</parameter>.
+      If <parameter>ptr</parameter> is <constant>NULL</constant>, the
+      <function>realloc<parameter/></function> function behaves identically to
+      <function>malloc<parameter/></function> for the specified size.</para>
+
+      <para>The <function>free<parameter/></function> function causes the
+      allocated memory referenced by <parameter>ptr</parameter> to be made
+      available for future allocations.  If <parameter>ptr</parameter> is
+      <constant>NULL</constant>, no action occurs.</para>
+    </refsect2>
+    <refsect2>
+      <title>Non-standard API</title>
+
+      <para>The <function>malloc_usable_size<parameter/></function> function
+      returns the usable size of the allocation pointed to by
+      <parameter>ptr</parameter>.  The return value may be larger than the size
+      that was requested during allocation.  The
+      <function>malloc_usable_size<parameter/></function> function is not a
+      mechanism for in-place <function>realloc<parameter/></function>; rather
+      it is provided solely as a tool for introspection purposes.  Any
+      discrepancy between the requested allocation size and the size reported
+      by <function>malloc_usable_size<parameter/></function> should not be
+      depended on, since such behavior is entirely implementation-dependent.
+      </para>
+
+      <para>The <function>malloc_stats_print<parameter/></function> function
+      writes human-readable summary statistics via the
+      <parameter>write_cb</parameter> callback function pointer and
+      <parameter>cbopaque</parameter> data passed to
+      <parameter>write_cb</parameter>, or
+      <function>malloc_message<parameter/></function> if
+      <parameter>write_cb</parameter> is <constant>NULL</constant>.  This
+      function can be called repeatedly.  General information that never
+      changes during execution can be omitted by specifying "g" as a character
+      within the <parameter>opts</parameter> string.  Note that
+      <function>malloc_message<parameter/></function> uses the
+      <function>mallctl*<parameter/></function> functions internally, so
+      inconsistent statistics can be reported if multiple threads use these
+      functions simultaneously.  If <option>--enable-stats</option> is
+      specified during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can
+      be specified to omit merged arena and per arena statistics, respectively;
+      &ldquo;b&rdquo; and &ldquo;l&rdquo; can be specified to omit per size
+      class statistics for bins and large objects, respectively.  Unrecognized
+      characters are silently ignored.  Note that thread caching may prevent
+      some statistics from being completely up to date, since extra locking
+      would be required to merge counters that track thread cache operations.
+      </para>
+
+      <para>The <function>mallctl<parameter/></function> function provides a
+      general interface for introspecting the memory allocator, as well as
+      setting modifiable parameters and triggering actions.  The
+      period-separated <parameter>name</parameter> argument specifies a
+      location in a tree-structured namespace; see the <xref
+      linkend="mallctl_namespace" xrefstyle="template:%t"/> section for
+      documentation on the tree contents.  To read a value, pass a pointer via
+      <parameter>oldp</parameter> to adequate space to contain the value, and a
+      pointer to its length via <parameter>oldlenp</parameter>; otherwise pass
+      <constant>NULL</constant> and <constant>NULL</constant>.  Similarly, to
+      write a value, pass a pointer to the value via
+      <parameter>newp</parameter>, and its length via
+      <parameter>newlen</parameter>; otherwise pass <constant>NULL</constant>
+      and <constant>0</constant>.</para>
+
+      <para>The <function>mallctlnametomib<parameter/></function> function
+      provides a way to avoid repeated name lookups for applications that
+      repeatedly query the same portion of the namespace, by translating a name
+      to a &ldquo;Management Information Base&rdquo; (MIB) that can be passed
+      repeatedly to <function>mallctlbymib<parameter/></function>.  Upon
+      successful return from <function>mallctlnametomib<parameter/></function>,
+      <parameter>mibp</parameter> contains an array of
+      <parameter>*miblenp</parameter> integers, where
+      <parameter>*miblenp</parameter> is the lesser of the number of components
+      in <parameter>name</parameter> and the input value of
+      <parameter>*miblenp</parameter>.  Thus it is possible to pass a
+      <parameter>*miblenp</parameter> that is smaller than the number of
+      period-separated name components, which results in a partial MIB that can
+      be used as the basis for constructing a complete MIB.  For name
+      components that are integers (e.g. the 2 in
+      <link
+      linkend="arenas.bin.i.size"><mallctl>arenas.bin.2.size</mallctl></link>),
+      the corresponding MIB component will always be that integer.  Therefore,
+      it is legitimate to construct code like the following: <programlisting
+      language="C"><![CDATA[
+unsigned nbins, i;
+
+int mib[4];
+size_t len, miblen;
+
+len = sizeof(nbins);
+mallctl("arenas.nbins", &nbins, &len, NULL, 0);
+
+miblen = 4;
+mallnametomib("arenas.bin.0.size", mib, &miblen);
+for (i = 0; i < nbins; i++) {
+	size_t bin_size;
+
+	mib[2] = i;
+	len = sizeof(bin_size);
+	mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0);
+	/* Do something with bin_size... */
+}]]></programlisting></para>
+    </refsect2>
+    <refsect2>
+      <title>Experimental API</title>
+      <para>The experimental API is subject to change or removal without regard
+      for backward compatibility.</para>
+
+      <para>The <function>allocm<parameter/></function>,
+      <function>rallocm<parameter/></function>,
+      <function>sallocm<parameter/></function>, and
+      <function>dallocm<parameter/></function> functions all have a
+      <parameter>flags</parameter> argument that can be used to specify
+      options.  The functions only check the options that are contextually
+      relevant.  Use bitwise or (<code language="C">|</code>) operations to
+      specify one or more of the following:
+        <variablelist>
+          <varlistentry>
+            <term><constant>ALLOCM_LG_ALIGN(<parameter>la</parameter>)
+            </constant></term>
+
+            <listitem><para>Align the memory allocation to start at an address
+            that is a multiple of <code language="C">(1 &lt;&lt;
+            <parameter>la</parameter>)</code>.  This macro does not validate
+            that <parameter>la</parameter> is within the valid
+            range.</para></listitem>
+          </varlistentry>
+          <varlistentry>
+            <term><constant>ALLOCM_ALIGN(<parameter>a</parameter>)
+            </constant></term>
+
+            <listitem><para>Align the memory allocation to start at an address
+            that is a multiple of <parameter>a</parameter>, where
+            <parameter>a</parameter> is a power of two.  This macro does not
+            validate that <parameter>a</parameter> is a power of 2.
+            </para></listitem>
+          </varlistentry>
+          <varlistentry>
+            <term><constant>ALLOCM_ZERO</constant></term>
+
+            <listitem><para>Initialize newly allocated memory to contain zero
+            bytes.  In the growing reallocation case, the real size prior to
+            reallocation defines the boundary between untouched bytes and those
+            that are initialized to contain zero bytes.  If this option is
+            absent, newly allocated memory is uninitialized.</para></listitem>
+          </varlistentry>
+          <varlistentry>
+            <term><constant>ALLOCM_NO_MOVE</constant></term>
+
+            <listitem><para>For reallocation, fail rather than moving the
+            object.  This constraint can apply to both growth and
+            shrinkage.</para></listitem>
+          </varlistentry>
+        </variablelist>
+      </para>
+
+      <para>The <function>allocm<parameter/></function> function allocates at
+      least <parameter>size</parameter> bytes of memory, sets
+      <parameter>*ptr</parameter> to the base address of the allocation, and
+      sets <parameter>*rsize</parameter> to the real size of the allocation if
+      <parameter>rsize</parameter> is not <constant>NULL</constant>.</para>
+
+      <para>The <function>rallocm<parameter/></function> function resizes the
+      allocation at <parameter>*ptr</parameter> to be at least
+      <parameter>size</parameter> bytes, sets <parameter>*ptr</parameter> to
+      the base address of the allocation if it moved, and sets
+      <parameter>*rsize</parameter> to the real size of the allocation if
+      <parameter>rsize</parameter> is not <constant>NULL</constant>.  If
+      <parameter>extra</parameter> is non-zero, an attempt is made to resize
+      the allocation to be at least <code
+      language="C"><parameter>size</parameter> +
+      <parameter>extra</parameter>)</code> bytes, though inability to allocate
+      the extra byte(s) will not by itself result in failure.  Behavior is
+      undefined if <code language="C">(<parameter>size</parameter> +
+      <parameter>extra</parameter> &gt;
+      <constant>SIZE_T_MAX</constant>)</code>.</para>
+
+      <para>The <function>sallocm<parameter/></function> function sets
+      <parameter>*rsize</parameter> to the real size of the allocation.</para>
+
+      <para>The <function>dallocm<parameter/></function> function causes the
+      memory referenced by <parameter>ptr</parameter> to be made available for
+      future allocations.</para>
+    </refsect2>
+  </refsect1>
+  <refsect1 id="tuning">
+    <title>TUNING</title>
+    <para>Once, when the first call is made to one of the memory allocation
+    routines, the allocator initializes its internals based in part on various
+    options that can be specified at compile- or run-time.</para>
+
+    <para>The string pointed to by the global variable
+    <varname>malloc_conf</varname>, the &ldquo;name&rdquo; of the file
+    referenced by the symbolic link named <filename
+    class="symlink">/etc/malloc.conf</filename>, and the value of the
+    environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
+    that order, from left to right as options.</para>
+
+    <para>An options string is a comma-separated list of option:value pairs.
+    There is one key corresponding to each <link
+    linkend="opt.abort"><mallctl>opt.*</mallctl></link> mallctl (see the <xref
+    linkend="mallctl_namespace" xrefstyle="template:%t"/> section for options
+    documentation).  For example, <literal>abort:true,narenas:1</literal> sets
+    the <link linkend="opt.abort"><mallctl>opt.abort</mallctl></link> and <link
+    linkend="opt.narenas"><mallctl>opt.narenas</mallctl></link> options.  Some
+    options have boolean values (true/false), others have integer values (base
+    8, 10, or 16, depending on prefix), and yet others have raw string
+    values.</para>
+  </refsect1>
+  <refsect1 id="implementation_notes">
+    <title>IMPLEMENTATION NOTES</title>
+    <para>Traditionally, allocators have used
+    <citerefentry><refentrytitle>sbrk</refentrytitle>
+    <manvolnum>2</manvolnum></citerefentry> to obtain memory, which is
+    suboptimal for several reasons, including race conditions, increased
+    fragmentation, and artificial limitations on maximum usable memory.  If
+    <option>--enable-dss</option> is specified during configuration, this
+    allocator uses both <citerefentry><refentrytitle>sbrk</refentrytitle>
+    <manvolnum>2</manvolnum></citerefentry> and
+    <citerefentry><refentrytitle>mmap</refentrytitle>
+    <manvolnum>2</manvolnum></citerefentry>, in that order of preference;
+    otherwise only <citerefentry><refentrytitle>mmap</refentrytitle>
+    <manvolnum>2</manvolnum></citerefentry> is used.</para>
+
+    <para>This allocator uses multiple arenas in order to reduce lock
+    contention for threaded programs on multi-processor systems.  This works
+    well with regard to threading scalability, but incurs some costs.  There is
+    a small fixed per-arena overhead, and additionally, arenas manage memory
+    completely independently of each other, which means a small fixed increase
+    in overall memory fragmentation.  These overheads are not generally an
+    issue, given the number of arenas normally used.  Note that using
+    substantially more arenas than the default is not likely to improve
+    performance, mainly due to reduced cache performance.  However, it may make
+    sense to reduce the number of arenas if an application does not make much
+    use of the allocation functions.</para>
+
+    <para>In addition to multiple arenas, unless
+    <option>--disable-tcache</option> is specified during configuration, this
+    allocator supports thread-specific caching for small and large objects, in
+    order to make it possible to completely avoid synchronization for most
+    allocation requests.  Such caching allows very fast allocation in the
+    common case, but it increases memory usage and fragmentation, since a
+    bounded number of objects can remain allocated in each thread cache.</para>
+
+    <para>Memory is conceptually broken into equal-sized chunks, where the
+    chunk size is a power of two that is greater than the page size.  Chunks
+    are always aligned to multiples of the chunk size.  This alignment makes it
+    possible to find metadata for user objects very quickly.</para>
+
+    <para>User objects are broken into three categories according to size:
+    small, large, and huge.  Small objects are smaller than one page.  Large
+    objects are smaller than the chunk size.  Huge objects are a multiple of
+    the chunk size.  Small and large objects are managed by arenas; huge
+    objects are managed separately in a single data structure that is shared by
+    all threads.  Huge objects are used by applications infrequently enough
+    that this single data structure is not a scalability issue.</para>
+
+    <para>Each chunk that is managed by an arena tracks its contents as runs of
+    contiguous pages (unused, backing a set of small objects, or backing one
+    large object).  The combination of chunk alignment and chunk page maps
+    makes it possible to determine all metadata regarding small and large
+    allocations in constant time.</para>
+
+    <para>Small objects are managed in groups by page runs.  Each run maintains
+    a frontier and free list to track which regions are in use.  Unless
+    <option>--disable-tiny</option> is specified during configuration,
+    allocation requests that are no more than half the quantum (8 or 16,
+    depending on architecture) are rounded up to the nearest power of two that
+    is at least <code language="C">sizeof(<type>void *</type>)</code>.
+    Allocation requests that are more than half the quantum, but no more than
+    the minimum cacheline-multiple size class (see the <link
+    linkend="opt.lg_qspace_max"><mallctl>opt.lg_qspace_max</mallctl></link>
+    option) are rounded up to the nearest multiple of the quantum.  Allocation
+    requests that are more than the minimum cacheline-multiple size class, but
+    no more than the minimum subpage-multiple size class (see the <link
+    linkend="opt.lg_cspace_max"><mallctl>opt.lg_cspace_max</mallctl></link>
+    option) are rounded up to the nearest multiple of the cacheline size (64).
+    Allocation requests that are more than the minimum subpage-multiple size
+    class, but no more than the maximum subpage-multiple size class are rounded
+    up to the nearest multiple of the subpage size (256).  Allocation requests
+    that are more than the maximum subpage-multiple size class, but small
+    enough to fit in an arena-managed chunk (see the <link
+    linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), are
+    rounded up to the nearest run size.  Allocation requests that are too large
+    to fit in an arena-managed chunk are rounded up to the nearest multiple of
+    the chunk size.</para>
+
+    <para>Allocations are packed tightly together, which can be an issue for
+    multi-threaded applications.  If you need to assure that allocations do not
+    suffer from cacheline sharing, round your allocation requests up to the
+    nearest multiple of the cacheline size, or specify cacheline alignment when
+    allocating.</para>
+
+    <para>Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit
+    system, the size classes in each category are as shown in <xref
+    linkend="size_classes" xrefstyle="template:Table %n"/>.</para>
+
+    <table xml:id="size_classes" frame="all">
+      <title>Size classes</title>
+      <tgroup cols="3" align="left" colsep="1" rowsep="1">
+      <colspec colname="c1"/>
+      <colspec colname="c2"/>
+      <colspec colname="c3"/>
+      <thead>
+        <row>
+          <entry>Category</entry>
+          <entry>Subcategory</entry>
+          <entry>Size</entry>
+        </row>
+      </thead>
+      <tbody>
+        <row>
+          <entry morerows="3">Small</entry>
+          <entry>Tiny</entry>
+          <entry>[8]</entry>
+        </row>
+        <row>
+          <entry>Quantum-spaced</entry>
+          <entry>[16, 32, 48, ..., 128]</entry>
+        </row>
+        <row>
+          <entry>Cacheline-spaced</entry>
+          <entry>[192, 256, 320, ..., 512]</entry>
+        </row>
+        <row>
+          <entry>Subpage-spaced</entry>
+          <entry>[768, 1024, 1280, ..., 3840]</entry>
+        </row>
+        <row>
+          <entry namest="c1" nameend="c2">Large</entry>
+          <entry>[4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB]</entry>
+        </row>
+        <row>
+          <entry namest="c1" nameend="c2">Huge</entry>
+          <entry>[4 MiB, 8 MiB, 12 MiB, ...]</entry>
+        </row>
+      </tbody>
+      </tgroup>
+    </table>
+  </refsect1>
+  <refsect1 id="mallctl_namespace">
+    <title>MALLCTL NAMESPACE</title>
+    <para>The following names are defined in the namespace accessible via the
+    <function>mallctl*<parameter/></function> functions.  Value types are
+    specified in parentheses, their readable/writable statuses are encoded as
+    <literal>rw</literal>, <literal>r-</literal>, <literal>-w</literal>, or
+    <literal>--</literal>, and required build configuration flags follow, if
+    any.  A name element encoded as <literal>&lt;i&gt;</literal> or
+    <literal>&lt;j&gt;</literal> indicates an integer component, where the
+    integer varies from 0 to some upper value that must be determined via
+    introspection.  In the case of <mallctl>stats.arenas.&lt;i&gt;.*</mallctl>,
+    <literal>&lt;i&gt;</literal> equal to <link
+    linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link> can be
+    used to access the summation of statistics from all arenas.  Take special
+    note of the <link linkend="epoch"><mallctl>epoch</mallctl></link> mallctl,
+    which controls refreshing of cached dynamic statistics.</para>
+
+    <variablelist>
+      <varlistentry>
+        <term>
+          <mallctl>version</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Return the jemalloc version string.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="epoch">
+        <term>
+          <mallctl>epoch</mallctl>
+          (<type>uint64_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>If a value is passed in, refresh the data from which
+        the <function>mallctl*<parameter/></function> functions report values,
+        and increment the epoch.  Return the current epoch.  This is useful for
+        detecting whether another thread caused a refresh.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.debug</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-debug</option> was specified during
+        build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.dss</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-dss</option> was specified during
+        build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.dynamic_page_shift</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-dynamic-page-shift</option> was
+        specified during build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.fill</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-fill</option> was specified during
+        build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.lazy_lock</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-lazy-lock</option> was specified
+        during build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.prof</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-prof</option> was specified during
+        build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.prof_libgcc</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--disable-prof-libgcc</option> was not
+        specified during build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.prof_libunwind</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-prof-libunwind</option> was specified
+        during build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.stats</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-stats</option> was specified during
+        build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.swap</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-swap</option> was specified during
+        build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.sysv</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-sysv</option> was specified during
+        build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.tcache</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--disable-tcache</option> was not specified
+        during build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.tiny</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--disable-tiny</option> was not specified
+        during build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.tls</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--disable-tls</option> was not specified during
+        build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.xmalloc</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-xmalloc</option> was specified during
+        build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.abort">
+        <term>
+          <mallctl>opt.abort</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Abort-on-warning enabled/disabled.  If true, most
+        warnings are fatal.  The process will call
+        <citerefentry><refentrytitle>abort</refentrytitle>
+        <manvolnum>3</manvolnum></citerefentry> in these cases.  This option is
+        disabled by default unless <option>--enable-debug</option> is
+        specified during configuration, in which case it is enabled by default.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.lg_qspace_max">
+        <term>
+          <mallctl>opt.lg_qspace_max</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Size (log base 2) of the maximum size class that is a
+        multiple of the quantum (8 or 16 bytes, depending on architecture).
+        Above this size, cacheline spacing is used for size classes.  The
+        default value is 128 bytes (2^7).</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.lg_cspace_max">
+        <term>
+          <mallctl>opt.lg_cspace_max</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Size (log base 2) of the maximum size class that is a
+        multiple of the cacheline size (64).  Above this size, subpage spacing
+        (256 bytes) is used for size classes.  The default value is 512 bytes
+        (2^9).</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.lg_chunk">
+        <term>
+          <mallctl>opt.lg_chunk</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Virtual memory chunk size (log base 2).  The default
+        chunk size is 4 MiB (2^22).</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.narenas">
+        <term>
+          <mallctl>opt.narenas</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum number of arenas to use.  The default maximum
+        number of arenas is four times the number of CPUs, or one if there is a
+        single CPU.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.lg_dirty_mult">
+        <term>
+          <mallctl>opt.lg_dirty_mult</mallctl>
+          (<type>ssize_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Per-arena minimum ratio (log base 2) of active to dirty
+        pages.  Some dirty unused pages may be allowed to accumulate, within
+        the limit set by the ratio (or one chunk worth of dirty pages,
+        whichever is greater), before informing the kernel about some of those
+        pages via <citerefentry><refentrytitle>madvise</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> or a similar system call.  This
+        provides the kernel with sufficient information to recycle dirty pages
+        if physical memory becomes scarce and the pages remain unused.  The
+        default minimum ratio is 32:1 (2^5:1); an option value of -1 will
+        disable dirty page purging.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.stats_print">
+        <term>
+          <mallctl>opt.stats_print</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Enable/disable statistics printing at exit.  If
+        enabled, the <function>malloc_stats_print<parameter/></function>
+        function is called at program exit via an
+        <citerefentry><refentrytitle>atexit</refentrytitle>
+        <manvolnum>3</manvolnum></citerefentry> function.  If
+        <option>--enable-stats</option> is specified during configuration, this
+        has the potential to cause deadlock for a multi-threaded process that
+        exits while one or more threads are executing in the memory allocation
+        functions.  Therefore, this option should only be used with care; it is
+        primarily intended as a performance tuning aid during application
+        development.  This option is disabled by default.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.junk">
+        <term>
+          <mallctl>opt.junk</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-fill</option>]
+        </term>
+        <listitem><para>Junk filling enabled/disabled.  If enabled, each byte
+        of uninitialized allocated memory will be initialized to
+        <literal>0xa5</literal>.  All deallocated memory will be initialized to
+        <literal>0x5a</literal>.  This is intended for debugging and will
+        impact performance negatively.  This option is disabled by default
+        unless <option>--enable-debug</option> is specified during
+        configuration, in which case it is enabled by default.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.zero">
+        <term>
+          <mallctl>opt.zero</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-fill</option>]
+        </term>
+        <listitem><para>Zero filling enabled/disabled.  If enabled, each byte
+        of uninitialized allocated memory will be initialized to 0.  Note that
+        this initialization only happens once for each byte, so
+        <function>realloc<parameter/></function> and
+        <function>rallocm<parameter/></function> calls do not zero memory that
+        was previously allocated.  This is intended for debugging and will
+        impact performance negatively.  This option is disabled by default.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.sysv">
+        <term>
+          <mallctl>opt.sysv</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-sysv</option>]
+        </term>
+        <listitem><para>If enabled, attempting to allocate zero bytes will
+        return a <constant>NULL</constant> pointer instead of a valid pointer.
+        (The default behavior is to make a minimal allocation and return a
+        pointer to it.) This option is provided for System V compatibility.
+        This option is incompatible with the <link
+        linkend="opt.xmalloc"><mallctl>opt.xmalloc</mallctl></link> option.
+        This option is disabled by default.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.xmalloc">
+        <term>
+          <mallctl>opt.xmalloc</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-xmalloc</option>]
+        </term>
+        <listitem><para>Abort-on-out-of-memory enabled/disabled.  If enabled,
+        rather than returning failure for any allocation function, display a
+        diagnostic message on <constant>STDERR_FILENO</constant> and cause the
+        program to drop core (using
+        <citerefentry><refentrytitle>abort</refentrytitle>
+        <manvolnum>3</manvolnum></citerefentry>).  If an application is
+        designed to depend on this behavior, set the option at compile time by
+        including the following in the source code:
+        <programlisting language="C"><![CDATA[
+malloc_conf = "xmalloc:true";]]></programlisting>
+        This option is disabled by default.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.tcache">
+        <term>
+          <mallctl>opt.tcache</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-tcache</option>]
+        </term>
+        <listitem><para>Thread-specific caching enabled/disabled.  When there
+        are multiple threads, each thread uses a thread-specific cache for
+        objects up to a certain size.  Thread-specific caching allows many
+        allocations to be satisfied without performing any thread
+        synchronization, at the cost of increased memory use.  See the
+        <link
+        linkend="opt.lg_tcache_gc_sweep"><mallctl>opt.lg_tcache_gc_sweep</mallctl></link>
+        and <link
+        linkend="opt.lg_tcache_max"><mallctl>opt.lg_tcache_max</mallctl></link>
+        options for related tuning information.  This option is enabled by
+        default.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.lg_tcache_gc_sweep">
+        <term>
+          <mallctl>opt.lg_tcache_gc_sweep</mallctl>
+          (<type>ssize_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-tcache</option>]
+        </term>
+        <listitem><para>Approximate interval (log base 2) between full
+        thread-specific cache garbage collection sweeps, counted in terms of
+        thread-specific cache allocation/deallocation events.  Garbage
+        collection is actually performed incrementally, one size class at a
+        time, in order to avoid large collection pauses.  The default sweep
+        interval is 8192 (2^13); setting this option to -1 will disable garbage
+        collection.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.lg_tcache_max">
+        <term>
+          <mallctl>opt.lg_tcache_max</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-tcache</option>]
+        </term>
+        <listitem><para>Maximum size class (log base 2) to cache in the
+        thread-specific cache.  At a minimum, all small size classes are
+        cached, and at a maximum all large size classes are cached.  The
+        default maximum is 32 KiB (2^15).</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.prof">
+        <term>
+          <mallctl>opt.prof</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Memory profiling enabled/disabled.  If enabled, profile
+        memory allocation activity, and use an
+        <citerefentry><refentrytitle>atexit</refentrytitle>
+        <manvolnum>3</manvolnum></citerefentry> function to dump final memory
+        usage to a file named according to the pattern
+        <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
+        where <literal>&lt;prefix&gt;</literal> is controlled by the <link
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
+        option.  See the <link
+        linkend="opt.lg_prof_bt_max"><mallctl>opt.lg_prof_bt_max</mallctl></link>
+        option for backtrace depth control.  See the <link
+        linkend="opt.prof_active"><mallctl>opt.prof_active</mallctl></link>
+        option for on-the-fly activation/deactivation.  See the <link
+        linkend="opt.lg_prof_sample"><mallctl>opt.lg_prof_sample</mallctl></link>
+        option for probabilistic sampling control.  See the <link
+        linkend="opt.prof_accum"><mallctl>opt.prof_accum</mallctl></link>
+        option for control of cumulative sample reporting.  See the <link
+        linkend="opt.lg_prof_tcmax"><mallctl>opt.lg_prof_tcmax</mallctl></link>
+        option for control of per thread backtrace caching.  See the <link
+        linkend="opt.lg_prof_interval"><mallctl>opt.lg_prof_interval</mallctl></link>
+        option for information on interval-triggered profile dumping, and the
+        <link linkend="opt.prof_gdump"><mallctl>opt.prof_gdump</mallctl></link>
+        option for information on high-water-triggered profile dumping.
+        Profile output is compatible with the included <command>pprof</command>
+        Perl script, which originates from the <ulink
+        url="http://code.google.com/p/google-perftools/">google-perftools
+        package</ulink>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.prof_prefix">
+        <term>
+          <mallctl>opt.prof_prefix</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Filename prefix for profile dumps.  If the prefix is
+        set to the empty string, no automatic dumps will occur; this is
+        primarily useful for disabling the automatic final heap dump (which
+        also disables leak reporting, if enabled).  The default prefix is
+        <filename>jeprof</filename>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.lg_prof_bt_max">
+        <term>
+          <mallctl>opt.lg_prof_bt_max</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Maximum backtrace depth (log base 2) when profiling
+        memory allocation activity.  The default is 128 (2^7).</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.prof_active">
+        <term>
+          <mallctl>opt.prof_active</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Profiling activated/deactivated.  This is a secondary
+        control mechanism that makes it possible to start the application with
+        profiling enabled (see the <link
+        linkend="opt.prof"><mallctl>opt.prof</mallctl></link> option) but
+        inactive, then toggle profiling at any time during program execution
+        with the <link
+        linkend="prof.active"><mallctl>prof.active</mallctl></link> mallctl.
+        This option is enabled by default.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.lg_prof_sample">
+        <term>
+          <mallctl>opt.lg_prof_sample</mallctl>
+          (<type>ssize_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Average interval (log base 2) between allocation
+        samples, as measured in bytes of allocation activity.  Increasing the
+        sampling interval decreases profile fidelity, but also decreases the
+        computational overhead.  The default sample interval is 1 (2^0) (i.e.
+        all allocations are sampled).</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.prof_accum">
+        <term>
+          <mallctl>opt.prof_accum</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Reporting of cumulative object/byte counts in profile
+        dumps enabled/disabled.  If this option is enabled, every unique
+        backtrace must be stored for the duration of execution.  Depending on
+        the application, this can impose a large memory overhead, and the
+        cumulative counts are not always of interest.  See the
+        <link
+        linkend="opt.lg_prof_tcmax"><mallctl>opt.lg_prof_tcmax</mallctl></link>
+        option for control of per thread backtrace caching, which has important
+        interactions.  This option is enabled by default.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.lg_prof_tcmax">
+        <term>
+          <mallctl>opt.lg_prof_tcmax</mallctl>
+          (<type>ssize_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Maximum per thread backtrace cache (log base 2) used
+        for heap profiling.  A backtrace can only be discarded if the
+        <link linkend="opt.prof_accum"><mallctl>opt.prof_accum</mallctl></link>
+        option is disabled, and no thread caches currently refer to the
+        backtrace.  Therefore, a backtrace cache limit should be imposed if the
+        intention is to limit how much memory is used by backtraces.  By
+        default, no limit is imposed (encoded as -1).
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.lg_prof_interval">
+        <term>
+          <mallctl>opt.lg_prof_interval</mallctl>
+          (<type>ssize_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Average interval (log base 2) between memory profile
+        dumps, as measured in bytes of allocation activity.  The actual
+        interval between dumps may be sporadic because decentralized allocation
+        counters are used to avoid synchronization bottlenecks.  Profiles are
+        dumped to files named according to the pattern
+        <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.i&lt;iseq&gt;.heap</filename>,
+        where <literal>&lt;prefix&gt;</literal> is controlled by the
+        <link
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
+        option.  By default, interval-triggered profile dumping is disabled
+        (encoded as -1).
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.prof_gdump">
+        <term>
+          <mallctl>opt.prof_gdump</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Trigger a memory profile dump every time the total
+        virtual memory exceeds the previous maximum.  Profiles are dumped to
+        files named according to the pattern
+        <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</filename>,
+        where <literal>&lt;prefix&gt;</literal> is controlled by the <link
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
+        option.  This option is disabled by default.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.prof_leak">
+        <term>
+          <mallctl>opt.prof_leak</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Leak reporting enabled/disabled.  If enabled, use an
+        <citerefentry><refentrytitle>atexit</refentrytitle>
+        <manvolnum>3</manvolnum></citerefentry> function to report memory leaks
+        detected by allocation sampling.  See the
+        <link
+        linkend="opt.lg_prof_bt_max"><mallctl>opt.lg_prof_bt_max</mallctl></link>
+        option for backtrace depth control.  See the
+        <link linkend="opt.prof"><mallctl>opt.prof</mallctl></link> option for
+        information on analyzing heap profile output.  This option is disabled
+        by default.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.overcommit">
+        <term>
+          <mallctl>opt.overcommit</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-swap</option>]
+        </term>
+        <listitem><para>Over-commit enabled/disabled.  If enabled, over-commit
+        memory as a side effect of using anonymous
+        <citerefentry><refentrytitle>mmap</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> or
+        <citerefentry><refentrytitle>sbrk</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> for virtual memory allocation.
+        In order for overcommit to be disabled, the <link
+        linkend="swap.fds"><mallctl>swap.fds</mallctl></link> mallctl must have
+        been successfully written to.  This option is enabled by
+        default.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>tcache.flush</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+          [<option>--enable-tcache</option>]
+        </term>
+        <listitem><para>Flush calling thread's tcache.  This interface releases
+        all cached objects and internal data structures associated with the
+        calling thread's thread-specific cache.  Ordinarily, this interface
+        need not be called, since automatic periodic incremental garbage
+        collection occurs, and the thread cache is automatically discarded when
+        a thread exits.  However, garbage collection is triggered by allocation
+        activity, so it is possible for a thread that stops
+        allocating/deallocating to retain its cache indefinitely, in which case
+        the developer may find manual flushing useful.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>thread.arena</mallctl>
+          (<type>unsigned</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Get or set the arena associated with the calling
+        thread.  The arena index must be less than the maximum number of arenas
+        (see the <link
+        linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>
+        mallctl).  If the specified arena was not initialized beforehand (see
+        the <link
+        linkend="arenas.initialized"><mallctl>arenas.initialized</mallctl></link>
+        mallctl), it will be automatically initialized as a side effect of
+        calling this interface.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="thread.allocated">
+        <term>
+          <mallctl>thread.allocated</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Get the total number of bytes ever allocated by the
+        calling thread.  This counter has the potential to wrap around; it is
+        up to the application to appropriately interpret the counter in such
+        cases.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>thread.allocatedp</mallctl>
+          (<type>uint64_t *</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Get a pointer to the the value that is returned by the
+        <link
+        linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
+        mallctl.  This is useful for avoiding the overhead of repeated
+        <function>mallctl*<parameter/></function> calls.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="thread.deallocated">
+        <term>
+          <mallctl>thread.deallocated</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Get the total number of bytes ever deallocated by the
+        calling thread.  This counter has the potential to wrap around; it is
+        up to the application to appropriately interpret the counter in such
+        cases.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>thread.deallocatedp</mallctl>
+          (<type>uint64_t *</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Get a pointer to the the value that is returned by the
+        <link
+        linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>
+        mallctl.  This is useful for avoiding the overhead of repeated
+        <function>mallctl*<parameter/></function> calls.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="arenas.narenas">
+        <term>
+          <mallctl>arenas.narenas</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum number of arenas.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="arenas.initialized">
+        <term>
+          <mallctl>arenas.initialized</mallctl>
+          (<type>bool *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>An array of <link
+        linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>
+        booleans.  Each boolean indicates whether the corresponding arena is
+        initialized.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.quantum</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Quantum size.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.cacheline</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Assumed cacheline size.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.subpage</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Subpage size class interval.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.pagesize</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Page size.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.chunksize</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Chunk size.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.tspace_min</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Minimum tiny size class.  Tiny size classes are powers
+        of two.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.tspace_max</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum tiny size class.  Tiny size classes are powers
+        of two.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.qspace_min</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Minimum quantum-spaced size class.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.qspace_max</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum quantum-spaced size class.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.cspace_min</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Minimum cacheline-spaced size class.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.cspace_max</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum cacheline-spaced size class.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.sspace_min</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Minimum subpage-spaced size class.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.sspace_max</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum subpage-spaced size class.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.tcache_max</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-tcache</option>]
+        </term>
+        <listitem><para>Maximum thread-cached size class.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.ntbins</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Number of tiny bin size classes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.nqbins</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Number of quantum-spaced bin size
+        classes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.ncbins</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Number of cacheline-spaced bin size
+        classes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.nsbins</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Number of subpage-spaced bin size
+        classes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.nbins</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Total number of bin size classes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.nhbins</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+          [<option>--enable-tcache</option>]
+        </term>
+        <listitem><para>Total number of thread cache bin size
+        classes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="arenas.bin.i.size">
+        <term>
+          <mallctl>arenas.bin.&lt;i&gt;.size</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum size supported by size class.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.bin.&lt;i&gt;.nregs</mallctl>
+          (<type>uint32_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Number of regions per page run.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.bin.&lt;i&gt;.run_size</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Number of bytes per page run.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.nlruns</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Total number of large size classes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.lrun.&lt;i&gt;.size</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum size supported by this large size
+        class.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>arenas.purge</mallctl>
+          (<type>unsigned</type>)
+          <literal>-w</literal>
+        </term>
+        <listitem><para>Purge unused dirty pages for the specified arena, or
+        for all arenas if none is specified.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="prof.active">
+        <term>
+          <mallctl>prof.active</mallctl>
+          (<type>bool</type>)
+          <literal>rw</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Control whether sampling is currently active.  See the
+        <link
+        linkend="opt.prof_active"><mallctl>opt.prof_active</mallctl></link>
+        option for additional information.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>prof.dump</mallctl>
+          (<type>const char *</type>)
+          <literal>-w</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Dump a memory profile to the specified file, or if NULL
+        is specified, to a file according to the pattern
+        <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.m&lt;mseq&gt;.heap</filename>,
+        where <literal>&lt;prefix&gt;</literal> is controlled by the
+        <link
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
+        option.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>prof.interval</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Average number of bytes allocated between
+        inverval-based profile dumps.  See the
+        <link
+        linkend="opt.lg_prof_interval"><mallctl>opt.lg_prof_interval</mallctl></link>
+        option for additional information.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.cactive">
+        <term>
+          <mallctl>stats.cactive</mallctl>
+          (<type>size_t *</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Pointer to a counter that contains an approximate count
+        of the current number of bytes in active pages.  The estimate may be
+        high, but never low, because each arena rounds up to the nearest
+        multiple of the chunk size when computing its contribution to the
+        counter.  Note that the <link
+        linkend="epoch"><mallctl>epoch</mallctl></link> mallctl has no bearing
+        on this counter.  Furthermore, counter consistency is maintained via
+        atomic operations, so it is necessary to use an atomic operation in
+        order to guarantee a consistent read when dereferencing the pointer.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.allocated">
+        <term>
+          <mallctl>stats.allocated</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Total number of bytes allocated by the
+        application.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.active">
+        <term>
+          <mallctl>stats.active</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Total number of bytes in active pages allocated by the
+        application.  This is a multiple of the page size, and greater than or
+        equal to <link
+        linkend="stats.allocated"><mallctl>stats.allocated</mallctl></link>.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.mapped</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Total number of bytes in chunks mapped on behalf of the
+        application.  This is a multiple of the chunk size, and is at least as
+        large as <link
+        linkend="stats.active"><mallctl>stats.active</mallctl></link>.  This
+        does not include inactive chunks backed by swap files.  his does not
+        include inactive chunks embedded in the DSS.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.chunks.current</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Total number of chunks actively mapped on behalf of the
+        application.  This does not include inactive chunks backed by swap
+        files.  This does not include inactive chunks embedded in the DSS.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.chunks.total</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of chunks allocated.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.chunks.high</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Maximum number of active chunks at any time thus far.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.huge.allocated</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of bytes currently allocated by huge objects.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.huge.nmalloc</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of huge allocation requests.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.huge.ndalloc</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of huge deallocation requests.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.nthreads</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Number of threads currently assigned to
+        arena.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.pactive</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Number of pages in active runs.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.pdirty</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Number of pages within unused runs that are potentially
+        dirty, and for which <function>madvise<parameter>...</parameter>
+        <parameter><constant>MADV_DONTNEED</constant></parameter></function> or
+        similar has not been called.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.mapped</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of mapped bytes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.npurge</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of dirty page purge sweeps performed.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.nmadvise</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of <function>madvise<parameter>...</parameter>
+        <parameter><constant>MADV_DONTNEED</constant></parameter></function> or
+        similar calls made to purge dirty pages.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.npurged</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of pages purged.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.small.allocated</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of bytes currently allocated by small objects.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.small.nmalloc</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of allocation requests served by
+        small bins.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.small.ndalloc</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of small objects returned to bins.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.small.nrequests</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of small allocation requests.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.large.allocated</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of bytes currently allocated by large objects.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.large.nmalloc</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of large allocation requests served
+        directly by the arena.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.large.ndalloc</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of large deallocation requests served
+        directly by the arena.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.large.nrequests</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of large allocation requests.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.allocated</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Current number of bytes allocated by
+        bin.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nmalloc</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of allocations served by bin.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.ndalloc</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of allocations returned to bin.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nrequests</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of allocation
+        requests.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nfills</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option> <option>--enable-tcache</option>]
+        </term>
+        <listitem><para>Cumulative number of tcache fills.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nflushes</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option> <option>--enable-tcache</option>]
+        </term>
+        <listitem><para>Cumulative number of tcache flushes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nruns</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of runs created.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nreruns</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of times the current run from which
+        to allocate changed.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.highruns</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Maximum number of runs at any time thus far.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.curruns</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Current number of runs.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.nmalloc</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of allocation requests for this size
+        class served directly by the arena.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.ndalloc</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of deallocation requests for this
+        size class served directly by the arena.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.nrequests</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of allocation requests for this size
+        class.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.highruns</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Maximum number of runs at any time thus far for this
+        size class.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.curruns</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Current number of runs for this size class.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>swap.avail</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats --enable-swap</option>]
+        </term>
+        <listitem><para>Number of swap file bytes that are currently not
+        associated with any chunk (i.e. mapped, but otherwise completely
+        unmanaged).</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="swap.prezeroed">
+        <term>
+          <mallctl>swap.prezeroed</mallctl>
+          (<type>bool</type>)
+          <literal>rw</literal>
+          [<option>--enable-swap</option>]
+        </term>
+        <listitem><para>If true, the allocator assumes that the swap file(s)
+        contain nothing but nil bytes.  If this assumption is violated,
+        allocator behavior is undefined.  This value becomes read-only after
+        <link linkend="swap.fds"><mallctl>swap.fds</mallctl></link> is
+        successfully written to.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>swap.nfds</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-swap</option>]
+        </term>
+        <listitem><para>Number of file descriptors in use for swap.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry id="swap.fds">
+        <term>
+          <mallctl>swap.fds</mallctl>
+          (<type>int *</type>)
+          <literal>rw</literal>
+          [<option>--enable-swap</option>]
+        </term>
+        <listitem><para>When written to, the files associated with the
+        specified file descriptors are contiguously mapped via
+        <citerefentry><refentrytitle>mmap</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry>.  The resulting virtual memory
+        region is preferred over anonymous
+        <citerefentry><refentrytitle>mmap</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> and
+        <citerefentry><refentrytitle>sbrk</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> memory.  Note that if a file's
+        size is not a multiple of the page size, it is automatically truncated
+        to the nearest page size multiple.  See the
+        <link linkend="swap.prezeroed"><mallctl>swap.prezeroed</mallctl></link>
+        mallctl for specifying that the files are pre-zeroed.</para></listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+  <refsect1 id="debugging_malloc_problems">
+    <title>DEBUGGING MALLOC PROBLEMS</title>
+    <para>When debugging, it is a good idea to configure/build jemalloc with
+    the <option>--enable-debug</option> and <option>--enable-fill</option>
+    options, and recompile the program with suitable options and symbols for
+    debugger support.  When so configured, jemalloc incorporates a wide variety
+    of run-time assertions that catch application errors such as double-free,
+    write-after-free, etc.</para>
+
+    <para>Programs often accidentally depend on &ldquo;uninitialized&rdquo;
+    memory actually being filled with zero bytes.  Junk filling
+    (see the <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link>
+    option) tends to expose such bugs in the form of obviously incorrect
+    results and/or coredumps.  Conversely, zero
+    filling (see the <link
+    linkend="opt.zero"><mallctl>opt.zero</mallctl></link> option) eliminates
+    the symptoms of such bugs.  Between these two options, it is usually
+    possible to quickly detect, diagnose, and eliminate such bugs.</para>
+
+    <para>This implementation does not provide much detail about the problems
+    it detects, because the performance impact for storing such information
+    would be prohibitive.  There are a number of allocator implementations
+    available on the Internet which focus on detecting and pinpointing problems
+    by trading performance for extra sanity checks and detailed
+    diagnostics.</para>
+  </refsect1>
+  <refsect1 id="diagnostic_messages">
+    <title>DIAGNOSTIC MESSAGES</title>
+    <para>If any of the memory allocation/deallocation functions detect an
+    error or warning condition, a message will be printed to file descriptor
+    <constant>STDERR_FILENO</constant>.  Errors will result in the process
+    dumping core.  If the <link
+    linkend="opt.abort"><mallctl>opt.abort</mallctl></link> option is set, most
+    warnings are treated as errors.</para>
+
+    <para>The <varname>malloc_message</varname> variable allows the programmer
+    to override the function which emits the text strings forming the errors
+    and warnings if for some reason the <constant>STDERR_FILENO</constant> file
+    descriptor is not suitable for this.
+    <function>malloc_message<parameter/></function> takes the
+    <parameter>cbopaque</parameter> pointer argument that is
+    <constant>NULL</constant> unless overridden by the arguments in a call to
+    <function>malloc_stats_print<parameter/></function>, followed by a string
+    pointer.  Please note that doing anything which tries to allocate memory in
+    this function is likely to result in a crash or deadlock.</para>
+
+    <para>All messages are prefixed by
+    &ldquo;<computeroutput>&lt;jemalloc&gt;: </computeroutput>&rdquo;.</para>
+  </refsect1>
+  <refsect1 id="return_values">
+    <title>RETURN VALUES</title>
+    <refsect2>
+      <title>Standard API</title>
+      <para>The <function>malloc<parameter/></function> and
+      <function>calloc<parameter/></function> functions return a pointer to the
+      allocated memory if successful; otherwise a <constant>NULL</constant>
+      pointer is returned and <varname>errno</varname> is set to
+      <errorname>ENOMEM</errorname>.</para>
+
+      <para>The <function>posix_memalign<parameter/></function> function
+      returns the value 0 if successful; otherwise it returns an error value.
+      The <function>posix_memalign<parameter/></function> function will fail
+      if:
+        <variablelist>
+          <varlistentry>
+            <term><errorname>EINVAL</errorname></term>
+
+            <listitem><para>The <parameter>alignment</parameter> parameter is
+            not a power of 2 at least as large as
+            <code language="C">sizeof(<type>void *</type>)</code>.
+            </para></listitem>
+          </varlistentry>
+          <varlistentry>
+            <term><errorname>ENOMEM</errorname></term>
+
+            <listitem><para>Memory allocation error.</para></listitem>
+          </varlistentry>
+        </variablelist>
+      </para>
+
+      <para>The <function>realloc<parameter/></function> function returns a
+      pointer, possibly identical to <parameter>ptr</parameter>, to the
+      allocated memory if successful; otherwise a <constant>NULL</constant>
+      pointer is returned, and <varname>errno</varname> is set to
+      <errorname>ENOMEM</errorname> if the error was the result of an
+      allocation failure.  The <function>realloc<parameter/></function>
+      function always leaves the original buffer intact when an error occurs.
+      </para>
+
+      <para>The <function>free<parameter/></function> function returns no
+      value.</para>
+    </refsect2>
+    <refsect2>
+      <title>Non-standard API</title>
+      <para>The <function>malloc_usable_size<parameter/></function> function
+      returns the usable size of the allocation pointed to by
+      <parameter>ptr</parameter>.  </para>
+
+      <para>The <function>mallctl<parameter/></function>,
+      <function>mallctlnametomib<parameter/></function>, and
+      <function>mallctlbymib<parameter/></function> functions return 0 on
+      success; otherwise they return an error value.  The functions will fail
+      if:
+        <variablelist>
+          <varlistentry>
+            <term><errorname>EINVAL</errorname></term>
+
+            <listitem><para><parameter>newp</parameter> is not
+            <constant>NULL</constant>, and <parameter>newlen</parameter> is too
+            large or too small.  Alternatively, <parameter>*oldlenp</parameter>
+            is too large or too small; in this case as much data as possible
+            are read despite the error.</para></listitem>
+          </varlistentry>
+          <varlistentry>
+            <term><errorname>ENOMEM</errorname></term>
+
+            <listitem><para><parameter>*oldlenp</parameter> is too short to
+            hold the requested value.</para></listitem>
+          </varlistentry>
+          <varlistentry>
+            <term><errorname>ENOENT</errorname></term>
+
+            <listitem><para><parameter>name</parameter> or
+            <parameter>mib</parameter> specifies an unknown/invalid
+            value.</para></listitem>
+          </varlistentry>
+          <varlistentry>
+            <term><errorname>EPERM</errorname></term>
+
+            <listitem><para>Attempt to read or write void value, or attempt to
+            write read-only value.</para></listitem>
+          </varlistentry>
+          <varlistentry>
+            <term><errorname>EAGAIN</errorname></term>
+
+            <listitem><para>A memory allocation failure
+            occurred.</para></listitem>
+          </varlistentry>
+          <varlistentry>
+            <term><errorname>EFAULT</errorname></term>
+
+            <listitem><para>An interface with side effects failed in some way
+            not directly related to <function>mallctl*<parameter/></function>
+            read/write processing.</para></listitem>
+          </varlistentry>
+        </variablelist>
+      </para>
+    </refsect2>
+    <refsect2>
+      <title>Experimental API</title>
+      <para>The <function>allocm<parameter/></function>,
+      <function>rallocm<parameter/></function>,
+      <function>sallocm<parameter/></function>, and
+      <function>dallocm<parameter/></function> functions return
+      <constant>ALLOCM_SUCCESS</constant> on success; otherwise they return an
+      error value.  The <function>allocm<parameter/></function> and
+      <function>rallocm<parameter/></function> functions will fail if:
+        <variablelist>
+          <varlistentry>
+            <term><errorname>ALLOCM_ERR_OOM</errorname></term>
+
+            <listitem><para>Out of memory.  Insufficient contiguous memory was
+            available to service the allocation request.  The
+            <function>allocm<parameter/></function> function additionally sets
+            <parameter>*ptr</parameter> to <constant>NULL</constant>, whereas
+            the <function>rallocm<parameter/></function> function leaves
+            <constant>*ptr</constant> unmodified.</para></listitem>
+          </varlistentry>
+        </variablelist>
+      The <function>rallocm<parameter/></function> function will also
+      fail if:
+        <variablelist>
+          <varlistentry>
+            <term><errorname>ALLOCM_ERR_NOT_MOVED</errorname></term>
+
+            <listitem><para><constant>ALLOCM_NO_MOVE</constant> was specified,
+            but the reallocation request could not be serviced without moving
+            the object.</para></listitem>
+          </varlistentry>
+        </variablelist>
+      </para>
+    </refsect2>
+  </refsect1>
+  <refsect1 id="environment">
+    <title>ENVIRONMENT</title>
+    <para>The following environment variable affects the execution of the
+    allocation functions:
+      <variablelist>
+        <varlistentry>
+          <term><envar>MALLOC_CONF</envar></term>
+
+          <listitem><para>If the environment variable
+          <envar>MALLOC_CONF</envar> is set, the characters it contains
+          will be interpreted as options.</para></listitem>
+        </varlistentry>
+      </variablelist>
+    </para>
+  </refsect1>
+  <refsect1 id="examples">
+    <title>EXAMPLES</title>
+    <para>To dump core whenever a problem occurs:
+      <screen>ln -s 'abort:true' /etc/malloc.conf</screen>
+    </para>
+    <para>To specify in the source a chunk size that is 16 MiB:
+      <programlisting language="C"><![CDATA[
+malloc_conf = "lg_chunk:24";]]></programlisting></para>
+  </refsect1>
+  <refsect1 id="see_also">
+    <title>SEE ALSO</title>
+    <para><citerefentry><refentrytitle>madvise</refentrytitle>
+    <manvolnum>2</manvolnum></citerefentry>,
+    <citerefentry><refentrytitle>mmap</refentrytitle>
+    <manvolnum>2</manvolnum></citerefentry>,
+    <citerefentry><refentrytitle>sbrk</refentrytitle>
+    <manvolnum>2</manvolnum></citerefentry>,
+    <citerefentry><refentrytitle>alloca</refentrytitle>
+    <manvolnum>3</manvolnum></citerefentry>,
+    <citerefentry><refentrytitle>atexit</refentrytitle>
+    <manvolnum>3</manvolnum></citerefentry>,
+    <citerefentry><refentrytitle>getpagesize</refentrytitle>
+    <manvolnum>3</manvolnum></citerefentry></para>
+  </refsect1>
+  <refsect1 id="standards">
+    <title>STANDARDS</title>
+    <para>The <function>malloc<parameter/></function>,
+    <function>calloc<parameter/></function>,
+    <function>realloc<parameter/></function>, and
+    <function>free<parameter/></function> functions conform to ISO/IEC
+    9899:1990 (&ldquo;ISO C90&rdquo;).</para>
+
+    <para>The <function>posix_memalign<parameter/></function> function conforms
+    to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
+  </refsect1>
+</refentry>
diff --git a/deps/jemalloc.orig/doc/manpages.xsl.in b/deps/jemalloc.orig/doc/manpages.xsl.in
new file mode 100644
index 00000000..88b2626b
--- /dev/null
+++ b/deps/jemalloc.orig/doc/manpages.xsl.in
@@ -0,0 +1,4 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+  <xsl:import href="@XSLROOT@/manpages/docbook.xsl"/>
+  <xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+</xsl:stylesheet>
diff --git a/deps/jemalloc.orig/doc/stylesheet.xsl b/deps/jemalloc.orig/doc/stylesheet.xsl
new file mode 100644
index 00000000..4e334a86
--- /dev/null
+++ b/deps/jemalloc.orig/doc/stylesheet.xsl
@@ -0,0 +1,7 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+  <xsl:param name="funcsynopsis.style">ansi</xsl:param>
+  <xsl:param name="function.parens" select="1"/>
+  <xsl:template match="mallctl">
+    "<xsl:call-template name="inline.monoseq"/>"
+  </xsl:template>
+</xsl:stylesheet>
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/arena.h b/deps/jemalloc.orig/include/jemalloc/internal/arena.h
new file mode 100644
index 00000000..b80c118d
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/arena.h
@@ -0,0 +1,743 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Subpages are an artificially designated partitioning of pages.  Their only
+ * purpose is to support subpage-spaced size classes.
+ *
+ * There must be at least 4 subpages per page, due to the way size classes are
+ * handled.
+ */
+#define	LG_SUBPAGE		8
+#define	SUBPAGE			((size_t)(1U << LG_SUBPAGE))
+#define	SUBPAGE_MASK		(SUBPAGE - 1)
+
+/* Return the smallest subpage multiple that is >= s. */
+#define	SUBPAGE_CEILING(s)						\
+	(((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
+
+#ifdef JEMALLOC_TINY
+   /* Smallest size class to support. */
+#  define LG_TINY_MIN		LG_SIZEOF_PTR
+#  define TINY_MIN		(1U << LG_TINY_MIN)
+#endif
+
+/*
+ * Maximum size class that is a multiple of the quantum, but not (necessarily)
+ * a power of 2.  Above this size, allocations are rounded up to the nearest
+ * power of 2.
+ */
+#define	LG_QSPACE_MAX_DEFAULT	7
+
+/*
+ * Maximum size class that is a multiple of the cacheline, but not (necessarily)
+ * a power of 2.  Above this size, allocations are rounded up to the nearest
+ * power of 2.
+ */
+#define	LG_CSPACE_MAX_DEFAULT	9
+
+/*
+ * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
+ * as small as possible such that this setting is still honored, without
+ * violating other constraints.  The goal is to make runs as small as possible
+ * without exceeding a per run external fragmentation threshold.
+ *
+ * We use binary fixed point math for overhead computations, where the binary
+ * point is implicitly RUN_BFP bits to the left.
+ *
+ * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
+ * honored for some/all object sizes, since when heap profiling is enabled
+ * there is one pointer of header overhead per object (plus a constant).  This
+ * constraint is relaxed (ignored) for runs that are so small that the
+ * per-region overhead is greater than:
+ *
+ *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
+ */
+#define	RUN_BFP			12
+/*                                    \/   Implicit binary fixed point. */
+#define	RUN_MAX_OVRHD		0x0000003dU
+#define	RUN_MAX_OVRHD_RELAX	0x00001800U
+
+/* Maximum number of regions in one run. */
+#define	LG_RUN_MAXREGS		11
+#define	RUN_MAXREGS		(1U << LG_RUN_MAXREGS)
+
+/*
+ * The minimum ratio of active:dirty pages per arena is computed as:
+ *
+ *   (nactive >> opt_lg_dirty_mult) >= ndirty
+ *
+ * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
+ * times as many active pages as dirty pages.
+ */
+#define	LG_DIRTY_MULT_DEFAULT	5
+
+typedef struct arena_chunk_map_s arena_chunk_map_t;
+typedef struct arena_chunk_s arena_chunk_t;
+typedef struct arena_run_s arena_run_t;
+typedef struct arena_bin_info_s arena_bin_info_t;
+typedef struct arena_bin_s arena_bin_t;
+typedef struct arena_s arena_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Each element of the chunk map corresponds to one page within the chunk. */
+struct arena_chunk_map_s {
+	union {
+		/*
+		 * Linkage for run trees.  There are two disjoint uses:
+		 *
+		 * 1) arena_t's runs_avail_{clean,dirty} trees.
+		 * 2) arena_run_t conceptually uses this linkage for in-use
+		 *    non-full runs, rather than directly embedding linkage.
+		 */
+		rb_node(arena_chunk_map_t)	rb_link;
+		/*
+		 * List of runs currently in purgatory.  arena_chunk_purge()
+		 * temporarily allocates runs that contain dirty pages while
+		 * purging, so that other threads cannot use the runs while the
+		 * purging thread is operating without the arena lock held.
+		 */
+		ql_elm(arena_chunk_map_t)	ql_link;
+	}				u;
+
+#ifdef JEMALLOC_PROF
+	/* Profile counters, used for large object runs. */
+	prof_ctx_t			*prof_ctx;
+#endif
+
+	/*
+	 * Run address (or size) and various flags are stored together.  The bit
+	 * layout looks like (assuming 32-bit system):
+	 *
+	 *   ???????? ???????? ????---- ----dula
+	 *
+	 * ? : Unallocated: Run address for first/last pages, unset for internal
+	 *                  pages.
+	 *     Small: Run page offset.
+	 *     Large: Run size for first page, unset for trailing pages.
+	 * - : Unused.
+	 * d : dirty?
+	 * u : unzeroed?
+	 * l : large?
+	 * a : allocated?
+	 *
+	 * Following are example bit patterns for the three types of runs.
+	 *
+	 * p : run page offset
+	 * s : run size
+	 * c : (binind+1) for size class (used only if prof_promote is true)
+	 * x : don't care
+	 * - : 0
+	 * + : 1
+	 * [DULA] : bit set
+	 * [dula] : bit unset
+	 *
+	 *   Unallocated (clean):
+	 *     ssssssss ssssssss ssss---- ----du-a
+	 *     xxxxxxxx xxxxxxxx xxxx---- -----Uxx
+	 *     ssssssss ssssssss ssss---- ----dU-a
+	 *
+	 *   Unallocated (dirty):
+	 *     ssssssss ssssssss ssss---- ----D--a
+	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
+	 *     ssssssss ssssssss ssss---- ----D--a
+	 *
+	 *   Small:
+	 *     pppppppp pppppppp pppp---- ----d--A
+	 *     pppppppp pppppppp pppp---- -------A
+	 *     pppppppp pppppppp pppp---- ----d--A
+	 *
+	 *   Large:
+	 *     ssssssss ssssssss ssss---- ----D-LA
+	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
+	 *     -------- -------- -------- ----D-LA
+	 *
+	 *   Large (sampled, size <= PAGE_SIZE):
+	 *     ssssssss ssssssss sssscccc ccccD-LA
+	 *
+	 *   Large (not sampled, size == PAGE_SIZE):
+	 *     ssssssss ssssssss ssss---- ----D-LA
+	 */
+	size_t				bits;
+#ifdef JEMALLOC_PROF
+#define	CHUNK_MAP_CLASS_SHIFT	4
+#define	CHUNK_MAP_CLASS_MASK	((size_t)0xff0U)
+#endif
+#define	CHUNK_MAP_FLAGS_MASK	((size_t)0xfU)
+#define	CHUNK_MAP_DIRTY		((size_t)0x8U)
+#define	CHUNK_MAP_UNZEROED	((size_t)0x4U)
+#define	CHUNK_MAP_LARGE		((size_t)0x2U)
+#define	CHUNK_MAP_ALLOCATED	((size_t)0x1U)
+#define	CHUNK_MAP_KEY		CHUNK_MAP_ALLOCATED
+};
+typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
+typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
+
+/* Arena chunk header. */
+struct arena_chunk_s {
+	/* Arena that owns the chunk. */
+	arena_t		*arena;
+
+	/* Linkage for the arena's chunks_dirty list. */
+	ql_elm(arena_chunk_t) link_dirty;
+
+	/*
+	 * True if the chunk is currently in the chunks_dirty list, due to
+	 * having at some point contained one or more dirty pages.  Removal
+	 * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
+	 */
+	bool		dirtied;
+
+	/* Number of dirty pages. */
+	size_t		ndirty;
+
+	/*
+	 * Map of pages within chunk that keeps track of free/large/small.  The
+	 * first map_bias entries are omitted, since the chunk header does not
+	 * need to be tracked in the map.  This omission saves a header page
+	 * for common chunk sizes (e.g. 4 MiB).
+	 */
+	arena_chunk_map_t map[1]; /* Dynamically sized. */
+};
+typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
+
+struct arena_run_s {
+#ifdef JEMALLOC_DEBUG
+	uint32_t	magic;
+#  define ARENA_RUN_MAGIC 0x384adf93
+#endif
+
+	/* Bin this run is associated with. */
+	arena_bin_t	*bin;
+
+	/* Index of next region that has never been allocated, or nregs. */
+	uint32_t	nextind;
+
+	/* Number of free regions in run. */
+	unsigned	nfree;
+};
+
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ */
+struct arena_bin_info_s {
+	/* Size of regions in a run for this bin's size class. */
+	size_t		reg_size;
+
+	/* Total size of a run for this bin's size class. */
+	size_t		run_size;
+
+	/* Total number of regions in a run for this bin's size class. */
+	uint32_t	nregs;
+
+	/*
+	 * Offset of first bitmap_t element in a run header for this bin's size
+	 * class.
+	 */
+	uint32_t	bitmap_offset;
+
+	/*
+	 * Metadata used to manipulate bitmaps for runs associated with this
+	 * bin.
+	 */
+	bitmap_info_t	bitmap_info;
+
+#ifdef JEMALLOC_PROF
+	/*
+	 * Offset of first (prof_ctx_t *) in a run header for this bin's size
+	 * class, or 0 if (opt_prof == false).
+	 */
+	uint32_t	ctx0_offset;
+#endif
+
+	/* Offset of first region in a run for this bin's size class. */
+	uint32_t	reg0_offset;
+};
+
+struct arena_bin_s {
+	/*
+	 * All operations on runcur, runs, and stats require that lock be
+	 * locked.  Run allocation/deallocation are protected by the arena lock,
+	 * which may be acquired while holding one or more bin locks, but not
+	 * vise versa.
+	 */
+	malloc_mutex_t	lock;
+
+	/*
+	 * Current run being used to service allocations of this bin's size
+	 * class.
+	 */
+	arena_run_t	*runcur;
+
+	/*
+	 * Tree of non-full runs.  This tree is used when looking for an
+	 * existing run when runcur is no longer usable.  We choose the
+	 * non-full run that is lowest in memory; this policy tends to keep
+	 * objects packed well, and it can also help reduce the number of
+	 * almost-empty chunks.
+	 */
+	arena_run_tree_t runs;
+
+#ifdef JEMALLOC_STATS
+	/* Bin statistics. */
+	malloc_bin_stats_t stats;
+#endif
+};
+
+struct arena_s {
+#ifdef JEMALLOC_DEBUG
+	uint32_t		magic;
+#  define ARENA_MAGIC 0x947d3d24
+#endif
+
+	/* This arena's index within the arenas array. */
+	unsigned		ind;
+
+	/*
+	 * Number of threads currently assigned to this arena.  This field is
+	 * protected by arenas_lock.
+	 */
+	unsigned		nthreads;
+
+	/*
+	 * There are three classes of arena operations from a locking
+	 * perspective:
+	 * 1) Thread asssignment (modifies nthreads) is protected by
+	 *    arenas_lock.
+	 * 2) Bin-related operations are protected by bin locks.
+	 * 3) Chunk- and run-related operations are protected by this mutex.
+	 */
+	malloc_mutex_t		lock;
+
+#ifdef JEMALLOC_STATS
+	arena_stats_t		stats;
+#  ifdef JEMALLOC_TCACHE
+	/*
+	 * List of tcaches for extant threads associated with this arena.
+	 * Stats from these are merged incrementally, and at exit.
+	 */
+	ql_head(tcache_t)	tcache_ql;
+#  endif
+#endif
+
+#ifdef JEMALLOC_PROF
+	uint64_t		prof_accumbytes;
+#endif
+
+	/* List of dirty-page-containing chunks this arena manages. */
+	ql_head(arena_chunk_t)	chunks_dirty;
+
+	/*
+	 * In order to avoid rapid chunk allocation/deallocation when an arena
+	 * oscillates right on the cusp of needing a new chunk, cache the most
+	 * recently freed chunk.  The spare is left in the arena's chunk trees
+	 * until it is deleted.
+	 *
+	 * There is one spare chunk per arena, rather than one spare total, in
+	 * order to avoid interactions between multiple threads that could make
+	 * a single spare inadequate.
+	 */
+	arena_chunk_t		*spare;
+
+	/* Number of pages in active runs. */
+	size_t			nactive;
+
+	/*
+	 * Current count of pages within unused runs that are potentially
+	 * dirty, and for which madvise(... MADV_DONTNEED) has not been called.
+	 * By tracking this, we can institute a limit on how much dirty unused
+	 * memory is mapped for each arena.
+	 */
+	size_t			ndirty;
+
+	/*
+	 * Approximate number of pages being purged.  It is possible for
+	 * multiple threads to purge dirty pages concurrently, and they use
+	 * npurgatory to indicate the total number of pages all threads are
+	 * attempting to purge.
+	 */
+	size_t			npurgatory;
+
+	/*
+	 * Size/address-ordered trees of this arena's available runs.  The trees
+	 * are used for first-best-fit run allocation.  The dirty tree contains
+	 * runs with dirty pages (i.e. very likely to have been touched and
+	 * therefore have associated physical pages), whereas the clean tree
+	 * contains runs with pages that either have no associated physical
+	 * pages, or have pages that the kernel may recycle at any time due to
+	 * previous madvise(2) calls.  The dirty tree is used in preference to
+	 * the clean tree for allocations, because using dirty pages reduces
+	 * the amount of dirty purging necessary to keep the active:dirty page
+	 * ratio below the purge threshold.
+	 */
+	arena_avail_tree_t	runs_avail_clean;
+	arena_avail_tree_t	runs_avail_dirty;
+
+	/*
+	 * bins is used to store trees of free regions of the following sizes,
+	 * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and
+	 * default MALLOC_CONF.
+	 *
+	 *   bins[i] |   size |
+	 *   --------+--------+
+	 *        0  |      8 |
+	 *   --------+--------+
+	 *        1  |     16 |
+	 *        2  |     32 |
+	 *        3  |     48 |
+	 *           :        :
+	 *        6  |     96 |
+	 *        7  |    112 |
+	 *        8  |    128 |
+	 *   --------+--------+
+	 *        9  |    192 |
+	 *       10  |    256 |
+	 *       11  |    320 |
+	 *       12  |    384 |
+	 *       13  |    448 |
+	 *       14  |    512 |
+	 *   --------+--------+
+	 *       15  |    768 |
+	 *       16  |   1024 |
+	 *       17  |   1280 |
+	 *           :        :
+	 *       25  |   3328 |
+	 *       26  |   3584 |
+	 *       27  |   3840 |
+	 *   --------+--------+
+	 */
+	arena_bin_t		bins[1]; /* Dynamically sized. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern size_t	opt_lg_qspace_max;
+extern size_t	opt_lg_cspace_max;
+extern ssize_t	opt_lg_dirty_mult;
+/*
+ * small_size2bin is a compact lookup table that rounds request sizes up to
+ * size classes.  In order to reduce cache footprint, the table is compressed,
+ * and all accesses are via the SMALL_SIZE2BIN macro.
+ */
+extern uint8_t const	*small_size2bin;
+#define	SMALL_SIZE2BIN(s)	(small_size2bin[(s-1) >> LG_TINY_MIN])
+
+extern arena_bin_info_t	*arena_bin_info;
+
+/* Various bin-related settings. */
+#ifdef JEMALLOC_TINY		/* Number of (2^n)-spaced tiny bins. */
+#  define		ntbins	((unsigned)(LG_QUANTUM - LG_TINY_MIN))
+#else
+#  define		ntbins	0
+#endif
+extern unsigned		nqbins; /* Number of quantum-spaced bins. */
+extern unsigned		ncbins; /* Number of cacheline-spaced bins. */
+extern unsigned		nsbins; /* Number of subpage-spaced bins. */
+extern unsigned		nbins;
+#ifdef JEMALLOC_TINY
+#  define		tspace_max	((size_t)(QUANTUM >> 1))
+#endif
+#define			qspace_min	QUANTUM
+extern size_t		qspace_max;
+extern size_t		cspace_min;
+extern size_t		cspace_max;
+extern size_t		sspace_min;
+extern size_t		sspace_max;
+#define			small_maxclass	sspace_max
+
+#define			nlclasses (chunk_npages - map_bias)
+
+void	arena_purge_all(arena_t *arena);
+#ifdef JEMALLOC_PROF
+void	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
+#endif
+#ifdef JEMALLOC_TCACHE
+void	arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
+    size_t binind
+#  ifdef JEMALLOC_PROF
+    , uint64_t prof_accumbytes
+#  endif
+    );
+#endif
+void	*arena_malloc_small(arena_t *arena, size_t size, bool zero);
+void	*arena_malloc_large(arena_t *arena, size_t size, bool zero);
+void	*arena_malloc(size_t size, bool zero);
+void	*arena_palloc(arena_t *arena, size_t size, size_t alloc_size,
+    size_t alignment, bool zero);
+size_t	arena_salloc(const void *ptr);
+#ifdef JEMALLOC_PROF
+void	arena_prof_promoted(const void *ptr, size_t size);
+size_t	arena_salloc_demote(const void *ptr);
+#endif
+void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    arena_chunk_map_t *mapelm);
+void	arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+#ifdef JEMALLOC_STATS
+void	arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
+    arena_stats_t *astats, malloc_bin_stats_t *bstats,
+    malloc_large_stats_t *lstats);
+#endif
+void	*arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
+    size_t extra, bool zero);
+void	*arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero);
+bool	arena_new(arena_t *arena, unsigned ind);
+bool	arena_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+size_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
+unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
+    const void *ptr);
+#  ifdef JEMALLOC_PROF
+prof_ctx_t	*arena_prof_ctx_get(const void *ptr);
+void	arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+#  endif
+void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
+JEMALLOC_INLINE size_t
+arena_bin_index(arena_t *arena, arena_bin_t *bin)
+{
+	size_t binind = bin - arena->bins;
+	assert(binind < nbins);
+	return (binind);
+}
+
+JEMALLOC_INLINE unsigned
+arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
+{
+	unsigned shift, diff, regind;
+	size_t size;
+
+	dassert(run->magic == ARENA_RUN_MAGIC);
+	/*
+	 * Freeing a pointer lower than region zero can cause assertion
+	 * failure.
+	 */
+	assert((uintptr_t)ptr >= (uintptr_t)run +
+	    (uintptr_t)bin_info->reg0_offset);
+
+	/*
+	 * Avoid doing division with a variable divisor if possible.  Using
+	 * actual division here can reduce allocator throughput by over 20%!
+	 */
+	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run -
+	    bin_info->reg0_offset);
+
+	/* Rescale (factor powers of 2 out of the numerator and denominator). */
+	size = bin_info->reg_size;
+	shift = ffs(size) - 1;
+	diff >>= shift;
+	size >>= shift;
+
+	if (size == 1) {
+		/* The divisor was a power of 2. */
+		regind = diff;
+	} else {
+		/*
+		 * To divide by a number D that is not a power of two we
+		 * multiply by (2^21 / D) and then right shift by 21 positions.
+		 *
+		 *   X / D
+		 *
+		 * becomes
+		 *
+		 *   (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
+		 *
+		 * We can omit the first three elements, because we never
+		 * divide by 0, and 1 and 2 are both powers of two, which are
+		 * handled above.
+		 */
+#define	SIZE_INV_SHIFT	((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
+#define	SIZE_INV(s)	(((1U << SIZE_INV_SHIFT) / (s)) + 1)
+		static const unsigned size_invs[] = {
+		    SIZE_INV(3),
+		    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
+		    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
+		    SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
+		    SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
+		    SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
+		    SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
+		    SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
+		};
+
+		if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
+			regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
+		else
+			regind = diff / size;
+#undef SIZE_INV
+#undef SIZE_INV_SHIFT
+	}
+	assert(diff == regind * size);
+	assert(regind < bin_info->nregs);
+
+	return (regind);
+}
+
+#ifdef JEMALLOC_PROF
+JEMALLOC_INLINE prof_ctx_t *
+arena_prof_ctx_get(const void *ptr)
+{
+	prof_ctx_t *ret;
+	arena_chunk_t *chunk;
+	size_t pageind, mapbits;
+
+	assert(ptr != NULL);
+	assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	mapbits = chunk->map[pageind-map_bias].bits;
+	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+		if (prof_promote)
+			ret = (prof_ctx_t *)(uintptr_t)1U;
+		else {
+			arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+			    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+			    PAGE_SHIFT));
+			size_t binind = arena_bin_index(chunk->arena, run->bin);
+			arena_bin_info_t *bin_info = &arena_bin_info[binind];
+			unsigned regind;
+
+			dassert(run->magic == ARENA_RUN_MAGIC);
+			regind = arena_run_regind(run, bin_info, ptr);
+			ret = *(prof_ctx_t **)((uintptr_t)run +
+			    bin_info->ctx0_offset + (regind *
+			    sizeof(prof_ctx_t *)));
+		}
+	} else
+		ret = chunk->map[pageind-map_bias].prof_ctx;
+
+	return (ret);
+}
+
+JEMALLOC_INLINE void
+arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+{
+	arena_chunk_t *chunk;
+	size_t pageind, mapbits;
+
+	assert(ptr != NULL);
+	assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	mapbits = chunk->map[pageind-map_bias].bits;
+	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+		if (prof_promote == false) {
+			arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+			    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+			    PAGE_SHIFT));
+			arena_bin_t *bin = run->bin;
+			size_t binind;
+			arena_bin_info_t *bin_info;
+			unsigned regind;
+
+			dassert(run->magic == ARENA_RUN_MAGIC);
+			binind = arena_bin_index(chunk->arena, bin);
+			bin_info = &arena_bin_info[binind];
+			regind = arena_run_regind(run, bin_info, ptr);
+
+			*((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset
+			    + (regind * sizeof(prof_ctx_t *)))) = ctx;
+		} else
+			assert((uintptr_t)ctx == (uintptr_t)1U);
+	} else
+		chunk->map[pageind-map_bias].prof_ctx = ctx;
+}
+#endif
+
+JEMALLOC_INLINE void
+arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+{
+	size_t pageind;
+	arena_chunk_map_t *mapelm;
+
+	assert(arena != NULL);
+	dassert(arena->magic == ARENA_MAGIC);
+	assert(chunk->arena == arena);
+	assert(ptr != NULL);
+	assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	mapelm = &chunk->map[pageind-map_bias];
+	assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
+	if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
+		/* Small allocation. */
+#ifdef JEMALLOC_TCACHE
+		tcache_t *tcache;
+
+		if ((tcache = tcache_get()) != NULL)
+			tcache_dalloc_small(tcache, ptr);
+		else {
+#endif
+			arena_run_t *run;
+			arena_bin_t *bin;
+
+			run = (arena_run_t *)((uintptr_t)chunk +
+			    (uintptr_t)((pageind - (mapelm->bits >>
+			    PAGE_SHIFT)) << PAGE_SHIFT));
+			dassert(run->magic == ARENA_RUN_MAGIC);
+			bin = run->bin;
+#ifdef JEMALLOC_DEBUG
+			{
+				size_t binind = arena_bin_index(arena, bin);
+				arena_bin_info_t *bin_info =
+				    &arena_bin_info[binind];
+				assert(((uintptr_t)ptr - ((uintptr_t)run +
+				    (uintptr_t)bin_info->reg0_offset)) %
+				    bin_info->reg_size == 0);
+			}
+#endif
+			malloc_mutex_lock(&bin->lock);
+			arena_dalloc_bin(arena, chunk, ptr, mapelm);
+			malloc_mutex_unlock(&bin->lock);
+#ifdef JEMALLOC_TCACHE
+		}
+#endif
+	} else {
+#ifdef JEMALLOC_TCACHE
+		size_t size = mapelm->bits & ~PAGE_MASK;
+
+		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+		if (size <= tcache_maxclass) {
+			tcache_t *tcache;
+
+			if ((tcache = tcache_get()) != NULL)
+				tcache_dalloc_large(tcache, ptr, size);
+			else {
+				malloc_mutex_lock(&arena->lock);
+				arena_dalloc_large(arena, chunk, ptr);
+				malloc_mutex_unlock(&arena->lock);
+			}
+		} else {
+			malloc_mutex_lock(&arena->lock);
+			arena_dalloc_large(arena, chunk, ptr);
+			malloc_mutex_unlock(&arena->lock);
+		}
+#else
+		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+		malloc_mutex_lock(&arena->lock);
+		arena_dalloc_large(arena, chunk, ptr);
+		malloc_mutex_unlock(&arena->lock);
+#endif
+	}
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/atomic.h b/deps/jemalloc.orig/include/jemalloc/internal/atomic.h
new file mode 100644
index 00000000..9a298623
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/atomic.h
@@ -0,0 +1,169 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#define	atomic_read_uint64(p)	atomic_add_uint64(p, 0)
+#define	atomic_read_uint32(p)	atomic_add_uint32(p, 0)
+
+#if (LG_SIZEOF_PTR == 3)
+#  define atomic_read_z(p)						\
+    (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0)
+#  define atomic_add_z(p, x)						\
+    (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)
+#  define atomic_sub_z(p, x)						\
+    (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x)
+#elif (LG_SIZEOF_PTR == 2)
+#  define atomic_read_z(p)						\
+    (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0)
+#  define atomic_add_z(p, x)						\
+    (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)
+#  define atomic_sub_z(p, x)						\
+    (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x)
+#endif
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+uint64_t	atomic_add_uint64(uint64_t *p, uint64_t x);
+uint64_t	atomic_sub_uint64(uint64_t *p, uint64_t x);
+uint32_t	atomic_add_uint32(uint32_t *p, uint32_t x);
+uint32_t	atomic_sub_uint32(uint32_t *p, uint32_t x);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
+/******************************************************************************/
+/* 64-bit operations. */
+#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (__sync_sub_and_fetch(p, x));
+}
+#elif (defined(JEMALLOC_OSATOMIC))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
+}
+#elif (defined(__amd64_) || defined(__x86_64__))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	asm volatile (
+	    "lock; xaddq %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	x = (uint64_t)(-(int64_t)x);
+	asm volatile (
+	    "lock; xaddq %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
+#else
+#  if (LG_SIZEOF_PTR == 3)
+#    error "Missing implementation for 64-bit atomic operations"
+#  endif
+#endif
+
+/******************************************************************************/
+/* 32-bit operations. */
+#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (__sync_sub_and_fetch(p, x));
+}
+#elif (defined(JEMALLOC_OSATOMIC))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
+}
+#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	asm volatile (
+	    "lock; xaddl %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	x = (uint32_t)(-(int32_t)x);
+	asm volatile (
+	    "lock; xaddl %0, %1;"
+	    : "+r" (x), "=m" (*p) /* Outputs. */
+	    : "m" (*p) /* Inputs. */
+	    );
+
+	return (x);
+}
+#else
+#  error "Missing implementation for 32-bit atomic operations"
+#endif
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/base.h b/deps/jemalloc.orig/include/jemalloc/internal/base.h
new file mode 100644
index 00000000..e353f309
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/base.h
@@ -0,0 +1,24 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern malloc_mutex_t	base_mtx;
+
+void	*base_alloc(size_t size);
+extent_node_t *base_node_alloc(void);
+void	base_node_dealloc(extent_node_t *node);
+bool	base_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/bitmap.h b/deps/jemalloc.orig/include/jemalloc/internal/bitmap.h
new file mode 100644
index 00000000..605ebac5
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/bitmap.h
@@ -0,0 +1,184 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
+#define	LG_BITMAP_MAXBITS	LG_RUN_MAXREGS
+
+typedef struct bitmap_level_s bitmap_level_t;
+typedef struct bitmap_info_s bitmap_info_t;
+typedef unsigned long bitmap_t;
+#define	LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
+
+/* Number of bits per group. */
+#define	LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
+#define	BITMAP_GROUP_NBITS		(ZU(1) << LG_BITMAP_GROUP_NBITS)
+#define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
+
+/* Maximum number of levels possible. */
+#define	BITMAP_MAX_LEVELS						\
+    (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP)				\
+    + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct bitmap_level_s {
+	/* Offset of this level's groups within the array of groups. */
+	size_t group_offset;
+};
+
+struct bitmap_info_s {
+	/* Logical number of bits in bitmap (stored at bottom level). */
+	size_t nbits;
+
+	/* Number of levels necessary for nbits. */
+	unsigned nlevels;
+
+	/*
+	 * Only the first (nlevels+1) elements are used, and levels are ordered
+	 * bottom to top (e.g. the bottom level is stored in levels[0]).
+	 */
+	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
+size_t	bitmap_info_ngroups(const bitmap_info_t *binfo);
+size_t	bitmap_size(size_t nbits);
+void	bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+bool	bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
+bool	bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+void	bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+size_t	bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
+void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
+JEMALLOC_INLINE bool
+bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+	unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+	bitmap_t rg = bitmap[rgoff];
+	/* The bitmap is full iff the root group is 0. */
+	return (rg == 0);
+}
+
+JEMALLOC_INLINE bool
+bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+	size_t goff;
+	bitmap_t g;
+
+	assert(bit < binfo->nbits);
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	g = bitmap[goff];
+	return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))));
+}
+
+JEMALLOC_INLINE void
+bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+	size_t goff;
+	bitmap_t *gp;
+	bitmap_t g;
+
+	assert(bit < binfo->nbits);
+	assert(bitmap_get(bitmap, binfo, bit) == false);
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	gp = &bitmap[goff];
+	g = *gp;
+	assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
+	g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+	*gp = g;
+	assert(bitmap_get(bitmap, binfo, bit));
+	/* Propagate group state transitions up the tree. */
+	if (g == 0) {
+		unsigned i;
+		for (i = 1; i < binfo->nlevels; i++) {
+			bit = goff;
+			goff = bit >> LG_BITMAP_GROUP_NBITS;
+			gp = &bitmap[binfo->levels[i].group_offset + goff];
+			g = *gp;
+			assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
+			g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+			*gp = g;
+			if (g != 0)
+				break;
+		}
+	}
+}
+
+/* sfu: set first unset. */
+JEMALLOC_INLINE size_t
+bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+	size_t bit;
+	bitmap_t g;
+	unsigned i;
+
+	assert(bitmap_full(bitmap, binfo) == false);
+
+	i = binfo->nlevels - 1;
+	g = bitmap[binfo->levels[i].group_offset];
+	bit = ffsl(g) - 1;
+	while (i > 0) {
+		i--;
+		g = bitmap[binfo->levels[i].group_offset + bit];
+		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1);
+	}
+
+	bitmap_set(bitmap, binfo, bit);
+	return (bit);
+}
+
+JEMALLOC_INLINE void
+bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+	size_t goff;
+	bitmap_t *gp;
+	bitmap_t g;
+	bool propagate;
+
+	assert(bit < binfo->nbits);
+	assert(bitmap_get(bitmap, binfo, bit));
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	gp = &bitmap[goff];
+	g = *gp;
+	propagate = (g == 0);
+	assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
+	g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+	*gp = g;
+	assert(bitmap_get(bitmap, binfo, bit) == false);
+	/* Propagate group state transitions up the tree. */
+	if (propagate) {
+		unsigned i;
+		for (i = 1; i < binfo->nlevels; i++) {
+			bit = goff;
+			goff = bit >> LG_BITMAP_GROUP_NBITS;
+			gp = &bitmap[binfo->levels[i].group_offset + goff];
+			g = *gp;
+			propagate = (g == 0);
+			assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))
+			    == 0);
+			g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+			*gp = g;
+			if (propagate == false)
+				break;
+		}
+	}
+}
+
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/chunk.h b/deps/jemalloc.orig/include/jemalloc/internal/chunk.h
new file mode 100644
index 00000000..54b6a3ec
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/chunk.h
@@ -0,0 +1,65 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Size and alignment of memory chunks that are allocated by the OS's virtual
+ * memory system.
+ */
+#define	LG_CHUNK_DEFAULT	22
+
+/* Return the chunk address for allocation address a. */
+#define	CHUNK_ADDR2BASE(a)						\
+	((void *)((uintptr_t)(a) & ~chunksize_mask))
+
+/* Return the chunk offset of address a. */
+#define	CHUNK_ADDR2OFFSET(a)						\
+	((size_t)((uintptr_t)(a) & chunksize_mask))
+
+/* Return the smallest chunk multiple that is >= s. */
+#define	CHUNK_CEILING(s)						\
+	(((s) + chunksize_mask) & ~chunksize_mask)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern size_t		opt_lg_chunk;
+#ifdef JEMALLOC_SWAP
+extern bool		opt_overcommit;
+#endif
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+/* Protects stats_chunks; currently not used for any other purpose. */
+extern malloc_mutex_t	chunks_mtx;
+/* Chunk statistics. */
+extern chunk_stats_t	stats_chunks;
+#endif
+
+#ifdef JEMALLOC_IVSALLOC
+extern rtree_t		*chunks_rtree;
+#endif
+
+extern size_t		chunksize;
+extern size_t		chunksize_mask; /* (chunksize - 1). */
+extern size_t		chunk_npages;
+extern size_t		map_bias; /* Number of arena chunk header pages. */
+extern size_t		arena_maxclass; /* Max size class for arenas. */
+
+void	*chunk_alloc(size_t size, bool base, bool *zero);
+void	chunk_dealloc(void *chunk, size_t size, bool unmap);
+bool	chunk_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
+#include "jemalloc/internal/chunk_swap.h"
+#include "jemalloc/internal/chunk_dss.h"
+#include "jemalloc/internal/chunk_mmap.h"
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/chunk_dss.h b/deps/jemalloc.orig/include/jemalloc/internal/chunk_dss.h
new file mode 100644
index 00000000..6f005222
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/chunk_dss.h
@@ -0,0 +1,30 @@
+#ifdef JEMALLOC_DSS
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+/*
+ * Protects sbrk() calls.  This avoids malloc races among threads, though it
+ * does not protect against races with threads that call sbrk() directly.
+ */
+extern malloc_mutex_t	dss_mtx;
+
+void	*chunk_alloc_dss(size_t size, bool *zero);
+bool	chunk_in_dss(void *chunk);
+bool	chunk_dealloc_dss(void *chunk, size_t size);
+bool	chunk_dss_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+#endif /* JEMALLOC_DSS */
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/chunk_mmap.h b/deps/jemalloc.orig/include/jemalloc/internal/chunk_mmap.h
new file mode 100644
index 00000000..07b50a4d
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/chunk_mmap.h
@@ -0,0 +1,23 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	*chunk_alloc_mmap(size_t size);
+void	*chunk_alloc_mmap_noreserve(size_t size);
+void	chunk_dealloc_mmap(void *chunk, size_t size);
+
+bool	chunk_mmap_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/chunk_swap.h b/deps/jemalloc.orig/include/jemalloc/internal/chunk_swap.h
new file mode 100644
index 00000000..9faa739f
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/chunk_swap.h
@@ -0,0 +1,34 @@
+#ifdef JEMALLOC_SWAP
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern malloc_mutex_t	swap_mtx;
+extern bool		swap_enabled;
+extern bool		swap_prezeroed;
+extern size_t		swap_nfds;
+extern int		*swap_fds;
+#ifdef JEMALLOC_STATS
+extern size_t		swap_avail;
+#endif
+
+void	*chunk_alloc_swap(size_t size, bool *zero);
+bool	chunk_in_swap(void *chunk);
+bool	chunk_dealloc_swap(void *chunk, size_t size);
+bool	chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed);
+bool	chunk_swap_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+#endif /* JEMALLOC_SWAP */
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/ckh.h b/deps/jemalloc.orig/include/jemalloc/internal/ckh.h
new file mode 100644
index 00000000..3e4ad4c8
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/ckh.h
@@ -0,0 +1,95 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ckh_s ckh_t;
+typedef struct ckhc_s ckhc_t;
+
+/* Typedefs to allow easy function pointer passing. */
+typedef void ckh_hash_t (const void *, unsigned, size_t *, size_t *);
+typedef bool ckh_keycomp_t (const void *, const void *);
+
+/* Maintain counters used to get an idea of performance. */
+/* #define	CKH_COUNT */
+/* Print counter values in ckh_delete() (requires CKH_COUNT). */
+/* #define	CKH_VERBOSE */
+
+/*
+ * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket.  Try to fit
+ * one bucket per L1 cache line.
+ */
+#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Hash table cell. */
+struct ckhc_s {
+	const void	*key;
+	const void	*data;
+};
+
+struct ckh_s {
+#ifdef JEMALLOC_DEBUG
+#define	CKH_MAGIC	0x3af2489d
+	uint32_t	magic;
+#endif
+
+#ifdef CKH_COUNT
+	/* Counters used to get an idea of performance. */
+	uint64_t	ngrows;
+	uint64_t	nshrinks;
+	uint64_t	nshrinkfails;
+	uint64_t	ninserts;
+	uint64_t	nrelocs;
+#endif
+
+	/* Used for pseudo-random number generation. */
+#define	CKH_A		1103515241
+#define	CKH_C		12347
+	uint32_t	prn_state;
+
+	/* Total number of items. */
+	size_t		count;
+
+	/*
+	 * Minimum and current number of hash table buckets.  There are
+	 * 2^LG_CKH_BUCKET_CELLS cells per bucket.
+	 */
+	unsigned	lg_minbuckets;
+	unsigned	lg_curbuckets;
+
+	/* Hash and comparison functions. */
+	ckh_hash_t	*hash;
+	ckh_keycomp_t	*keycomp;
+
+	/* Hash table with 2^lg_curbuckets buckets. */
+	ckhc_t		*tab;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+bool	ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+    ckh_keycomp_t *keycomp);
+void	ckh_delete(ckh_t *ckh);
+size_t	ckh_count(ckh_t *ckh);
+bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
+bool	ckh_insert(ckh_t *ckh, const void *key, const void *data);
+bool	ckh_remove(ckh_t *ckh, const void *searchkey, void **key,
+    void **data);
+bool	ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data);
+void	ckh_string_hash(const void *key, unsigned minbits, size_t *hash1,
+    size_t *hash2);
+bool	ckh_string_keycomp(const void *k1, const void *k2);
+void	ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
+    size_t *hash2);
+bool	ckh_pointer_keycomp(const void *k1, const void *k2);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/ctl.h b/deps/jemalloc.orig/include/jemalloc/internal/ctl.h
new file mode 100644
index 00000000..f1f5eb70
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/ctl.h
@@ -0,0 +1,118 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ctl_node_s ctl_node_t;
+typedef struct ctl_arena_stats_s ctl_arena_stats_t;
+typedef struct ctl_stats_s ctl_stats_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct ctl_node_s {
+	bool			named;
+	union {
+		struct {
+			const char	*name;
+			/* If (nchildren == 0), this is a terminal node. */
+			unsigned	nchildren;
+			const	ctl_node_t *children;
+		} named;
+		struct {
+			const ctl_node_t *(*index)(const size_t *, size_t,
+			    size_t);
+		} indexed;
+	} u;
+	int	(*ctl)(const size_t *, size_t, void *, size_t *, void *,
+	    size_t);
+};
+
+struct ctl_arena_stats_s {
+	bool			initialized;
+	unsigned		nthreads;
+	size_t			pactive;
+	size_t			pdirty;
+#ifdef JEMALLOC_STATS
+	arena_stats_t		astats;
+
+	/* Aggregate stats for small size classes, based on bin stats. */
+	size_t			allocated_small;
+	uint64_t		nmalloc_small;
+	uint64_t		ndalloc_small;
+	uint64_t		nrequests_small;
+
+	malloc_bin_stats_t	*bstats;	/* nbins elements. */
+	malloc_large_stats_t	*lstats;	/* nlclasses elements. */
+#endif
+};
+
+struct ctl_stats_s {
+#ifdef JEMALLOC_STATS
+	size_t			allocated;
+	size_t			active;
+	size_t			mapped;
+	struct {
+		size_t		current;	/* stats_chunks.curchunks */
+		uint64_t	total;		/* stats_chunks.nchunks */
+		size_t		high;		/* stats_chunks.highchunks */
+	} chunks;
+	struct {
+		size_t		allocated;	/* huge_allocated */
+		uint64_t	nmalloc;	/* huge_nmalloc */
+		uint64_t	ndalloc;	/* huge_ndalloc */
+	} huge;
+#endif
+	ctl_arena_stats_t	*arenas;	/* (narenas + 1) elements. */
+#ifdef JEMALLOC_SWAP
+	size_t			swap_avail;
+#endif
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+int	ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen);
+int	ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp);
+
+int	ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen);
+bool	ctl_boot(void);
+
+#define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
+	if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen)	\
+	    != 0) {							\
+		malloc_write("<jemalloc>: Failure in xmallctl(\"");	\
+		malloc_write(name);					\
+		malloc_write("\", ...)\n");				\
+		abort();						\
+	}								\
+} while (0)
+
+#define	xmallctlnametomib(name, mibp, miblenp) do {			\
+	if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) {	\
+		malloc_write(						\
+		    "<jemalloc>: Failure in xmallctlnametomib(\"");	\
+		malloc_write(name);					\
+		malloc_write("\", ...)\n");				\
+		abort();						\
+	}								\
+} while (0)
+
+#define	xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
+	if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp,	\
+	    newlen) != 0) {						\
+		malloc_write(						\
+		    "<jemalloc>: Failure in xmallctlbymib()\n");	\
+		abort();						\
+	}								\
+} while (0)
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/extent.h b/deps/jemalloc.orig/include/jemalloc/internal/extent.h
new file mode 100644
index 00000000..6fe9702b
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/extent.h
@@ -0,0 +1,49 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct extent_node_s extent_node_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/* Tree of extents. */
+struct extent_node_s {
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+	/* Linkage for the size/address-ordered tree. */
+	rb_node(extent_node_t)	link_szad;
+#endif
+
+	/* Linkage for the address-ordered tree. */
+	rb_node(extent_node_t)	link_ad;
+
+#ifdef JEMALLOC_PROF
+	/* Profile counters, used for huge objects. */
+	prof_ctx_t		*prof_ctx;
+#endif
+
+	/* Pointer to the extent that this tree node is responsible for. */
+	void			*addr;
+
+	/* Total region size. */
+	size_t			size;
+};
+typedef rb_tree(extent_node_t) extent_tree_t;
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t)
+#endif
+
+rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/hash.h b/deps/jemalloc.orig/include/jemalloc/internal/hash.h
new file mode 100644
index 00000000..8a46ce30
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/hash.h
@@ -0,0 +1,70 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+uint64_t	hash(const void *key, size_t len, uint64_t seed);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_))
+/*
+ * The following hash function is based on MurmurHash64A(), placed into the
+ * public domain by Austin Appleby.  See http://murmurhash.googlepages.com/ for
+ * details.
+ */
+JEMALLOC_INLINE uint64_t
+hash(const void *key, size_t len, uint64_t seed)
+{
+	const uint64_t m = 0xc6a4a7935bd1e995LLU;
+	const int r = 47;
+	uint64_t h = seed ^ (len * m);
+	const uint64_t *data = (const uint64_t *)key;
+	const uint64_t *end = data + (len/8);
+	const unsigned char *data2;
+
+	assert(((uintptr_t)key & 0x7) == 0);
+
+	while(data != end) {
+		uint64_t k = *data++;
+
+		k *= m;
+		k ^= k >> r;
+		k *= m;
+
+		h ^= k;
+		h *= m;
+	}
+
+	data2 = (const unsigned char *)data;
+	switch(len & 7) {
+		case 7: h ^= ((uint64_t)(data2[6])) << 48;
+		case 6: h ^= ((uint64_t)(data2[5])) << 40;
+		case 5: h ^= ((uint64_t)(data2[4])) << 32;
+		case 4: h ^= ((uint64_t)(data2[3])) << 24;
+		case 3: h ^= ((uint64_t)(data2[2])) << 16;
+		case 2: h ^= ((uint64_t)(data2[1])) << 8;
+		case 1: h ^= ((uint64_t)(data2[0]));
+			h *= m;
+	}
+
+	h ^= h >> r;
+	h *= m;
+	h ^= h >> r;
+
+	return (h);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/huge.h b/deps/jemalloc.orig/include/jemalloc/internal/huge.h
new file mode 100644
index 00000000..66544cf8
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/huge.h
@@ -0,0 +1,41 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#ifdef JEMALLOC_STATS
+/* Huge allocation statistics. */
+extern uint64_t		huge_nmalloc;
+extern uint64_t		huge_ndalloc;
+extern size_t		huge_allocated;
+#endif
+
+/* Protects chunk-related data structures. */
+extern malloc_mutex_t	huge_mtx;
+
+void	*huge_malloc(size_t size, bool zero);
+void	*huge_palloc(size_t size, size_t alignment, bool zero);
+void	*huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
+    size_t extra);
+void	*huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero);
+void	huge_dalloc(void *ptr, bool unmap);
+size_t	huge_salloc(const void *ptr);
+#ifdef JEMALLOC_PROF
+prof_ctx_t	*huge_prof_ctx_get(const void *ptr);
+void	huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+#endif
+bool	huge_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/jemalloc_internal.h.in b/deps/jemalloc.orig/include/jemalloc/internal/jemalloc_internal.h.in
new file mode 100644
index 00000000..a44f0978
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/jemalloc_internal.h.in
@@ -0,0 +1,788 @@
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <sys/uio.h>
+
+#include <errno.h>
+#include <limits.h>
+#ifndef SIZE_T_MAX
+#  define SIZE_T_MAX	SIZE_MAX
+#endif
+#include <pthread.h>
+#include <sched.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stddef.h>
+#ifndef offsetof
+#  define offsetof(type, member)	((size_t)&(((type *)NULL)->member))
+#endif
+#include <inttypes.h>
+#include <string.h>
+#include <strings.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <math.h>
+
+#define	JEMALLOC_MANGLE
+#include "../jemalloc@install_suffix@.h"
+
+#include "jemalloc/internal/private_namespace.h"
+
+#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
+#include <libkern/OSAtomic.h>
+#endif
+
+#ifdef JEMALLOC_ZONE
+#include <mach/mach_error.h>
+#include <mach/mach_init.h>
+#include <mach/vm_map.h>
+#include <malloc/malloc.h>
+#endif
+
+#ifdef JEMALLOC_LAZY_LOCK
+#include <dlfcn.h>
+#endif
+
+#define	RB_COMPACT
+#include "jemalloc/internal/rb.h"
+#include "jemalloc/internal/qr.h"
+#include "jemalloc/internal/ql.h"
+
+extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
+
+/*
+ * Define a custom assert() in order to reduce the chances of deadlock during
+ * assertion failure.
+ */
+#ifndef assert
+#  ifdef JEMALLOC_DEBUG
+#    define assert(e) do {						\
+	if (!(e)) {							\
+		char line_buf[UMAX2S_BUFSIZE];				\
+		malloc_write("<jemalloc>: ");				\
+		malloc_write(__FILE__);					\
+		malloc_write(":");					\
+		malloc_write(u2s(__LINE__, 10, line_buf));		\
+		malloc_write(": Failed assertion: ");			\
+		malloc_write("\"");					\
+		malloc_write(#e);					\
+		malloc_write("\"\n");					\
+		abort();						\
+	}								\
+} while (0)
+#  else
+#    define assert(e)
+#  endif
+#endif
+
+#ifdef JEMALLOC_DEBUG
+#  define dassert(e) assert(e)
+#else
+#  define dassert(e)
+#endif
+
+/*
+ * jemalloc can conceptually be broken into components (arena, tcache, etc.),
+ * but there are circular dependencies that cannot be broken without
+ * substantial performance degradation.  In order to reduce the effect on
+ * visual code flow, read the header files in multiple passes, with one of the
+ * following cpp variables defined during each pass:
+ *
+ *   JEMALLOC_H_TYPES   : Preprocessor-defined constants and psuedo-opaque data
+ *                        types.
+ *   JEMALLOC_H_STRUCTS : Data structures.
+ *   JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
+ *   JEMALLOC_H_INLINES : Inline functions.
+ */
+/******************************************************************************/
+#define JEMALLOC_H_TYPES
+
+#define	ALLOCM_LG_ALIGN_MASK	((int)0x3f)
+
+#define	ZU(z)	((size_t)z)
+
+#ifndef __DECONST
+#  define	__DECONST(type, var)	((type)(uintptr_t)(const void *)(var))
+#endif
+
+#ifdef JEMALLOC_DEBUG
+   /* Disable inlining to make debugging easier. */
+#  define JEMALLOC_INLINE
+#  define inline
+#else
+#  define JEMALLOC_ENABLE_INLINE
+#  define JEMALLOC_INLINE static inline
+#endif
+
+/* Size of stack-allocated buffer passed to buferror(). */
+#define	BUFERROR_BUF		64
+
+/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */
+#ifdef __i386__
+#  define LG_QUANTUM		4
+#endif
+#ifdef __ia64__
+#  define LG_QUANTUM		4
+#endif
+#ifdef __alpha__
+#  define LG_QUANTUM		4
+#endif
+#ifdef __sparc64__
+#  define LG_QUANTUM		4
+#endif
+#if (defined(__amd64__) || defined(__x86_64__))
+#  define LG_QUANTUM		4
+#endif
+#ifdef __arm__
+#  define LG_QUANTUM		3
+#endif
+#ifdef __mips__
+#  define LG_QUANTUM		3
+#endif
+#ifdef __powerpc__
+#  define LG_QUANTUM		4
+#endif
+#ifdef __s390x__
+#  define LG_QUANTUM		4
+#endif
+
+#define	QUANTUM			((size_t)(1U << LG_QUANTUM))
+#define	QUANTUM_MASK		(QUANTUM - 1)
+
+/* Return the smallest quantum multiple that is >= a. */
+#define	QUANTUM_CEILING(a)						\
+	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+
+#define	LONG			((size_t)(1U << LG_SIZEOF_LONG))
+#define	LONG_MASK		(LONG - 1)
+
+/* Return the smallest long multiple that is >= a. */
+#define	LONG_CEILING(a)						\
+	(((a) + LONG_MASK) & ~LONG_MASK)
+
+#define	SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
+#define	PTR_MASK		(SIZEOF_PTR - 1)
+
+/* Return the smallest (void *) multiple that is >= a. */
+#define	PTR_CEILING(a)						\
+	(((a) + PTR_MASK) & ~PTR_MASK)
+
+/*
+ * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
+ * In addition, this controls the spacing of cacheline-spaced size classes.
+ */
+#define	LG_CACHELINE		6
+#define	CACHELINE		((size_t)(1U << LG_CACHELINE))
+#define	CACHELINE_MASK		(CACHELINE - 1)
+
+/* Return the smallest cacheline multiple that is >= s. */
+#define	CACHELINE_CEILING(s)						\
+	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
+
+/*
+ * Page size.  STATIC_PAGE_SHIFT is determined by the configure script.  If
+ * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where
+ * compile-time values are required for the purposes of defining data
+ * structures.
+ */
+#define	STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT))
+#define	STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1))
+
+#ifdef PAGE_SHIFT
+#  undef PAGE_SHIFT
+#endif
+#ifdef PAGE_SIZE
+#  undef PAGE_SIZE
+#endif
+#ifdef PAGE_MASK
+#  undef PAGE_MASK
+#endif
+
+#ifdef DYNAMIC_PAGE_SHIFT
+#  define PAGE_SHIFT	lg_pagesize
+#  define PAGE_SIZE	pagesize
+#  define PAGE_MASK	pagesize_mask
+#else
+#  define PAGE_SHIFT	STATIC_PAGE_SHIFT
+#  define PAGE_SIZE	STATIC_PAGE_SIZE
+#  define PAGE_MASK	STATIC_PAGE_MASK
+#endif
+
+/* Return the smallest pagesize multiple that is >= s. */
+#define	PAGE_CEILING(s)							\
+	(((s) + PAGE_MASK) & ~PAGE_MASK)
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_TYPES
+/******************************************************************************/
+#define JEMALLOC_H_STRUCTS
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
+#include "jemalloc/internal/prof.h"
+
+#ifdef JEMALLOC_STATS
+typedef struct {
+	uint64_t	allocated;
+	uint64_t	deallocated;
+} thread_allocated_t;
+#endif
+
+#undef JEMALLOC_H_STRUCTS
+/******************************************************************************/
+#define JEMALLOC_H_EXTERNS
+
+extern bool	opt_abort;
+#ifdef JEMALLOC_FILL
+extern bool	opt_junk;
+#endif
+#ifdef JEMALLOC_SYSV
+extern bool	opt_sysv;
+#endif
+#ifdef JEMALLOC_XMALLOC
+extern bool	opt_xmalloc;
+#endif
+#ifdef JEMALLOC_FILL
+extern bool	opt_zero;
+#endif
+extern size_t	opt_narenas;
+
+#ifdef DYNAMIC_PAGE_SHIFT
+extern size_t		pagesize;
+extern size_t		pagesize_mask;
+extern size_t		lg_pagesize;
+#endif
+
+/* Number of CPUs. */
+extern unsigned		ncpus;
+
+extern malloc_mutex_t	arenas_lock; /* Protects arenas initialization. */
+extern pthread_key_t	arenas_tsd;
+#ifndef NO_TLS
+/*
+ * Map of pthread_self() --> arenas[???], used for selecting an arena to use
+ * for allocations.
+ */
+extern __thread arena_t	*arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#  define ARENA_GET()	arenas_tls
+#  define ARENA_SET(v)	do {						\
+	arenas_tls = (v);						\
+	pthread_setspecific(arenas_tsd, (void *)(v));			\
+} while (0)
+#else
+#  define ARENA_GET()	((arena_t *)pthread_getspecific(arenas_tsd))
+#  define ARENA_SET(v)	do {						\
+	pthread_setspecific(arenas_tsd, (void *)(v));			\
+} while (0)
+#endif
+
+/*
+ * Arenas that are used to service external requests.  Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ */
+extern arena_t		**arenas;
+extern unsigned		narenas;
+
+#ifdef JEMALLOC_STATS
+#  ifndef NO_TLS
+extern __thread thread_allocated_t	thread_allocated_tls;
+#    define ALLOCATED_GET() (thread_allocated_tls.allocated)
+#    define ALLOCATEDP_GET() (&thread_allocated_tls.allocated)
+#    define DEALLOCATED_GET() (thread_allocated_tls.deallocated)
+#    define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated)
+#    define ALLOCATED_ADD(a, d) do {					\
+	thread_allocated_tls.allocated += a;				\
+	thread_allocated_tls.deallocated += d;				\
+} while (0)
+#  else
+extern pthread_key_t	thread_allocated_tsd;
+thread_allocated_t	*thread_allocated_get_hard(void);
+
+#    define ALLOCATED_GET() (thread_allocated_get()->allocated)
+#    define ALLOCATEDP_GET() (&thread_allocated_get()->allocated)
+#    define DEALLOCATED_GET() (thread_allocated_get()->deallocated)
+#    define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated)
+#    define ALLOCATED_ADD(a, d) do {					\
+	thread_allocated_t *thread_allocated = thread_allocated_get();	\
+	thread_allocated->allocated += (a);				\
+	thread_allocated->deallocated += (d);				\
+} while (0)
+#  endif
+#endif
+
+arena_t	*arenas_extend(unsigned ind);
+arena_t	*choose_arena_hard(void);
+int	buferror(int errnum, char *buf, size_t buflen);
+void	jemalloc_prefork(void);
+void	jemalloc_postfork(void);
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/hash.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_EXTERNS
+/******************************************************************************/
+#define JEMALLOC_H_INLINES
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/chunk.h"
+#include "jemalloc/internal/huge.h"
+
+#ifndef JEMALLOC_ENABLE_INLINE
+size_t	pow2_ceil(size_t x);
+size_t	s2u(size_t size);
+size_t	sa2u(size_t size, size_t alignment, size_t *run_size_p);
+void	malloc_write(const char *s);
+arena_t	*choose_arena(void);
+#  if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+thread_allocated_t	*thread_allocated_get(void);
+#  endif
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+/* Compute the smallest power of 2 that is >= x. */
+JEMALLOC_INLINE size_t
+pow2_ceil(size_t x)
+{
+
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+#if (LG_SIZEOF_PTR == 3)
+	x |= x >> 32;
+#endif
+	x++;
+	return (x);
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size.
+ */
+JEMALLOC_INLINE size_t
+s2u(size_t size)
+{
+
+	if (size <= small_maxclass)
+		return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size);
+	if (size <= arena_maxclass)
+		return (PAGE_CEILING(size));
+	return (CHUNK_CEILING(size));
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size and alignment.
+ */
+JEMALLOC_INLINE size_t
+sa2u(size_t size, size_t alignment, size_t *run_size_p)
+{
+	size_t usize;
+
+	/*
+	 * Round size up to the nearest multiple of alignment.
+	 *
+	 * This done, we can take advantage of the fact that for each small
+	 * size class, every object is aligned at the smallest power of two
+	 * that is non-zero in the base two representation of the size.  For
+	 * example:
+	 *
+	 *   Size |   Base 2 | Minimum alignment
+	 *   -----+----------+------------------
+	 *     96 |  1100000 |  32
+	 *    144 | 10100000 |  32
+	 *    192 | 11000000 |  64
+	 *
+	 * Depending on runtime settings, it is possible that arena_malloc()
+	 * will further round up to a power of two, but that never causes
+	 * correctness issues.
+	 */
+	usize = (size + (alignment - 1)) & (-alignment);
+	/*
+	 * (usize < size) protects against the combination of maximal
+	 * alignment and size greater than maximal alignment.
+	 */
+	if (usize < size) {
+		/* size_t overflow. */
+		return (0);
+	}
+
+	if (usize <= arena_maxclass && alignment <= PAGE_SIZE) {
+		if (usize <= small_maxclass)
+			return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size);
+		return (PAGE_CEILING(usize));
+	} else {
+		size_t run_size;
+
+		/*
+		 * We can't achieve subpage alignment, so round up alignment
+		 * permanently; it makes later calculations simpler.
+		 */
+		alignment = PAGE_CEILING(alignment);
+		usize = PAGE_CEILING(size);
+		/*
+		 * (usize < size) protects against very large sizes within
+		 * PAGE_SIZE of SIZE_T_MAX.
+		 *
+		 * (usize + alignment < usize) protects against the
+		 * combination of maximal alignment and usize large enough
+		 * to cause overflow.  This is similar to the first overflow
+		 * check above, but it needs to be repeated due to the new
+		 * usize value, which may now be *equal* to maximal
+		 * alignment, whereas before we only detected overflow if the
+		 * original size was *greater* than maximal alignment.
+		 */
+		if (usize < size || usize + alignment < usize) {
+			/* size_t overflow. */
+			return (0);
+		}
+
+		/*
+		 * Calculate the size of the over-size run that arena_palloc()
+		 * would need to allocate in order to guarantee the alignment.
+		 */
+		if (usize >= alignment)
+			run_size = usize + alignment - PAGE_SIZE;
+		else {
+			/*
+			 * It is possible that (alignment << 1) will cause
+			 * overflow, but it doesn't matter because we also
+			 * subtract PAGE_SIZE, which in the case of overflow
+			 * leaves us with a very large run_size.  That causes
+			 * the first conditional below to fail, which means
+			 * that the bogus run_size value never gets used for
+			 * anything important.
+			 */
+			run_size = (alignment << 1) - PAGE_SIZE;
+		}
+		if (run_size_p != NULL)
+			*run_size_p = run_size;
+
+		if (run_size <= arena_maxclass)
+			return (PAGE_CEILING(usize));
+		return (CHUNK_CEILING(usize));
+	}
+}
+
+/*
+ * Wrapper around malloc_message() that avoids the need for
+ * JEMALLOC_P(malloc_message)(...) throughout the code.
+ */
+JEMALLOC_INLINE void
+malloc_write(const char *s)
+{
+
+	JEMALLOC_P(malloc_message)(NULL, s);
+}
+
+/*
+ * Choose an arena based on a per-thread value (fast-path code, calls slow-path
+ * code if necessary).
+ */
+JEMALLOC_INLINE arena_t *
+choose_arena(void)
+{
+	arena_t *ret;
+
+	ret = ARENA_GET();
+	if (ret == NULL) {
+		ret = choose_arena_hard();
+		assert(ret != NULL);
+	}
+
+	return (ret);
+}
+
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+JEMALLOC_INLINE thread_allocated_t *
+thread_allocated_get(void)
+{
+	thread_allocated_t *thread_allocated = (thread_allocated_t *)
+	    pthread_getspecific(thread_allocated_tsd);
+
+	if (thread_allocated == NULL)
+		return (thread_allocated_get_hard());
+	return (thread_allocated);
+}
+#endif
+#endif
+
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/tcache.h"
+#include "jemalloc/internal/arena.h"
+#include "jemalloc/internal/hash.h"
+#ifdef JEMALLOC_ZONE
+#include "jemalloc/internal/zone.h"
+#endif
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	*imalloc(size_t size);
+void	*icalloc(size_t size);
+void	*ipalloc(size_t usize, size_t alignment, bool zero);
+size_t	isalloc(const void *ptr);
+#  ifdef JEMALLOC_IVSALLOC
+size_t	ivsalloc(const void *ptr);
+#  endif
+void	idalloc(void *ptr);
+void	*iralloc(void *ptr, size_t size, size_t extra, size_t alignment,
+    bool zero, bool no_move);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE void *
+imalloc(size_t size)
+{
+
+	assert(size != 0);
+
+	if (size <= arena_maxclass)
+		return (arena_malloc(size, false));
+	else
+		return (huge_malloc(size, false));
+}
+
+JEMALLOC_INLINE void *
+icalloc(size_t size)
+{
+
+	if (size <= arena_maxclass)
+		return (arena_malloc(size, true));
+	else
+		return (huge_malloc(size, true));
+}
+
+JEMALLOC_INLINE void *
+ipalloc(size_t usize, size_t alignment, bool zero)
+{
+	void *ret;
+
+	assert(usize != 0);
+	assert(usize == sa2u(usize, alignment, NULL));
+
+	if (usize <= arena_maxclass && alignment <= PAGE_SIZE)
+		ret = arena_malloc(usize, zero);
+	else {
+		size_t run_size
+#ifdef JEMALLOC_CC_SILENCE
+		    = 0
+#endif
+		    ;
+
+		/*
+		 * Ideally we would only ever call sa2u() once per aligned
+		 * allocation request, and the caller of this function has
+		 * already done so once.  However, it's rather burdensome to
+		 * require every caller to pass in run_size, especially given
+		 * that it's only relevant to large allocations.  Therefore,
+		 * just call it again here in order to get run_size.
+		 */
+		sa2u(usize, alignment, &run_size);
+		if (run_size <= arena_maxclass) {
+			ret = arena_palloc(choose_arena(), usize, run_size,
+			    alignment, zero);
+		} else if (alignment <= chunksize)
+			ret = huge_malloc(usize, zero);
+		else
+			ret = huge_palloc(usize, alignment, zero);
+	}
+
+	assert(((uintptr_t)ret & (alignment - 1)) == 0);
+	return (ret);
+}
+
+JEMALLOC_INLINE size_t
+isalloc(const void *ptr)
+{
+	size_t ret;
+	arena_chunk_t *chunk;
+
+	assert(ptr != NULL);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	if (chunk != ptr) {
+		/* Region. */
+		dassert(chunk->arena->magic == ARENA_MAGIC);
+
+#ifdef JEMALLOC_PROF
+		ret = arena_salloc_demote(ptr);
+#else
+		ret = arena_salloc(ptr);
+#endif
+	} else
+		ret = huge_salloc(ptr);
+
+	return (ret);
+}
+
+#ifdef JEMALLOC_IVSALLOC
+JEMALLOC_INLINE size_t
+ivsalloc(const void *ptr)
+{
+
+	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
+	if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL)
+		return (0);
+
+	return (isalloc(ptr));
+}
+#endif
+
+JEMALLOC_INLINE void
+idalloc(void *ptr)
+{
+	arena_chunk_t *chunk;
+
+	assert(ptr != NULL);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	if (chunk != ptr)
+		arena_dalloc(chunk->arena, chunk, ptr);
+	else
+		huge_dalloc(ptr, true);
+}
+
+JEMALLOC_INLINE void *
+iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
+    bool no_move)
+{
+	void *ret;
+	size_t oldsize;
+
+	assert(ptr != NULL);
+	assert(size != 0);
+
+	oldsize = isalloc(ptr);
+
+	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
+	    != 0) {
+		size_t usize, copysize;
+
+		/*
+		 * Existing object alignment is inadquate; allocate new space
+		 * and copy.
+		 */
+		if (no_move)
+			return (NULL);
+		usize = sa2u(size + extra, alignment, NULL);
+		if (usize == 0)
+			return (NULL);
+		ret = ipalloc(usize, alignment, zero);
+		if (ret == NULL) {
+			if (extra == 0)
+				return (NULL);
+			/* Try again, without extra this time. */
+			usize = sa2u(size, alignment, NULL);
+			if (usize == 0)
+				return (NULL);
+			ret = ipalloc(usize, alignment, zero);
+			if (ret == NULL)
+				return (NULL);
+		}
+		/*
+		 * Copy at most size bytes (not size+extra), since the caller
+		 * has no expectation that the extra bytes will be reliably
+		 * preserved.
+		 */
+		copysize = (size < oldsize) ? size : oldsize;
+		memcpy(ret, ptr, copysize);
+		idalloc(ptr);
+		return (ret);
+	}
+
+	if (no_move) {
+		if (size <= arena_maxclass) {
+			return (arena_ralloc_no_move(ptr, oldsize, size,
+			    extra, zero));
+		} else {
+			return (huge_ralloc_no_move(ptr, oldsize, size,
+			    extra));
+		}
+	} else {
+		if (size + extra <= arena_maxclass) {
+			return (arena_ralloc(ptr, oldsize, size, extra,
+			    alignment, zero));
+		} else {
+			return (huge_ralloc(ptr, oldsize, size, extra,
+			    alignment, zero));
+		}
+	}
+}
+#endif
+
+#include "jemalloc/internal/prof.h"
+
+#undef JEMALLOC_H_INLINES
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/mb.h b/deps/jemalloc.orig/include/jemalloc/internal/mb.h
new file mode 100644
index 00000000..dc9f2a54
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/mb.h
@@ -0,0 +1,108 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	mb_write(void);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_))
+#ifdef __i386__
+/*
+ * According to the Intel Architecture Software Developer's Manual, current
+ * processors execute instructions in order from the perspective of other
+ * processors in a multiprocessor system, but 1) Intel reserves the right to
+ * change that, and 2) the compiler's optimizer could re-order instructions if
+ * there weren't some form of barrier.  Therefore, even if running on an
+ * architecture that does not need memory barriers (everything through at least
+ * i686), an "optimizer barrier" is necessary.
+ */
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+#  if 0
+	/* This is a true memory barrier. */
+	asm volatile ("pusha;"
+	    "xor  %%eax,%%eax;"
+	    "cpuid;"
+	    "popa;"
+	    : /* Outputs. */
+	    : /* Inputs. */
+	    : "memory" /* Clobbers. */
+	    );
+#else
+	/*
+	 * This is hopefully enough to keep the compiler from reordering
+	 * instructions around this one.
+	 */
+	asm volatile ("nop;"
+	    : /* Outputs. */
+	    : /* Inputs. */
+	    : "memory" /* Clobbers. */
+	    );
+#endif
+}
+#elif (defined(__amd64_) || defined(__x86_64__))
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+	asm volatile ("sfence"
+	    : /* Outputs. */
+	    : /* Inputs. */
+	    : "memory" /* Clobbers. */
+	    );
+}
+#elif defined(__powerpc__)
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+	asm volatile ("eieio"
+	    : /* Outputs. */
+	    : /* Inputs. */
+	    : "memory" /* Clobbers. */
+	    );
+}
+#elif defined(__sparc64__)
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+	asm volatile ("membar #StoreStore"
+	    : /* Outputs. */
+	    : /* Inputs. */
+	    : "memory" /* Clobbers. */
+	    );
+}
+#else
+/*
+ * This is much slower than a simple memory barrier, but the semantics of mutex
+ * unlock make this work.
+ */
+JEMALLOC_INLINE void
+mb_write(void)
+{
+	malloc_mutex_t mtx;
+
+	malloc_mutex_init(&mtx);
+	malloc_mutex_lock(&mtx);
+	malloc_mutex_unlock(&mtx);
+}
+#endif
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/mutex.h b/deps/jemalloc.orig/include/jemalloc/internal/mutex.h
new file mode 100644
index 00000000..62947ced
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/mutex.h
@@ -0,0 +1,86 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#ifdef JEMALLOC_OSSPIN
+typedef OSSpinLock malloc_mutex_t;
+#else
+typedef pthread_mutex_t malloc_mutex_t;
+#endif
+
+#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+#  define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+#else
+#  define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#ifdef JEMALLOC_LAZY_LOCK
+extern bool isthreaded;
+#else
+#  define isthreaded true
+#endif
+
+bool	malloc_mutex_init(malloc_mutex_t *mutex);
+void	malloc_mutex_destroy(malloc_mutex_t *mutex);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	malloc_mutex_lock(malloc_mutex_t *mutex);
+bool	malloc_mutex_trylock(malloc_mutex_t *mutex);
+void	malloc_mutex_unlock(malloc_mutex_t *mutex);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE void
+malloc_mutex_lock(malloc_mutex_t *mutex)
+{
+
+	if (isthreaded) {
+#ifdef JEMALLOC_OSSPIN
+		OSSpinLockLock(mutex);
+#else
+		pthread_mutex_lock(mutex);
+#endif
+	}
+}
+
+JEMALLOC_INLINE bool
+malloc_mutex_trylock(malloc_mutex_t *mutex)
+{
+
+	if (isthreaded) {
+#ifdef JEMALLOC_OSSPIN
+		return (OSSpinLockTry(mutex) == false);
+#else
+		return (pthread_mutex_trylock(mutex) != 0);
+#endif
+	} else
+		return (false);
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_unlock(malloc_mutex_t *mutex)
+{
+
+	if (isthreaded) {
+#ifdef JEMALLOC_OSSPIN
+		OSSpinLockUnlock(mutex);
+#else
+		pthread_mutex_unlock(mutex);
+#endif
+	}
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/private_namespace.h b/deps/jemalloc.orig/include/jemalloc/internal/private_namespace.h
new file mode 100644
index 00000000..d4f5f96d
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/private_namespace.h
@@ -0,0 +1,195 @@
+#define	arena_bin_index JEMALLOC_N(arena_bin_index)
+#define	arena_boot JEMALLOC_N(arena_boot)
+#define	arena_dalloc JEMALLOC_N(arena_dalloc)
+#define	arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin)
+#define	arena_dalloc_large JEMALLOC_N(arena_dalloc_large)
+#define	arena_malloc JEMALLOC_N(arena_malloc)
+#define	arena_malloc_large JEMALLOC_N(arena_malloc_large)
+#define	arena_malloc_small JEMALLOC_N(arena_malloc_small)
+#define	arena_new JEMALLOC_N(arena_new)
+#define	arena_palloc JEMALLOC_N(arena_palloc)
+#define	arena_prof_accum JEMALLOC_N(arena_prof_accum)
+#define	arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get)
+#define	arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set)
+#define	arena_prof_promoted JEMALLOC_N(arena_prof_promoted)
+#define	arena_purge_all JEMALLOC_N(arena_purge_all)
+#define	arena_ralloc JEMALLOC_N(arena_ralloc)
+#define	arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move)
+#define	arena_run_regind JEMALLOC_N(arena_run_regind)
+#define	arena_salloc JEMALLOC_N(arena_salloc)
+#define	arena_salloc_demote JEMALLOC_N(arena_salloc_demote)
+#define	arena_stats_merge JEMALLOC_N(arena_stats_merge)
+#define	arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small)
+#define	arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index)
+#define	arenas_extend JEMALLOC_N(arenas_extend)
+#define	arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index)
+#define	atomic_add_uint32 JEMALLOC_N(atomic_add_uint32)
+#define	atomic_add_uint64 JEMALLOC_N(atomic_add_uint64)
+#define	atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32)
+#define	atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64)
+#define	base_alloc JEMALLOC_N(base_alloc)
+#define	base_boot JEMALLOC_N(base_boot)
+#define	base_node_alloc JEMALLOC_N(base_node_alloc)
+#define	base_node_dealloc JEMALLOC_N(base_node_dealloc)
+#define	bitmap_full JEMALLOC_N(bitmap_full)
+#define	bitmap_get JEMALLOC_N(bitmap_get)
+#define	bitmap_info_init JEMALLOC_N(bitmap_info_init)
+#define	bitmap_info_ngroups JEMALLOC_N(bitmap_info_ngroups)
+#define	bitmap_init JEMALLOC_N(bitmap_init)
+#define	bitmap_set JEMALLOC_N(bitmap_set)
+#define	bitmap_sfu JEMALLOC_N(bitmap_sfu)
+#define	bitmap_size JEMALLOC_N(bitmap_size)
+#define	bitmap_unset JEMALLOC_N(bitmap_unset)
+#define	bt_init JEMALLOC_N(bt_init)
+#define	buferror JEMALLOC_N(buferror)
+#define	choose_arena JEMALLOC_N(choose_arena)
+#define	choose_arena_hard JEMALLOC_N(choose_arena_hard)
+#define	chunk_alloc JEMALLOC_N(chunk_alloc)
+#define	chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss)
+#define	chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap)
+#define	chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve)
+#define	chunk_alloc_swap JEMALLOC_N(chunk_alloc_swap)
+#define	chunk_boot JEMALLOC_N(chunk_boot)
+#define	chunk_dealloc JEMALLOC_N(chunk_dealloc)
+#define	chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss)
+#define	chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap)
+#define	chunk_dealloc_swap JEMALLOC_N(chunk_dealloc_swap)
+#define	chunk_dss_boot JEMALLOC_N(chunk_dss_boot)
+#define	chunk_in_dss JEMALLOC_N(chunk_in_dss)
+#define	chunk_in_swap JEMALLOC_N(chunk_in_swap)
+#define	chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot)
+#define	chunk_swap_boot JEMALLOC_N(chunk_swap_boot)
+#define	chunk_swap_enable JEMALLOC_N(chunk_swap_enable)
+#define	ckh_bucket_search JEMALLOC_N(ckh_bucket_search)
+#define	ckh_count JEMALLOC_N(ckh_count)
+#define	ckh_delete JEMALLOC_N(ckh_delete)
+#define	ckh_evict_reloc_insert JEMALLOC_N(ckh_evict_reloc_insert)
+#define	ckh_insert JEMALLOC_N(ckh_insert)
+#define	ckh_isearch JEMALLOC_N(ckh_isearch)
+#define	ckh_iter JEMALLOC_N(ckh_iter)
+#define	ckh_new JEMALLOC_N(ckh_new)
+#define	ckh_pointer_hash JEMALLOC_N(ckh_pointer_hash)
+#define	ckh_pointer_keycomp JEMALLOC_N(ckh_pointer_keycomp)
+#define	ckh_rebuild JEMALLOC_N(ckh_rebuild)
+#define	ckh_remove JEMALLOC_N(ckh_remove)
+#define	ckh_search JEMALLOC_N(ckh_search)
+#define	ckh_string_hash JEMALLOC_N(ckh_string_hash)
+#define	ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp)
+#define	ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert)
+#define	ckh_try_insert JEMALLOC_N(ckh_try_insert)
+#define	create_zone JEMALLOC_N(create_zone)
+#define	ctl_boot JEMALLOC_N(ctl_boot)
+#define	ctl_bymib JEMALLOC_N(ctl_bymib)
+#define	ctl_byname JEMALLOC_N(ctl_byname)
+#define	ctl_nametomib JEMALLOC_N(ctl_nametomib)
+#define	extent_tree_ad_first JEMALLOC_N(extent_tree_ad_first)
+#define	extent_tree_ad_insert JEMALLOC_N(extent_tree_ad_insert)
+#define	extent_tree_ad_iter JEMALLOC_N(extent_tree_ad_iter)
+#define	extent_tree_ad_iter_recurse JEMALLOC_N(extent_tree_ad_iter_recurse)
+#define	extent_tree_ad_iter_start JEMALLOC_N(extent_tree_ad_iter_start)
+#define	extent_tree_ad_last JEMALLOC_N(extent_tree_ad_last)
+#define	extent_tree_ad_new JEMALLOC_N(extent_tree_ad_new)
+#define	extent_tree_ad_next JEMALLOC_N(extent_tree_ad_next)
+#define	extent_tree_ad_nsearch JEMALLOC_N(extent_tree_ad_nsearch)
+#define	extent_tree_ad_prev JEMALLOC_N(extent_tree_ad_prev)
+#define	extent_tree_ad_psearch JEMALLOC_N(extent_tree_ad_psearch)
+#define	extent_tree_ad_remove JEMALLOC_N(extent_tree_ad_remove)
+#define	extent_tree_ad_reverse_iter JEMALLOC_N(extent_tree_ad_reverse_iter)
+#define	extent_tree_ad_reverse_iter_recurse JEMALLOC_N(extent_tree_ad_reverse_iter_recurse)
+#define	extent_tree_ad_reverse_iter_start JEMALLOC_N(extent_tree_ad_reverse_iter_start)
+#define	extent_tree_ad_search JEMALLOC_N(extent_tree_ad_search)
+#define	extent_tree_szad_first JEMALLOC_N(extent_tree_szad_first)
+#define	extent_tree_szad_insert JEMALLOC_N(extent_tree_szad_insert)
+#define	extent_tree_szad_iter JEMALLOC_N(extent_tree_szad_iter)
+#define	extent_tree_szad_iter_recurse JEMALLOC_N(extent_tree_szad_iter_recurse)
+#define	extent_tree_szad_iter_start JEMALLOC_N(extent_tree_szad_iter_start)
+#define	extent_tree_szad_last JEMALLOC_N(extent_tree_szad_last)
+#define	extent_tree_szad_new JEMALLOC_N(extent_tree_szad_new)
+#define	extent_tree_szad_next JEMALLOC_N(extent_tree_szad_next)
+#define	extent_tree_szad_nsearch JEMALLOC_N(extent_tree_szad_nsearch)
+#define	extent_tree_szad_prev JEMALLOC_N(extent_tree_szad_prev)
+#define	extent_tree_szad_psearch JEMALLOC_N(extent_tree_szad_psearch)
+#define	extent_tree_szad_remove JEMALLOC_N(extent_tree_szad_remove)
+#define	extent_tree_szad_reverse_iter JEMALLOC_N(extent_tree_szad_reverse_iter)
+#define	extent_tree_szad_reverse_iter_recurse JEMALLOC_N(extent_tree_szad_reverse_iter_recurse)
+#define	extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start)
+#define	extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search)
+#define	hash JEMALLOC_N(hash)
+#define	huge_boot JEMALLOC_N(huge_boot)
+#define	huge_dalloc JEMALLOC_N(huge_dalloc)
+#define	huge_malloc JEMALLOC_N(huge_malloc)
+#define	huge_palloc JEMALLOC_N(huge_palloc)
+#define	huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get)
+#define	huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set)
+#define	huge_ralloc JEMALLOC_N(huge_ralloc)
+#define	huge_ralloc_no_move JEMALLOC_N(huge_ralloc_no_move)
+#define	huge_salloc JEMALLOC_N(huge_salloc)
+#define	iallocm JEMALLOC_N(iallocm)
+#define	icalloc JEMALLOC_N(icalloc)
+#define	idalloc JEMALLOC_N(idalloc)
+#define	imalloc JEMALLOC_N(imalloc)
+#define	ipalloc JEMALLOC_N(ipalloc)
+#define	iralloc JEMALLOC_N(iralloc)
+#define	isalloc JEMALLOC_N(isalloc)
+#define	ivsalloc JEMALLOC_N(ivsalloc)
+#define	jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init)
+#define	jemalloc_postfork JEMALLOC_N(jemalloc_postfork)
+#define	jemalloc_prefork JEMALLOC_N(jemalloc_prefork)
+#define	malloc_cprintf JEMALLOC_N(malloc_cprintf)
+#define	malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy)
+#define	malloc_mutex_init JEMALLOC_N(malloc_mutex_init)
+#define	malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock)
+#define	malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock)
+#define	malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock)
+#define	malloc_printf JEMALLOC_N(malloc_printf)
+#define	malloc_write JEMALLOC_N(malloc_write)
+#define	mb_write JEMALLOC_N(mb_write)
+#define	pow2_ceil JEMALLOC_N(pow2_ceil)
+#define	prof_backtrace JEMALLOC_N(prof_backtrace)
+#define	prof_boot0 JEMALLOC_N(prof_boot0)
+#define	prof_boot1 JEMALLOC_N(prof_boot1)
+#define	prof_boot2 JEMALLOC_N(prof_boot2)
+#define	prof_ctx_get JEMALLOC_N(prof_ctx_get)
+#define	prof_ctx_set JEMALLOC_N(prof_ctx_set)
+#define	prof_free JEMALLOC_N(prof_free)
+#define	prof_gdump JEMALLOC_N(prof_gdump)
+#define	prof_idump JEMALLOC_N(prof_idump)
+#define	prof_lookup JEMALLOC_N(prof_lookup)
+#define	prof_malloc JEMALLOC_N(prof_malloc)
+#define	prof_mdump JEMALLOC_N(prof_mdump)
+#define	prof_realloc JEMALLOC_N(prof_realloc)
+#define	prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update)
+#define	prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update)
+#define	prof_tdata_init JEMALLOC_N(prof_tdata_init)
+#define	pthread_create JEMALLOC_N(pthread_create)
+#define	rtree_get JEMALLOC_N(rtree_get)
+#define	rtree_get_locked JEMALLOC_N(rtree_get_locked)
+#define	rtree_new JEMALLOC_N(rtree_new)
+#define	rtree_set JEMALLOC_N(rtree_set)
+#define	s2u JEMALLOC_N(s2u)
+#define	sa2u JEMALLOC_N(sa2u)
+#define	stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index)
+#define	stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index)
+#define	stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index)
+#define	stats_cactive_add JEMALLOC_N(stats_cactive_add)
+#define	stats_cactive_get JEMALLOC_N(stats_cactive_get)
+#define	stats_cactive_sub JEMALLOC_N(stats_cactive_sub)
+#define	stats_print JEMALLOC_N(stats_print)
+#define	szone2ozone JEMALLOC_N(szone2ozone)
+#define	tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy)
+#define	tcache_alloc_large JEMALLOC_N(tcache_alloc_large)
+#define	tcache_alloc_small JEMALLOC_N(tcache_alloc_small)
+#define	tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard)
+#define	tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large)
+#define	tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small)
+#define	tcache_boot JEMALLOC_N(tcache_boot)
+#define	tcache_create JEMALLOC_N(tcache_create)
+#define	tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large)
+#define	tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small)
+#define	tcache_destroy JEMALLOC_N(tcache_destroy)
+#define	tcache_event JEMALLOC_N(tcache_event)
+#define	tcache_get JEMALLOC_N(tcache_get)
+#define	tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
+#define	thread_allocated_get JEMALLOC_N(thread_allocated_get)
+#define	thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard)
+#define	u2s JEMALLOC_N(u2s)
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/prn.h b/deps/jemalloc.orig/include/jemalloc/internal/prn.h
new file mode 100644
index 00000000..0709d708
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/prn.h
@@ -0,0 +1,60 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Simple linear congruential pseudo-random number generator:
+ *
+ *   prn(y) = (a*x + c) % m
+ *
+ * where the following constants ensure maximal period:
+ *
+ *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
+ *   c == Odd number (relatively prime to 2^n).
+ *   m == 2^32
+ *
+ * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
+ *
+ * This choice of m has the disadvantage that the quality of the bits is
+ * proportional to bit position.  For example. the lowest bit has a cycle of 2,
+ * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
+ * bits.
+ *
+ * Macro parameters:
+ *   uint32_t r          : Result.
+ *   unsigned lg_range   : (0..32], number of least significant bits to return.
+ *   uint32_t state      : Seed value.
+ *   const uint32_t a, c : See above discussion.
+ */
+#define prn32(r, lg_range, state, a, c) do {				\
+	assert(lg_range > 0);						\
+	assert(lg_range <= 32);						\
+									\
+	r = (state * (a)) + (c);					\
+	state = r;							\
+	r >>= (32 - lg_range);						\
+} while (false)
+
+/* Same as prn32(), but 64 bits of pseudo-randomness, using uint64_t. */
+#define prn64(r, lg_range, state, a, c) do {				\
+	assert(lg_range > 0);						\
+	assert(lg_range <= 64);						\
+									\
+	r = (state * (a)) + (c);					\
+	state = r;							\
+	r >>= (64 - lg_range);						\
+} while (false)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/prof.h b/deps/jemalloc.orig/include/jemalloc/internal/prof.h
new file mode 100644
index 00000000..e9064ba6
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/prof.h
@@ -0,0 +1,547 @@
+#ifdef JEMALLOC_PROF
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct prof_bt_s prof_bt_t;
+typedef struct prof_cnt_s prof_cnt_t;
+typedef struct prof_thr_cnt_s prof_thr_cnt_t;
+typedef struct prof_ctx_s prof_ctx_t;
+typedef struct prof_tdata_s prof_tdata_t;
+
+/* Option defaults. */
+#define	PROF_PREFIX_DEFAULT		"jeprof"
+#define	LG_PROF_BT_MAX_DEFAULT		7
+#define	LG_PROF_SAMPLE_DEFAULT		0
+#define	LG_PROF_INTERVAL_DEFAULT	-1
+#define	LG_PROF_TCMAX_DEFAULT		-1
+
+/*
+ * Hard limit on stack backtrace depth.  Note that the version of
+ * prof_backtrace() that is based on __builtin_return_address() necessarily has
+ * a hard-coded number of backtrace frame handlers.
+ */
+#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND))
+#  define LG_PROF_BT_MAX	((ZU(1) << (LG_SIZEOF_PTR+3)) - 1)
+#else
+#  define LG_PROF_BT_MAX	7 /* >= LG_PROF_BT_MAX_DEFAULT */
+#endif
+#define	PROF_BT_MAX		(1U << LG_PROF_BT_MAX)
+
+/* Initial hash table size. */
+#define	PROF_CKH_MINITEMS	64
+
+/* Size of memory buffer to use when writing dump files. */
+#define	PROF_DUMP_BUF_SIZE	65536
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct prof_bt_s {
+	/* Backtrace, stored as len program counters. */
+	void		**vec;
+	unsigned	len;
+};
+
+#ifdef JEMALLOC_PROF_LIBGCC
+/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
+typedef struct {
+	prof_bt_t	*bt;
+	unsigned	nignore;
+	unsigned	max;
+} prof_unwind_data_t;
+#endif
+
+struct prof_cnt_s {
+	/*
+	 * Profiling counters.  An allocation/deallocation pair can operate on
+	 * different prof_thr_cnt_t objects that are linked into the same
+	 * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go
+	 * negative.  In principle it is possible for the *bytes counters to
+	 * overflow/underflow, but a general solution would require something
+	 * like 128-bit counters; this implementation doesn't bother to solve
+	 * that problem.
+	 */
+	int64_t		curobjs;
+	int64_t		curbytes;
+	uint64_t	accumobjs;
+	uint64_t	accumbytes;
+};
+
+struct prof_thr_cnt_s {
+	/* Linkage into prof_ctx_t's cnts_ql. */
+	ql_elm(prof_thr_cnt_t)	cnts_link;
+
+	/* Linkage into thread's LRU. */
+	ql_elm(prof_thr_cnt_t)	lru_link;
+
+	/*
+	 * Associated context.  If a thread frees an object that it did not
+	 * allocate, it is possible that the context is not cached in the
+	 * thread's hash table, in which case it must be able to look up the
+	 * context, insert a new prof_thr_cnt_t into the thread's hash table,
+	 * and link it into the prof_ctx_t's cnts_ql.
+	 */
+	prof_ctx_t		*ctx;
+
+	/*
+	 * Threads use memory barriers to update the counters.  Since there is
+	 * only ever one writer, the only challenge is for the reader to get a
+	 * consistent read of the counters.
+	 *
+	 * The writer uses this series of operations:
+	 *
+	 * 1) Increment epoch to an odd number.
+	 * 2) Update counters.
+	 * 3) Increment epoch to an even number.
+	 *
+	 * The reader must assure 1) that the epoch is even while it reads the
+	 * counters, and 2) that the epoch doesn't change between the time it
+	 * starts and finishes reading the counters.
+	 */
+	unsigned		epoch;
+
+	/* Profiling counters. */
+	prof_cnt_t		cnts;
+};
+
+struct prof_ctx_s {
+	/* Associated backtrace. */
+	prof_bt_t		*bt;
+
+	/* Protects cnt_merged and cnts_ql. */
+	malloc_mutex_t		lock;
+
+	/* Temporary storage for summation during dump. */
+	prof_cnt_t		cnt_summed;
+
+	/* When threads exit, they merge their stats into cnt_merged. */
+	prof_cnt_t		cnt_merged;
+
+	/*
+	 * List of profile counters, one for each thread that has allocated in
+	 * this context.
+	 */
+	ql_head(prof_thr_cnt_t)	cnts_ql;
+};
+
+struct prof_tdata_s {
+	/*
+	 * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *).  Each thread keeps a
+	 * cache of backtraces, with associated thread-specific prof_thr_cnt_t
+	 * objects.  Other threads may read the prof_thr_cnt_t contents, but no
+	 * others will ever write them.
+	 *
+	 * Upon thread exit, the thread must merge all the prof_thr_cnt_t
+	 * counter data into the associated prof_ctx_t objects, and unlink/free
+	 * the prof_thr_cnt_t objects.
+	 */
+	ckh_t			bt2cnt;
+
+	/* LRU for contents of bt2cnt. */
+	ql_head(prof_thr_cnt_t)	lru_ql;
+
+	/* Backtrace vector, used for calls to prof_backtrace(). */
+	void			**vec;
+
+	/* Sampling state. */
+	uint64_t		prn_state;
+	uint64_t		threshold;
+	uint64_t		accum;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool	opt_prof;
+/*
+ * Even if opt_prof is true, sampling can be temporarily disabled by setting
+ * opt_prof_active to false.  No locking is used when updating opt_prof_active,
+ * so there are no guarantees regarding how long it will take for all threads
+ * to notice state changes.
+ */
+extern bool	opt_prof_active;
+extern size_t	opt_lg_prof_bt_max;   /* Maximum backtrace depth. */
+extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
+extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
+extern bool	opt_prof_gdump;       /* High-water memory dumping. */
+extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
+extern bool	opt_prof_accum;       /* Report cumulative bytes. */
+extern ssize_t	opt_lg_prof_tcmax;    /* lg(max per thread bactrace cache) */
+extern char	opt_prof_prefix[PATH_MAX + 1];
+
+/*
+ * Profile dump interval, measured in bytes allocated.  Each arena triggers a
+ * profile dump when it reaches this threshold.  The effect is that the
+ * interval between profile dumps averages prof_interval, though the actual
+ * interval between dumps will tend to be sporadic, and the interval will be a
+ * maximum of approximately (prof_interval * narenas).
+ */
+extern uint64_t	prof_interval;
+
+/*
+ * If true, promote small sampled objects to large objects, since small run
+ * headers do not have embedded profile context pointers.
+ */
+extern bool	prof_promote;
+
+/* (1U << opt_lg_prof_bt_max). */
+extern unsigned	prof_bt_max;
+
+/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
+#ifndef NO_TLS
+extern __thread prof_tdata_t	*prof_tdata_tls
+    JEMALLOC_ATTR(tls_model("initial-exec"));
+#  define PROF_TCACHE_GET()	prof_tdata_tls
+#  define PROF_TCACHE_SET(v)	do {					\
+	prof_tdata_tls = (v);						\
+	pthread_setspecific(prof_tdata_tsd, (void *)(v));		\
+} while (0)
+#else
+#  define PROF_TCACHE_GET()						\
+	((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd))
+#  define PROF_TCACHE_SET(v)	do {					\
+	pthread_setspecific(prof_tdata_tsd, (void *)(v));		\
+} while (0)
+#endif
+/*
+ * Same contents as b2cnt_tls, but initialized such that the TSD destructor is
+ * called when a thread exits, so that prof_tdata_tls contents can be merged,
+ * unlinked, and deallocated.
+ */
+extern pthread_key_t	prof_tdata_tsd;
+
+void	bt_init(prof_bt_t *bt, void **vec);
+void	prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
+prof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
+void	prof_idump(void);
+bool	prof_mdump(const char *filename);
+void	prof_gdump(void);
+prof_tdata_t	*prof_tdata_init(void);
+void	prof_boot0(void);
+void	prof_boot1(void);
+bool	prof_boot2(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#define	PROF_ALLOC_PREP(nignore, size, ret) do {			\
+	prof_tdata_t *prof_tdata;					\
+	prof_bt_t bt;							\
+									\
+	assert(size == s2u(size));					\
+									\
+	prof_tdata = PROF_TCACHE_GET();					\
+	if (prof_tdata == NULL) {					\
+		prof_tdata = prof_tdata_init();				\
+		if (prof_tdata == NULL) {				\
+			ret = NULL;					\
+			break;						\
+		}							\
+	}								\
+									\
+	if (opt_prof_active == false) {					\
+		/* Sampling is currently inactive, so avoid sampling. */\
+		ret = (prof_thr_cnt_t *)(uintptr_t)1U;			\
+	} else if (opt_lg_prof_sample == 0) {				\
+		/* Don't bother with sampling logic, since sampling   */\
+		/* interval is 1.                                     */\
+		bt_init(&bt, prof_tdata->vec);				\
+		prof_backtrace(&bt, nignore, prof_bt_max);		\
+		ret = prof_lookup(&bt);					\
+	} else {							\
+		if (prof_tdata->threshold == 0) {			\
+			/* Initialize.  Seed the prng differently for */\
+			/* each thread.                               */\
+			prof_tdata->prn_state =				\
+			    (uint64_t)(uintptr_t)&size;			\
+			prof_sample_threshold_update(prof_tdata);	\
+		}							\
+									\
+		/* Determine whether to capture a backtrace based on  */\
+		/* whether size is enough for prof_accum to reach     */\
+		/* prof_tdata->threshold.  However, delay updating    */\
+		/* these variables until prof_{m,re}alloc(), because  */\
+		/* we don't know for sure that the allocation will    */\
+		/* succeed.                                           */\
+		/*                                                    */\
+		/* Use subtraction rather than addition to avoid      */\
+		/* potential integer overflow.                        */\
+		if (size >= prof_tdata->threshold -			\
+		    prof_tdata->accum) {				\
+			bt_init(&bt, prof_tdata->vec);			\
+			prof_backtrace(&bt, nignore, prof_bt_max);	\
+			ret = prof_lookup(&bt);				\
+		} else							\
+			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
+	}								\
+} while (0)
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
+prof_ctx_t	*prof_ctx_get(const void *ptr);
+void	prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
+bool	prof_sample_accum_update(size_t size);
+void	prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
+void	prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
+    size_t old_size, prof_ctx_t *old_ctx);
+void	prof_free(const void *ptr, size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
+JEMALLOC_INLINE void
+prof_sample_threshold_update(prof_tdata_t *prof_tdata)
+{
+	uint64_t r;
+	double u;
+
+	/*
+	 * Compute sample threshold as a geometrically distributed random
+	 * variable with mean (2^opt_lg_prof_sample).
+	 *
+	 *                         __        __
+	 *                         |  log(u)  |                     1
+	 * prof_tdata->threshold = | -------- |, where p = -------------------
+	 *                         | log(1-p) |             opt_lg_prof_sample
+	 *                                                 2
+	 *
+	 * For more information on the math, see:
+	 *
+	 *   Non-Uniform Random Variate Generation
+	 *   Luc Devroye
+	 *   Springer-Verlag, New York, 1986
+	 *   pp 500
+	 *   (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
+	 */
+	prn64(r, 53, prof_tdata->prn_state,
+	    (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
+	u = (double)r * (1.0/9007199254740992.0L);
+	prof_tdata->threshold = (uint64_t)(log(u) /
+	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
+	    + (uint64_t)1U;
+}
+
+JEMALLOC_INLINE prof_ctx_t *
+prof_ctx_get(const void *ptr)
+{
+	prof_ctx_t *ret;
+	arena_chunk_t *chunk;
+
+	assert(ptr != NULL);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	if (chunk != ptr) {
+		/* Region. */
+		dassert(chunk->arena->magic == ARENA_MAGIC);
+
+		ret = arena_prof_ctx_get(ptr);
+	} else
+		ret = huge_prof_ctx_get(ptr);
+
+	return (ret);
+}
+
+JEMALLOC_INLINE void
+prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+{
+	arena_chunk_t *chunk;
+
+	assert(ptr != NULL);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	if (chunk != ptr) {
+		/* Region. */
+		dassert(chunk->arena->magic == ARENA_MAGIC);
+
+		arena_prof_ctx_set(ptr, ctx);
+	} else
+		huge_prof_ctx_set(ptr, ctx);
+}
+
+JEMALLOC_INLINE bool
+prof_sample_accum_update(size_t size)
+{
+	prof_tdata_t *prof_tdata;
+
+	/* Sampling logic is unnecessary if the interval is 1. */
+	assert(opt_lg_prof_sample != 0);
+
+	prof_tdata = PROF_TCACHE_GET();
+	assert(prof_tdata != NULL);
+
+	/* Take care to avoid integer overflow. */
+	if (size >= prof_tdata->threshold - prof_tdata->accum) {
+		prof_tdata->accum -= (prof_tdata->threshold - size);
+		/* Compute new sample threshold. */
+		prof_sample_threshold_update(prof_tdata);
+		while (prof_tdata->accum >= prof_tdata->threshold) {
+			prof_tdata->accum -= prof_tdata->threshold;
+			prof_sample_threshold_update(prof_tdata);
+		}
+		return (false);
+	} else {
+		prof_tdata->accum += size;
+		return (true);
+	}
+}
+
+JEMALLOC_INLINE void
+prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
+{
+
+	assert(ptr != NULL);
+	assert(size == isalloc(ptr));
+
+	if (opt_lg_prof_sample != 0) {
+		if (prof_sample_accum_update(size)) {
+			/*
+			 * Don't sample.  For malloc()-like allocation, it is
+			 * always possible to tell in advance how large an
+			 * object's usable size will be, so there should never
+			 * be a difference between the size passed to
+			 * PROF_ALLOC_PREP() and prof_malloc().
+			 */
+			assert((uintptr_t)cnt == (uintptr_t)1U);
+		}
+	}
+
+	if ((uintptr_t)cnt > (uintptr_t)1U) {
+		prof_ctx_set(ptr, cnt->ctx);
+
+		cnt->epoch++;
+		/*********/
+		mb_write();
+		/*********/
+		cnt->cnts.curobjs++;
+		cnt->cnts.curbytes += size;
+		if (opt_prof_accum) {
+			cnt->cnts.accumobjs++;
+			cnt->cnts.accumbytes += size;
+		}
+		/*********/
+		mb_write();
+		/*********/
+		cnt->epoch++;
+		/*********/
+		mb_write();
+		/*********/
+	} else
+		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+}
+
+JEMALLOC_INLINE void
+prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
+    size_t old_size, prof_ctx_t *old_ctx)
+{
+	prof_thr_cnt_t *told_cnt;
+
+	assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
+
+	if (ptr != NULL) {
+		assert(size == isalloc(ptr));
+		if (opt_lg_prof_sample != 0) {
+			if (prof_sample_accum_update(size)) {
+				/*
+				 * Don't sample.  The size passed to
+				 * PROF_ALLOC_PREP() was larger than what
+				 * actually got allocated, so a backtrace was
+				 * captured for this allocation, even though
+				 * its actual size was insufficient to cross
+				 * the sample threshold.
+				 */
+				cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+			}
+		}
+	}
+
+	if ((uintptr_t)old_ctx > (uintptr_t)1U) {
+		told_cnt = prof_lookup(old_ctx->bt);
+		if (told_cnt == NULL) {
+			/*
+			 * It's too late to propagate OOM for this realloc(),
+			 * so operate directly on old_cnt->ctx->cnt_merged.
+			 */
+			malloc_mutex_lock(&old_ctx->lock);
+			old_ctx->cnt_merged.curobjs--;
+			old_ctx->cnt_merged.curbytes -= old_size;
+			malloc_mutex_unlock(&old_ctx->lock);
+			told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+		}
+	} else
+		told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
+
+	if ((uintptr_t)told_cnt > (uintptr_t)1U)
+		told_cnt->epoch++;
+	if ((uintptr_t)cnt > (uintptr_t)1U) {
+		prof_ctx_set(ptr, cnt->ctx);
+		cnt->epoch++;
+	} else
+		prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U);
+	/*********/
+	mb_write();
+	/*********/
+	if ((uintptr_t)told_cnt > (uintptr_t)1U) {
+		told_cnt->cnts.curobjs--;
+		told_cnt->cnts.curbytes -= old_size;
+	}
+	if ((uintptr_t)cnt > (uintptr_t)1U) {
+		cnt->cnts.curobjs++;
+		cnt->cnts.curbytes += size;
+		if (opt_prof_accum) {
+			cnt->cnts.accumobjs++;
+			cnt->cnts.accumbytes += size;
+		}
+	}
+	/*********/
+	mb_write();
+	/*********/
+	if ((uintptr_t)told_cnt > (uintptr_t)1U)
+		told_cnt->epoch++;
+	if ((uintptr_t)cnt > (uintptr_t)1U)
+		cnt->epoch++;
+	/*********/
+	mb_write(); /* Not strictly necessary. */
+}
+
+JEMALLOC_INLINE void
+prof_free(const void *ptr, size_t size)
+{
+	prof_ctx_t *ctx = prof_ctx_get(ptr);
+
+	if ((uintptr_t)ctx > (uintptr_t)1) {
+		assert(size == isalloc(ptr));
+		prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
+
+		if (tcnt != NULL) {
+			tcnt->epoch++;
+			/*********/
+			mb_write();
+			/*********/
+			tcnt->cnts.curobjs--;
+			tcnt->cnts.curbytes -= size;
+			/*********/
+			mb_write();
+			/*********/
+			tcnt->epoch++;
+			/*********/
+			mb_write();
+			/*********/
+		} else {
+			/*
+			 * OOM during free() cannot be propagated, so operate
+			 * directly on cnt->ctx->cnt_merged.
+			 */
+			malloc_mutex_lock(&ctx->lock);
+			ctx->cnt_merged.curobjs--;
+			ctx->cnt_merged.curbytes -= size;
+			malloc_mutex_unlock(&ctx->lock);
+		}
+	}
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+#endif /* JEMALLOC_PROF */
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/ql.h b/deps/jemalloc.orig/include/jemalloc/internal/ql.h
new file mode 100644
index 00000000..a9ed2393
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/ql.h
@@ -0,0 +1,83 @@
+/*
+ * List definitions.
+ */
+#define ql_head(a_type)							\
+struct {								\
+	a_type *qlh_first;						\
+}
+
+#define ql_head_initializer(a_head) {NULL}
+
+#define ql_elm(a_type)	qr(a_type)
+
+/* List functions. */
+#define ql_new(a_head) do {						\
+	(a_head)->qlh_first = NULL;					\
+} while (0)
+
+#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
+
+#define ql_first(a_head) ((a_head)->qlh_first)
+
+#define ql_last(a_head, a_field)					\
+	((ql_first(a_head) != NULL)					\
+	    ? qr_prev(ql_first(a_head), a_field) : NULL)
+
+#define ql_next(a_head, a_elm, a_field)					\
+	((ql_last(a_head, a_field) != (a_elm))				\
+	    ? qr_next((a_elm), a_field)	: NULL)
+
+#define ql_prev(a_head, a_elm, a_field)					\
+	((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field)	\
+				       : NULL)
+
+#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do {		\
+	qr_before_insert((a_qlelm), (a_elm), a_field);			\
+	if (ql_first(a_head) == (a_qlelm)) {				\
+		ql_first(a_head) = (a_elm);				\
+	}								\
+} while (0)
+
+#define ql_after_insert(a_qlelm, a_elm, a_field)			\
+	qr_after_insert((a_qlelm), (a_elm), a_field)
+
+#define ql_head_insert(a_head, a_elm, a_field) do {			\
+	if (ql_first(a_head) != NULL) {					\
+		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
+	}								\
+	ql_first(a_head) = (a_elm);					\
+} while (0)
+
+#define ql_tail_insert(a_head, a_elm, a_field) do {			\
+	if (ql_first(a_head) != NULL) {					\
+		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
+	}								\
+	ql_first(a_head) = qr_next((a_elm), a_field);			\
+} while (0)
+
+#define ql_remove(a_head, a_elm, a_field) do {				\
+	if (ql_first(a_head) == (a_elm)) {				\
+		ql_first(a_head) = qr_next(ql_first(a_head), a_field);	\
+	}								\
+	if (ql_first(a_head) != (a_elm)) {				\
+		qr_remove((a_elm), a_field);				\
+	} else {							\
+		ql_first(a_head) = NULL;				\
+	}								\
+} while (0)
+
+#define ql_head_remove(a_head, a_type, a_field) do {			\
+	a_type *t = ql_first(a_head);					\
+	ql_remove((a_head), t, a_field);				\
+} while (0)
+
+#define ql_tail_remove(a_head, a_type, a_field) do {			\
+	a_type *t = ql_last(a_head, a_field);				\
+	ql_remove((a_head), t, a_field);				\
+} while (0)
+
+#define ql_foreach(a_var, a_head, a_field)				\
+	qr_foreach((a_var), ql_first(a_head), a_field)
+
+#define ql_reverse_foreach(a_var, a_head, a_field)			\
+	qr_reverse_foreach((a_var), ql_first(a_head), a_field)
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/qr.h b/deps/jemalloc.orig/include/jemalloc/internal/qr.h
new file mode 100644
index 00000000..fe22352f
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/qr.h
@@ -0,0 +1,67 @@
+/* Ring definitions. */
+#define qr(a_type)							\
+struct {								\
+	a_type	*qre_next;						\
+	a_type	*qre_prev;						\
+}
+
+/* Ring functions. */
+#define qr_new(a_qr, a_field) do {					\
+	(a_qr)->a_field.qre_next = (a_qr);				\
+	(a_qr)->a_field.qre_prev = (a_qr);				\
+} while (0)
+
+#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
+
+#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
+
+#define qr_before_insert(a_qrelm, a_qr, a_field) do {			\
+	(a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev;		\
+	(a_qr)->a_field.qre_next = (a_qrelm);				\
+	(a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr);		\
+	(a_qrelm)->a_field.qre_prev = (a_qr);				\
+} while (0)
+
+#define qr_after_insert(a_qrelm, a_qr, a_field)				\
+    do									\
+    {									\
+	(a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;		\
+	(a_qr)->a_field.qre_prev = (a_qrelm);				\
+	(a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);		\
+	(a_qrelm)->a_field.qre_next = (a_qr);				\
+    } while (0)
+
+#define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
+	void *t;							\
+	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
+	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
+	t = (a_qr_a)->a_field.qre_prev;					\
+	(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;	\
+	(a_qr_b)->a_field.qre_prev = t;					\
+} while (0)
+
+/* qr_meld() and qr_split() are functionally equivalent, so there's no need to
+ * have two copies of the code. */
+#define qr_split(a_qr_a, a_qr_b, a_field)				\
+	qr_meld((a_qr_a), (a_qr_b), a_field)
+
+#define qr_remove(a_qr, a_field) do {					\
+	(a_qr)->a_field.qre_prev->a_field.qre_next			\
+	    = (a_qr)->a_field.qre_next;					\
+	(a_qr)->a_field.qre_next->a_field.qre_prev			\
+	    = (a_qr)->a_field.qre_prev;					\
+	(a_qr)->a_field.qre_next = (a_qr);				\
+	(a_qr)->a_field.qre_prev = (a_qr);				\
+} while (0)
+
+#define qr_foreach(var, a_qr, a_field)					\
+	for ((var) = (a_qr);						\
+	    (var) != NULL;						\
+	    (var) = (((var)->a_field.qre_next != (a_qr))		\
+	    ? (var)->a_field.qre_next : NULL))
+
+#define qr_reverse_foreach(var, a_qr, a_field)				\
+	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;	\
+	    (var) != NULL;						\
+	    (var) = (((var) != (a_qr))					\
+	    ? (var)->a_field.qre_prev : NULL))
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/rb.h b/deps/jemalloc.orig/include/jemalloc/internal/rb.h
new file mode 100644
index 00000000..ee9b009d
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/rb.h
@@ -0,0 +1,973 @@
+/*-
+ *******************************************************************************
+ *
+ * cpp macro implementation of left-leaning 2-3 red-black trees.  Parent
+ * pointers are not used, and color bits are stored in the least significant
+ * bit of right-child pointers (if RB_COMPACT is defined), thus making node
+ * linkage as compact as is possible for red-black trees.
+ *
+ * Usage:
+ *
+ *   #include <stdint.h>
+ *   #include <stdbool.h>
+ *   #define NDEBUG // (Optional, see assert(3).)
+ *   #include <assert.h>
+ *   #define RB_COMPACT // (Optional, embed color bits in right-child pointers.)
+ *   #include <rb.h>
+ *   ...
+ *
+ *******************************************************************************
+ */
+
+#ifndef RB_H_
+#define	RB_H_
+
+#if 0
+__FBSDID("$FreeBSD: head/lib/libc/stdlib/rb.h 204493 2010-02-28 22:57:13Z jasone $");
+#endif
+
+#ifdef RB_COMPACT
+/* Node structure. */
+#define	rb_node(a_type)							\
+struct {								\
+    a_type *rbn_left;							\
+    a_type *rbn_right_red;						\
+}
+#else
+#define	rb_node(a_type)							\
+struct {								\
+    a_type *rbn_left;							\
+    a_type *rbn_right;							\
+    bool rbn_red;							\
+}
+#endif
+
+/* Root structure. */
+#define	rb_tree(a_type)							\
+struct {								\
+    a_type *rbt_root;							\
+    a_type rbt_nil;							\
+}
+
+/* Left accessors. */
+#define	rbtn_left_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_left)
+#define	rbtn_left_set(a_type, a_field, a_node, a_left) do {		\
+    (a_node)->a_field.rbn_left = a_left;				\
+} while (0)
+
+#ifdef RB_COMPACT
+/* Right accessors. */
+#define	rbtn_right_get(a_type, a_field, a_node)				\
+    ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red)		\
+      & ((ssize_t)-2)))
+#define	rbtn_right_set(a_type, a_field, a_node, a_right) do {		\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right)	\
+      | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1)));	\
+} while (0)
+
+/* Color accessors. */
+#define	rbtn_red_get(a_type, a_field, a_node)				\
+    ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red)		\
+      & ((size_t)1)))
+#define	rbtn_color_set(a_type, a_field, a_node, a_red) do {		\
+    (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t)		\
+      (a_node)->a_field.rbn_right_red) & ((ssize_t)-2))			\
+      | ((ssize_t)a_red));						\
+} while (0)
+#define	rbtn_red_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t)		\
+      (a_node)->a_field.rbn_right_red) | ((size_t)1));			\
+} while (0)
+#define	rbtn_black_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t)		\
+      (a_node)->a_field.rbn_right_red) & ((ssize_t)-2));		\
+} while (0)
+#else
+/* Right accessors. */
+#define	rbtn_right_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_right)
+#define	rbtn_right_set(a_type, a_field, a_node, a_right) do {		\
+    (a_node)->a_field.rbn_right = a_right;				\
+} while (0)
+
+/* Color accessors. */
+#define	rbtn_red_get(a_type, a_field, a_node)				\
+    ((a_node)->a_field.rbn_red)
+#define	rbtn_color_set(a_type, a_field, a_node, a_red) do {		\
+    (a_node)->a_field.rbn_red = (a_red);				\
+} while (0)
+#define	rbtn_red_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_red = true;					\
+} while (0)
+#define	rbtn_black_set(a_type, a_field, a_node) do {			\
+    (a_node)->a_field.rbn_red = false;					\
+} while (0)
+#endif
+
+/* Node initializer. */
+#define	rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
+    rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_red_set(a_type, a_field, (a_node));				\
+} while (0)
+
+/* Tree initializer. */
+#define	rb_new(a_type, a_field, a_rbt) do {				\
+    (a_rbt)->rbt_root = &(a_rbt)->rbt_nil;				\
+    rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil);		\
+    rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil);			\
+} while (0)
+
+/* Internal utility macros. */
+#define	rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do {		\
+    (r_node) = (a_root);						\
+    if ((r_node) != &(a_rbt)->rbt_nil) {				\
+	for (;								\
+	  rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\
+	  (r_node) = rbtn_left_get(a_type, a_field, (r_node))) {	\
+	}								\
+    }									\
+} while (0)
+
+#define	rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do {		\
+    (r_node) = (a_root);						\
+    if ((r_node) != &(a_rbt)->rbt_nil) {				\
+	for (; rbtn_right_get(a_type, a_field, (r_node)) !=		\
+	  &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field,	\
+	  (r_node))) {							\
+	}								\
+    }									\
+} while (0)
+
+#define	rbtn_rotate_left(a_type, a_field, a_node, r_node) do {		\
+    (r_node) = rbtn_right_get(a_type, a_field, (a_node));		\
+    rbtn_right_set(a_type, a_field, (a_node),				\
+      rbtn_left_get(a_type, a_field, (r_node)));			\
+    rbtn_left_set(a_type, a_field, (r_node), (a_node));			\
+} while (0)
+
+#define	rbtn_rotate_right(a_type, a_field, a_node, r_node) do {		\
+    (r_node) = rbtn_left_get(a_type, a_field, (a_node));		\
+    rbtn_left_set(a_type, a_field, (a_node),				\
+      rbtn_right_get(a_type, a_field, (r_node)));			\
+    rbtn_right_set(a_type, a_field, (r_node), (a_node));		\
+} while (0)
+
+/*
+ * The rb_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to rb_gen().
+ */
+
+#define	rb_proto(a_attr, a_prefix, a_rbt_type, a_type)			\
+a_attr void								\
+a_prefix##new(a_rbt_type *rbtree);					\
+a_attr a_type *								\
+a_prefix##first(a_rbt_type *rbtree);					\
+a_attr a_type *								\
+a_prefix##last(a_rbt_type *rbtree);					\
+a_attr a_type *								\
+a_prefix##next(a_rbt_type *rbtree, a_type *node);			\
+a_attr a_type *								\
+a_prefix##prev(a_rbt_type *rbtree, a_type *node);			\
+a_attr a_type *								\
+a_prefix##search(a_rbt_type *rbtree, a_type *key);			\
+a_attr a_type *								\
+a_prefix##nsearch(a_rbt_type *rbtree, a_type *key);			\
+a_attr a_type *								\
+a_prefix##psearch(a_rbt_type *rbtree, a_type *key);			\
+a_attr void								\
+a_prefix##insert(a_rbt_type *rbtree, a_type *node);			\
+a_attr void								\
+a_prefix##remove(a_rbt_type *rbtree, a_type *node);			\
+a_attr a_type *								\
+a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)(	\
+  a_rbt_type *, a_type *, void *), void *arg);				\
+a_attr a_type *								\
+a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg);
+
+/*
+ * The rb_gen() macro generates a type-specific red-black tree implementation,
+ * based on the above cpp macros.
+ *
+ * Arguments:
+ *
+ *   a_attr    : Function attribute for generated functions (ex: static).
+ *   a_prefix  : Prefix for generated functions (ex: ex_).
+ *   a_rb_type : Type for red-black tree data structure (ex: ex_t).
+ *   a_type    : Type for red-black tree node data structure (ex: ex_node_t).
+ *   a_field   : Name of red-black tree node linkage (ex: ex_link).
+ *   a_cmp     : Node comparison function name, with the following prototype:
+ *                 int (a_cmp *)(a_type *a_node, a_type *a_other);
+ *                                       ^^^^^^
+ *                                    or a_key
+ *               Interpretation of comparision function return values:
+ *                 -1 : a_node <  a_other
+ *                  0 : a_node == a_other
+ *                  1 : a_node >  a_other
+ *               In all cases, the a_node or a_key macro argument is the first
+ *               argument to the comparison function, which makes it possible
+ *               to write comparison functions that treat the first argument
+ *               specially.
+ *
+ * Assuming the following setup:
+ *
+ *   typedef struct ex_node_s ex_node_t;
+ *   struct ex_node_s {
+ *       rb_node(ex_node_t) ex_link;
+ *   };
+ *   typedef rb_tree(ex_node_t) ex_t;
+ *   rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp)
+ *
+ * The following API is generated:
+ *
+ *   static void
+ *   ex_new(ex_t *extree);
+ *       Description: Initialize a red-black tree structure.
+ *       Args:
+ *         extree: Pointer to an uninitialized red-black tree object.
+ *
+ *   static ex_node_t *
+ *   ex_first(ex_t *extree);
+ *   static ex_node_t *
+ *   ex_last(ex_t *extree);
+ *       Description: Get the first/last node in extree.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *       Ret: First/last node in extree, or NULL if extree is empty.
+ *
+ *   static ex_node_t *
+ *   ex_next(ex_t *extree, ex_node_t *node);
+ *   static ex_node_t *
+ *   ex_prev(ex_t *extree, ex_node_t *node);
+ *       Description: Get node's successor/predecessor.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         node : A node in extree.
+ *       Ret: node's successor/predecessor in extree, or NULL if node is
+ *            last/first.
+ *
+ *   static ex_node_t *
+ *   ex_search(ex_t *extree, ex_node_t *key);
+ *       Description: Search for node that matches key.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         key  : Search key.
+ *       Ret: Node in extree that matches key, or NULL if no match.
+ *
+ *   static ex_node_t *
+ *   ex_nsearch(ex_t *extree, ex_node_t *key);
+ *   static ex_node_t *
+ *   ex_psearch(ex_t *extree, ex_node_t *key);
+ *       Description: Search for node that matches key.  If no match is found,
+ *                    return what would be key's successor/predecessor, were
+ *                    key in extree.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         key   : Search key.
+ *       Ret: Node in extree that matches key, or if no match, hypothetical
+ *            node's successor/predecessor (NULL if no successor/predecessor).
+ *
+ *   static void
+ *   ex_insert(ex_t *extree, ex_node_t *node);
+ *       Description: Insert node into extree.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         node  : Node to be inserted into extree.
+ *
+ *   static void
+ *   ex_remove(ex_t *extree, ex_node_t *node);
+ *       Description: Remove node from extree.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         node  : Node in extree to be removed.
+ *
+ *   static ex_node_t *
+ *   ex_iter(ex_t *extree, ex_node_t *start, ex_node_t *(*cb)(ex_t *,
+ *     ex_node_t *, void *), void *arg);
+ *   static ex_node_t *
+ *   ex_reverse_iter(ex_t *extree, ex_node_t *start, ex_node *(*cb)(ex_t *,
+ *     ex_node_t *, void *), void *arg);
+ *       Description: Iterate forward/backward over extree, starting at node.
+ *                    If extree is modified, iteration must be immediately
+ *                    terminated by the callback function that causes the
+ *                    modification.
+ *       Args:
+ *         extree: Pointer to an initialized red-black tree object.
+ *         start : Node at which to start iteration, or NULL to start at
+ *                 first/last node.
+ *         cb    : Callback function, which is called for each node during
+ *                 iteration.  Under normal circumstances the callback function
+ *                 should return NULL, which causes iteration to continue.  If a
+ *                 callback function returns non-NULL, iteration is immediately
+ *                 terminated and the non-NULL return value is returned by the
+ *                 iterator.  This is useful for re-starting iteration after
+ *                 modifying extree.
+ *         arg   : Opaque pointer passed to cb().
+ *       Ret: NULL if iteration completed, or the non-NULL callback return value
+ *            that caused termination of the iteration.
+ */
+#define	rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
+a_attr void								\
+a_prefix##new(a_rbt_type *rbtree) {					\
+    rb_new(a_type, a_field, rbtree);					\
+}									\
+a_attr a_type *								\
+a_prefix##first(a_rbt_type *rbtree) {					\
+    a_type *ret;							\
+    rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = NULL;							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##last(a_rbt_type *rbtree) {					\
+    a_type *ret;							\
+    rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = NULL;							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##next(a_rbt_type *rbtree, a_type *node) {			\
+    a_type *ret;							\
+    if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) {	\
+	rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type,	\
+	  a_field, node), ret);						\
+    } else {								\
+	a_type *tnode = rbtree->rbt_root;				\
+	assert(tnode != &rbtree->rbt_nil);				\
+	ret = &rbtree->rbt_nil;						\
+	while (true) {							\
+	    int cmp = (a_cmp)(node, tnode);				\
+	    if (cmp < 0) {						\
+		ret = tnode;						\
+		tnode = rbtn_left_get(a_type, a_field, tnode);		\
+	    } else if (cmp > 0) {					\
+		tnode = rbtn_right_get(a_type, a_field, tnode);		\
+	    } else {							\
+		break;							\
+	    }								\
+	    assert(tnode != &rbtree->rbt_nil);				\
+	}								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##prev(a_rbt_type *rbtree, a_type *node) {			\
+    a_type *ret;							\
+    if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) {	\
+	rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type,	\
+	  a_field, node), ret);						\
+    } else {								\
+	a_type *tnode = rbtree->rbt_root;				\
+	assert(tnode != &rbtree->rbt_nil);				\
+	ret = &rbtree->rbt_nil;						\
+	while (true) {							\
+	    int cmp = (a_cmp)(node, tnode);				\
+	    if (cmp < 0) {						\
+		tnode = rbtn_left_get(a_type, a_field, tnode);		\
+	    } else if (cmp > 0) {					\
+		ret = tnode;						\
+		tnode = rbtn_right_get(a_type, a_field, tnode);		\
+	    } else {							\
+		break;							\
+	    }								\
+	    assert(tnode != &rbtree->rbt_nil);				\
+	}								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##search(a_rbt_type *rbtree, a_type *key) {			\
+    a_type *ret;							\
+    int cmp;								\
+    ret = rbtree->rbt_root;						\
+    while (ret != &rbtree->rbt_nil					\
+      && (cmp = (a_cmp)(key, ret)) != 0) {				\
+	if (cmp < 0) {							\
+	    ret = rbtn_left_get(a_type, a_field, ret);			\
+	} else {							\
+	    ret = rbtn_right_get(a_type, a_field, ret);			\
+	}								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) {			\
+    a_type *ret;							\
+    a_type *tnode = rbtree->rbt_root;					\
+    ret = &rbtree->rbt_nil;						\
+    while (tnode != &rbtree->rbt_nil) {					\
+	int cmp = (a_cmp)(key, tnode);					\
+	if (cmp < 0) {							\
+	    ret = tnode;						\
+	    tnode = rbtn_left_get(a_type, a_field, tnode);		\
+	} else if (cmp > 0) {						\
+	    tnode = rbtn_right_get(a_type, a_field, tnode);		\
+	} else {							\
+	    ret = tnode;						\
+	    break;							\
+	}								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##psearch(a_rbt_type *rbtree, a_type *key) {			\
+    a_type *ret;							\
+    a_type *tnode = rbtree->rbt_root;					\
+    ret = &rbtree->rbt_nil;						\
+    while (tnode != &rbtree->rbt_nil) {					\
+	int cmp = (a_cmp)(key, tnode);					\
+	if (cmp < 0) {							\
+	    tnode = rbtn_left_get(a_type, a_field, tnode);		\
+	} else if (cmp > 0) {						\
+	    ret = tnode;						\
+	    tnode = rbtn_right_get(a_type, a_field, tnode);		\
+	} else {							\
+	    ret = tnode;						\
+	    break;							\
+	}								\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = (NULL);							\
+    }									\
+    return (ret);							\
+}									\
+a_attr void								\
+a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
+    struct {								\
+	a_type *node;							\
+	int cmp;							\
+    } path[sizeof(void *) << 4], *pathp;				\
+    rbt_node_new(a_type, a_field, rbtree, node);			\
+    /* Wind. */								\
+    path->node = rbtree->rbt_root;					\
+    for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) {	\
+	int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
+	assert(cmp != 0);						\
+	if (cmp < 0) {							\
+	    pathp[1].node = rbtn_left_get(a_type, a_field,		\
+	      pathp->node);						\
+	} else {							\
+	    pathp[1].node = rbtn_right_get(a_type, a_field,		\
+	      pathp->node);						\
+	}								\
+    }									\
+    pathp->node = node;							\
+    /* Unwind. */							\
+    for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) {	\
+	a_type *cnode = pathp->node;					\
+	if (pathp->cmp < 0) {						\
+	    a_type *left = pathp[1].node;				\
+	    rbtn_left_set(a_type, a_field, cnode, left);		\
+	    if (rbtn_red_get(a_type, a_field, left)) {			\
+		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		    /* Fix up 4-node. */				\
+		    a_type *tnode;					\
+		    rbtn_black_set(a_type, a_field, leftleft);		\
+		    rbtn_rotate_right(a_type, a_field, cnode, tnode);	\
+		    cnode = tnode;					\
+		}							\
+	    } else {							\
+		return;							\
+	    }								\
+	} else {							\
+	    a_type *right = pathp[1].node;				\
+	    rbtn_right_set(a_type, a_field, cnode, right);		\
+	    if (rbtn_red_get(a_type, a_field, right)) {			\
+		a_type *left = rbtn_left_get(a_type, a_field, cnode);	\
+		if (rbtn_red_get(a_type, a_field, left)) {		\
+		    /* Split 4-node. */					\
+		    rbtn_black_set(a_type, a_field, left);		\
+		    rbtn_black_set(a_type, a_field, right);		\
+		    rbtn_red_set(a_type, a_field, cnode);		\
+		} else {						\
+		    /* Lean left. */					\
+		    a_type *tnode;					\
+		    bool tred = rbtn_red_get(a_type, a_field, cnode);	\
+		    rbtn_rotate_left(a_type, a_field, cnode, tnode);	\
+		    rbtn_color_set(a_type, a_field, tnode, tred);	\
+		    rbtn_red_set(a_type, a_field, cnode);		\
+		    cnode = tnode;					\
+		}							\
+	    } else {							\
+		return;							\
+	    }								\
+	}								\
+	pathp->node = cnode;						\
+    }									\
+    /* Set root, and make it black. */					\
+    rbtree->rbt_root = path->node;					\
+    rbtn_black_set(a_type, a_field, rbtree->rbt_root);			\
+}									\
+a_attr void								\
+a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
+    struct {								\
+	a_type *node;							\
+	int cmp;							\
+    } *pathp, *nodep, path[sizeof(void *) << 4];			\
+    /* Wind. */								\
+    nodep = NULL; /* Silence compiler warning. */			\
+    path->node = rbtree->rbt_root;					\
+    for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) {	\
+	int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
+	if (cmp < 0) {							\
+	    pathp[1].node = rbtn_left_get(a_type, a_field,		\
+	      pathp->node);						\
+	} else {							\
+	    pathp[1].node = rbtn_right_get(a_type, a_field,		\
+	      pathp->node);						\
+	    if (cmp == 0) {						\
+	        /* Find node's successor, in preparation for swap. */	\
+		pathp->cmp = 1;						\
+		nodep = pathp;						\
+		for (pathp++; pathp->node != &rbtree->rbt_nil;		\
+		  pathp++) {						\
+		    pathp->cmp = -1;					\
+		    pathp[1].node = rbtn_left_get(a_type, a_field,	\
+		      pathp->node);					\
+		}							\
+		break;							\
+	    }								\
+	}								\
+    }									\
+    assert(nodep->node == node);					\
+    pathp--;								\
+    if (pathp->node != node) {						\
+	/* Swap node with its successor. */				\
+	bool tred = rbtn_red_get(a_type, a_field, pathp->node);		\
+	rbtn_color_set(a_type, a_field, pathp->node,			\
+	  rbtn_red_get(a_type, a_field, node));				\
+	rbtn_left_set(a_type, a_field, pathp->node,			\
+	  rbtn_left_get(a_type, a_field, node));			\
+	/* If node's successor is its right child, the following code */\
+	/* will do the wrong thing for the right child pointer.       */\
+	/* However, it doesn't matter, because the pointer will be    */\
+	/* properly set when the successor is pruned.                 */\
+	rbtn_right_set(a_type, a_field, pathp->node,			\
+	  rbtn_right_get(a_type, a_field, node));			\
+	rbtn_color_set(a_type, a_field, node, tred);			\
+	/* The pruned leaf node's child pointers are never accessed   */\
+	/* again, so don't bother setting them to nil.                */\
+	nodep->node = pathp->node;					\
+	pathp->node = node;						\
+	if (nodep == path) {						\
+	    rbtree->rbt_root = nodep->node;				\
+	} else {							\
+	    if (nodep[-1].cmp < 0) {					\
+		rbtn_left_set(a_type, a_field, nodep[-1].node,		\
+		  nodep->node);						\
+	    } else {							\
+		rbtn_right_set(a_type, a_field, nodep[-1].node,		\
+		  nodep->node);						\
+	    }								\
+	}								\
+    } else {								\
+	a_type *left = rbtn_left_get(a_type, a_field, node);		\
+	if (left != &rbtree->rbt_nil) {					\
+	    /* node has no successor, but it has a left child.        */\
+	    /* Splice node out, without losing the left child.        */\
+	    assert(rbtn_red_get(a_type, a_field, node) == false);	\
+	    assert(rbtn_red_get(a_type, a_field, left));		\
+	    rbtn_black_set(a_type, a_field, left);			\
+	    if (pathp == path) {					\
+		rbtree->rbt_root = left;				\
+	    } else {							\
+		if (pathp[-1].cmp < 0) {				\
+		    rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+		      left);						\
+		} else {						\
+		    rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+		      left);						\
+		}							\
+	    }								\
+	    return;							\
+	} else if (pathp == path) {					\
+	    /* The tree only contained one node. */			\
+	    rbtree->rbt_root = &rbtree->rbt_nil;			\
+	    return;							\
+	}								\
+    }									\
+    if (rbtn_red_get(a_type, a_field, pathp->node)) {			\
+	/* Prune red node, which requires no fixup. */			\
+	assert(pathp[-1].cmp < 0);					\
+	rbtn_left_set(a_type, a_field, pathp[-1].node,			\
+	  &rbtree->rbt_nil);						\
+	return;								\
+    }									\
+    /* The node to be pruned is black, so unwind until balance is     */\
+    /* restored.                                                      */\
+    pathp->node = &rbtree->rbt_nil;					\
+    for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) {	\
+	assert(pathp->cmp != 0);					\
+	if (pathp->cmp < 0) {						\
+	    rbtn_left_set(a_type, a_field, pathp->node,			\
+	      pathp[1].node);						\
+	    assert(rbtn_red_get(a_type, a_field, pathp[1].node)		\
+	      == false);						\
+	    if (rbtn_red_get(a_type, a_field, pathp->node)) {		\
+		a_type *right = rbtn_right_get(a_type, a_field,		\
+		  pathp->node);						\
+		a_type *rightleft = rbtn_left_get(a_type, a_field,	\
+		  right);						\
+		a_type *tnode;						\
+		if (rbtn_red_get(a_type, a_field, rightleft)) {		\
+		    /* In the following diagrams, ||, //, and \\      */\
+		    /* indicate the path to the removed node.         */\
+		    /*                                                */\
+		    /*      ||                                        */\
+		    /*    pathp(r)                                    */\
+		    /*  //        \                                   */\
+		    /* (b)        (b)                                 */\
+		    /*           /                                    */\
+		    /*          (r)                                   */\
+		    /*                                                */\
+		    rbtn_black_set(a_type, a_field, pathp->node);	\
+		    rbtn_rotate_right(a_type, a_field, right, tnode);	\
+		    rbtn_right_set(a_type, a_field, pathp->node, tnode);\
+		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		} else {						\
+		    /*      ||                                        */\
+		    /*    pathp(r)                                    */\
+		    /*  //        \                                   */\
+		    /* (b)        (b)                                 */\
+		    /*           /                                    */\
+		    /*          (b)                                   */\
+		    /*                                                */\
+		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		}							\
+		/* Balance restored, but rotation modified subtree    */\
+		/* root.                                              */\
+		assert((uintptr_t)pathp > (uintptr_t)path);		\
+		if (pathp[-1].cmp < 0) {				\
+		    rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+		      tnode);						\
+		} else {						\
+		    rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+		      tnode);						\
+		}							\
+		return;							\
+	    } else {							\
+		a_type *right = rbtn_right_get(a_type, a_field,		\
+		  pathp->node);						\
+		a_type *rightleft = rbtn_left_get(a_type, a_field,	\
+		  right);						\
+		if (rbtn_red_get(a_type, a_field, rightleft)) {		\
+		    /*      ||                                        */\
+		    /*    pathp(b)                                    */\
+		    /*  //        \                                   */\
+		    /* (b)        (b)                                 */\
+		    /*           /                                    */\
+		    /*          (r)                                   */\
+		    a_type *tnode;					\
+		    rbtn_black_set(a_type, a_field, rightleft);		\
+		    rbtn_rotate_right(a_type, a_field, right, tnode);	\
+		    rbtn_right_set(a_type, a_field, pathp->node, tnode);\
+		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    /* Balance restored, but rotation modified        */\
+		    /* subree root, which may actually be the tree    */\
+		    /* root.                                          */\
+		    if (pathp == path) {				\
+			/* Set root. */					\
+			rbtree->rbt_root = tnode;			\
+		    } else {						\
+			if (pathp[-1].cmp < 0) {			\
+			    rbtn_left_set(a_type, a_field,		\
+			      pathp[-1].node, tnode);			\
+			} else {					\
+			    rbtn_right_set(a_type, a_field,		\
+			      pathp[-1].node, tnode);			\
+			}						\
+		    }							\
+		    return;						\
+		} else {						\
+		    /*      ||                                        */\
+		    /*    pathp(b)                                    */\
+		    /*  //        \                                   */\
+		    /* (b)        (b)                                 */\
+		    /*           /                                    */\
+		    /*          (b)                                   */\
+		    a_type *tnode;					\
+		    rbtn_red_set(a_type, a_field, pathp->node);		\
+		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    pathp->node = tnode;				\
+		}							\
+	    }								\
+	} else {							\
+	    a_type *left;						\
+	    rbtn_right_set(a_type, a_field, pathp->node,		\
+	      pathp[1].node);						\
+	    left = rbtn_left_get(a_type, a_field, pathp->node);		\
+	    if (rbtn_red_get(a_type, a_field, left)) {			\
+		a_type *tnode;						\
+		a_type *leftright = rbtn_right_get(a_type, a_field,	\
+		  left);						\
+		a_type *leftrightleft = rbtn_left_get(a_type, a_field,	\
+		  leftright);						\
+		if (rbtn_red_get(a_type, a_field, leftrightleft)) {	\
+		    /*      ||                                        */\
+		    /*    pathp(b)                                    */\
+		    /*   /        \\                                  */\
+		    /* (r)        (b)                                 */\
+		    /*   \                                            */\
+		    /*   (b)                                          */\
+		    /*   /                                            */\
+		    /* (r)                                            */\
+		    a_type *unode;					\
+		    rbtn_black_set(a_type, a_field, leftrightleft);	\
+		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
+		      unode);						\
+		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    rbtn_right_set(a_type, a_field, unode, tnode);	\
+		    rbtn_rotate_left(a_type, a_field, unode, tnode);	\
+		} else {						\
+		    /*      ||                                        */\
+		    /*    pathp(b)                                    */\
+		    /*   /        \\                                  */\
+		    /* (r)        (b)                                 */\
+		    /*   \                                            */\
+		    /*   (b)                                          */\
+		    /*   /                                            */\
+		    /* (b)                                            */\
+		    assert(leftright != &rbtree->rbt_nil);		\
+		    rbtn_red_set(a_type, a_field, leftright);		\
+		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    rbtn_black_set(a_type, a_field, tnode);		\
+		}							\
+		/* Balance restored, but rotation modified subtree    */\
+		/* root, which may actually be the tree root.         */\
+		if (pathp == path) {					\
+		    /* Set root. */					\
+		    rbtree->rbt_root = tnode;				\
+		} else {						\
+		    if (pathp[-1].cmp < 0) {				\
+			rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+			  tnode);					\
+		    } else {						\
+			rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+			  tnode);					\
+		    }							\
+		}							\
+		return;							\
+	    } else if (rbtn_red_get(a_type, a_field, pathp->node)) {	\
+		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		    /*        ||                                      */\
+		    /*      pathp(r)                                  */\
+		    /*     /        \\                                */\
+		    /*   (b)        (b)                               */\
+		    /*   /                                            */\
+		    /* (r)                                            */\
+		    a_type *tnode;					\
+		    rbtn_black_set(a_type, a_field, pathp->node);	\
+		    rbtn_red_set(a_type, a_field, left);		\
+		    rbtn_black_set(a_type, a_field, leftleft);		\
+		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    /* Balance restored, but rotation modified        */\
+		    /* subtree root.                                  */\
+		    assert((uintptr_t)pathp > (uintptr_t)path);		\
+		    if (pathp[-1].cmp < 0) {				\
+			rbtn_left_set(a_type, a_field, pathp[-1].node,	\
+			  tnode);					\
+		    } else {						\
+			rbtn_right_set(a_type, a_field, pathp[-1].node,	\
+			  tnode);					\
+		    }							\
+		    return;						\
+		} else {						\
+		    /*        ||                                      */\
+		    /*      pathp(r)                                  */\
+		    /*     /        \\                                */\
+		    /*   (b)        (b)                               */\
+		    /*   /                                            */\
+		    /* (b)                                            */\
+		    rbtn_red_set(a_type, a_field, left);		\
+		    rbtn_black_set(a_type, a_field, pathp->node);	\
+		    /* Balance restored. */				\
+		    return;						\
+		}							\
+	    } else {							\
+		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
+		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		    /*               ||                               */\
+		    /*             pathp(b)                           */\
+		    /*            /        \\                         */\
+		    /*          (b)        (b)                        */\
+		    /*          /                                     */\
+		    /*        (r)                                     */\
+		    a_type *tnode;					\
+		    rbtn_black_set(a_type, a_field, leftleft);		\
+		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
+		      tnode);						\
+		    /* Balance restored, but rotation modified        */\
+		    /* subtree root, which may actually be the tree   */\
+		    /* root.                                          */\
+		    if (pathp == path) {				\
+			/* Set root. */					\
+			rbtree->rbt_root = tnode;			\
+		    } else {						\
+			if (pathp[-1].cmp < 0) {			\
+			    rbtn_left_set(a_type, a_field,		\
+			      pathp[-1].node, tnode);			\
+			} else {					\
+			    rbtn_right_set(a_type, a_field,		\
+			      pathp[-1].node, tnode);			\
+			}						\
+		    }							\
+		    return;						\
+		} else {						\
+		    /*               ||                               */\
+		    /*             pathp(b)                           */\
+		    /*            /        \\                         */\
+		    /*          (b)        (b)                        */\
+		    /*          /                                     */\
+		    /*        (b)                                     */\
+		    rbtn_red_set(a_type, a_field, left);		\
+		}							\
+	    }								\
+	}								\
+    }									\
+    /* Set root. */							\
+    rbtree->rbt_root = path->node;					\
+    assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false);	\
+}									\
+a_attr a_type *								\
+a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node,		\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    if (node == &rbtree->rbt_nil) {					\
+	return (&rbtree->rbt_nil);					\
+    } else {								\
+	a_type *ret;							\
+	if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type,	\
+	  a_field, node), cb, arg)) != &rbtree->rbt_nil			\
+	  || (ret = cb(rbtree, node, arg)) != NULL) {			\
+	    return (ret);						\
+	}								\
+	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+	  a_field, node), cb, arg));					\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    int cmp = a_cmp(start, node);					\
+    if (cmp < 0) {							\
+	a_type *ret;							\
+	if ((ret = a_prefix##iter_start(rbtree, start,			\
+	  rbtn_left_get(a_type, a_field, node), cb, arg)) !=		\
+	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	    return (ret);						\
+	}								\
+	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+	  a_field, node), cb, arg));					\
+    } else if (cmp > 0) {						\
+	return (a_prefix##iter_start(rbtree, start,			\
+	  rbtn_right_get(a_type, a_field, node), cb, arg));		\
+    } else {								\
+	a_type *ret;							\
+	if ((ret = cb(rbtree, node, arg)) != NULL) {			\
+	    return (ret);						\
+	}								\
+	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+	  a_field, node), cb, arg));					\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)(	\
+  a_rbt_type *, a_type *, void *), void *arg) {				\
+    a_type *ret;							\
+    if (start != NULL) {						\
+	ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root,	\
+	  cb, arg);							\
+    } else {								\
+	ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = NULL;							\
+    }									\
+    return (ret);							\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    if (node == &rbtree->rbt_nil) {					\
+	return (&rbtree->rbt_nil);					\
+    } else {								\
+	a_type *ret;							\
+	if ((ret = a_prefix##reverse_iter_recurse(rbtree,		\
+	  rbtn_right_get(a_type, a_field, node), cb, arg)) !=		\
+	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	    return (ret);						\
+	}								\
+	return (a_prefix##reverse_iter_recurse(rbtree,			\
+	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start,		\
+  a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *),		\
+  void *arg) {								\
+    int cmp = a_cmp(start, node);					\
+    if (cmp > 0) {							\
+	a_type *ret;							\
+	if ((ret = a_prefix##reverse_iter_start(rbtree, start,		\
+	  rbtn_right_get(a_type, a_field, node), cb, arg)) !=		\
+	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	    return (ret);						\
+	}								\
+	return (a_prefix##reverse_iter_recurse(rbtree,			\
+	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    } else if (cmp < 0) {						\
+	return (a_prefix##reverse_iter_start(rbtree, start,		\
+	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    } else {								\
+	a_type *ret;							\
+	if ((ret = cb(rbtree, node, arg)) != NULL) {			\
+	    return (ret);						\
+	}								\
+	return (a_prefix##reverse_iter_recurse(rbtree,			\
+	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
+    a_type *ret;							\
+    if (start != NULL) {						\
+	ret = a_prefix##reverse_iter_start(rbtree, start,		\
+	  rbtree->rbt_root, cb, arg);					\
+    } else {								\
+	ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root,	\
+	  cb, arg);							\
+    }									\
+    if (ret == &rbtree->rbt_nil) {					\
+	ret = NULL;							\
+    }									\
+    return (ret);							\
+}
+
+#endif /* RB_H_ */
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/rtree.h b/deps/jemalloc.orig/include/jemalloc/internal/rtree.h
new file mode 100644
index 00000000..95d6355a
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/rtree.h
@@ -0,0 +1,161 @@
+/*
+ * This radix tree implementation is tailored to the singular purpose of
+ * tracking which chunks are currently owned by jemalloc.  This functionality
+ * is mandatory for OS X, where jemalloc must be able to respond to object
+ * ownership queries.
+ *
+ *******************************************************************************
+ */
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct rtree_s rtree_t;
+
+/*
+ * Size of each radix tree node (must be a power of 2).  This impacts tree
+ * depth.
+ */
+#if (LG_SIZEOF_PTR == 2)
+#  define RTREE_NODESIZE (1U << 14)
+#else
+#  define RTREE_NODESIZE CACHELINE
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct rtree_s {
+	malloc_mutex_t	mutex;
+	void		**root;
+	unsigned	height;
+	unsigned	level2bits[1]; /* Dynamically sized. */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+rtree_t	*rtree_new(unsigned bits);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+#ifndef JEMALLOC_DEBUG
+void	*rtree_get_locked(rtree_t *rtree, uintptr_t key);
+#endif
+void	*rtree_get(rtree_t *rtree, uintptr_t key);
+bool	rtree_set(rtree_t *rtree, uintptr_t key, void *val);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
+#define	RTREE_GET_GENERATE(f)						\
+/* The least significant bits of the key are ignored. */		\
+JEMALLOC_INLINE void *							\
+f(rtree_t *rtree, uintptr_t key)					\
+{									\
+	void *ret;							\
+	uintptr_t subkey;						\
+	unsigned i, lshift, height, bits;				\
+	void **node, **child;						\
+									\
+	RTREE_LOCK(&rtree->mutex);					\
+	for (i = lshift = 0, height = rtree->height, node = rtree->root;\
+	    i < height - 1;						\
+	    i++, lshift += bits, node = child) {			\
+		bits = rtree->level2bits[i];				\
+		subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \
+		    3)) - bits);					\
+		child = (void**)node[subkey];				\
+		if (child == NULL) {					\
+			RTREE_UNLOCK(&rtree->mutex);			\
+			return (NULL);					\
+		}							\
+	}								\
+									\
+	/*								\
+	 * node is a leaf, so it contains values rather than node	\
+	 * pointers.							\
+	 */								\
+	bits = rtree->level2bits[i];					\
+	subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -	\
+	    bits);							\
+	ret = node[subkey];						\
+	RTREE_UNLOCK(&rtree->mutex);					\
+									\
+	RTREE_GET_VALIDATE						\
+	return (ret);							\
+}
+
+#ifdef JEMALLOC_DEBUG
+#  define RTREE_LOCK(l)		malloc_mutex_lock(l)
+#  define RTREE_UNLOCK(l)	malloc_mutex_unlock(l)
+#  define RTREE_GET_VALIDATE
+RTREE_GET_GENERATE(rtree_get_locked)
+#  undef RTREE_LOCK
+#  undef RTREE_UNLOCK
+#  undef RTREE_GET_VALIDATE
+#endif
+
+#define	RTREE_LOCK(l)
+#define	RTREE_UNLOCK(l)
+#ifdef JEMALLOC_DEBUG
+   /*
+    * Suppose that it were possible for a jemalloc-allocated chunk to be
+    * munmap()ped, followed by a different allocator in another thread re-using
+    * overlapping virtual memory, all without invalidating the cached rtree
+    * value.  The result would be a false positive (the rtree would claim that
+    * jemalloc owns memory that it had actually discarded).  This scenario
+    * seems impossible, but the following assertion is a prudent sanity check.
+    */
+#  define RTREE_GET_VALIDATE						\
+	assert(rtree_get_locked(rtree, key) == ret);
+#else
+#  define RTREE_GET_VALIDATE
+#endif
+RTREE_GET_GENERATE(rtree_get)
+#undef RTREE_LOCK
+#undef RTREE_UNLOCK
+#undef RTREE_GET_VALIDATE
+
+JEMALLOC_INLINE bool
+rtree_set(rtree_t *rtree, uintptr_t key, void *val)
+{
+	uintptr_t subkey;
+	unsigned i, lshift, height, bits;
+	void **node, **child;
+
+	malloc_mutex_lock(&rtree->mutex);
+	for (i = lshift = 0, height = rtree->height, node = rtree->root;
+	    i < height - 1;
+	    i++, lshift += bits, node = child) {
+		bits = rtree->level2bits[i];
+		subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
+		    bits);
+		child = (void**)node[subkey];
+		if (child == NULL) {
+			child = (void**)base_alloc(sizeof(void *) <<
+			    rtree->level2bits[i+1]);
+			if (child == NULL) {
+				malloc_mutex_unlock(&rtree->mutex);
+				return (true);
+			}
+			memset(child, 0, sizeof(void *) <<
+			    rtree->level2bits[i+1]);
+			node[subkey] = child;
+		}
+	}
+
+	/* node is a leaf, so it contains values rather than node pointers. */
+	bits = rtree->level2bits[i];
+	subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits);
+	node[subkey] = val;
+	malloc_mutex_unlock(&rtree->mutex);
+
+	return (false);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/stats.h b/deps/jemalloc.orig/include/jemalloc/internal/stats.h
new file mode 100644
index 00000000..2a9b31d9
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/stats.h
@@ -0,0 +1,207 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#define	UMAX2S_BUFSIZE	65
+
+#ifdef JEMALLOC_STATS
+typedef struct tcache_bin_stats_s tcache_bin_stats_t;
+typedef struct malloc_bin_stats_s malloc_bin_stats_t;
+typedef struct malloc_large_stats_s malloc_large_stats_t;
+typedef struct arena_stats_s arena_stats_t;
+#endif
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+typedef struct chunk_stats_s chunk_stats_t;
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#ifdef JEMALLOC_STATS
+
+#ifdef JEMALLOC_TCACHE
+struct tcache_bin_stats_s {
+	/*
+	 * Number of allocation requests that corresponded to the size of this
+	 * bin.
+	 */
+	uint64_t	nrequests;
+};
+#endif
+
+struct malloc_bin_stats_s {
+	/*
+	 * Current number of bytes allocated, including objects currently
+	 * cached by tcache.
+	 */
+	size_t		allocated;
+
+	/*
+	 * Total number of allocation/deallocation requests served directly by
+	 * the bin.  Note that tcache may allocate an object, then recycle it
+	 * many times, resulting many increments to nrequests, but only one
+	 * each to nmalloc and ndalloc.
+	 */
+	uint64_t	nmalloc;
+	uint64_t	ndalloc;
+
+	/*
+	 * Number of allocation requests that correspond to the size of this
+	 * bin.  This includes requests served by tcache, though tcache only
+	 * periodically merges into this counter.
+	 */
+	uint64_t	nrequests;
+
+#ifdef JEMALLOC_TCACHE
+	/* Number of tcache fills from this bin. */
+	uint64_t	nfills;
+
+	/* Number of tcache flushes to this bin. */
+	uint64_t	nflushes;
+#endif
+
+	/* Total number of runs created for this bin's size class. */
+	uint64_t	nruns;
+
+	/*
+	 * Total number of runs reused by extracting them from the runs tree for
+	 * this bin's size class.
+	 */
+	uint64_t	reruns;
+
+	/* High-water mark for this bin. */
+	size_t		highruns;
+
+	/* Current number of runs in this bin. */
+	size_t		curruns;
+};
+
+struct malloc_large_stats_s {
+	/*
+	 * Total number of allocation/deallocation requests served directly by
+	 * the arena.  Note that tcache may allocate an object, then recycle it
+	 * many times, resulting many increments to nrequests, but only one
+	 * each to nmalloc and ndalloc.
+	 */
+	uint64_t	nmalloc;
+	uint64_t	ndalloc;
+
+	/*
+	 * Number of allocation requests that correspond to this size class.
+	 * This includes requests served by tcache, though tcache only
+	 * periodically merges into this counter.
+	 */
+	uint64_t	nrequests;
+
+	/* High-water mark for this size class. */
+	size_t		highruns;
+
+	/* Current number of runs of this size class. */
+	size_t		curruns;
+};
+
+struct arena_stats_s {
+	/* Number of bytes currently mapped. */
+	size_t		mapped;
+
+	/*
+	 * Total number of purge sweeps, total number of madvise calls made,
+	 * and total pages purged in order to keep dirty unused memory under
+	 * control.
+	 */
+	uint64_t	npurge;
+	uint64_t	nmadvise;
+	uint64_t	purged;
+
+	/* Per-size-category statistics. */
+	size_t		allocated_large;
+	uint64_t	nmalloc_large;
+	uint64_t	ndalloc_large;
+	uint64_t	nrequests_large;
+
+	/*
+	 * One element for each possible size class, including sizes that
+	 * overlap with bin size classes.  This is necessary because ipalloc()
+	 * sometimes has to use such large objects in order to assure proper
+	 * alignment.
+	 */
+	malloc_large_stats_t	*lstats;
+};
+#endif /* JEMALLOC_STATS */
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+struct chunk_stats_s {
+#  ifdef JEMALLOC_STATS
+	/* Number of chunks that were allocated. */
+	uint64_t	nchunks;
+#  endif
+
+	/* High-water mark for number of chunks allocated. */
+	size_t		highchunks;
+
+	/*
+	 * Current number of chunks allocated.  This value isn't maintained for
+	 * any other purpose, so keep track of it in order to be able to set
+	 * highchunks.
+	 */
+	size_t		curchunks;
+};
+#endif /* JEMALLOC_STATS */
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool	opt_stats_print;
+
+#ifdef JEMALLOC_STATS
+extern size_t	stats_cactive;
+#endif
+
+char	*u2s(uint64_t x, unsigned base, char *s);
+#ifdef JEMALLOC_STATS
+void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
+    const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
+void	malloc_printf(const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 1, 2));
+#endif
+void	stats_print(void (*write)(void *, const char *), void *cbopaque,
+    const char *opts);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+#ifdef JEMALLOC_STATS
+
+#ifndef JEMALLOC_ENABLE_INLINE
+size_t	stats_cactive_get(void);
+void	stats_cactive_add(size_t size);
+void	stats_cactive_sub(size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_))
+JEMALLOC_INLINE size_t
+stats_cactive_get(void)
+{
+
+	return (atomic_read_z(&stats_cactive));
+}
+
+JEMALLOC_INLINE void
+stats_cactive_add(size_t size)
+{
+
+	atomic_add_z(&stats_cactive, size);
+}
+
+JEMALLOC_INLINE void
+stats_cactive_sub(size_t size)
+{
+
+	atomic_sub_z(&stats_cactive, size);
+}
+#endif
+
+#endif /* JEMALLOC_STATS */
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/tcache.h b/deps/jemalloc.orig/include/jemalloc/internal/tcache.h
new file mode 100644
index 00000000..da3c68c5
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/tcache.h
@@ -0,0 +1,431 @@
+#ifdef JEMALLOC_TCACHE
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct tcache_bin_info_s tcache_bin_info_t;
+typedef struct tcache_bin_s tcache_bin_t;
+typedef struct tcache_s tcache_t;
+
+/*
+ * Absolute maximum number of cache slots for each small bin in the thread
+ * cache.  This is an additional constraint beyond that imposed as: twice the
+ * number of regions per run for this size class.
+ *
+ * This constant must be an even number.
+ */
+#define	TCACHE_NSLOTS_SMALL_MAX		200
+
+/* Number of cache slots for large size classes. */
+#define	TCACHE_NSLOTS_LARGE		20
+
+/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
+#define	LG_TCACHE_MAXCLASS_DEFAULT	15
+
+/*
+ * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation
+ * events between full GC sweeps (-1: disabled).  Integer rounding may cause
+ * the actual number to be slightly higher, since GC is performed
+ * incrementally.
+ */
+#define	LG_TCACHE_GC_SWEEP_DEFAULT	13
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+/*
+ * Read-only information associated with each element of tcache_t's tbins array
+ * is stored separately, mainly to reduce memory usage.
+ */
+struct tcache_bin_info_s {
+	unsigned	ncached_max;	/* Upper limit on ncached. */
+};
+
+struct tcache_bin_s {
+#  ifdef JEMALLOC_STATS
+	tcache_bin_stats_t tstats;
+#  endif
+	int		low_water;	/* Min # cached since last GC. */
+	unsigned	lg_fill_div;	/* Fill (ncached_max >> lg_fill_div). */
+	unsigned	ncached;	/* # of cached objects. */
+	void		**avail;	/* Stack of available objects. */
+};
+
+struct tcache_s {
+#  ifdef JEMALLOC_STATS
+	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
+#  endif
+#  ifdef JEMALLOC_PROF
+	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum() */
+#  endif
+	arena_t		*arena;		/* This thread's arena. */
+	unsigned	ev_cnt;		/* Event count since incremental GC. */
+	unsigned	next_gc_bin;	/* Next bin to GC. */
+	tcache_bin_t	tbins[1];	/* Dynamically sized. */
+	/*
+	 * The pointer stacks associated with tbins follow as a contiguous
+	 * array.  During tcache initialization, the avail pointer in each
+	 * element of tbins is initialized to point to the proper offset within
+	 * this array.
+	 */
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+extern bool	opt_tcache;
+extern ssize_t	opt_lg_tcache_max;
+extern ssize_t	opt_lg_tcache_gc_sweep;
+
+extern tcache_bin_info_t	*tcache_bin_info;
+
+/* Map of thread-specific caches. */
+#ifndef NO_TLS
+extern __thread tcache_t	*tcache_tls
+    JEMALLOC_ATTR(tls_model("initial-exec"));
+#  define TCACHE_GET()	tcache_tls
+#  define TCACHE_SET(v)	do {						\
+	tcache_tls = (tcache_t *)(v);					\
+	pthread_setspecific(tcache_tsd, (void *)(v));			\
+} while (0)
+#else
+#  define TCACHE_GET()	((tcache_t *)pthread_getspecific(tcache_tsd))
+#  define TCACHE_SET(v)	do {						\
+	pthread_setspecific(tcache_tsd, (void *)(v));			\
+} while (0)
+#endif
+extern pthread_key_t		tcache_tsd;
+
+/*
+ * Number of tcache bins.  There are nbins small-object bins, plus 0 or more
+ * large-object bins.
+ */
+extern size_t			nhbins;
+
+/* Maximum cached size class. */
+extern size_t			tcache_maxclass;
+
+/* Number of tcache allocation/deallocation events between incremental GCs. */
+extern unsigned			tcache_gc_incr;
+
+void	tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+    , tcache_t *tcache
+#endif
+    );
+void	tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+    , tcache_t *tcache
+#endif
+    );
+tcache_t *tcache_create(arena_t *arena);
+void	*tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
+    size_t binind);
+void	tcache_destroy(tcache_t *tcache);
+#ifdef JEMALLOC_STATS
+void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
+#endif
+bool	tcache_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	tcache_event(tcache_t *tcache);
+tcache_t *tcache_get(void);
+void	*tcache_alloc_easy(tcache_bin_t *tbin);
+void	*tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
+void	*tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
+void	tcache_dalloc_small(tcache_t *tcache, void *ptr);
+void	tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
+JEMALLOC_INLINE tcache_t *
+tcache_get(void)
+{
+	tcache_t *tcache;
+
+	if ((isthreaded & opt_tcache) == false)
+		return (NULL);
+
+	tcache = TCACHE_GET();
+	if ((uintptr_t)tcache <= (uintptr_t)2) {
+		if (tcache == NULL) {
+			tcache = tcache_create(choose_arena());
+			if (tcache == NULL)
+				return (NULL);
+		} else {
+			if (tcache == (void *)(uintptr_t)1) {
+				/*
+				 * Make a note that an allocator function was
+				 * called after the tcache_thread_cleanup() was
+				 * called.
+				 */
+				TCACHE_SET((uintptr_t)2);
+			}
+			return (NULL);
+		}
+	}
+
+	return (tcache);
+}
+
+JEMALLOC_INLINE void
+tcache_event(tcache_t *tcache)
+{
+
+	if (tcache_gc_incr == 0)
+		return;
+
+	tcache->ev_cnt++;
+	assert(tcache->ev_cnt <= tcache_gc_incr);
+	if (tcache->ev_cnt == tcache_gc_incr) {
+		size_t binind = tcache->next_gc_bin;
+		tcache_bin_t *tbin = &tcache->tbins[binind];
+		tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
+
+		if (tbin->low_water > 0) {
+			/*
+			 * Flush (ceiling) 3/4 of the objects below the low
+			 * water mark.
+			 */
+			if (binind < nbins) {
+				tcache_bin_flush_small(tbin, binind,
+				    tbin->ncached - tbin->low_water +
+				    (tbin->low_water >> 2)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+				    , tcache
+#endif
+				    );
+			} else {
+				tcache_bin_flush_large(tbin, binind,
+				    tbin->ncached - tbin->low_water +
+				    (tbin->low_water >> 2)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+				    , tcache
+#endif
+				    );
+			}
+			/*
+			 * Reduce fill count by 2X.  Limit lg_fill_div such that
+			 * the fill count is always at least 1.
+			 */
+			if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1))
+			    >= 1)
+				tbin->lg_fill_div++;
+		} else if (tbin->low_water < 0) {
+			/*
+			 * Increase fill count by 2X.  Make sure lg_fill_div
+			 * stays greater than 0.
+			 */
+			if (tbin->lg_fill_div > 1)
+				tbin->lg_fill_div--;
+		}
+		tbin->low_water = tbin->ncached;
+
+		tcache->next_gc_bin++;
+		if (tcache->next_gc_bin == nhbins)
+			tcache->next_gc_bin = 0;
+		tcache->ev_cnt = 0;
+	}
+}
+
+JEMALLOC_INLINE void *
+tcache_alloc_easy(tcache_bin_t *tbin)
+{
+	void *ret;
+
+	if (tbin->ncached == 0) {
+		tbin->low_water = -1;
+		return (NULL);
+	}
+	tbin->ncached--;
+	if ((int)tbin->ncached < tbin->low_water)
+		tbin->low_water = tbin->ncached;
+	ret = tbin->avail[tbin->ncached];
+	return (ret);
+}
+
+JEMALLOC_INLINE void *
+tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
+{
+	void *ret;
+	size_t binind;
+	tcache_bin_t *tbin;
+
+	binind = SMALL_SIZE2BIN(size);
+	assert(binind < nbins);
+	tbin = &tcache->tbins[binind];
+	ret = tcache_alloc_easy(tbin);
+	if (ret == NULL) {
+		ret = tcache_alloc_small_hard(tcache, tbin, binind);
+		if (ret == NULL)
+			return (NULL);
+	}
+	assert(arena_salloc(ret) == arena_bin_info[binind].reg_size);
+
+	if (zero == false) {
+#ifdef JEMALLOC_FILL
+		if (opt_junk)
+			memset(ret, 0xa5, size);
+		else if (opt_zero)
+			memset(ret, 0, size);
+#endif
+	} else
+		memset(ret, 0, size);
+
+#ifdef JEMALLOC_STATS
+	tbin->tstats.nrequests++;
+#endif
+#ifdef JEMALLOC_PROF
+	tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
+#endif
+	tcache_event(tcache);
+	return (ret);
+}
+
+JEMALLOC_INLINE void *
+tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
+{
+	void *ret;
+	size_t binind;
+	tcache_bin_t *tbin;
+
+	size = PAGE_CEILING(size);
+	assert(size <= tcache_maxclass);
+	binind = nbins + (size >> PAGE_SHIFT) - 1;
+	assert(binind < nhbins);
+	tbin = &tcache->tbins[binind];
+	ret = tcache_alloc_easy(tbin);
+	if (ret == NULL) {
+		/*
+		 * Only allocate one large object at a time, because it's quite
+		 * expensive to create one and not use it.
+		 */
+		ret = arena_malloc_large(tcache->arena, size, zero);
+		if (ret == NULL)
+			return (NULL);
+	} else {
+#ifdef JEMALLOC_PROF
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
+		size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
+		    PAGE_SHIFT);
+		chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK;
+#endif
+		if (zero == false) {
+#ifdef JEMALLOC_FILL
+			if (opt_junk)
+				memset(ret, 0xa5, size);
+			else if (opt_zero)
+				memset(ret, 0, size);
+#endif
+		} else
+			memset(ret, 0, size);
+
+#ifdef JEMALLOC_STATS
+		tbin->tstats.nrequests++;
+#endif
+#ifdef JEMALLOC_PROF
+		tcache->prof_accumbytes += size;
+#endif
+	}
+
+	tcache_event(tcache);
+	return (ret);
+}
+
+JEMALLOC_INLINE void
+tcache_dalloc_small(tcache_t *tcache, void *ptr)
+{
+	arena_t *arena;
+	arena_chunk_t *chunk;
+	arena_run_t *run;
+	arena_bin_t *bin;
+	tcache_bin_t *tbin;
+	tcache_bin_info_t *tbin_info;
+	size_t pageind, binind;
+	arena_chunk_map_t *mapelm;
+
+	assert(arena_salloc(ptr) <= small_maxclass);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	arena = chunk->arena;
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	mapelm = &chunk->map[pageind-map_bias];
+	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+	    (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
+	dassert(run->magic == ARENA_RUN_MAGIC);
+	bin = run->bin;
+	binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
+	    sizeof(arena_bin_t);
+	assert(binind < nbins);
+
+#ifdef JEMALLOC_FILL
+	if (opt_junk)
+		memset(ptr, 0x5a, arena_bin_info[binind].reg_size);
+#endif
+
+	tbin = &tcache->tbins[binind];
+	tbin_info = &tcache_bin_info[binind];
+	if (tbin->ncached == tbin_info->ncached_max) {
+		tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
+		    1)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+		    , tcache
+#endif
+		    );
+	}
+	assert(tbin->ncached < tbin_info->ncached_max);
+	tbin->avail[tbin->ncached] = ptr;
+	tbin->ncached++;
+
+	tcache_event(tcache);
+}
+
+JEMALLOC_INLINE void
+tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
+{
+	arena_t *arena;
+	arena_chunk_t *chunk;
+	size_t pageind, binind;
+	tcache_bin_t *tbin;
+	tcache_bin_info_t *tbin_info;
+
+	assert((size & PAGE_MASK) == 0);
+	assert(arena_salloc(ptr) > small_maxclass);
+	assert(arena_salloc(ptr) <= tcache_maxclass);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	arena = chunk->arena;
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	binind = nbins + (size >> PAGE_SHIFT) - 1;
+
+#ifdef JEMALLOC_FILL
+	if (opt_junk)
+		memset(ptr, 0x5a, size);
+#endif
+
+	tbin = &tcache->tbins[binind];
+	tbin_info = &tcache_bin_info[binind];
+	if (tbin->ncached == tbin_info->ncached_max) {
+		tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
+		    1)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+		    , tcache
+#endif
+		    );
+	}
+	assert(tbin->ncached < tbin_info->ncached_max);
+	tbin->avail[tbin->ncached] = ptr;
+	tbin->ncached++;
+
+	tcache_event(tcache);
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+#endif /* JEMALLOC_TCACHE */
diff --git a/deps/jemalloc.orig/include/jemalloc/internal/zone.h b/deps/jemalloc.orig/include/jemalloc/internal/zone.h
new file mode 100644
index 00000000..859b529d
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/internal/zone.h
@@ -0,0 +1,23 @@
+#ifndef JEMALLOC_ZONE
+#  error "This source file is for zones on Darwin (OS X)."
+#endif
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+malloc_zone_t *create_zone(void);
+void	szone2ozone(malloc_zone_t *zone);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/include/jemalloc/jemalloc.h.in b/deps/jemalloc.orig/include/jemalloc/jemalloc.h.in
new file mode 100644
index 00000000..580a5ec5
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/jemalloc.h.in
@@ -0,0 +1,66 @@
+#ifndef JEMALLOC_H_
+#define	JEMALLOC_H_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <limits.h>
+#include <strings.h>
+
+#define	JEMALLOC_VERSION "@jemalloc_version@"
+#define	JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
+#define	JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
+#define	JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
+#define	JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
+#define	JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
+
+#include "jemalloc_defs@install_suffix@.h"
+#ifndef JEMALLOC_P
+#  define JEMALLOC_P(s) s
+#endif
+
+#define	ALLOCM_LG_ALIGN(la)	(la)
+#if LG_SIZEOF_PTR == 2
+#define	ALLOCM_ALIGN(a)	(ffs(a)-1)
+#else
+#define	ALLOCM_ALIGN(a)	((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31)
+#endif
+#define	ALLOCM_ZERO	((int)0x40)
+#define	ALLOCM_NO_MOVE	((int)0x80)
+
+#define	ALLOCM_SUCCESS		0
+#define	ALLOCM_ERR_OOM		1
+#define	ALLOCM_ERR_NOT_MOVED	2
+
+extern const char	*JEMALLOC_P(malloc_conf);
+extern void		(*JEMALLOC_P(malloc_message))(void *, const char *);
+
+void	*JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc);
+void	*JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc);
+int	JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
+    JEMALLOC_ATTR(nonnull(1));
+void	*JEMALLOC_P(realloc)(void *ptr, size_t size);
+void	JEMALLOC_P(free)(void *ptr);
+
+size_t	JEMALLOC_P(malloc_usable_size)(const void *ptr);
+void	JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *),
+    void *cbopaque, const char *opts);
+int	JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen);
+int	JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp,
+    size_t *miblenp);
+int	JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen);
+
+int	JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
+    JEMALLOC_ATTR(nonnull(1));
+int	JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size,
+    size_t extra, int flags) JEMALLOC_ATTR(nonnull(1));
+int	JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags)
+    JEMALLOC_ATTR(nonnull(1));
+int	JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1));
+
+#ifdef __cplusplus
+};
+#endif
+#endif /* JEMALLOC_H_ */
diff --git a/deps/jemalloc.orig/include/jemalloc/jemalloc_defs.h.in b/deps/jemalloc.orig/include/jemalloc/jemalloc_defs.h.in
new file mode 100644
index 00000000..9ac7e1c2
--- /dev/null
+++ b/deps/jemalloc.orig/include/jemalloc/jemalloc_defs.h.in
@@ -0,0 +1,167 @@
+#ifndef JEMALLOC_DEFS_H_
+#define	JEMALLOC_DEFS_H_
+
+/*
+ * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed.
+ * This makes it possible, with some care, to use multiple allocators
+ * simultaneously.
+ *
+ * In many cases it is more convenient to manually prefix allocator function
+ * calls than to let macros do it automatically, particularly when using
+ * multiple allocators simultaneously.  Define JEMALLOC_MANGLE before
+ * #include'ing jemalloc.h in order to cause name mangling that corresponds to
+ * the API prefixing.
+ */
+#undef JEMALLOC_PREFIX
+#undef JEMALLOC_CPREFIX
+#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE))
+#undef JEMALLOC_P
+#endif
+
+/*
+ * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
+ * For shared libraries, symbol visibility mechanisms prevent these symbols
+ * from being exported, but for static libraries, naming collisions are a real
+ * possibility.
+ */
+#undef JEMALLOC_PRIVATE_NAMESPACE
+#undef JEMALLOC_N
+
+/*
+ * Hyper-threaded CPUs may need a special instruction inside spin loops in
+ * order to yield to another virtual CPU.
+ */
+#undef CPU_SPINWAIT
+
+/*
+ * Defined if OSAtomic*() functions are available, as provided by Darwin, and
+ * documented in the atomic(3) manual page.
+ */
+#undef JEMALLOC_OSATOMIC
+
+/*
+ * Defined if OSSpin*() functions are available, as provided by Darwin, and
+ * documented in the spinlock(3) manual page.
+ */
+#undef JEMALLOC_OSSPIN
+
+/* Defined if __attribute__((...)) syntax is supported. */
+#undef JEMALLOC_HAVE_ATTR
+#ifdef JEMALLOC_HAVE_ATTR
+#  define JEMALLOC_ATTR(s) __attribute__((s))
+#else
+#  define JEMALLOC_ATTR(s)
+#endif
+
+/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */
+#undef JEMALLOC_CC_SILENCE
+
+/*
+ * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
+ * inline functions.
+ */
+#undef JEMALLOC_DEBUG
+
+/* JEMALLOC_STATS enables statistics calculation. */
+#undef JEMALLOC_STATS
+
+/* JEMALLOC_PROF enables allocation profiling. */
+#undef JEMALLOC_PROF
+
+/* Use libunwind for profile backtracing if defined. */
+#undef JEMALLOC_PROF_LIBUNWIND
+
+/* Use libgcc for profile backtracing if defined. */
+#undef JEMALLOC_PROF_LIBGCC
+
+/* Use gcc intrinsics for profile backtracing if defined. */
+#undef JEMALLOC_PROF_GCC
+
+/*
+ * JEMALLOC_TINY enables support for tiny objects, which are smaller than one
+ * quantum.
+ */
+#undef JEMALLOC_TINY
+
+/*
+ * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
+ * This makes it possible to allocate/deallocate objects without any locking
+ * when the cache is in the steady state.
+ */
+#undef JEMALLOC_TCACHE
+
+/*
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
+ * segment (DSS).
+ */
+#undef JEMALLOC_DSS
+
+/* JEMALLOC_SWAP enables mmap()ed swap file support. */
+#undef JEMALLOC_SWAP
+
+/* Support memory filling (junk/zero). */
+#undef JEMALLOC_FILL
+
+/* Support optional abort() on OOM. */
+#undef JEMALLOC_XMALLOC
+
+/* Support SYSV semantics. */
+#undef JEMALLOC_SYSV
+
+/* Support lazy locking (avoid locking unless a second thread is launched). */
+#undef JEMALLOC_LAZY_LOCK
+
+/* Determine page size at run time if defined. */
+#undef DYNAMIC_PAGE_SHIFT
+
+/* One page is 2^STATIC_PAGE_SHIFT bytes. */
+#undef STATIC_PAGE_SHIFT
+
+/* TLS is used to map arenas and magazine caches to threads. */
+#undef NO_TLS
+
+/*
+ * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
+ * within jemalloc-owned chunks before dereferencing them.
+ */
+#undef JEMALLOC_IVSALLOC
+
+/*
+ * Define overrides for non-standard allocator-related functions if they
+ * are present on the system.
+ */
+#undef JEMALLOC_OVERRIDE_MEMALIGN
+#undef JEMALLOC_OVERRIDE_VALLOC
+
+/*
+ * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
+ */
+#undef JEMALLOC_ZONE
+#undef JEMALLOC_ZONE_VERSION
+
+/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */
+#undef JEMALLOC_MREMAP_FIXED
+
+/*
+ * Methods for purging unused pages differ between operating systems.
+ *
+ *   madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
+ *                                 such that new pages will be demand-zeroed if
+ *                                 the address region is later touched.
+ *   madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being
+ *                             unused, such that they will be discarded rather
+ *                             than swapped out.
+ */
+#undef JEMALLOC_PURGE_MADVISE_DONTNEED
+#undef JEMALLOC_PURGE_MADVISE_FREE
+
+/* sizeof(void *) == 2^LG_SIZEOF_PTR. */
+#undef LG_SIZEOF_PTR
+
+/* sizeof(int) == 2^LG_SIZEOF_INT. */
+#undef LG_SIZEOF_INT
+
+/* sizeof(long) == 2^LG_SIZEOF_LONG. */
+#undef LG_SIZEOF_LONG
+
+#endif /* JEMALLOC_DEFS_H_ */
diff --git a/deps/jemalloc.orig/install-sh b/deps/jemalloc.orig/install-sh
new file mode 100755
index 00000000..ebc66913
--- /dev/null
+++ b/deps/jemalloc.orig/install-sh
@@ -0,0 +1,250 @@
+#! /bin/sh
+#
+# install - install a program, script, or datafile
+# This comes from X11R5 (mit/util/scripts/install.sh).
+#
+# Copyright 1991 by the Massachusetts Institute of Technology
+#
+# Permission to use, copy, modify, distribute, and sell this software and its
+# documentation for any purpose is hereby granted without fee, provided that
+# the above copyright notice appear in all copies and that both that
+# copyright notice and this permission notice appear in supporting
+# documentation, and that the name of M.I.T. not be used in advertising or
+# publicity pertaining to distribution of the software without specific,
+# written prior permission.  M.I.T. makes no representations about the
+# suitability of this software for any purpose.  It is provided "as is"
+# without express or implied warranty.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.  It can only install one file at a time, a restriction
+# shared with many OS's install programs.
+
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+transformbasename=""
+transform_arg=""
+instcmd="$mvprog"
+chmodcmd="$chmodprog 0755"
+chowncmd=""
+chgrpcmd=""
+stripcmd=""
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=""
+dst=""
+dir_arg=""
+
+while [ x"$1" != x ]; do
+    case $1 in
+	-c) instcmd="$cpprog"
+	    shift
+	    continue;;
+
+	-d) dir_arg=true
+	    shift
+	    continue;;
+
+	-m) chmodcmd="$chmodprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-o) chowncmd="$chownprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-g) chgrpcmd="$chgrpprog $2"
+	    shift
+	    shift
+	    continue;;
+
+	-s) stripcmd="$stripprog"
+	    shift
+	    continue;;
+
+	-t=*) transformarg=`echo $1 | sed 's/-t=//'`
+	    shift
+	    continue;;
+
+	-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
+	    shift
+	    continue;;
+
+	*)  if [ x"$src" = x ]
+	    then
+		src=$1
+	    else
+		# this colon is to work around a 386BSD /bin/sh bug
+		:
+		dst=$1
+	    fi
+	    shift
+	    continue;;
+    esac
+done
+
+if [ x"$src" = x ]
+then
+	echo "install:	no input file specified"
+	exit 1
+else
+	true
+fi
+
+if [ x"$dir_arg" != x ]; then
+	dst=$src
+	src=""
+	
+	if [ -d $dst ]; then
+		instcmd=:
+	else
+		instcmd=mkdir
+	fi
+else
+
+# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
+# might cause directories to be created, which would be especially bad 
+# if $src (and thus $dsttmp) contains '*'.
+
+	if [ -f $src -o -d $src ]
+	then
+		true
+	else
+		echo "install:  $src does not exist"
+		exit 1
+	fi
+	
+	if [ x"$dst" = x ]
+	then
+		echo "install:	no destination specified"
+		exit 1
+	else
+		true
+	fi
+
+# If destination is a directory, append the input filename; if your system
+# does not like double slashes in filenames, you may need to add some logic
+
+	if [ -d $dst ]
+	then
+		dst="$dst"/`basename $src`
+	else
+		true
+	fi
+fi
+
+## this sed command emulates the dirname command
+dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+# Make sure that the destination directory exists.
+#  this part is taken from Noah Friedman's mkinstalldirs script
+
+# Skip lots of stat calls in the usual case.
+if [ ! -d "$dstdir" ]; then
+defaultIFS='	
+'
+IFS="${IFS-${defaultIFS}}"
+
+oIFS="${IFS}"
+# Some sh's can't handle IFS=/ for some reason.
+IFS='%'
+set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
+IFS="${oIFS}"
+
+pathcomp=''
+
+while [ $# -ne 0 ] ; do
+	pathcomp="${pathcomp}${1}"
+	shift
+
+	if [ ! -d "${pathcomp}" ] ;
+        then
+		$mkdirprog "${pathcomp}"
+	else
+		true
+	fi
+
+	pathcomp="${pathcomp}/"
+done
+fi
+
+if [ x"$dir_arg" != x ]
+then
+	$doit $instcmd $dst &&
+
+	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
+	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
+	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
+	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
+else
+
+# If we're going to rename the final executable, determine the name now.
+
+	if [ x"$transformarg" = x ] 
+	then
+		dstfile=`basename $dst`
+	else
+		dstfile=`basename $dst $transformbasename | 
+			sed $transformarg`$transformbasename
+	fi
+
+# don't allow the sed command to completely eliminate the filename
+
+	if [ x"$dstfile" = x ] 
+	then
+		dstfile=`basename $dst`
+	else
+		true
+	fi
+
+# Make a temp file name in the proper directory.
+
+	dsttmp=$dstdir/#inst.$$#
+
+# Move or copy the file name to the temp name
+
+	$doit $instcmd $src $dsttmp &&
+
+	trap "rm -f ${dsttmp}" 0 &&
+
+# and set any options; do chmod last to preserve setuid bits
+
+# If any of these fail, we abort the whole thing.  If we want to
+# ignore errors from any of these, just make sure not to ignore
+# errors from the above "$doit $instcmd $src $dsttmp" command.
+
+	if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
+	if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
+	if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
+	if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
+
+# Now rename the file to the real destination.
+
+	$doit $rmcmd -f $dstdir/$dstfile &&
+	$doit $mvcmd $dsttmp $dstdir/$dstfile 
+
+fi &&
+
+
+exit 0
diff --git a/deps/jemalloc.orig/src/arena.c b/deps/jemalloc.orig/src/arena.c
new file mode 100644
index 00000000..d166ca1e
--- /dev/null
+++ b/deps/jemalloc.orig/src/arena.c
@@ -0,0 +1,2704 @@
+#define	JEMALLOC_ARENA_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+size_t	opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
+size_t	opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
+ssize_t		opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
+uint8_t const	*small_size2bin;
+arena_bin_info_t	*arena_bin_info;
+
+/* Various bin-related settings. */
+unsigned	nqbins;
+unsigned	ncbins;
+unsigned	nsbins;
+unsigned	nbins;
+size_t		qspace_max;
+size_t		cspace_min;
+size_t		cspace_max;
+size_t		sspace_min;
+size_t		sspace_max;
+
+size_t		lg_mspace;
+size_t		mspace_mask;
+
+/*
+ * const_small_size2bin is a static constant lookup table that in the common
+ * case can be used as-is for small_size2bin.
+ */
+#if (LG_TINY_MIN == 2)
+#define	S2B_4(i)	i,
+#define	S2B_8(i)	S2B_4(i) S2B_4(i)
+#elif (LG_TINY_MIN == 3)
+#define	S2B_8(i)	i,
+#else
+#  error "Unsupported LG_TINY_MIN"
+#endif
+#define	S2B_16(i)	S2B_8(i) S2B_8(i)
+#define	S2B_32(i)	S2B_16(i) S2B_16(i)
+#define	S2B_64(i)	S2B_32(i) S2B_32(i)
+#define	S2B_128(i)	S2B_64(i) S2B_64(i)
+#define	S2B_256(i)	S2B_128(i) S2B_128(i)
+/*
+ * The number of elements in const_small_size2bin is dependent on the
+ * definition for SUBPAGE.
+ */
+static JEMALLOC_ATTR(aligned(CACHELINE))
+    const uint8_t	const_small_size2bin[] = {
+#if (LG_QUANTUM == 4)
+/* 16-byte quantum **********************/
+#  ifdef JEMALLOC_TINY
+#    if (LG_TINY_MIN == 2)
+       S2B_4(0)			/*    4 */
+       S2B_4(1)			/*    8 */
+       S2B_8(2)			/*   16 */
+#      define S2B_QMIN 2
+#    elif (LG_TINY_MIN == 3)
+       S2B_8(0)			/*    8 */
+       S2B_8(1)			/*   16 */
+#      define S2B_QMIN 1
+#    else
+#      error "Unsupported LG_TINY_MIN"
+#    endif
+#  else
+	S2B_16(0)		/*   16 */
+#    define S2B_QMIN 0
+#  endif
+	S2B_16(S2B_QMIN + 1)	/*   32 */
+	S2B_16(S2B_QMIN + 2)	/*   48 */
+	S2B_16(S2B_QMIN + 3)	/*   64 */
+	S2B_16(S2B_QMIN + 4)	/*   80 */
+	S2B_16(S2B_QMIN + 5)	/*   96 */
+	S2B_16(S2B_QMIN + 6)	/*  112 */
+	S2B_16(S2B_QMIN + 7)	/*  128 */
+#  define S2B_CMIN (S2B_QMIN + 8)
+#else
+/* 8-byte quantum ***********************/
+#  ifdef JEMALLOC_TINY
+#    if (LG_TINY_MIN == 2)
+       S2B_4(0)			/*    4 */
+       S2B_4(1)			/*    8 */
+#      define S2B_QMIN 1
+#    else
+#      error "Unsupported LG_TINY_MIN"
+#    endif
+#  else
+	S2B_8(0)		/*    8 */
+#    define S2B_QMIN 0
+#  endif
+	S2B_8(S2B_QMIN + 1)	/*   16 */
+	S2B_8(S2B_QMIN + 2)	/*   24 */
+	S2B_8(S2B_QMIN + 3)	/*   32 */
+	S2B_8(S2B_QMIN + 4)	/*   40 */
+	S2B_8(S2B_QMIN + 5)	/*   48 */
+	S2B_8(S2B_QMIN + 6)	/*   56 */
+	S2B_8(S2B_QMIN + 7)	/*   64 */
+	S2B_8(S2B_QMIN + 8)	/*   72 */
+	S2B_8(S2B_QMIN + 9)	/*   80 */
+	S2B_8(S2B_QMIN + 10)	/*   88 */
+	S2B_8(S2B_QMIN + 11)	/*   96 */
+	S2B_8(S2B_QMIN + 12)	/*  104 */
+	S2B_8(S2B_QMIN + 13)	/*  112 */
+	S2B_8(S2B_QMIN + 14)	/*  120 */
+	S2B_8(S2B_QMIN + 15)	/*  128 */
+#  define S2B_CMIN (S2B_QMIN + 16)
+#endif
+/****************************************/
+	S2B_64(S2B_CMIN + 0)	/*  192 */
+	S2B_64(S2B_CMIN + 1)	/*  256 */
+	S2B_64(S2B_CMIN + 2)	/*  320 */
+	S2B_64(S2B_CMIN + 3)	/*  384 */
+	S2B_64(S2B_CMIN + 4)	/*  448 */
+	S2B_64(S2B_CMIN + 5)	/*  512 */
+#  define S2B_SMIN (S2B_CMIN + 6)
+	S2B_256(S2B_SMIN + 0)	/*  768 */
+	S2B_256(S2B_SMIN + 1)	/* 1024 */
+	S2B_256(S2B_SMIN + 2)	/* 1280 */
+	S2B_256(S2B_SMIN + 3)	/* 1536 */
+	S2B_256(S2B_SMIN + 4)	/* 1792 */
+	S2B_256(S2B_SMIN + 5)	/* 2048 */
+	S2B_256(S2B_SMIN + 6)	/* 2304 */
+	S2B_256(S2B_SMIN + 7)	/* 2560 */
+	S2B_256(S2B_SMIN + 8)	/* 2816 */
+	S2B_256(S2B_SMIN + 9)	/* 3072 */
+	S2B_256(S2B_SMIN + 10)	/* 3328 */
+	S2B_256(S2B_SMIN + 11)	/* 3584 */
+	S2B_256(S2B_SMIN + 12)	/* 3840 */
+#if (STATIC_PAGE_SHIFT == 13)
+	S2B_256(S2B_SMIN + 13)	/* 4096 */
+	S2B_256(S2B_SMIN + 14)	/* 4352 */
+	S2B_256(S2B_SMIN + 15)	/* 4608 */
+	S2B_256(S2B_SMIN + 16)	/* 4864 */
+	S2B_256(S2B_SMIN + 17)	/* 5120 */
+	S2B_256(S2B_SMIN + 18)	/* 5376 */
+	S2B_256(S2B_SMIN + 19)	/* 5632 */
+	S2B_256(S2B_SMIN + 20)	/* 5888 */
+	S2B_256(S2B_SMIN + 21)	/* 6144 */
+	S2B_256(S2B_SMIN + 22)	/* 6400 */
+	S2B_256(S2B_SMIN + 23)	/* 6656 */
+	S2B_256(S2B_SMIN + 24)	/* 6912 */
+	S2B_256(S2B_SMIN + 25)	/* 7168 */
+	S2B_256(S2B_SMIN + 26)	/* 7424 */
+	S2B_256(S2B_SMIN + 27)	/* 7680 */
+	S2B_256(S2B_SMIN + 28)	/* 7936 */
+#endif
+};
+#undef S2B_1
+#undef S2B_2
+#undef S2B_4
+#undef S2B_8
+#undef S2B_16
+#undef S2B_32
+#undef S2B_64
+#undef S2B_128
+#undef S2B_256
+#undef S2B_QMIN
+#undef S2B_CMIN
+#undef S2B_SMIN
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
+    bool large, bool zero);
+static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
+static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
+static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
+    bool zero);
+static void	arena_purge(arena_t *arena, bool all);
+static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
+static void	arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, size_t oldsize, size_t newsize);
+static void	arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
+static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
+static void	*arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
+static void	arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
+    arena_bin_t *bin);
+static void	arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, arena_bin_t *bin);
+static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, arena_bin_t *bin);
+static void	arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t oldsize, size_t size);
+static bool	arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t oldsize, size_t size, size_t extra, bool zero);
+static bool	arena_ralloc_large(void *ptr, size_t oldsize, size_t size,
+    size_t extra, bool zero);
+static bool	small_size2bin_init(void);
+#ifdef JEMALLOC_DEBUG
+static void	small_size2bin_validate(void);
+#endif
+static bool	small_size2bin_init_hard(void);
+static size_t	bin_info_run_size_calc(arena_bin_info_t *bin_info,
+    size_t min_run_size);
+static bool	bin_info_init(void);
+
+/******************************************************************************/
+
+static inline int
+arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
+{
+	uintptr_t a_mapelm = (uintptr_t)a;
+	uintptr_t b_mapelm = (uintptr_t)b;
+
+	assert(a != NULL);
+	assert(b != NULL);
+
+	return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
+}
+
+/* Generate red-black tree functions. */
+rb_gen(static JEMALLOC_ATTR(unused), arena_run_tree_, arena_run_tree_t,
+    arena_chunk_map_t, u.rb_link, arena_run_comp)
+
+static inline int
+arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
+{
+	int ret;
+	size_t a_size = a->bits & ~PAGE_MASK;
+	size_t b_size = b->bits & ~PAGE_MASK;
+
+	assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits &
+	    CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY));
+
+	ret = (a_size > b_size) - (a_size < b_size);
+	if (ret == 0) {
+		uintptr_t a_mapelm, b_mapelm;
+
+		if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY)
+			a_mapelm = (uintptr_t)a;
+		else {
+			/*
+			 * Treat keys as though they are lower than anything
+			 * else.
+			 */
+			a_mapelm = 0;
+		}
+		b_mapelm = (uintptr_t)b;
+
+		ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
+	}
+
+	return (ret);
+}
+
+/* Generate red-black tree functions. */
+rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
+    arena_chunk_map_t, u.rb_link, arena_avail_comp)
+
+static inline void *
+arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
+{
+	void *ret;
+	unsigned regind;
+	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
+	    (uintptr_t)bin_info->bitmap_offset);
+
+	dassert(run->magic == ARENA_RUN_MAGIC);
+	assert(run->nfree > 0);
+	assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false);
+
+	regind = bitmap_sfu(bitmap, &bin_info->bitmap_info);
+	ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset +
+	    (uintptr_t)(bin_info->reg_size * regind));
+	run->nfree--;
+	if (regind == run->nextind)
+		run->nextind++;
+	assert(regind < run->nextind);
+	return (ret);
+}
+
+static inline void
+arena_run_reg_dalloc(arena_run_t *run, void *ptr)
+{
+	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+	size_t binind = arena_bin_index(chunk->arena, run->bin);
+	arena_bin_info_t *bin_info = &arena_bin_info[binind];
+	unsigned regind = arena_run_regind(run, bin_info, ptr);
+	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
+	    (uintptr_t)bin_info->bitmap_offset);
+
+	assert(run->nfree < bin_info->nregs);
+	/* Freeing an interior pointer can cause assertion failure. */
+	assert(((uintptr_t)ptr - ((uintptr_t)run +
+	    (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size
+	    == 0);
+	assert((uintptr_t)ptr >= (uintptr_t)run +
+	    (uintptr_t)bin_info->reg0_offset);
+	/* Freeing an unallocated pointer can cause assertion failure. */
+	assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind));
+
+	bitmap_unset(bitmap, &bin_info->bitmap_info, regind);
+	run->nfree++;
+}
+
+#ifdef JEMALLOC_DEBUG
+static inline void
+arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
+{
+	size_t i;
+	size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT));
+
+	for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++)
+		assert(p[i] == 0);
+}
+#endif
+
+static void
+arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
+    bool zero)
+{
+	arena_chunk_t *chunk;
+	size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
+	size_t flag_dirty;
+	arena_avail_tree_t *runs_avail;
+#ifdef JEMALLOC_STATS
+	size_t cactive_diff;
+#endif
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+	old_ndirty = chunk->ndirty;
+	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
+	    >> PAGE_SHIFT);
+	flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY;
+	runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty :
+	    &arena->runs_avail_clean;
+	total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >>
+	    PAGE_SHIFT;
+	assert((chunk->map[run_ind+total_pages-1-map_bias].bits &
+	    CHUNK_MAP_DIRTY) == flag_dirty);
+	need_pages = (size >> PAGE_SHIFT);
+	assert(need_pages > 0);
+	assert(need_pages <= total_pages);
+	rem_pages = total_pages - need_pages;
+
+	arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]);
+#ifdef JEMALLOC_STATS
+	/* Update stats_cactive if nactive is crossing a chunk multiple. */
+	cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) <<
+	    PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT);
+	if (cactive_diff != 0)
+		stats_cactive_add(cactive_diff);
+#endif
+	arena->nactive += need_pages;
+
+	/* Keep track of trailing unused pages for later use. */
+	if (rem_pages > 0) {
+		if (flag_dirty != 0) {
+			chunk->map[run_ind+need_pages-map_bias].bits =
+			    (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY;
+			chunk->map[run_ind+total_pages-1-map_bias].bits =
+			    (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY;
+		} else {
+			chunk->map[run_ind+need_pages-map_bias].bits =
+			    (rem_pages << PAGE_SHIFT) |
+			    (chunk->map[run_ind+need_pages-map_bias].bits &
+			    CHUNK_MAP_UNZEROED);
+			chunk->map[run_ind+total_pages-1-map_bias].bits =
+			    (rem_pages << PAGE_SHIFT) |
+			    (chunk->map[run_ind+total_pages-1-map_bias].bits &
+			    CHUNK_MAP_UNZEROED);
+		}
+		arena_avail_tree_insert(runs_avail,
+		    &chunk->map[run_ind+need_pages-map_bias]);
+	}
+
+	/* Update dirty page accounting. */
+	if (flag_dirty != 0) {
+		chunk->ndirty -= need_pages;
+		arena->ndirty -= need_pages;
+	}
+
+	/*
+	 * Update the page map separately for large vs. small runs, since it is
+	 * possible to avoid iteration for large mallocs.
+	 */
+	if (large) {
+		if (zero) {
+			if (flag_dirty == 0) {
+				/*
+				 * The run is clean, so some pages may be
+				 * zeroed (i.e. never before touched).
+				 */
+				for (i = 0; i < need_pages; i++) {
+					if ((chunk->map[run_ind+i-map_bias].bits
+					    & CHUNK_MAP_UNZEROED) != 0) {
+						memset((void *)((uintptr_t)
+						    chunk + ((run_ind+i) <<
+						    PAGE_SHIFT)), 0,
+						    PAGE_SIZE);
+					}
+#ifdef JEMALLOC_DEBUG
+					else {
+						arena_chunk_validate_zeroed(
+						    chunk, run_ind+i);
+					}
+#endif
+				}
+			} else {
+				/*
+				 * The run is dirty, so all pages must be
+				 * zeroed.
+				 */
+				memset((void *)((uintptr_t)chunk + (run_ind <<
+				    PAGE_SHIFT)), 0, (need_pages <<
+				    PAGE_SHIFT));
+			}
+		}
+
+		/*
+		 * Set the last element first, in case the run only contains one
+		 * page (i.e. both statements set the same element).
+		 */
+		chunk->map[run_ind+need_pages-1-map_bias].bits =
+		    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty;
+		chunk->map[run_ind-map_bias].bits = size | flag_dirty |
+		    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+	} else {
+		assert(zero == false);
+		/*
+		 * Propagate the dirty and unzeroed flags to the allocated
+		 * small run, so that arena_dalloc_bin_run() has the ability to
+		 * conditionally trim clean pages.
+		 */
+		chunk->map[run_ind-map_bias].bits =
+		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) |
+		    CHUNK_MAP_ALLOCATED | flag_dirty;
+#ifdef JEMALLOC_DEBUG
+		/*
+		 * The first page will always be dirtied during small run
+		 * initialization, so a validation failure here would not
+		 * actually cause an observable failure.
+		 */
+		if (flag_dirty == 0 &&
+		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED)
+		    == 0)
+			arena_chunk_validate_zeroed(chunk, run_ind);
+#endif
+		for (i = 1; i < need_pages - 1; i++) {
+			chunk->map[run_ind+i-map_bias].bits = (i << PAGE_SHIFT)
+			    | (chunk->map[run_ind+i-map_bias].bits &
+			    CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED;
+#ifdef JEMALLOC_DEBUG
+			if (flag_dirty == 0 &&
+			    (chunk->map[run_ind+i-map_bias].bits &
+			    CHUNK_MAP_UNZEROED) == 0)
+				arena_chunk_validate_zeroed(chunk, run_ind+i);
+#endif
+		}
+		chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages
+		    - 1) << PAGE_SHIFT) |
+		    (chunk->map[run_ind+need_pages-1-map_bias].bits &
+		    CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty;
+#ifdef JEMALLOC_DEBUG
+		if (flag_dirty == 0 &&
+		    (chunk->map[run_ind+need_pages-1-map_bias].bits &
+		    CHUNK_MAP_UNZEROED) == 0) {
+			arena_chunk_validate_zeroed(chunk,
+			    run_ind+need_pages-1);
+		}
+#endif
+	}
+}
+
+static arena_chunk_t *
+arena_chunk_alloc(arena_t *arena)
+{
+	arena_chunk_t *chunk;
+	size_t i;
+
+	if (arena->spare != NULL) {
+		arena_avail_tree_t *runs_avail;
+
+		chunk = arena->spare;
+		arena->spare = NULL;
+
+		/* Insert the run into the appropriate runs_avail_* tree. */
+		if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0)
+			runs_avail = &arena->runs_avail_clean;
+		else
+			runs_avail = &arena->runs_avail_dirty;
+		assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass);
+		assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK)
+		    == arena_maxclass);
+		assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) ==
+		    (chunk->map[chunk_npages-1-map_bias].bits &
+		    CHUNK_MAP_DIRTY));
+		arena_avail_tree_insert(runs_avail, &chunk->map[0]);
+	} else {
+		bool zero;
+		size_t unzeroed;
+
+		zero = false;
+		malloc_mutex_unlock(&arena->lock);
+		chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero);
+		malloc_mutex_lock(&arena->lock);
+		if (chunk == NULL)
+			return (NULL);
+#ifdef JEMALLOC_STATS
+		arena->stats.mapped += chunksize;
+#endif
+
+		chunk->arena = arena;
+		ql_elm_new(chunk, link_dirty);
+		chunk->dirtied = false;
+
+		/*
+		 * Claim that no pages are in use, since the header is merely
+		 * overhead.
+		 */
+		chunk->ndirty = 0;
+
+		/*
+		 * Initialize the map to contain one maximal free untouched run.
+		 * Mark the pages as zeroed iff chunk_alloc() returned a zeroed
+		 * chunk.
+		 */
+		unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED;
+		chunk->map[0].bits = arena_maxclass | unzeroed;
+		/*
+		 * There is no need to initialize the internal page map entries
+		 * unless the chunk is not zeroed.
+		 */
+		if (zero == false) {
+			for (i = map_bias+1; i < chunk_npages-1; i++)
+				chunk->map[i-map_bias].bits = unzeroed;
+		}
+#ifdef JEMALLOC_DEBUG
+		else {
+			for (i = map_bias+1; i < chunk_npages-1; i++)
+				assert(chunk->map[i-map_bias].bits == unzeroed);
+		}
+#endif
+		chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass |
+		    unzeroed;
+
+		/* Insert the run into the runs_avail_clean tree. */
+		arena_avail_tree_insert(&arena->runs_avail_clean,
+		    &chunk->map[0]);
+	}
+
+	return (chunk);
+}
+
+static void
+arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
+{
+	arena_avail_tree_t *runs_avail;
+
+	/*
+	 * Remove run from the appropriate runs_avail_* tree, so that the arena
+	 * does not use it.
+	 */
+	if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0)
+		runs_avail = &arena->runs_avail_clean;
+	else
+		runs_avail = &arena->runs_avail_dirty;
+	arena_avail_tree_remove(runs_avail, &chunk->map[0]);
+
+	if (arena->spare != NULL) {
+		arena_chunk_t *spare = arena->spare;
+
+		arena->spare = chunk;
+		if (spare->dirtied) {
+			ql_remove(&chunk->arena->chunks_dirty, spare,
+			    link_dirty);
+			arena->ndirty -= spare->ndirty;
+		}
+		malloc_mutex_unlock(&arena->lock);
+		chunk_dealloc((void *)spare, chunksize, true);
+		malloc_mutex_lock(&arena->lock);
+#ifdef JEMALLOC_STATS
+		arena->stats.mapped -= chunksize;
+#endif
+	} else
+		arena->spare = chunk;
+}
+
+static arena_run_t *
+arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
+{
+	arena_chunk_t *chunk;
+	arena_run_t *run;
+	arena_chunk_map_t *mapelm, key;
+
+	assert(size <= arena_maxclass);
+	assert((size & PAGE_MASK) == 0);
+
+	/* Search the arena's chunks for the lowest best fit. */
+	key.bits = size | CHUNK_MAP_KEY;
+	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key);
+	if (mapelm != NULL) {
+		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
+		size_t pageind = (((uintptr_t)mapelm -
+		    (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
+		    + map_bias;
+
+		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
+		    PAGE_SHIFT));
+		arena_run_split(arena, run, size, large, zero);
+		return (run);
+	}
+	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
+	if (mapelm != NULL) {
+		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
+		size_t pageind = (((uintptr_t)mapelm -
+		    (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
+		    + map_bias;
+
+		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
+		    PAGE_SHIFT));
+		arena_run_split(arena, run, size, large, zero);
+		return (run);
+	}
+
+	/*
+	 * No usable runs.  Create a new chunk from which to allocate the run.
+	 */
+	chunk = arena_chunk_alloc(arena);
+	if (chunk != NULL) {
+		run = (arena_run_t *)((uintptr_t)chunk + (map_bias <<
+		    PAGE_SHIFT));
+		arena_run_split(arena, run, size, large, zero);
+		return (run);
+	}
+
+	/*
+	 * arena_chunk_alloc() failed, but another thread may have made
+	 * sufficient memory available while this one dropped arena->lock in
+	 * arena_chunk_alloc(), so search one more time.
+	 */
+	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key);
+	if (mapelm != NULL) {
+		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
+		size_t pageind = (((uintptr_t)mapelm -
+		    (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
+		    + map_bias;
+
+		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
+		    PAGE_SHIFT));
+		arena_run_split(arena, run, size, large, zero);
+		return (run);
+	}
+	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
+	if (mapelm != NULL) {
+		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
+		size_t pageind = (((uintptr_t)mapelm -
+		    (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
+		    + map_bias;
+
+		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
+		    PAGE_SHIFT));
+		arena_run_split(arena, run, size, large, zero);
+		return (run);
+	}
+
+	return (NULL);
+}
+
+static inline void
+arena_maybe_purge(arena_t *arena)
+{
+
+	/* Enforce opt_lg_dirty_mult. */
+	if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory &&
+	    (arena->ndirty - arena->npurgatory) > chunk_npages &&
+	    (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
+	    arena->npurgatory))
+		arena_purge(arena, false);
+}
+
+static inline void
+arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
+{
+	ql_head(arena_chunk_map_t) mapelms;
+	arena_chunk_map_t *mapelm;
+	size_t pageind, flag_unzeroed;
+#ifdef JEMALLOC_DEBUG
+	size_t ndirty;
+#endif
+#ifdef JEMALLOC_STATS
+	size_t nmadvise;
+#endif
+
+	ql_new(&mapelms);
+
+	flag_unzeroed =
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
+   /*
+    * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous
+    * mappings, but not for file-backed mappings.
+    */
+#  ifdef JEMALLOC_SWAP
+	    swap_enabled ? CHUNK_MAP_UNZEROED :
+#  endif
+	    0;
+#else
+	    CHUNK_MAP_UNZEROED;
+#endif
+
+	/*
+	 * If chunk is the spare, temporarily re-allocate it, 1) so that its
+	 * run is reinserted into runs_avail_dirty, and 2) so that it cannot be
+	 * completely discarded by another thread while arena->lock is dropped
+	 * by this thread.  Note that the arena_run_dalloc() call will
+	 * implicitly deallocate the chunk, so no explicit action is required
+	 * in this function to deallocate the chunk.
+	 *
+	 * Note that once a chunk contains dirty pages, it cannot again contain
+	 * a single run unless 1) it is a dirty run, or 2) this function purges
+	 * dirty pages and causes the transition to a single clean run.  Thus
+	 * (chunk == arena->spare) is possible, but it is not possible for
+	 * this function to be called on the spare unless it contains a dirty
+	 * run.
+	 */
+	if (chunk == arena->spare) {
+		assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0);
+		arena_chunk_alloc(arena);
+	}
+
+	/* Temporarily allocate all free dirty runs within chunk. */
+	for (pageind = map_bias; pageind < chunk_npages;) {
+		mapelm = &chunk->map[pageind-map_bias];
+		if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) {
+			size_t npages;
+
+			npages = mapelm->bits >> PAGE_SHIFT;
+			assert(pageind + npages <= chunk_npages);
+			if (mapelm->bits & CHUNK_MAP_DIRTY) {
+				size_t i;
+#ifdef JEMALLOC_STATS
+				size_t cactive_diff;
+#endif
+
+				arena_avail_tree_remove(
+				    &arena->runs_avail_dirty, mapelm);
+
+				mapelm->bits = (npages << PAGE_SHIFT) |
+				    flag_unzeroed | CHUNK_MAP_LARGE |
+				    CHUNK_MAP_ALLOCATED;
+				/*
+				 * Update internal elements in the page map, so
+				 * that CHUNK_MAP_UNZEROED is properly set.
+				 */
+				for (i = 1; i < npages - 1; i++) {
+					chunk->map[pageind+i-map_bias].bits =
+					    flag_unzeroed;
+				}
+				if (npages > 1) {
+					chunk->map[
+					    pageind+npages-1-map_bias].bits =
+					    flag_unzeroed | CHUNK_MAP_LARGE |
+					    CHUNK_MAP_ALLOCATED;
+				}
+
+#ifdef JEMALLOC_STATS
+				/*
+				 * Update stats_cactive if nactive is crossing a
+				 * chunk multiple.
+				 */
+				cactive_diff = CHUNK_CEILING((arena->nactive +
+				    npages) << PAGE_SHIFT) -
+				    CHUNK_CEILING(arena->nactive << PAGE_SHIFT);
+				if (cactive_diff != 0)
+					stats_cactive_add(cactive_diff);
+#endif
+				arena->nactive += npages;
+				/* Append to list for later processing. */
+				ql_elm_new(mapelm, u.ql_link);
+				ql_tail_insert(&mapelms, mapelm, u.ql_link);
+			}
+
+			pageind += npages;
+		} else {
+			/* Skip allocated run. */
+			if (mapelm->bits & CHUNK_MAP_LARGE)
+				pageind += mapelm->bits >> PAGE_SHIFT;
+			else {
+				arena_run_t *run = (arena_run_t *)((uintptr_t)
+				    chunk + (uintptr_t)(pageind << PAGE_SHIFT));
+
+				assert((mapelm->bits >> PAGE_SHIFT) == 0);
+				dassert(run->magic == ARENA_RUN_MAGIC);
+				size_t binind = arena_bin_index(arena,
+				    run->bin);
+				arena_bin_info_t *bin_info =
+				    &arena_bin_info[binind];
+				pageind += bin_info->run_size >> PAGE_SHIFT;
+			}
+		}
+	}
+	assert(pageind == chunk_npages);
+
+#ifdef JEMALLOC_DEBUG
+	ndirty = chunk->ndirty;
+#endif
+#ifdef JEMALLOC_STATS
+	arena->stats.purged += chunk->ndirty;
+#endif
+	arena->ndirty -= chunk->ndirty;
+	chunk->ndirty = 0;
+	ql_remove(&arena->chunks_dirty, chunk, link_dirty);
+	chunk->dirtied = false;
+
+	malloc_mutex_unlock(&arena->lock);
+#ifdef JEMALLOC_STATS
+	nmadvise = 0;
+#endif
+	ql_foreach(mapelm, &mapelms, u.ql_link) {
+		size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+		    sizeof(arena_chunk_map_t)) + map_bias;
+		size_t npages = mapelm->bits >> PAGE_SHIFT;
+
+		assert(pageind + npages <= chunk_npages);
+#ifdef JEMALLOC_DEBUG
+		assert(ndirty >= npages);
+		ndirty -= npages;
+#endif
+
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
+		madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
+		    (npages << PAGE_SHIFT), MADV_DONTNEED);
+#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+		madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
+		    (npages << PAGE_SHIFT), MADV_FREE);
+#else
+#  error "No method defined for purging unused dirty pages."
+#endif
+
+#ifdef JEMALLOC_STATS
+		nmadvise++;
+#endif
+	}
+#ifdef JEMALLOC_DEBUG
+	assert(ndirty == 0);
+#endif
+	malloc_mutex_lock(&arena->lock);
+#ifdef JEMALLOC_STATS
+	arena->stats.nmadvise += nmadvise;
+#endif
+
+	/* Deallocate runs. */
+	for (mapelm = ql_first(&mapelms); mapelm != NULL;
+	    mapelm = ql_first(&mapelms)) {
+		size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+		    sizeof(arena_chunk_map_t)) + map_bias;
+		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+		    (uintptr_t)(pageind << PAGE_SHIFT));
+
+		ql_remove(&mapelms, mapelm, u.ql_link);
+		arena_run_dalloc(arena, run, false);
+	}
+}
+
+static void
+arena_purge(arena_t *arena, bool all)
+{
+	arena_chunk_t *chunk;
+	size_t npurgatory;
+#ifdef JEMALLOC_DEBUG
+	size_t ndirty = 0;
+
+	ql_foreach(chunk, &arena->chunks_dirty, link_dirty) {
+	    assert(chunk->dirtied);
+	    ndirty += chunk->ndirty;
+	}
+	assert(ndirty == arena->ndirty);
+#endif
+	assert(arena->ndirty > arena->npurgatory || all);
+	assert(arena->ndirty - arena->npurgatory > chunk_npages || all);
+	assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
+	    arena->npurgatory) || all);
+
+#ifdef JEMALLOC_STATS
+	arena->stats.npurge++;
+#endif
+
+	/*
+	 * Compute the minimum number of pages that this thread should try to
+	 * purge, and add the result to arena->npurgatory.  This will keep
+	 * multiple threads from racing to reduce ndirty below the threshold.
+	 */
+	npurgatory = arena->ndirty - arena->npurgatory;
+	if (all == false) {
+		assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult);
+		npurgatory -= arena->nactive >> opt_lg_dirty_mult;
+	}
+	arena->npurgatory += npurgatory;
+
+	while (npurgatory > 0) {
+		/* Get next chunk with dirty pages. */
+		chunk = ql_first(&arena->chunks_dirty);
+		if (chunk == NULL) {
+			/*
+			 * This thread was unable to purge as many pages as
+			 * originally intended, due to races with other threads
+			 * that either did some of the purging work, or re-used
+			 * dirty pages.
+			 */
+			arena->npurgatory -= npurgatory;
+			return;
+		}
+		while (chunk->ndirty == 0) {
+			ql_remove(&arena->chunks_dirty, chunk, link_dirty);
+			chunk->dirtied = false;
+			chunk = ql_first(&arena->chunks_dirty);
+			if (chunk == NULL) {
+				/* Same logic as for above. */
+				arena->npurgatory -= npurgatory;
+				return;
+			}
+		}
+
+		if (chunk->ndirty > npurgatory) {
+			/*
+			 * This thread will, at a minimum, purge all the dirty
+			 * pages in chunk, so set npurgatory to reflect this
+			 * thread's commitment to purge the pages.  This tends
+			 * to reduce the chances of the following scenario:
+			 *
+			 * 1) This thread sets arena->npurgatory such that
+			 *    (arena->ndirty - arena->npurgatory) is at the
+			 *    threshold.
+			 * 2) This thread drops arena->lock.
+			 * 3) Another thread causes one or more pages to be
+			 *    dirtied, and immediately determines that it must
+			 *    purge dirty pages.
+			 *
+			 * If this scenario *does* play out, that's okay,
+			 * because all of the purging work being done really
+			 * needs to happen.
+			 */
+			arena->npurgatory += chunk->ndirty - npurgatory;
+			npurgatory = chunk->ndirty;
+		}
+
+		arena->npurgatory -= chunk->ndirty;
+		npurgatory -= chunk->ndirty;
+		arena_chunk_purge(arena, chunk);
+	}
+}
+
+void
+arena_purge_all(arena_t *arena)
+{
+
+	malloc_mutex_lock(&arena->lock);
+	arena_purge(arena, true);
+	malloc_mutex_unlock(&arena->lock);
+}
+
+static void
+arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
+{
+	arena_chunk_t *chunk;
+	size_t size, run_ind, run_pages, flag_dirty;
+	arena_avail_tree_t *runs_avail;
+#ifdef JEMALLOC_STATS
+	size_t cactive_diff;
+#endif
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
+	    >> PAGE_SHIFT);
+	assert(run_ind >= map_bias);
+	assert(run_ind < chunk_npages);
+	if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) {
+		size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK;
+		assert(size == PAGE_SIZE ||
+		    (chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
+		    ~PAGE_MASK) == 0);
+		assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
+		    CHUNK_MAP_LARGE) != 0);
+		assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
+		    CHUNK_MAP_ALLOCATED) != 0);
+	} else {
+		size_t binind = arena_bin_index(arena, run->bin);
+		arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		size = bin_info->run_size;
+	}
+	run_pages = (size >> PAGE_SHIFT);
+#ifdef JEMALLOC_STATS
+	/* Update stats_cactive if nactive is crossing a chunk multiple. */
+	cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) -
+	    CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT);
+	if (cactive_diff != 0)
+		stats_cactive_sub(cactive_diff);
+#endif
+	arena->nactive -= run_pages;
+
+	/*
+	 * The run is dirty if the caller claims to have dirtied it, as well as
+	 * if it was already dirty before being allocated.
+	 */
+	if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0)
+		dirty = true;
+	flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
+	runs_avail = dirty ? &arena->runs_avail_dirty :
+	    &arena->runs_avail_clean;
+
+	/* Mark pages as unallocated in the chunk map. */
+	if (dirty) {
+		chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY;
+		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
+		    CHUNK_MAP_DIRTY;
+
+		chunk->ndirty += run_pages;
+		arena->ndirty += run_pages;
+	} else {
+		chunk->map[run_ind-map_bias].bits = size |
+		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED);
+		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
+		    (chunk->map[run_ind+run_pages-1-map_bias].bits &
+		    CHUNK_MAP_UNZEROED);
+	}
+
+	/* Try to coalesce forward. */
+	if (run_ind + run_pages < chunk_npages &&
+	    (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED)
+	    == 0 && (chunk->map[run_ind+run_pages-map_bias].bits &
+	    CHUNK_MAP_DIRTY) == flag_dirty) {
+		size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits &
+		    ~PAGE_MASK;
+		size_t nrun_pages = nrun_size >> PAGE_SHIFT;
+
+		/*
+		 * Remove successor from runs_avail; the coalesced run is
+		 * inserted later.
+		 */
+		assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
+		    & ~PAGE_MASK) == nrun_size);
+		assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
+		    & CHUNK_MAP_ALLOCATED) == 0);
+		assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
+		    & CHUNK_MAP_DIRTY) == flag_dirty);
+		arena_avail_tree_remove(runs_avail,
+		    &chunk->map[run_ind+run_pages-map_bias]);
+
+		size += nrun_size;
+		run_pages += nrun_pages;
+
+		chunk->map[run_ind-map_bias].bits = size |
+		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK);
+		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
+		    (chunk->map[run_ind+run_pages-1-map_bias].bits &
+		    CHUNK_MAP_FLAGS_MASK);
+	}
+
+	/* Try to coalesce backward. */
+	if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits &
+	    CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits &
+	    CHUNK_MAP_DIRTY) == flag_dirty) {
+		size_t prun_size = chunk->map[run_ind-1-map_bias].bits &
+		    ~PAGE_MASK;
+		size_t prun_pages = prun_size >> PAGE_SHIFT;
+
+		run_ind -= prun_pages;
+
+		/*
+		 * Remove predecessor from runs_avail; the coalesced run is
+		 * inserted later.
+		 */
+		assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK)
+		    == prun_size);
+		assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED)
+		    == 0);
+		assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY)
+		    == flag_dirty);
+		arena_avail_tree_remove(runs_avail,
+		    &chunk->map[run_ind-map_bias]);
+
+		size += prun_size;
+		run_pages += prun_pages;
+
+		chunk->map[run_ind-map_bias].bits = size |
+		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK);
+		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
+		    (chunk->map[run_ind+run_pages-1-map_bias].bits &
+		    CHUNK_MAP_FLAGS_MASK);
+	}
+
+	/* Insert into runs_avail, now that coalescing is complete. */
+	assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) ==
+	    (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK));
+	assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) ==
+	    (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY));
+	arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]);
+
+	if (dirty) {
+		/*
+		 * Insert into chunks_dirty before potentially calling
+		 * arena_chunk_dealloc(), so that chunks_dirty and
+		 * arena->ndirty are consistent.
+		 */
+		if (chunk->dirtied == false) {
+			ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
+			chunk->dirtied = true;
+		}
+	}
+
+	/*
+	 * Deallocate chunk if it is now completely unused.  The bit
+	 * manipulation checks whether the first run is unallocated and extends
+	 * to the end of the chunk.
+	 */
+	if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) ==
+	    arena_maxclass)
+		arena_chunk_dealloc(arena, chunk);
+
+	/*
+	 * It is okay to do dirty page processing here even if the chunk was
+	 * deallocated above, since in that case it is the spare.  Waiting
+	 * until after possible chunk deallocation to do dirty processing
+	 * allows for an old spare to be fully deallocated, thus decreasing the
+	 * chances of spuriously crossing the dirty page purging threshold.
+	 */
+	if (dirty)
+		arena_maybe_purge(arena);
+}
+
+static void
+arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
+    size_t oldsize, size_t newsize)
+{
+	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
+	size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
+	size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY;
+
+	assert(oldsize > newsize);
+
+	/*
+	 * Update the chunk map so that arena_run_dalloc() can treat the
+	 * leading run as separately allocated.  Set the last element of each
+	 * run first, in case of single-page runs.
+	 */
+	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0);
+	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0);
+	chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty |
+	    (chunk->map[pageind+head_npages-1-map_bias].bits &
+	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+	chunk->map[pageind-map_bias].bits = (oldsize - newsize)
+	    | flag_dirty | (chunk->map[pageind-map_bias].bits &
+	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+
+#ifdef JEMALLOC_DEBUG
+	{
+		size_t tail_npages = newsize >> PAGE_SHIFT;
+		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
+		    .bits & ~PAGE_MASK) == 0);
+		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
+		    .bits & CHUNK_MAP_DIRTY) == flag_dirty);
+		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
+		    .bits & CHUNK_MAP_LARGE) != 0);
+		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
+		    .bits & CHUNK_MAP_ALLOCATED) != 0);
+	}
+#endif
+	chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty |
+	    (chunk->map[pageind+head_npages-map_bias].bits &
+	    CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+
+	arena_run_dalloc(arena, run, false);
+}
+
+static void
+arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
+    size_t oldsize, size_t newsize, bool dirty)
+{
+	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
+	size_t head_npages = newsize >> PAGE_SHIFT;
+	size_t tail_npages = (oldsize - newsize) >> PAGE_SHIFT;
+	size_t flag_dirty = chunk->map[pageind-map_bias].bits &
+	    CHUNK_MAP_DIRTY;
+
+	assert(oldsize > newsize);
+
+	/*
+	 * Update the chunk map so that arena_run_dalloc() can treat the
+	 * trailing run as separately allocated.  Set the last element of each
+	 * run first, in case of single-page runs.
+	 */
+	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0);
+	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0);
+	chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty |
+	    (chunk->map[pageind+head_npages-1-map_bias].bits &
+	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+	chunk->map[pageind-map_bias].bits = newsize | flag_dirty |
+	    (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) |
+	    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+
+	assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
+	    ~PAGE_MASK) == 0);
+	assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
+	    CHUNK_MAP_LARGE) != 0);
+	assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
+	    CHUNK_MAP_ALLOCATED) != 0);
+	chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits =
+	    flag_dirty |
+	    (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
+	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+	chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) |
+	    flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits &
+	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+
+	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
+	    dirty);
+}
+
+static arena_run_t *
+arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
+{
+	arena_chunk_map_t *mapelm;
+	arena_run_t *run;
+	size_t binind;
+	arena_bin_info_t *bin_info;
+
+	/* Look for a usable run. */
+	mapelm = arena_run_tree_first(&bin->runs);
+	if (mapelm != NULL) {
+		arena_chunk_t *chunk;
+		size_t pageind;
+
+		/* run is guaranteed to have available space. */
+		arena_run_tree_remove(&bin->runs, mapelm);
+
+		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
+		pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+		    sizeof(arena_chunk_map_t))) + map_bias;
+		run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+		    (mapelm->bits >> PAGE_SHIFT))
+		    << PAGE_SHIFT));
+#ifdef JEMALLOC_STATS
+		bin->stats.reruns++;
+#endif
+		return (run);
+	}
+	/* No existing runs have any space available. */
+
+	binind = arena_bin_index(arena, bin);
+	bin_info = &arena_bin_info[binind];
+
+	/* Allocate a new run. */
+	malloc_mutex_unlock(&bin->lock);
+	/******************************/
+	malloc_mutex_lock(&arena->lock);
+	run = arena_run_alloc(arena, bin_info->run_size, false, false);
+	if (run != NULL) {
+		bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
+		    (uintptr_t)bin_info->bitmap_offset);
+
+		/* Initialize run internals. */
+		run->bin = bin;
+		run->nextind = 0;
+		run->nfree = bin_info->nregs;
+		bitmap_init(bitmap, &bin_info->bitmap_info);
+#ifdef JEMALLOC_DEBUG
+		run->magic = ARENA_RUN_MAGIC;
+#endif
+	}
+	malloc_mutex_unlock(&arena->lock);
+	/********************************/
+	malloc_mutex_lock(&bin->lock);
+	if (run != NULL) {
+#ifdef JEMALLOC_STATS
+		bin->stats.nruns++;
+		bin->stats.curruns++;
+		if (bin->stats.curruns > bin->stats.highruns)
+			bin->stats.highruns = bin->stats.curruns;
+#endif
+		return (run);
+	}
+
+	/*
+	 * arena_run_alloc() failed, but another thread may have made
+	 * sufficient memory available while this one dropped bin->lock above,
+	 * so search one more time.
+	 */
+	mapelm = arena_run_tree_first(&bin->runs);
+	if (mapelm != NULL) {
+		arena_chunk_t *chunk;
+		size_t pageind;
+
+		/* run is guaranteed to have available space. */
+		arena_run_tree_remove(&bin->runs, mapelm);
+
+		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
+		pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+		    sizeof(arena_chunk_map_t))) + map_bias;
+		run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+		    (mapelm->bits >> PAGE_SHIFT))
+		    << PAGE_SHIFT));
+#ifdef JEMALLOC_STATS
+		bin->stats.reruns++;
+#endif
+		return (run);
+	}
+
+	return (NULL);
+}
+
+/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
+static void *
+arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
+{
+	void *ret;
+	size_t binind;
+	arena_bin_info_t *bin_info;
+	arena_run_t *run;
+
+	binind = arena_bin_index(arena, bin);
+	bin_info = &arena_bin_info[binind];
+	bin->runcur = NULL;
+	run = arena_bin_nonfull_run_get(arena, bin);
+	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
+		/*
+		 * Another thread updated runcur while this one ran without the
+		 * bin lock in arena_bin_nonfull_run_get().
+		 */
+		dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
+		assert(bin->runcur->nfree > 0);
+		ret = arena_run_reg_alloc(bin->runcur, bin_info);
+		if (run != NULL) {
+			arena_chunk_t *chunk;
+
+			/*
+			 * arena_run_alloc() may have allocated run, or it may
+			 * have pulled run from the bin's run tree.  Therefore
+			 * it is unsafe to make any assumptions about how run
+			 * has previously been used, and arena_bin_lower_run()
+			 * must be called, as if a region were just deallocated
+			 * from the run.
+			 */
+			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+			if (run->nfree == bin_info->nregs)
+				arena_dalloc_bin_run(arena, chunk, run, bin);
+			else
+				arena_bin_lower_run(arena, chunk, run, bin);
+		}
+		return (ret);
+	}
+
+	if (run == NULL)
+		return (NULL);
+
+	bin->runcur = run;
+
+	dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
+	assert(bin->runcur->nfree > 0);
+
+	return (arena_run_reg_alloc(bin->runcur, bin_info));
+}
+
+#ifdef JEMALLOC_PROF
+void
+arena_prof_accum(arena_t *arena, uint64_t accumbytes)
+{
+
+	if (prof_interval != 0) {
+		arena->prof_accumbytes += accumbytes;
+		if (arena->prof_accumbytes >= prof_interval) {
+			prof_idump();
+			arena->prof_accumbytes -= prof_interval;
+		}
+	}
+}
+#endif
+
+#ifdef JEMALLOC_TCACHE
+void
+arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
+#  ifdef JEMALLOC_PROF
+    , uint64_t prof_accumbytes
+#  endif
+    )
+{
+	unsigned i, nfill;
+	arena_bin_t *bin;
+	arena_run_t *run;
+	void *ptr;
+
+	assert(tbin->ncached == 0);
+
+#ifdef JEMALLOC_PROF
+	malloc_mutex_lock(&arena->lock);
+	arena_prof_accum(arena, prof_accumbytes);
+	malloc_mutex_unlock(&arena->lock);
+#endif
+	bin = &arena->bins[binind];
+	malloc_mutex_lock(&bin->lock);
+	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
+	    tbin->lg_fill_div); i < nfill; i++) {
+		if ((run = bin->runcur) != NULL && run->nfree > 0)
+			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
+		else
+			ptr = arena_bin_malloc_hard(arena, bin);
+		if (ptr == NULL)
+			break;
+		/* Insert such that low regions get used first. */
+		tbin->avail[nfill - 1 - i] = ptr;
+	}
+#ifdef JEMALLOC_STATS
+	bin->stats.allocated += i * arena_bin_info[binind].reg_size;
+	bin->stats.nmalloc += i;
+	bin->stats.nrequests += tbin->tstats.nrequests;
+	bin->stats.nfills++;
+	tbin->tstats.nrequests = 0;
+#endif
+	malloc_mutex_unlock(&bin->lock);
+	tbin->ncached = i;
+}
+#endif
+
+void *
+arena_malloc_small(arena_t *arena, size_t size, bool zero)
+{
+	void *ret;
+	arena_bin_t *bin;
+	arena_run_t *run;
+	size_t binind;
+
+	binind = SMALL_SIZE2BIN(size);
+	assert(binind < nbins);
+	bin = &arena->bins[binind];
+	size = arena_bin_info[binind].reg_size;
+
+	malloc_mutex_lock(&bin->lock);
+	if ((run = bin->runcur) != NULL && run->nfree > 0)
+		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
+	else
+		ret = arena_bin_malloc_hard(arena, bin);
+
+	if (ret == NULL) {
+		malloc_mutex_unlock(&bin->lock);
+		return (NULL);
+	}
+
+#ifdef JEMALLOC_STATS
+	bin->stats.allocated += size;
+	bin->stats.nmalloc++;
+	bin->stats.nrequests++;
+#endif
+	malloc_mutex_unlock(&bin->lock);
+#ifdef JEMALLOC_PROF
+	if (isthreaded == false) {
+		malloc_mutex_lock(&arena->lock);
+		arena_prof_accum(arena, size);
+		malloc_mutex_unlock(&arena->lock);
+	}
+#endif
+
+	if (zero == false) {
+#ifdef JEMALLOC_FILL
+		if (opt_junk)
+			memset(ret, 0xa5, size);
+		else if (opt_zero)
+			memset(ret, 0, size);
+#endif
+	} else
+		memset(ret, 0, size);
+
+	return (ret);
+}
+
+void *
+arena_malloc_large(arena_t *arena, size_t size, bool zero)
+{
+	void *ret;
+
+	/* Large allocation. */
+	size = PAGE_CEILING(size);
+	malloc_mutex_lock(&arena->lock);
+	ret = (void *)arena_run_alloc(arena, size, true, zero);
+	if (ret == NULL) {
+		malloc_mutex_unlock(&arena->lock);
+		return (NULL);
+	}
+#ifdef JEMALLOC_STATS
+	arena->stats.nmalloc_large++;
+	arena->stats.nrequests_large++;
+	arena->stats.allocated_large += size;
+	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
+	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
+	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
+	if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
+	    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
+		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
+		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
+	}
+#endif
+#ifdef JEMALLOC_PROF
+	arena_prof_accum(arena, size);
+#endif
+	malloc_mutex_unlock(&arena->lock);
+
+	if (zero == false) {
+#ifdef JEMALLOC_FILL
+		if (opt_junk)
+			memset(ret, 0xa5, size);
+		else if (opt_zero)
+			memset(ret, 0, size);
+#endif
+	}
+
+	return (ret);
+}
+
+void *
+arena_malloc(size_t size, bool zero)
+{
+
+	assert(size != 0);
+	assert(QUANTUM_CEILING(size) <= arena_maxclass);
+
+	if (size <= small_maxclass) {
+#ifdef JEMALLOC_TCACHE
+		tcache_t *tcache;
+
+		if ((tcache = tcache_get()) != NULL)
+			return (tcache_alloc_small(tcache, size, zero));
+		else
+
+#endif
+			return (arena_malloc_small(choose_arena(), size, zero));
+	} else {
+#ifdef JEMALLOC_TCACHE
+		if (size <= tcache_maxclass) {
+			tcache_t *tcache;
+
+			if ((tcache = tcache_get()) != NULL)
+				return (tcache_alloc_large(tcache, size, zero));
+			else {
+				return (arena_malloc_large(choose_arena(),
+				    size, zero));
+			}
+		} else
+#endif
+			return (arena_malloc_large(choose_arena(), size, zero));
+	}
+}
+
+/* Only handles large allocations that require more than page alignment. */
+void *
+arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment,
+    bool zero)
+{
+	void *ret;
+	size_t offset;
+	arena_chunk_t *chunk;
+
+	assert((size & PAGE_MASK) == 0);
+
+	alignment = PAGE_CEILING(alignment);
+
+	malloc_mutex_lock(&arena->lock);
+	ret = (void *)arena_run_alloc(arena, alloc_size, true, zero);
+	if (ret == NULL) {
+		malloc_mutex_unlock(&arena->lock);
+		return (NULL);
+	}
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
+
+	offset = (uintptr_t)ret & (alignment - 1);
+	assert((offset & PAGE_MASK) == 0);
+	assert(offset < alloc_size);
+	if (offset == 0)
+		arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
+	else {
+		size_t leadsize, trailsize;
+
+		leadsize = alignment - offset;
+		if (leadsize > 0) {
+			arena_run_trim_head(arena, chunk, ret, alloc_size,
+			    alloc_size - leadsize);
+			ret = (void *)((uintptr_t)ret + leadsize);
+		}
+
+		trailsize = alloc_size - leadsize - size;
+		if (trailsize != 0) {
+			/* Trim trailing space. */
+			assert(trailsize < alloc_size);
+			arena_run_trim_tail(arena, chunk, ret, size + trailsize,
+			    size, false);
+		}
+	}
+
+#ifdef JEMALLOC_STATS
+	arena->stats.nmalloc_large++;
+	arena->stats.nrequests_large++;
+	arena->stats.allocated_large += size;
+	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
+	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
+	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
+	if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
+	    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
+		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
+		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
+	}
+#endif
+	malloc_mutex_unlock(&arena->lock);
+
+#ifdef JEMALLOC_FILL
+	if (zero == false) {
+		if (opt_junk)
+			memset(ret, 0xa5, size);
+		else if (opt_zero)
+			memset(ret, 0, size);
+	}
+#endif
+	return (ret);
+}
+
+/* Return the size of the allocation pointed to by ptr. */
+size_t
+arena_salloc(const void *ptr)
+{
+	size_t ret;
+	arena_chunk_t *chunk;
+	size_t pageind, mapbits;
+
+	assert(ptr != NULL);
+	assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	mapbits = chunk->map[pageind-map_bias].bits;
+	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+		    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+		    PAGE_SHIFT));
+		dassert(run->magic == ARENA_RUN_MAGIC);
+		size_t binind = arena_bin_index(chunk->arena, run->bin);
+		arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		assert(((uintptr_t)ptr - ((uintptr_t)run +
+		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
+		    0);
+		ret = bin_info->reg_size;
+	} else {
+		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+		ret = mapbits & ~PAGE_MASK;
+		assert(ret != 0);
+	}
+
+	return (ret);
+}
+
+#ifdef JEMALLOC_PROF
+void
+arena_prof_promoted(const void *ptr, size_t size)
+{
+	arena_chunk_t *chunk;
+	size_t pageind, binind;
+
+	assert(ptr != NULL);
+	assert(CHUNK_ADDR2BASE(ptr) != ptr);
+	assert(isalloc(ptr) == PAGE_SIZE);
+	assert(size <= small_maxclass);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	binind = SMALL_SIZE2BIN(size);
+	assert(binind < nbins);
+	chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits &
+	    ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT);
+}
+
+size_t
+arena_salloc_demote(const void *ptr)
+{
+	size_t ret;
+	arena_chunk_t *chunk;
+	size_t pageind, mapbits;
+
+	assert(ptr != NULL);
+	assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	mapbits = chunk->map[pageind-map_bias].bits;
+	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
+	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
+		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+		    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
+		    PAGE_SHIFT));
+		dassert(run->magic == ARENA_RUN_MAGIC);
+		size_t binind = arena_bin_index(chunk->arena, run->bin);
+		arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		assert(((uintptr_t)ptr - ((uintptr_t)run +
+		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
+		    0);
+		ret = bin_info->reg_size;
+	} else {
+		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+		ret = mapbits & ~PAGE_MASK;
+		if (prof_promote && ret == PAGE_SIZE && (mapbits &
+		    CHUNK_MAP_CLASS_MASK) != 0) {
+			size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
+			    CHUNK_MAP_CLASS_SHIFT) - 1;
+			assert(binind < nbins);
+			ret = arena_bin_info[binind].reg_size;
+		}
+		assert(ret != 0);
+	}
+
+	return (ret);
+}
+#endif
+
+static void
+arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
+    arena_bin_t *bin)
+{
+
+	/* Dissociate run from bin. */
+	if (run == bin->runcur)
+		bin->runcur = NULL;
+	else {
+		size_t binind = arena_bin_index(chunk->arena, bin);
+		arena_bin_info_t *bin_info = &arena_bin_info[binind];
+
+		if (bin_info->nregs != 1) {
+			size_t run_pageind = (((uintptr_t)run -
+			    (uintptr_t)chunk)) >> PAGE_SHIFT;
+			arena_chunk_map_t *run_mapelm =
+			    &chunk->map[run_pageind-map_bias];
+			/*
+			 * This block's conditional is necessary because if the
+			 * run only contains one region, then it never gets
+			 * inserted into the non-full runs tree.
+			 */
+			arena_run_tree_remove(&bin->runs, run_mapelm);
+		}
+	}
+}
+
+static void
+arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
+    arena_bin_t *bin)
+{
+	size_t binind;
+	arena_bin_info_t *bin_info;
+	size_t npages, run_ind, past;
+
+	assert(run != bin->runcur);
+	assert(arena_run_tree_search(&bin->runs, &chunk->map[
+	    (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL);
+
+	binind = arena_bin_index(chunk->arena, run->bin);
+	bin_info = &arena_bin_info[binind];
+
+	malloc_mutex_unlock(&bin->lock);
+	/******************************/
+	npages = bin_info->run_size >> PAGE_SHIFT;
+	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
+	past = (size_t)(PAGE_CEILING((uintptr_t)run +
+	    (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind *
+	    bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT);
+	malloc_mutex_lock(&arena->lock);
+
+	/*
+	 * If the run was originally clean, and some pages were never touched,
+	 * trim the clean pages before deallocating the dirty portion of the
+	 * run.
+	 */
+	if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past
+	    - run_ind < npages) {
+		/*
+		 * Trim clean pages.  Convert to large run beforehand.  Set the
+		 * last map element first, in case this is a one-page run.
+		 */
+		chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE |
+		    (chunk->map[run_ind+npages-1-map_bias].bits &
+		    CHUNK_MAP_FLAGS_MASK);
+		chunk->map[run_ind-map_bias].bits = bin_info->run_size |
+		    CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits &
+		    CHUNK_MAP_FLAGS_MASK);
+		arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT),
+		    ((past - run_ind) << PAGE_SHIFT), false);
+		/* npages = past - run_ind; */
+	}
+#ifdef JEMALLOC_DEBUG
+	run->magic = 0;
+#endif
+	arena_run_dalloc(arena, run, true);
+	malloc_mutex_unlock(&arena->lock);
+	/****************************/
+	malloc_mutex_lock(&bin->lock);
+#ifdef JEMALLOC_STATS
+	bin->stats.curruns--;
+#endif
+}
+
+static void
+arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
+    arena_bin_t *bin)
+{
+
+	/*
+	 * Make sure that bin->runcur always refers to the lowest non-full run,
+	 * if one exists.
+	 */
+	if (bin->runcur == NULL)
+		bin->runcur = run;
+	else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
+		/* Switch runcur. */
+		if (bin->runcur->nfree > 0) {
+			arena_chunk_t *runcur_chunk =
+			    CHUNK_ADDR2BASE(bin->runcur);
+			size_t runcur_pageind = (((uintptr_t)bin->runcur -
+			    (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
+			arena_chunk_map_t *runcur_mapelm =
+			    &runcur_chunk->map[runcur_pageind-map_bias];
+
+			/* Insert runcur. */
+			arena_run_tree_insert(&bin->runs, runcur_mapelm);
+		}
+		bin->runcur = run;
+	} else {
+		size_t run_pageind = (((uintptr_t)run -
+		    (uintptr_t)chunk)) >> PAGE_SHIFT;
+		arena_chunk_map_t *run_mapelm =
+		    &chunk->map[run_pageind-map_bias];
+
+		assert(arena_run_tree_search(&bin->runs, run_mapelm) == NULL);
+		arena_run_tree_insert(&bin->runs, run_mapelm);
+	}
+}
+
+void
+arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    arena_chunk_map_t *mapelm)
+{
+	size_t pageind;
+	arena_run_t *run;
+	arena_bin_t *bin;
+#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
+	size_t size;
+#endif
+
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+	    (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
+	dassert(run->magic == ARENA_RUN_MAGIC);
+	bin = run->bin;
+	size_t binind = arena_bin_index(arena, bin);
+	arena_bin_info_t *bin_info = &arena_bin_info[binind];
+#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
+	size = bin_info->reg_size;
+#endif
+
+#ifdef JEMALLOC_FILL
+	if (opt_junk)
+		memset(ptr, 0x5a, size);
+#endif
+
+	arena_run_reg_dalloc(run, ptr);
+	if (run->nfree == bin_info->nregs) {
+		arena_dissociate_bin_run(chunk, run, bin);
+		arena_dalloc_bin_run(arena, chunk, run, bin);
+	} else if (run->nfree == 1 && run != bin->runcur)
+		arena_bin_lower_run(arena, chunk, run, bin);
+
+#ifdef JEMALLOC_STATS
+	bin->stats.allocated -= size;
+	bin->stats.ndalloc++;
+#endif
+}
+
+#ifdef JEMALLOC_STATS
+void
+arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
+    arena_stats_t *astats, malloc_bin_stats_t *bstats,
+    malloc_large_stats_t *lstats)
+{
+	unsigned i;
+
+	malloc_mutex_lock(&arena->lock);
+	*nactive += arena->nactive;
+	*ndirty += arena->ndirty;
+
+	astats->mapped += arena->stats.mapped;
+	astats->npurge += arena->stats.npurge;
+	astats->nmadvise += arena->stats.nmadvise;
+	astats->purged += arena->stats.purged;
+	astats->allocated_large += arena->stats.allocated_large;
+	astats->nmalloc_large += arena->stats.nmalloc_large;
+	astats->ndalloc_large += arena->stats.ndalloc_large;
+	astats->nrequests_large += arena->stats.nrequests_large;
+
+	for (i = 0; i < nlclasses; i++) {
+		lstats[i].nmalloc += arena->stats.lstats[i].nmalloc;
+		lstats[i].ndalloc += arena->stats.lstats[i].ndalloc;
+		lstats[i].nrequests += arena->stats.lstats[i].nrequests;
+		lstats[i].highruns += arena->stats.lstats[i].highruns;
+		lstats[i].curruns += arena->stats.lstats[i].curruns;
+	}
+	malloc_mutex_unlock(&arena->lock);
+
+	for (i = 0; i < nbins; i++) {
+		arena_bin_t *bin = &arena->bins[i];
+
+		malloc_mutex_lock(&bin->lock);
+		bstats[i].allocated += bin->stats.allocated;
+		bstats[i].nmalloc += bin->stats.nmalloc;
+		bstats[i].ndalloc += bin->stats.ndalloc;
+		bstats[i].nrequests += bin->stats.nrequests;
+#ifdef JEMALLOC_TCACHE
+		bstats[i].nfills += bin->stats.nfills;
+		bstats[i].nflushes += bin->stats.nflushes;
+#endif
+		bstats[i].nruns += bin->stats.nruns;
+		bstats[i].reruns += bin->stats.reruns;
+		bstats[i].highruns += bin->stats.highruns;
+		bstats[i].curruns += bin->stats.curruns;
+		malloc_mutex_unlock(&bin->lock);
+	}
+}
+#endif
+
+void
+arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+{
+
+	/* Large allocation. */
+#ifdef JEMALLOC_FILL
+#  ifndef JEMALLOC_STATS
+	if (opt_junk)
+#  endif
+#endif
+	{
+#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
+		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
+		    PAGE_SHIFT;
+		size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK;
+#endif
+
+#ifdef JEMALLOC_FILL
+#  ifdef JEMALLOC_STATS
+		if (opt_junk)
+#  endif
+			memset(ptr, 0x5a, size);
+#endif
+#ifdef JEMALLOC_STATS
+		arena->stats.ndalloc_large++;
+		arena->stats.allocated_large -= size;
+		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++;
+		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--;
+#endif
+	}
+
+	arena_run_dalloc(arena, (arena_run_t *)ptr, true);
+}
+
+static void
+arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    size_t oldsize, size_t size)
+{
+
+	assert(size < oldsize);
+
+	/*
+	 * Shrink the run, and make trailing pages available for other
+	 * allocations.
+	 */
+	malloc_mutex_lock(&arena->lock);
+	arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
+	    true);
+#ifdef JEMALLOC_STATS
+	arena->stats.ndalloc_large++;
+	arena->stats.allocated_large -= oldsize;
+	arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++;
+	arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
+
+	arena->stats.nmalloc_large++;
+	arena->stats.nrequests_large++;
+	arena->stats.allocated_large += size;
+	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
+	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
+	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
+	if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
+	    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
+		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
+		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
+	}
+#endif
+	malloc_mutex_unlock(&arena->lock);
+}
+
+static bool
+arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    size_t oldsize, size_t size, size_t extra, bool zero)
+{
+	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	size_t npages = oldsize >> PAGE_SHIFT;
+	size_t followsize;
+
+	assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK));
+
+	/* Try to extend the run. */
+	assert(size + extra > oldsize);
+	malloc_mutex_lock(&arena->lock);
+	if (pageind + npages < chunk_npages &&
+	    (chunk->map[pageind+npages-map_bias].bits
+	    & CHUNK_MAP_ALLOCATED) == 0 && (followsize =
+	    chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size -
+	    oldsize) {
+		/*
+		 * The next run is available and sufficiently large.  Split the
+		 * following run, then merge the first part with the existing
+		 * allocation.
+		 */
+		size_t flag_dirty;
+		size_t splitsize = (oldsize + followsize <= size + extra)
+		    ? followsize : size + extra - oldsize;
+		arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
+		    ((pageind+npages) << PAGE_SHIFT)), splitsize, true, zero);
+
+		size = oldsize + splitsize;
+		npages = size >> PAGE_SHIFT;
+
+		/*
+		 * Mark the extended run as dirty if either portion of the run
+		 * was dirty before allocation.  This is rather pedantic,
+		 * because there's not actually any sequence of events that
+		 * could cause the resulting run to be passed to
+		 * arena_run_dalloc() with the dirty argument set to false
+		 * (which is when dirty flag consistency would really matter).
+		 */
+		flag_dirty = (chunk->map[pageind-map_bias].bits &
+		    CHUNK_MAP_DIRTY) |
+		    (chunk->map[pageind+npages-1-map_bias].bits &
+		    CHUNK_MAP_DIRTY);
+		chunk->map[pageind-map_bias].bits = size | flag_dirty
+		    | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+		chunk->map[pageind+npages-1-map_bias].bits = flag_dirty |
+		    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+
+#ifdef JEMALLOC_STATS
+		arena->stats.ndalloc_large++;
+		arena->stats.allocated_large -= oldsize;
+		arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++;
+		arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
+
+		arena->stats.nmalloc_large++;
+		arena->stats.nrequests_large++;
+		arena->stats.allocated_large += size;
+		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
+		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
+		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
+		if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
+		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
+			arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
+			    arena->stats.lstats[(size >> PAGE_SHIFT) -
+			    1].curruns;
+		}
+#endif
+		malloc_mutex_unlock(&arena->lock);
+		return (false);
+	}
+	malloc_mutex_unlock(&arena->lock);
+
+	return (true);
+}
+
+/*
+ * Try to resize a large allocation, in order to avoid copying.  This will
+ * always fail if growing an object, and the following run is already in use.
+ */
+static bool
+arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
+    bool zero)
+{
+	size_t psize;
+
+	psize = PAGE_CEILING(size + extra);
+	if (psize == oldsize) {
+		/* Same size class. */
+#ifdef JEMALLOC_FILL
+		if (opt_junk && size < oldsize) {
+			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
+			    size);
+		}
+#endif
+		return (false);
+	} else {
+		arena_chunk_t *chunk;
+		arena_t *arena;
+
+		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+		arena = chunk->arena;
+		dassert(arena->magic == ARENA_MAGIC);
+
+		if (psize < oldsize) {
+#ifdef JEMALLOC_FILL
+			/* Fill before shrinking in order avoid a race. */
+			if (opt_junk) {
+				memset((void *)((uintptr_t)ptr + size), 0x5a,
+				    oldsize - size);
+			}
+#endif
+			arena_ralloc_large_shrink(arena, chunk, ptr, oldsize,
+			    psize);
+			return (false);
+		} else {
+			bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
+			    oldsize, PAGE_CEILING(size),
+			    psize - PAGE_CEILING(size), zero);
+#ifdef JEMALLOC_FILL
+			if (ret == false && zero == false && opt_zero) {
+				memset((void *)((uintptr_t)ptr + oldsize), 0,
+				    size - oldsize);
+			}
+#endif
+			return (ret);
+		}
+	}
+}
+
+void *
+arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
+    bool zero)
+{
+
+	/*
+	 * Avoid moving the allocation if the size class can be left the same.
+	 */
+	if (oldsize <= arena_maxclass) {
+		if (oldsize <= small_maxclass) {
+			assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size
+			    == oldsize);
+			if ((size + extra <= small_maxclass &&
+			    SMALL_SIZE2BIN(size + extra) ==
+			    SMALL_SIZE2BIN(oldsize)) || (size <= oldsize &&
+			    size + extra >= oldsize)) {
+#ifdef JEMALLOC_FILL
+				if (opt_junk && size < oldsize) {
+					memset((void *)((uintptr_t)ptr + size),
+					    0x5a, oldsize - size);
+				}
+#endif
+				return (ptr);
+			}
+		} else {
+			assert(size <= arena_maxclass);
+			if (size + extra > small_maxclass) {
+				if (arena_ralloc_large(ptr, oldsize, size,
+				    extra, zero) == false)
+					return (ptr);
+			}
+		}
+	}
+
+	/* Reallocation would require a move. */
+	return (NULL);
+}
+
+void *
+arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero)
+{
+	void *ret;
+	size_t copysize;
+
+	/* Try to avoid moving the allocation. */
+	ret = arena_ralloc_no_move(ptr, oldsize, size, extra, zero);
+	if (ret != NULL)
+		return (ret);
+
+	/*
+	 * size and oldsize are different enough that we need to move the
+	 * object.  In that case, fall back to allocating new space and
+	 * copying.
+	 */
+	if (alignment != 0) {
+		size_t usize = sa2u(size + extra, alignment, NULL);
+		if (usize == 0)
+			return (NULL);
+		ret = ipalloc(usize, alignment, zero);
+	} else
+		ret = arena_malloc(size + extra, zero);
+
+	if (ret == NULL) {
+		if (extra == 0)
+			return (NULL);
+		/* Try again, this time without extra. */
+		if (alignment != 0) {
+			size_t usize = sa2u(size, alignment, NULL);
+			if (usize == 0)
+				return (NULL);
+			ret = ipalloc(usize, alignment, zero);
+		} else
+			ret = arena_malloc(size, zero);
+
+		if (ret == NULL)
+			return (NULL);
+	}
+
+	/* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */
+
+	/*
+	 * Copy at most size bytes (not size+extra), since the caller has no
+	 * expectation that the extra bytes will be reliably preserved.
+	 */
+	copysize = (size < oldsize) ? size : oldsize;
+	memcpy(ret, ptr, copysize);
+	idalloc(ptr);
+	return (ret);
+}
+
+bool
+arena_new(arena_t *arena, unsigned ind)
+{
+	unsigned i;
+	arena_bin_t *bin;
+
+	arena->ind = ind;
+	arena->nthreads = 0;
+
+	if (malloc_mutex_init(&arena->lock))
+		return (true);
+
+#ifdef JEMALLOC_STATS
+	memset(&arena->stats, 0, sizeof(arena_stats_t));
+	arena->stats.lstats = (malloc_large_stats_t *)base_alloc(nlclasses *
+	    sizeof(malloc_large_stats_t));
+	if (arena->stats.lstats == NULL)
+		return (true);
+	memset(arena->stats.lstats, 0, nlclasses *
+	    sizeof(malloc_large_stats_t));
+#  ifdef JEMALLOC_TCACHE
+	ql_new(&arena->tcache_ql);
+#  endif
+#endif
+
+#ifdef JEMALLOC_PROF
+	arena->prof_accumbytes = 0;
+#endif
+
+	/* Initialize chunks. */
+	ql_new(&arena->chunks_dirty);
+	arena->spare = NULL;
+
+	arena->nactive = 0;
+	arena->ndirty = 0;
+	arena->npurgatory = 0;
+
+	arena_avail_tree_new(&arena->runs_avail_clean);
+	arena_avail_tree_new(&arena->runs_avail_dirty);
+
+	/* Initialize bins. */
+	i = 0;
+#ifdef JEMALLOC_TINY
+	/* (2^n)-spaced tiny bins. */
+	for (; i < ntbins; i++) {
+		bin = &arena->bins[i];
+		if (malloc_mutex_init(&bin->lock))
+			return (true);
+		bin->runcur = NULL;
+		arena_run_tree_new(&bin->runs);
+#ifdef JEMALLOC_STATS
+		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+#endif
+	}
+#endif
+
+	/* Quantum-spaced bins. */
+	for (; i < ntbins + nqbins; i++) {
+		bin = &arena->bins[i];
+		if (malloc_mutex_init(&bin->lock))
+			return (true);
+		bin->runcur = NULL;
+		arena_run_tree_new(&bin->runs);
+#ifdef JEMALLOC_STATS
+		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+#endif
+	}
+
+	/* Cacheline-spaced bins. */
+	for (; i < ntbins + nqbins + ncbins; i++) {
+		bin = &arena->bins[i];
+		if (malloc_mutex_init(&bin->lock))
+			return (true);
+		bin->runcur = NULL;
+		arena_run_tree_new(&bin->runs);
+#ifdef JEMALLOC_STATS
+		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+#endif
+	}
+
+	/* Subpage-spaced bins. */
+	for (; i < nbins; i++) {
+		bin = &arena->bins[i];
+		if (malloc_mutex_init(&bin->lock))
+			return (true);
+		bin->runcur = NULL;
+		arena_run_tree_new(&bin->runs);
+#ifdef JEMALLOC_STATS
+		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+#endif
+	}
+
+#ifdef JEMALLOC_DEBUG
+	arena->magic = ARENA_MAGIC;
+#endif
+
+	return (false);
+}
+
+#ifdef JEMALLOC_DEBUG
+static void
+small_size2bin_validate(void)
+{
+	size_t i, size, binind;
+
+	i = 1;
+#  ifdef JEMALLOC_TINY
+	/* Tiny. */
+	for (; i < (1U << LG_TINY_MIN); i++) {
+		size = pow2_ceil(1U << LG_TINY_MIN);
+		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+		assert(SMALL_SIZE2BIN(i) == binind);
+	}
+	for (; i < qspace_min; i++) {
+		size = pow2_ceil(i);
+		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+		assert(SMALL_SIZE2BIN(i) == binind);
+	}
+#  endif
+	/* Quantum-spaced. */
+	for (; i <= qspace_max; i++) {
+		size = QUANTUM_CEILING(i);
+		binind = ntbins + (size >> LG_QUANTUM) - 1;
+		assert(SMALL_SIZE2BIN(i) == binind);
+	}
+	/* Cacheline-spaced. */
+	for (; i <= cspace_max; i++) {
+		size = CACHELINE_CEILING(i);
+		binind = ntbins + nqbins + ((size - cspace_min) >>
+		    LG_CACHELINE);
+		assert(SMALL_SIZE2BIN(i) == binind);
+	}
+	/* Sub-page. */
+	for (; i <= sspace_max; i++) {
+		size = SUBPAGE_CEILING(i);
+		binind = ntbins + nqbins + ncbins + ((size - sspace_min)
+		    >> LG_SUBPAGE);
+		assert(SMALL_SIZE2BIN(i) == binind);
+	}
+}
+#endif
+
+static bool
+small_size2bin_init(void)
+{
+
+	if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
+	    || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
+	    || (sizeof(const_small_size2bin) != ((small_maxclass-1) >>
+	    LG_TINY_MIN) + 1))
+		return (small_size2bin_init_hard());
+
+	small_size2bin = const_small_size2bin;
+#ifdef JEMALLOC_DEBUG
+	small_size2bin_validate();
+#endif
+	return (false);
+}
+
+static bool
+small_size2bin_init_hard(void)
+{
+	size_t i, size, binind;
+	uint8_t *custom_small_size2bin;
+#define	CUSTOM_SMALL_SIZE2BIN(s)					\
+    custom_small_size2bin[(s-1) >> LG_TINY_MIN]
+
+	assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
+	    || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
+	    || (sizeof(const_small_size2bin) != ((small_maxclass-1) >>
+	    LG_TINY_MIN) + 1));
+
+	custom_small_size2bin = (uint8_t *)
+	    base_alloc(small_maxclass >> LG_TINY_MIN);
+	if (custom_small_size2bin == NULL)
+		return (true);
+
+	i = 1;
+#ifdef JEMALLOC_TINY
+	/* Tiny. */
+	for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) {
+		size = pow2_ceil(1U << LG_TINY_MIN);
+		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+	}
+	for (; i < qspace_min; i += TINY_MIN) {
+		size = pow2_ceil(i);
+		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+	}
+#endif
+	/* Quantum-spaced. */
+	for (; i <= qspace_max; i += TINY_MIN) {
+		size = QUANTUM_CEILING(i);
+		binind = ntbins + (size >> LG_QUANTUM) - 1;
+		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+	}
+	/* Cacheline-spaced. */
+	for (; i <= cspace_max; i += TINY_MIN) {
+		size = CACHELINE_CEILING(i);
+		binind = ntbins + nqbins + ((size - cspace_min) >>
+		    LG_CACHELINE);
+		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+	}
+	/* Sub-page. */
+	for (; i <= sspace_max; i += TINY_MIN) {
+		size = SUBPAGE_CEILING(i);
+		binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
+		    LG_SUBPAGE);
+		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+	}
+
+	small_size2bin = custom_small_size2bin;
+#ifdef JEMALLOC_DEBUG
+	small_size2bin_validate();
+#endif
+	return (false);
+#undef CUSTOM_SMALL_SIZE2BIN
+}
+
+/*
+ * Calculate bin_info->run_size such that it meets the following constraints:
+ *
+ *   *) bin_info->run_size >= min_run_size
+ *   *) bin_info->run_size <= arena_maxclass
+ *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
+ *   *) bin_info->nregs <= RUN_MAXREGS
+ *
+ * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also
+ * calculated here, since these settings are all interdependent.
+ */
+static size_t
+bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
+{
+	size_t try_run_size, good_run_size;
+	uint32_t try_nregs, good_nregs;
+	uint32_t try_hdr_size, good_hdr_size;
+	uint32_t try_bitmap_offset, good_bitmap_offset;
+#ifdef JEMALLOC_PROF
+	uint32_t try_ctx0_offset, good_ctx0_offset;
+#endif
+	uint32_t try_reg0_offset, good_reg0_offset;
+
+	assert(min_run_size >= PAGE_SIZE);
+	assert(min_run_size <= arena_maxclass);
+
+	/*
+	 * Calculate known-valid settings before entering the run_size
+	 * expansion loop, so that the first part of the loop always copies
+	 * valid settings.
+	 *
+	 * The do..while loop iteratively reduces the number of regions until
+	 * the run header and the regions no longer overlap.  A closed formula
+	 * would be quite messy, since there is an interdependency between the
+	 * header's mask length and the number of regions.
+	 */
+	try_run_size = min_run_size;
+	try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size)
+	    + 1; /* Counter-act try_nregs-- in loop. */
+	if (try_nregs > RUN_MAXREGS) {
+		try_nregs = RUN_MAXREGS
+		    + 1; /* Counter-act try_nregs-- in loop. */
+	}
+	do {
+		try_nregs--;
+		try_hdr_size = sizeof(arena_run_t);
+		/* Pad to a long boundary. */
+		try_hdr_size = LONG_CEILING(try_hdr_size);
+		try_bitmap_offset = try_hdr_size;
+		/* Add space for bitmap. */
+		try_hdr_size += bitmap_size(try_nregs);
+#ifdef JEMALLOC_PROF
+		if (opt_prof && prof_promote == false) {
+			/* Pad to a quantum boundary. */
+			try_hdr_size = QUANTUM_CEILING(try_hdr_size);
+			try_ctx0_offset = try_hdr_size;
+			/* Add space for one (prof_ctx_t *) per region. */
+			try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
+		} else
+			try_ctx0_offset = 0;
+#endif
+		try_reg0_offset = try_run_size - (try_nregs *
+		    bin_info->reg_size);
+	} while (try_hdr_size > try_reg0_offset);
+
+	/* run_size expansion loop. */
+	do {
+		/*
+		 * Copy valid settings before trying more aggressive settings.
+		 */
+		good_run_size = try_run_size;
+		good_nregs = try_nregs;
+		good_hdr_size = try_hdr_size;
+		good_bitmap_offset = try_bitmap_offset;
+#ifdef JEMALLOC_PROF
+		good_ctx0_offset = try_ctx0_offset;
+#endif
+		good_reg0_offset = try_reg0_offset;
+
+		/* Try more aggressive settings. */
+		try_run_size += PAGE_SIZE;
+		try_nregs = ((try_run_size - sizeof(arena_run_t)) /
+		    bin_info->reg_size)
+		    + 1; /* Counter-act try_nregs-- in loop. */
+		if (try_nregs > RUN_MAXREGS) {
+			try_nregs = RUN_MAXREGS
+			    + 1; /* Counter-act try_nregs-- in loop. */
+		}
+		do {
+			try_nregs--;
+			try_hdr_size = sizeof(arena_run_t);
+			/* Pad to a long boundary. */
+			try_hdr_size = LONG_CEILING(try_hdr_size);
+			try_bitmap_offset = try_hdr_size;
+			/* Add space for bitmap. */
+			try_hdr_size += bitmap_size(try_nregs);
+#ifdef JEMALLOC_PROF
+			if (opt_prof && prof_promote == false) {
+				/* Pad to a quantum boundary. */
+				try_hdr_size = QUANTUM_CEILING(try_hdr_size);
+				try_ctx0_offset = try_hdr_size;
+				/*
+				 * Add space for one (prof_ctx_t *) per region.
+				 */
+				try_hdr_size += try_nregs *
+				    sizeof(prof_ctx_t *);
+			}
+#endif
+			try_reg0_offset = try_run_size - (try_nregs *
+			    bin_info->reg_size);
+		} while (try_hdr_size > try_reg0_offset);
+	} while (try_run_size <= arena_maxclass
+	    && try_run_size <= arena_maxclass
+	    && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX
+	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
+	    && try_nregs < RUN_MAXREGS);
+
+	assert(good_hdr_size <= good_reg0_offset);
+
+	/* Copy final settings. */
+	bin_info->run_size = good_run_size;
+	bin_info->nregs = good_nregs;
+	bin_info->bitmap_offset = good_bitmap_offset;
+#ifdef JEMALLOC_PROF
+	bin_info->ctx0_offset = good_ctx0_offset;
+#endif
+	bin_info->reg0_offset = good_reg0_offset;
+
+	return (good_run_size);
+}
+
+static bool
+bin_info_init(void)
+{
+	arena_bin_info_t *bin_info;
+	unsigned i;
+	size_t prev_run_size;
+
+	arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins);
+	if (arena_bin_info == NULL)
+		return (true);
+
+	prev_run_size = PAGE_SIZE;
+	i = 0;
+#ifdef JEMALLOC_TINY
+	/* (2^n)-spaced tiny bins. */
+	for (; i < ntbins; i++) {
+		bin_info = &arena_bin_info[i];
+		bin_info->reg_size = (1U << (LG_TINY_MIN + i));
+		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
+		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+	}
+#endif
+
+	/* Quantum-spaced bins. */
+	for (; i < ntbins + nqbins; i++) {
+		bin_info = &arena_bin_info[i];
+		bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM;
+		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
+		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+	}
+
+	/* Cacheline-spaced bins. */
+	for (; i < ntbins + nqbins + ncbins; i++) {
+		bin_info = &arena_bin_info[i];
+		bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
+		    LG_CACHELINE);
+		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
+		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+	}
+
+	/* Subpage-spaced bins. */
+	for (; i < nbins; i++) {
+		bin_info = &arena_bin_info[i];
+		bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins +
+		    ncbins)) << LG_SUBPAGE);
+		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
+		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+	}
+
+	return (false);
+}
+
+bool
+arena_boot(void)
+{
+	size_t header_size;
+	unsigned i;
+
+	/* Set variables according to the value of opt_lg_[qc]space_max. */
+	qspace_max = (1U << opt_lg_qspace_max);
+	cspace_min = CACHELINE_CEILING(qspace_max);
+	if (cspace_min == qspace_max)
+		cspace_min += CACHELINE;
+	cspace_max = (1U << opt_lg_cspace_max);
+	sspace_min = SUBPAGE_CEILING(cspace_max);
+	if (sspace_min == cspace_max)
+		sspace_min += SUBPAGE;
+	assert(sspace_min < PAGE_SIZE);
+	sspace_max = PAGE_SIZE - SUBPAGE;
+
+#ifdef JEMALLOC_TINY
+	assert(LG_QUANTUM >= LG_TINY_MIN);
+#endif
+	assert(ntbins <= LG_QUANTUM);
+	nqbins = qspace_max >> LG_QUANTUM;
+	ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1;
+	nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1;
+	nbins = ntbins + nqbins + ncbins + nsbins;
+
+	/*
+	 * The small_size2bin lookup table uses uint8_t to encode each bin
+	 * index, so we cannot support more than 256 small size classes.  This
+	 * limit is difficult to exceed (not even possible with 16B quantum and
+	 * 4KiB pages), and such configurations are impractical, but
+	 * nonetheless we need to protect against this case in order to avoid
+	 * undefined behavior.
+	 *
+	 * Further constrain nbins to 255 if prof_promote is true, since all
+	 * small size classes, plus a "not small" size class must be stored in
+	 * 8 bits of arena_chunk_map_t's bits field.
+	 */
+#ifdef JEMALLOC_PROF
+	if (opt_prof && prof_promote) {
+		if (nbins > 255) {
+		    char line_buf[UMAX2S_BUFSIZE];
+		    malloc_write("<jemalloc>: Too many small size classes (");
+		    malloc_write(u2s(nbins, 10, line_buf));
+		    malloc_write(" > max 255)\n");
+		    abort();
+		}
+	} else
+#endif
+	if (nbins > 256) {
+	    char line_buf[UMAX2S_BUFSIZE];
+	    malloc_write("<jemalloc>: Too many small size classes (");
+	    malloc_write(u2s(nbins, 10, line_buf));
+	    malloc_write(" > max 256)\n");
+	    abort();
+	}
+
+	/*
+	 * Compute the header size such that it is large enough to contain the
+	 * page map.  The page map is biased to omit entries for the header
+	 * itself, so some iteration is necessary to compute the map bias.
+	 *
+	 * 1) Compute safe header_size and map_bias values that include enough
+	 *    space for an unbiased page map.
+	 * 2) Refine map_bias based on (1) to omit the header pages in the page
+	 *    map.  The resulting map_bias may be one too small.
+	 * 3) Refine map_bias based on (2).  The result will be >= the result
+	 *    from (2), and will always be correct.
+	 */
+	map_bias = 0;
+	for (i = 0; i < 3; i++) {
+		header_size = offsetof(arena_chunk_t, map)
+			+ (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias));
+		map_bias = (header_size >> PAGE_SHIFT) + ((header_size &
+		    PAGE_MASK) != 0);
+	}
+	assert(map_bias > 0);
+
+	arena_maxclass = chunksize - (map_bias << PAGE_SHIFT);
+
+	if (small_size2bin_init())
+		return (true);
+
+	if (bin_info_init())
+		return (true);
+
+	return (false);
+}
diff --git a/deps/jemalloc.orig/src/atomic.c b/deps/jemalloc.orig/src/atomic.c
new file mode 100644
index 00000000..77ee3131
--- /dev/null
+++ b/deps/jemalloc.orig/src/atomic.c
@@ -0,0 +1,2 @@
+#define	JEMALLOC_ATOMIC_C_
+#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/deps/jemalloc.orig/src/base.c b/deps/jemalloc.orig/src/base.c
new file mode 100644
index 00000000..cc85e849
--- /dev/null
+++ b/deps/jemalloc.orig/src/base.c
@@ -0,0 +1,106 @@
+#define	JEMALLOC_BASE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+malloc_mutex_t	base_mtx;
+
+/*
+ * Current pages that are being used for internal memory allocations.  These
+ * pages are carved up in cacheline-size quanta, so that there is no chance of
+ * false cache line sharing.
+ */
+static void		*base_pages;
+static void		*base_next_addr;
+static void		*base_past_addr; /* Addr immediately past base_pages. */
+static extent_node_t	*base_nodes;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static bool	base_pages_alloc(size_t minsize);
+
+/******************************************************************************/
+
+static bool
+base_pages_alloc(size_t minsize)
+{
+	size_t csize;
+	bool zero;
+
+	assert(minsize != 0);
+	csize = CHUNK_CEILING(minsize);
+	zero = false;
+	base_pages = chunk_alloc(csize, true, &zero);
+	if (base_pages == NULL)
+		return (true);
+	base_next_addr = base_pages;
+	base_past_addr = (void *)((uintptr_t)base_pages + csize);
+
+	return (false);
+}
+
+void *
+base_alloc(size_t size)
+{
+	void *ret;
+	size_t csize;
+
+	/* Round size up to nearest multiple of the cacheline size. */
+	csize = CACHELINE_CEILING(size);
+
+	malloc_mutex_lock(&base_mtx);
+	/* Make sure there's enough space for the allocation. */
+	if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
+		if (base_pages_alloc(csize)) {
+			malloc_mutex_unlock(&base_mtx);
+			return (NULL);
+		}
+	}
+	/* Allocate. */
+	ret = base_next_addr;
+	base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
+	malloc_mutex_unlock(&base_mtx);
+
+	return (ret);
+}
+
+extent_node_t *
+base_node_alloc(void)
+{
+	extent_node_t *ret;
+
+	malloc_mutex_lock(&base_mtx);
+	if (base_nodes != NULL) {
+		ret = base_nodes;
+		base_nodes = *(extent_node_t **)ret;
+		malloc_mutex_unlock(&base_mtx);
+	} else {
+		malloc_mutex_unlock(&base_mtx);
+		ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
+	}
+
+	return (ret);
+}
+
+void
+base_node_dealloc(extent_node_t *node)
+{
+
+	malloc_mutex_lock(&base_mtx);
+	*(extent_node_t **)node = base_nodes;
+	base_nodes = node;
+	malloc_mutex_unlock(&base_mtx);
+}
+
+bool
+base_boot(void)
+{
+
+	base_nodes = NULL;
+	if (malloc_mutex_init(&base_mtx))
+		return (true);
+
+	return (false);
+}
diff --git a/deps/jemalloc.orig/src/bitmap.c b/deps/jemalloc.orig/src/bitmap.c
new file mode 100644
index 00000000..b47e2629
--- /dev/null
+++ b/deps/jemalloc.orig/src/bitmap.c
@@ -0,0 +1,90 @@
+#define JEMALLOC_BITMAP_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static size_t	bits2groups(size_t nbits);
+
+/******************************************************************************/
+
+static size_t
+bits2groups(size_t nbits)
+{
+
+	return ((nbits >> LG_BITMAP_GROUP_NBITS) +
+	    !!(nbits & BITMAP_GROUP_NBITS_MASK));
+}
+
+void
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
+{
+	unsigned i;
+	size_t group_count;
+
+	assert(nbits > 0);
+	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
+
+	/*
+	 * Compute the number of groups necessary to store nbits bits, and
+	 * progressively work upward through the levels until reaching a level
+	 * that requires only one group.
+	 */
+	binfo->levels[0].group_offset = 0;
+	group_count = bits2groups(nbits);
+	for (i = 1; group_count > 1; i++) {
+		assert(i < BITMAP_MAX_LEVELS);
+		binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+		    + group_count;
+		group_count = bits2groups(group_count);
+	}
+	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+	    + group_count;
+	binfo->nlevels = i;
+	binfo->nbits = nbits;
+}
+
+size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo)
+{
+
+	return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
+}
+
+size_t
+bitmap_size(size_t nbits)
+{
+	bitmap_info_t binfo;
+
+	bitmap_info_init(&binfo, nbits);
+	return (bitmap_info_ngroups(&binfo));
+}
+
+void
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+	size_t extra;
+	unsigned i;
+
+	/*
+	 * Bits are actually inverted with regard to the external bitmap
+	 * interface, so the bitmap starts out with all 1 bits, except for
+	 * trailing unused bits (if any).  Note that each group uses bit 0 to
+	 * correspond to the first logical bit in the group, so extra bits
+	 * are the most significant bits of the last group.
+	 */
+	memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
+	    LG_SIZEOF_BITMAP);
+	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
+	    & BITMAP_GROUP_NBITS_MASK;
+	if (extra != 0)
+		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
+	for (i = 1; i < binfo->nlevels; i++) {
+		size_t group_count = binfo->levels[i].group_offset -
+		    binfo->levels[i-1].group_offset;
+		extra = (BITMAP_GROUP_NBITS - (group_count &
+		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
+		if (extra != 0)
+			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
+	}
+}
diff --git a/deps/jemalloc.orig/src/chunk.c b/deps/jemalloc.orig/src/chunk.c
new file mode 100644
index 00000000..d190c6f4
--- /dev/null
+++ b/deps/jemalloc.orig/src/chunk.c
@@ -0,0 +1,173 @@
+#define	JEMALLOC_CHUNK_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+size_t	opt_lg_chunk = LG_CHUNK_DEFAULT;
+#ifdef JEMALLOC_SWAP
+bool	opt_overcommit = true;
+#endif
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+malloc_mutex_t	chunks_mtx;
+chunk_stats_t	stats_chunks;
+#endif
+
+#ifdef JEMALLOC_IVSALLOC
+rtree_t		*chunks_rtree;
+#endif
+
+/* Various chunk-related settings. */
+size_t		chunksize;
+size_t		chunksize_mask; /* (chunksize - 1). */
+size_t		chunk_npages;
+size_t		map_bias;
+size_t		arena_maxclass; /* Max size class for arenas. */
+
+/******************************************************************************/
+
+/*
+ * If the caller specifies (*zero == false), it is still possible to receive
+ * zeroed memory, in which case *zero is toggled to true.  arena_chunk_alloc()
+ * takes advantage of this to avoid demanding zeroed chunks, but taking
+ * advantage of them if they are returned.
+ */
+void *
+chunk_alloc(size_t size, bool base, bool *zero)
+{
+	void *ret;
+
+	assert(size != 0);
+	assert((size & chunksize_mask) == 0);
+
+#ifdef JEMALLOC_SWAP
+	if (swap_enabled) {
+		ret = chunk_alloc_swap(size, zero);
+		if (ret != NULL)
+			goto RETURN;
+	}
+
+	if (swap_enabled == false || opt_overcommit) {
+#endif
+#ifdef JEMALLOC_DSS
+		ret = chunk_alloc_dss(size, zero);
+		if (ret != NULL)
+			goto RETURN;
+#endif
+		ret = chunk_alloc_mmap(size);
+		if (ret != NULL) {
+			*zero = true;
+			goto RETURN;
+		}
+#ifdef JEMALLOC_SWAP
+	}
+#endif
+
+	/* All strategies for allocation failed. */
+	ret = NULL;
+RETURN:
+#ifdef JEMALLOC_IVSALLOC
+	if (base == false && ret != NULL) {
+		if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
+			chunk_dealloc(ret, size, true);
+			return (NULL);
+		}
+	}
+#endif
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+	if (ret != NULL) {
+#  ifdef JEMALLOC_PROF
+		bool gdump;
+#  endif
+		malloc_mutex_lock(&chunks_mtx);
+#  ifdef JEMALLOC_STATS
+		stats_chunks.nchunks += (size / chunksize);
+#  endif
+		stats_chunks.curchunks += (size / chunksize);
+		if (stats_chunks.curchunks > stats_chunks.highchunks) {
+			stats_chunks.highchunks = stats_chunks.curchunks;
+#  ifdef JEMALLOC_PROF
+			gdump = true;
+#  endif
+		}
+#  ifdef JEMALLOC_PROF
+		else
+			gdump = false;
+#  endif
+		malloc_mutex_unlock(&chunks_mtx);
+#  ifdef JEMALLOC_PROF
+		if (opt_prof && opt_prof_gdump && gdump)
+			prof_gdump();
+#  endif
+	}
+#endif
+
+	assert(CHUNK_ADDR2BASE(ret) == ret);
+	return (ret);
+}
+
+void
+chunk_dealloc(void *chunk, size_t size, bool unmap)
+{
+
+	assert(chunk != NULL);
+	assert(CHUNK_ADDR2BASE(chunk) == chunk);
+	assert(size != 0);
+	assert((size & chunksize_mask) == 0);
+
+#ifdef JEMALLOC_IVSALLOC
+	rtree_set(chunks_rtree, (uintptr_t)chunk, NULL);
+#endif
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+	malloc_mutex_lock(&chunks_mtx);
+	stats_chunks.curchunks -= (size / chunksize);
+	malloc_mutex_unlock(&chunks_mtx);
+#endif
+
+	if (unmap) {
+#ifdef JEMALLOC_SWAP
+		if (swap_enabled && chunk_dealloc_swap(chunk, size) == false)
+			return;
+#endif
+#ifdef JEMALLOC_DSS
+		if (chunk_dealloc_dss(chunk, size) == false)
+			return;
+#endif
+		chunk_dealloc_mmap(chunk, size);
+	}
+}
+
+bool
+chunk_boot(void)
+{
+
+	/* Set variables according to the value of opt_lg_chunk. */
+	chunksize = (ZU(1) << opt_lg_chunk);
+	assert(chunksize >= PAGE_SIZE);
+	chunksize_mask = chunksize - 1;
+	chunk_npages = (chunksize >> PAGE_SHIFT);
+
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+	if (malloc_mutex_init(&chunks_mtx))
+		return (true);
+	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
+#endif
+#ifdef JEMALLOC_SWAP
+	if (chunk_swap_boot())
+		return (true);
+#endif
+	if (chunk_mmap_boot())
+		return (true);
+#ifdef JEMALLOC_DSS
+	if (chunk_dss_boot())
+		return (true);
+#endif
+#ifdef JEMALLOC_IVSALLOC
+	chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk);
+	if (chunks_rtree == NULL)
+		return (true);
+#endif
+
+	return (false);
+}
diff --git a/deps/jemalloc.orig/src/chunk_dss.c b/deps/jemalloc.orig/src/chunk_dss.c
new file mode 100644
index 00000000..5c0e290e
--- /dev/null
+++ b/deps/jemalloc.orig/src/chunk_dss.c
@@ -0,0 +1,284 @@
+#define	JEMALLOC_CHUNK_DSS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifdef JEMALLOC_DSS
+/******************************************************************************/
+/* Data. */
+
+malloc_mutex_t	dss_mtx;
+
+/* Base address of the DSS. */
+static void	*dss_base;
+/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
+static void	*dss_prev;
+/* Current upper limit on DSS addresses. */
+static void	*dss_max;
+
+/*
+ * Trees of chunks that were previously allocated (trees differ only in node
+ * ordering).  These are used when allocating chunks, in an attempt to re-use
+ * address space.  Depending on function, different tree orderings are needed,
+ * which is why there are two trees with the same contents.
+ */
+static extent_tree_t	dss_chunks_szad;
+static extent_tree_t	dss_chunks_ad;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	*chunk_recycle_dss(size_t size, bool *zero);
+static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
+
+/******************************************************************************/
+
+static void *
+chunk_recycle_dss(size_t size, bool *zero)
+{
+	extent_node_t *node, key;
+
+	key.addr = NULL;
+	key.size = size;
+	malloc_mutex_lock(&dss_mtx);
+	node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
+	if (node != NULL) {
+		void *ret = node->addr;
+
+		/* Remove node from the tree. */
+		extent_tree_szad_remove(&dss_chunks_szad, node);
+		if (node->size == size) {
+			extent_tree_ad_remove(&dss_chunks_ad, node);
+			base_node_dealloc(node);
+		} else {
+			/*
+			 * Insert the remainder of node's address range as a
+			 * smaller chunk.  Its position within dss_chunks_ad
+			 * does not change.
+			 */
+			assert(node->size > size);
+			node->addr = (void *)((uintptr_t)node->addr + size);
+			node->size -= size;
+			extent_tree_szad_insert(&dss_chunks_szad, node);
+		}
+		malloc_mutex_unlock(&dss_mtx);
+
+		if (*zero)
+			memset(ret, 0, size);
+		return (ret);
+	}
+	malloc_mutex_unlock(&dss_mtx);
+
+	return (NULL);
+}
+
+void *
+chunk_alloc_dss(size_t size, bool *zero)
+{
+	void *ret;
+
+	ret = chunk_recycle_dss(size, zero);
+	if (ret != NULL)
+		return (ret);
+
+	/*
+	 * sbrk() uses a signed increment argument, so take care not to
+	 * interpret a huge allocation request as a negative increment.
+	 */
+	if ((intptr_t)size < 0)
+		return (NULL);
+
+	malloc_mutex_lock(&dss_mtx);
+	if (dss_prev != (void *)-1) {
+		intptr_t incr;
+
+		/*
+		 * The loop is necessary to recover from races with other
+		 * threads that are using the DSS for something other than
+		 * malloc.
+		 */
+		do {
+			/* Get the current end of the DSS. */
+			dss_max = sbrk(0);
+
+			/*
+			 * Calculate how much padding is necessary to
+			 * chunk-align the end of the DSS.
+			 */
+			incr = (intptr_t)size
+			    - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
+			if (incr == (intptr_t)size)
+				ret = dss_max;
+			else {
+				ret = (void *)((intptr_t)dss_max + incr);
+				incr += size;
+			}
+
+			dss_prev = sbrk(incr);
+			if (dss_prev == dss_max) {
+				/* Success. */
+				dss_max = (void *)((intptr_t)dss_prev + incr);
+				malloc_mutex_unlock(&dss_mtx);
+				*zero = true;
+				return (ret);
+			}
+		} while (dss_prev != (void *)-1);
+	}
+	malloc_mutex_unlock(&dss_mtx);
+
+	return (NULL);
+}
+
+static extent_node_t *
+chunk_dealloc_dss_record(void *chunk, size_t size)
+{
+	extent_node_t *xnode, *node, *prev, key;
+
+	xnode = NULL;
+	while (true) {
+		key.addr = (void *)((uintptr_t)chunk + size);
+		node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
+		/* Try to coalesce forward. */
+		if (node != NULL && node->addr == key.addr) {
+			/*
+			 * Coalesce chunk with the following address range.
+			 * This does not change the position within
+			 * dss_chunks_ad, so only remove/insert from/into
+			 * dss_chunks_szad.
+			 */
+			extent_tree_szad_remove(&dss_chunks_szad, node);
+			node->addr = chunk;
+			node->size += size;
+			extent_tree_szad_insert(&dss_chunks_szad, node);
+			break;
+		} else if (xnode == NULL) {
+			/*
+			 * It is possible that base_node_alloc() will cause a
+			 * new base chunk to be allocated, so take care not to
+			 * deadlock on dss_mtx, and recover if another thread
+			 * deallocates an adjacent chunk while this one is busy
+			 * allocating xnode.
+			 */
+			malloc_mutex_unlock(&dss_mtx);
+			xnode = base_node_alloc();
+			malloc_mutex_lock(&dss_mtx);
+			if (xnode == NULL)
+				return (NULL);
+		} else {
+			/* Coalescing forward failed, so insert a new node. */
+			node = xnode;
+			xnode = NULL;
+			node->addr = chunk;
+			node->size = size;
+			extent_tree_ad_insert(&dss_chunks_ad, node);
+			extent_tree_szad_insert(&dss_chunks_szad, node);
+			break;
+		}
+	}
+	/* Discard xnode if it ended up unused do to a race. */
+	if (xnode != NULL)
+		base_node_dealloc(xnode);
+
+	/* Try to coalesce backward. */
+	prev = extent_tree_ad_prev(&dss_chunks_ad, node);
+	if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
+	    chunk) {
+		/*
+		 * Coalesce chunk with the previous address range.  This does
+		 * not change the position within dss_chunks_ad, so only
+		 * remove/insert node from/into dss_chunks_szad.
+		 */
+		extent_tree_szad_remove(&dss_chunks_szad, prev);
+		extent_tree_ad_remove(&dss_chunks_ad, prev);
+
+		extent_tree_szad_remove(&dss_chunks_szad, node);
+		node->addr = prev->addr;
+		node->size += prev->size;
+		extent_tree_szad_insert(&dss_chunks_szad, node);
+
+		base_node_dealloc(prev);
+	}
+
+	return (node);
+}
+
+bool
+chunk_in_dss(void *chunk)
+{
+	bool ret;
+
+	malloc_mutex_lock(&dss_mtx);
+	if ((uintptr_t)chunk >= (uintptr_t)dss_base
+	    && (uintptr_t)chunk < (uintptr_t)dss_max)
+		ret = true;
+	else
+		ret = false;
+	malloc_mutex_unlock(&dss_mtx);
+
+	return (ret);
+}
+
+bool
+chunk_dealloc_dss(void *chunk, size_t size)
+{
+	bool ret;
+
+	malloc_mutex_lock(&dss_mtx);
+	if ((uintptr_t)chunk >= (uintptr_t)dss_base
+	    && (uintptr_t)chunk < (uintptr_t)dss_max) {
+		extent_node_t *node;
+
+		/* Try to coalesce with other unused chunks. */
+		node = chunk_dealloc_dss_record(chunk, size);
+		if (node != NULL) {
+			chunk = node->addr;
+			size = node->size;
+		}
+
+		/* Get the current end of the DSS. */
+		dss_max = sbrk(0);
+
+		/*
+		 * Try to shrink the DSS if this chunk is at the end of the
+		 * DSS.  The sbrk() call here is subject to a race condition
+		 * with threads that use brk(2) or sbrk(2) directly, but the
+		 * alternative would be to leak memory for the sake of poorly
+		 * designed multi-threaded programs.
+		 */
+		if ((void *)((uintptr_t)chunk + size) == dss_max
+		    && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
+			/* Success. */
+			dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
+
+			if (node != NULL) {
+				extent_tree_szad_remove(&dss_chunks_szad, node);
+				extent_tree_ad_remove(&dss_chunks_ad, node);
+				base_node_dealloc(node);
+			}
+		} else
+			madvise(chunk, size, MADV_DONTNEED);
+
+		ret = false;
+		goto RETURN;
+	}
+
+	ret = true;
+RETURN:
+	malloc_mutex_unlock(&dss_mtx);
+	return (ret);
+}
+
+bool
+chunk_dss_boot(void)
+{
+
+	if (malloc_mutex_init(&dss_mtx))
+		return (true);
+	dss_base = sbrk(0);
+	dss_prev = dss_base;
+	dss_max = dss_base;
+	extent_tree_szad_new(&dss_chunks_szad);
+	extent_tree_ad_new(&dss_chunks_ad);
+
+	return (false);
+}
+
+/******************************************************************************/
+#endif /* JEMALLOC_DSS */
diff --git a/deps/jemalloc.orig/src/chunk_mmap.c b/deps/jemalloc.orig/src/chunk_mmap.c
new file mode 100644
index 00000000..164e86e7
--- /dev/null
+++ b/deps/jemalloc.orig/src/chunk_mmap.c
@@ -0,0 +1,239 @@
+#define	JEMALLOC_CHUNK_MMAP_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+/*
+ * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and
+ * potentially avoid some system calls.
+ */
+#ifndef NO_TLS
+static __thread bool	mmap_unaligned_tls
+    JEMALLOC_ATTR(tls_model("initial-exec"));
+#define	MMAP_UNALIGNED_GET()	mmap_unaligned_tls
+#define	MMAP_UNALIGNED_SET(v)	do {					\
+	mmap_unaligned_tls = (v);					\
+} while (0)
+#else
+static pthread_key_t	mmap_unaligned_tsd;
+#define	MMAP_UNALIGNED_GET()	((bool)pthread_getspecific(mmap_unaligned_tsd))
+#define	MMAP_UNALIGNED_SET(v)	do {					\
+	pthread_setspecific(mmap_unaligned_tsd, (void *)(v));		\
+} while (0)
+#endif
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	*pages_map(void *addr, size_t size, bool noreserve);
+static void	pages_unmap(void *addr, size_t size);
+static void	*chunk_alloc_mmap_slow(size_t size, bool unaligned,
+    bool noreserve);
+static void	*chunk_alloc_mmap_internal(size_t size, bool noreserve);
+
+/******************************************************************************/
+
+static void *
+pages_map(void *addr, size_t size, bool noreserve)
+{
+	void *ret;
+
+	/*
+	 * We don't use MAP_FIXED here, because it can cause the *replacement*
+	 * of existing mappings, and we only want to create new mappings.
+	 */
+	int flags = MAP_PRIVATE | MAP_ANON;
+#ifdef MAP_NORESERVE
+	if (noreserve)
+		flags |= MAP_NORESERVE;
+#endif
+	ret = mmap(addr, size, PROT_READ | PROT_WRITE, flags, -1, 0);
+	assert(ret != NULL);
+
+	if (ret == MAP_FAILED)
+		ret = NULL;
+	else if (addr != NULL && ret != addr) {
+		/*
+		 * We succeeded in mapping memory, but not in the right place.
+		 */
+		if (munmap(ret, size) == -1) {
+			char buf[BUFERROR_BUF];
+
+			buferror(errno, buf, sizeof(buf));
+			malloc_write("<jemalloc>: Error in munmap(): ");
+			malloc_write(buf);
+			malloc_write("\n");
+			if (opt_abort)
+				abort();
+		}
+		ret = NULL;
+	}
+
+	assert(ret == NULL || (addr == NULL && ret != addr)
+	    || (addr != NULL && ret == addr));
+	return (ret);
+}
+
+static void
+pages_unmap(void *addr, size_t size)
+{
+
+	if (munmap(addr, size) == -1) {
+		char buf[BUFERROR_BUF];
+
+		buferror(errno, buf, sizeof(buf));
+		malloc_write("<jemalloc>: Error in munmap(): ");
+		malloc_write(buf);
+		malloc_write("\n");
+		if (opt_abort)
+			abort();
+	}
+}
+
+static void *
+chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve)
+{
+	void *ret;
+	size_t offset;
+
+	/* Beware size_t wrap-around. */
+	if (size + chunksize <= size)
+		return (NULL);
+
+	ret = pages_map(NULL, size + chunksize, noreserve);
+	if (ret == NULL)
+		return (NULL);
+
+	/* Clean up unneeded leading/trailing space. */
+	offset = CHUNK_ADDR2OFFSET(ret);
+	if (offset != 0) {
+		/* Note that mmap() returned an unaligned mapping. */
+		unaligned = true;
+
+		/* Leading space. */
+		pages_unmap(ret, chunksize - offset);
+
+		ret = (void *)((uintptr_t)ret +
+		    (chunksize - offset));
+
+		/* Trailing space. */
+		pages_unmap((void *)((uintptr_t)ret + size),
+		    offset);
+	} else {
+		/* Trailing space only. */
+		pages_unmap((void *)((uintptr_t)ret + size),
+		    chunksize);
+	}
+
+	/*
+	 * If mmap() returned an aligned mapping, reset mmap_unaligned so that
+	 * the next chunk_alloc_mmap() execution tries the fast allocation
+	 * method.
+	 */
+	if (unaligned == false)
+		MMAP_UNALIGNED_SET(false);
+
+	return (ret);
+}
+
+static void *
+chunk_alloc_mmap_internal(size_t size, bool noreserve)
+{
+	void *ret;
+
+	/*
+	 * Ideally, there would be a way to specify alignment to mmap() (like
+	 * NetBSD has), but in the absence of such a feature, we have to work
+	 * hard to efficiently create aligned mappings.  The reliable, but
+	 * slow method is to create a mapping that is over-sized, then trim the
+	 * excess.  However, that always results in at least one call to
+	 * pages_unmap().
+	 *
+	 * A more optimistic approach is to try mapping precisely the right
+	 * amount, then try to append another mapping if alignment is off.  In
+	 * practice, this works out well as long as the application is not
+	 * interleaving mappings via direct mmap() calls.  If we do run into a
+	 * situation where there is an interleaved mapping and we are unable to
+	 * extend an unaligned mapping, our best option is to switch to the
+	 * slow method until mmap() returns another aligned mapping.  This will
+	 * tend to leave a gap in the memory map that is too small to cause
+	 * later problems for the optimistic method.
+	 *
+	 * Another possible confounding factor is address space layout
+	 * randomization (ASLR), which causes mmap(2) to disregard the
+	 * requested address.  mmap_unaligned tracks whether the previous
+	 * chunk_alloc_mmap() execution received any unaligned or relocated
+	 * mappings, and if so, the current execution will immediately fall
+	 * back to the slow method.  However, we keep track of whether the fast
+	 * method would have succeeded, and if so, we make a note to try the
+	 * fast method next time.
+	 */
+
+	if (MMAP_UNALIGNED_GET() == false) {
+		size_t offset;
+
+		ret = pages_map(NULL, size, noreserve);
+		if (ret == NULL)
+			return (NULL);
+
+		offset = CHUNK_ADDR2OFFSET(ret);
+		if (offset != 0) {
+			MMAP_UNALIGNED_SET(true);
+			/* Try to extend chunk boundary. */
+			if (pages_map((void *)((uintptr_t)ret + size),
+			    chunksize - offset, noreserve) == NULL) {
+				/*
+				 * Extension failed.  Clean up, then revert to
+				 * the reliable-but-expensive method.
+				 */
+				pages_unmap(ret, size);
+				ret = chunk_alloc_mmap_slow(size, true,
+				    noreserve);
+			} else {
+				/* Clean up unneeded leading space. */
+				pages_unmap(ret, chunksize - offset);
+				ret = (void *)((uintptr_t)ret + (chunksize -
+				    offset));
+			}
+		}
+	} else
+		ret = chunk_alloc_mmap_slow(size, false, noreserve);
+
+	return (ret);
+}
+
+void *
+chunk_alloc_mmap(size_t size)
+{
+
+	return (chunk_alloc_mmap_internal(size, false));
+}
+
+void *
+chunk_alloc_mmap_noreserve(size_t size)
+{
+
+	return (chunk_alloc_mmap_internal(size, true));
+}
+
+void
+chunk_dealloc_mmap(void *chunk, size_t size)
+{
+
+	pages_unmap(chunk, size);
+}
+
+bool
+chunk_mmap_boot(void)
+{
+
+#ifdef NO_TLS
+	if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) {
+		malloc_write("<jemalloc>: Error in pthread_key_create()\n");
+		return (true);
+	}
+#endif
+
+	return (false);
+}
diff --git a/deps/jemalloc.orig/src/chunk_swap.c b/deps/jemalloc.orig/src/chunk_swap.c
new file mode 100644
index 00000000..cb25ae0d
--- /dev/null
+++ b/deps/jemalloc.orig/src/chunk_swap.c
@@ -0,0 +1,402 @@
+#define	JEMALLOC_CHUNK_SWAP_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifdef JEMALLOC_SWAP
+/******************************************************************************/
+/* Data. */
+
+malloc_mutex_t	swap_mtx;
+bool		swap_enabled;
+bool		swap_prezeroed;
+size_t		swap_nfds;
+int		*swap_fds;
+#ifdef JEMALLOC_STATS
+size_t		swap_avail;
+#endif
+
+/* Base address of the mmap()ed file(s). */
+static void	*swap_base;
+/* Current end of the space in use (<= swap_max). */
+static void	*swap_end;
+/* Absolute upper limit on file-backed addresses. */
+static void	*swap_max;
+
+/*
+ * Trees of chunks that were previously allocated (trees differ only in node
+ * ordering).  These are used when allocating chunks, in an attempt to re-use
+ * address space.  Depending on function, different tree orderings are needed,
+ * which is why there are two trees with the same contents.
+ */
+static extent_tree_t	swap_chunks_szad;
+static extent_tree_t	swap_chunks_ad;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	*chunk_recycle_swap(size_t size, bool *zero);
+static extent_node_t *chunk_dealloc_swap_record(void *chunk, size_t size);
+
+/******************************************************************************/
+
+static void *
+chunk_recycle_swap(size_t size, bool *zero)
+{
+	extent_node_t *node, key;
+
+	key.addr = NULL;
+	key.size = size;
+	malloc_mutex_lock(&swap_mtx);
+	node = extent_tree_szad_nsearch(&swap_chunks_szad, &key);
+	if (node != NULL) {
+		void *ret = node->addr;
+
+		/* Remove node from the tree. */
+		extent_tree_szad_remove(&swap_chunks_szad, node);
+		if (node->size == size) {
+			extent_tree_ad_remove(&swap_chunks_ad, node);
+			base_node_dealloc(node);
+		} else {
+			/*
+			 * Insert the remainder of node's address range as a
+			 * smaller chunk.  Its position within swap_chunks_ad
+			 * does not change.
+			 */
+			assert(node->size > size);
+			node->addr = (void *)((uintptr_t)node->addr + size);
+			node->size -= size;
+			extent_tree_szad_insert(&swap_chunks_szad, node);
+		}
+#ifdef JEMALLOC_STATS
+		swap_avail -= size;
+#endif
+		malloc_mutex_unlock(&swap_mtx);
+
+		if (*zero)
+			memset(ret, 0, size);
+		return (ret);
+	}
+	malloc_mutex_unlock(&swap_mtx);
+
+	return (NULL);
+}
+
+void *
+chunk_alloc_swap(size_t size, bool *zero)
+{
+	void *ret;
+
+	assert(swap_enabled);
+
+	ret = chunk_recycle_swap(size, zero);
+	if (ret != NULL)
+		return (ret);
+
+	malloc_mutex_lock(&swap_mtx);
+	if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) {
+		ret = swap_end;
+		swap_end = (void *)((uintptr_t)swap_end + size);
+#ifdef JEMALLOC_STATS
+		swap_avail -= size;
+#endif
+		malloc_mutex_unlock(&swap_mtx);
+
+		if (swap_prezeroed)
+			*zero = true;
+		else if (*zero)
+			memset(ret, 0, size);
+	} else {
+		malloc_mutex_unlock(&swap_mtx);
+		return (NULL);
+	}
+
+	return (ret);
+}
+
+static extent_node_t *
+chunk_dealloc_swap_record(void *chunk, size_t size)
+{
+	extent_node_t *xnode, *node, *prev, key;
+
+	xnode = NULL;
+	while (true) {
+		key.addr = (void *)((uintptr_t)chunk + size);
+		node = extent_tree_ad_nsearch(&swap_chunks_ad, &key);
+		/* Try to coalesce forward. */
+		if (node != NULL && node->addr == key.addr) {
+			/*
+			 * Coalesce chunk with the following address range.
+			 * This does not change the position within
+			 * swap_chunks_ad, so only remove/insert from/into
+			 * swap_chunks_szad.
+			 */
+			extent_tree_szad_remove(&swap_chunks_szad, node);
+			node->addr = chunk;
+			node->size += size;
+			extent_tree_szad_insert(&swap_chunks_szad, node);
+			break;
+		} else if (xnode == NULL) {
+			/*
+			 * It is possible that base_node_alloc() will cause a
+			 * new base chunk to be allocated, so take care not to
+			 * deadlock on swap_mtx, and recover if another thread
+			 * deallocates an adjacent chunk while this one is busy
+			 * allocating xnode.
+			 */
+			malloc_mutex_unlock(&swap_mtx);
+			xnode = base_node_alloc();
+			malloc_mutex_lock(&swap_mtx);
+			if (xnode == NULL)
+				return (NULL);
+		} else {
+			/* Coalescing forward failed, so insert a new node. */
+			node = xnode;
+			xnode = NULL;
+			node->addr = chunk;
+			node->size = size;
+			extent_tree_ad_insert(&swap_chunks_ad, node);
+			extent_tree_szad_insert(&swap_chunks_szad, node);
+			break;
+		}
+	}
+	/* Discard xnode if it ended up unused do to a race. */
+	if (xnode != NULL)
+		base_node_dealloc(xnode);
+
+	/* Try to coalesce backward. */
+	prev = extent_tree_ad_prev(&swap_chunks_ad, node);
+	if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
+	    chunk) {
+		/*
+		 * Coalesce chunk with the previous address range.  This does
+		 * not change the position within swap_chunks_ad, so only
+		 * remove/insert node from/into swap_chunks_szad.
+		 */
+		extent_tree_szad_remove(&swap_chunks_szad, prev);
+		extent_tree_ad_remove(&swap_chunks_ad, prev);
+
+		extent_tree_szad_remove(&swap_chunks_szad, node);
+		node->addr = prev->addr;
+		node->size += prev->size;
+		extent_tree_szad_insert(&swap_chunks_szad, node);
+
+		base_node_dealloc(prev);
+	}
+
+	return (node);
+}
+
+bool
+chunk_in_swap(void *chunk)
+{
+	bool ret;
+
+	assert(swap_enabled);
+
+	malloc_mutex_lock(&swap_mtx);
+	if ((uintptr_t)chunk >= (uintptr_t)swap_base
+	    && (uintptr_t)chunk < (uintptr_t)swap_max)
+		ret = true;
+	else
+		ret = false;
+	malloc_mutex_unlock(&swap_mtx);
+
+	return (ret);
+}
+
+bool
+chunk_dealloc_swap(void *chunk, size_t size)
+{
+	bool ret;
+
+	assert(swap_enabled);
+
+	malloc_mutex_lock(&swap_mtx);
+	if ((uintptr_t)chunk >= (uintptr_t)swap_base
+	    && (uintptr_t)chunk < (uintptr_t)swap_max) {
+		extent_node_t *node;
+
+		/* Try to coalesce with other unused chunks. */
+		node = chunk_dealloc_swap_record(chunk, size);
+		if (node != NULL) {
+			chunk = node->addr;
+			size = node->size;
+		}
+
+		/*
+		 * Try to shrink the in-use memory if this chunk is at the end
+		 * of the in-use memory.
+		 */
+		if ((void *)((uintptr_t)chunk + size) == swap_end) {
+			swap_end = (void *)((uintptr_t)swap_end - size);
+
+			if (node != NULL) {
+				extent_tree_szad_remove(&swap_chunks_szad,
+				    node);
+				extent_tree_ad_remove(&swap_chunks_ad, node);
+				base_node_dealloc(node);
+			}
+		} else
+			madvise(chunk, size, MADV_DONTNEED);
+
+#ifdef JEMALLOC_STATS
+		swap_avail += size;
+#endif
+		ret = false;
+		goto RETURN;
+	}
+
+	ret = true;
+RETURN:
+	malloc_mutex_unlock(&swap_mtx);
+	return (ret);
+}
+
+bool
+chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed)
+{
+	bool ret;
+	unsigned i;
+	off_t off;
+	void *vaddr;
+	size_t cumsize, voff;
+	size_t sizes[nfds];
+
+	malloc_mutex_lock(&swap_mtx);
+
+	/* Get file sizes. */
+	for (i = 0, cumsize = 0; i < nfds; i++) {
+		off = lseek(fds[i], 0, SEEK_END);
+		if (off == ((off_t)-1)) {
+			ret = true;
+			goto RETURN;
+		}
+		if (PAGE_CEILING(off) != off) {
+			/* Truncate to a multiple of the page size. */
+			off &= ~PAGE_MASK;
+			if (ftruncate(fds[i], off) != 0) {
+				ret = true;
+				goto RETURN;
+			}
+		}
+		sizes[i] = off;
+		if (cumsize + off < cumsize) {
+			/*
+			 * Cumulative file size is greater than the total
+			 * address space.  Bail out while it's still obvious
+			 * what the problem is.
+			 */
+			ret = true;
+			goto RETURN;
+		}
+		cumsize += off;
+	}
+
+	/* Round down to a multiple of the chunk size. */
+	cumsize &= ~chunksize_mask;
+	if (cumsize == 0) {
+		ret = true;
+		goto RETURN;
+	}
+
+	/*
+	 * Allocate a chunk-aligned region of anonymous memory, which will
+	 * be the final location for the memory-mapped files.
+	 */
+	vaddr = chunk_alloc_mmap_noreserve(cumsize);
+	if (vaddr == NULL) {
+		ret = true;
+		goto RETURN;
+	}
+
+	/* Overlay the files onto the anonymous mapping. */
+	for (i = 0, voff = 0; i < nfds; i++) {
+		void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i],
+		    PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0);
+		if (addr == MAP_FAILED) {
+			char buf[BUFERROR_BUF];
+
+
+			buferror(errno, buf, sizeof(buf));
+			malloc_write(
+			    "<jemalloc>: Error in mmap(..., MAP_FIXED, ...): ");
+			malloc_write(buf);
+			malloc_write("\n");
+			if (opt_abort)
+				abort();
+			if (munmap(vaddr, voff) == -1) {
+				buferror(errno, buf, sizeof(buf));
+				malloc_write("<jemalloc>: Error in munmap(): ");
+				malloc_write(buf);
+				malloc_write("\n");
+			}
+			ret = true;
+			goto RETURN;
+		}
+		assert(addr == (void *)((uintptr_t)vaddr + voff));
+
+		/*
+		 * Tell the kernel that the mapping will be accessed randomly,
+		 * and that it should not gratuitously sync pages to the
+		 * filesystem.
+		 */
+#ifdef MADV_RANDOM
+		madvise(addr, sizes[i], MADV_RANDOM);
+#endif
+#ifdef MADV_NOSYNC
+		madvise(addr, sizes[i], MADV_NOSYNC);
+#endif
+
+		voff += sizes[i];
+	}
+
+	swap_prezeroed = prezeroed;
+	swap_base = vaddr;
+	swap_end = swap_base;
+	swap_max = (void *)((uintptr_t)vaddr + cumsize);
+
+	/* Copy the fds array for mallctl purposes. */
+	swap_fds = (int *)base_alloc(nfds * sizeof(int));
+	if (swap_fds == NULL) {
+		ret = true;
+		goto RETURN;
+	}
+	memcpy(swap_fds, fds, nfds * sizeof(int));
+	swap_nfds = nfds;
+
+#ifdef JEMALLOC_STATS
+	swap_avail = cumsize;
+#endif
+
+	swap_enabled = true;
+
+	ret = false;
+RETURN:
+	malloc_mutex_unlock(&swap_mtx);
+	return (ret);
+}
+
+bool
+chunk_swap_boot(void)
+{
+
+	if (malloc_mutex_init(&swap_mtx))
+		return (true);
+
+	swap_enabled = false;
+	swap_prezeroed = false; /* swap.* mallctl's depend on this. */
+	swap_nfds = 0;
+	swap_fds = NULL;
+#ifdef JEMALLOC_STATS
+	swap_avail = 0;
+#endif
+	swap_base = NULL;
+	swap_end = NULL;
+	swap_max = NULL;
+
+	extent_tree_szad_new(&swap_chunks_szad);
+	extent_tree_ad_new(&swap_chunks_ad);
+
+	return (false);
+}
+
+/******************************************************************************/
+#endif /* JEMALLOC_SWAP */
diff --git a/deps/jemalloc.orig/src/ckh.c b/deps/jemalloc.orig/src/ckh.c
new file mode 100644
index 00000000..43fcc252
--- /dev/null
+++ b/deps/jemalloc.orig/src/ckh.c
@@ -0,0 +1,619 @@
+/*
+ *******************************************************************************
+ * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each
+ * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash
+ * functions are employed.  The original cuckoo hashing algorithm was described
+ * in:
+ *
+ *   Pagh, R., F.F. Rodler (2004) Cuckoo Hashing.  Journal of Algorithms
+ *     51(2):122-144.
+ *
+ * Generalization of cuckoo hashing was discussed in:
+ *
+ *   Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical
+ *     alternative to traditional hash tables.  In Proceedings of the 7th
+ *     Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA,
+ *     January 2006.
+ *
+ * This implementation uses precisely two hash functions because that is the
+ * fewest that can work, and supporting multiple hashes is an implementation
+ * burden.  Here is a reproduction of Figure 1 from Erlingsson et al. (2006)
+ * that shows approximate expected maximum load factors for various
+ * configurations:
+ *
+ *           |         #cells/bucket         |
+ *   #hashes |   1   |   2   |   4   |   8   |
+ *   --------+-------+-------+-------+-------+
+ *         1 | 0.006 | 0.006 | 0.03  | 0.12  |
+ *         2 | 0.49  | 0.86  |>0.93< |>0.96< |
+ *         3 | 0.91  | 0.97  | 0.98  | 0.999 |
+ *         4 | 0.97  | 0.99  | 0.999 |       |
+ *
+ * The number of cells per bucket is chosen such that a bucket fits in one cache
+ * line.  So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing,
+ * respectively.
+ *
+ ******************************************************************************/
+#define	JEMALLOC_CKH_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static bool	ckh_grow(ckh_t *ckh);
+static void	ckh_shrink(ckh_t *ckh);
+
+/******************************************************************************/
+
+/*
+ * Search bucket for key and return the cell number if found; SIZE_T_MAX
+ * otherwise.
+ */
+JEMALLOC_INLINE size_t
+ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
+{
+	ckhc_t *cell;
+	unsigned i;
+
+	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
+		if (cell->key != NULL && ckh->keycomp(key, cell->key))
+			return ((bucket << LG_CKH_BUCKET_CELLS) + i);
+	}
+
+	return (SIZE_T_MAX);
+}
+
+/*
+ * Search table for key and return cell number if found; SIZE_T_MAX otherwise.
+ */
+JEMALLOC_INLINE size_t
+ckh_isearch(ckh_t *ckh, const void *key)
+{
+	size_t hash1, hash2, bucket, cell;
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+
+	ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
+
+	/* Search primary bucket. */
+	bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	cell = ckh_bucket_search(ckh, bucket, key);
+	if (cell != SIZE_T_MAX)
+		return (cell);
+
+	/* Search secondary bucket. */
+	bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	cell = ckh_bucket_search(ckh, bucket, key);
+	return (cell);
+}
+
+JEMALLOC_INLINE bool
+ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
+    const void *data)
+{
+	ckhc_t *cell;
+	unsigned offset, i;
+
+	/*
+	 * Cycle through the cells in the bucket, starting at a random position.
+	 * The randomness avoids worst-case search overhead as buckets fill up.
+	 */
+	prn32(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
+	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
+		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
+		if (cell->key == NULL) {
+			cell->key = key;
+			cell->data = data;
+			ckh->count++;
+			return (false);
+		}
+	}
+
+	return (true);
+}
+
+/*
+ * No space is available in bucket.  Randomly evict an item, then try to find an
+ * alternate location for that item.  Iteratively repeat this
+ * eviction/relocation procedure until either success or detection of an
+ * eviction/relocation bucket cycle.
+ */
+JEMALLOC_INLINE bool
+ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
+    void const **argdata)
+{
+	const void *key, *data, *tkey, *tdata;
+	ckhc_t *cell;
+	size_t hash1, hash2, bucket, tbucket;
+	unsigned i;
+
+	bucket = argbucket;
+	key = *argkey;
+	data = *argdata;
+	while (true) {
+		/*
+		 * Choose a random item within the bucket to evict.  This is
+		 * critical to correct function, because without (eventually)
+		 * evicting all items within a bucket during iteration, it
+		 * would be possible to get stuck in an infinite loop if there
+		 * were an item for which both hashes indicated the same
+		 * bucket.
+		 */
+		prn32(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
+		assert(cell->key != NULL);
+
+		/* Swap cell->{key,data} and {key,data} (evict). */
+		tkey = cell->key; tdata = cell->data;
+		cell->key = key; cell->data = data;
+		key = tkey; data = tdata;
+
+#ifdef CKH_COUNT
+		ckh->nrelocs++;
+#endif
+
+		/* Find the alternate bucket for the evicted item. */
+		ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
+		tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+		if (tbucket == bucket) {
+			tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+			/*
+			 * It may be that (tbucket == bucket) still, if the
+			 * item's hashes both indicate this bucket.  However,
+			 * we are guaranteed to eventually escape this bucket
+			 * during iteration, assuming pseudo-random item
+			 * selection (true randomness would make infinite
+			 * looping a remote possibility).  The reason we can
+			 * never get trapped forever is that there are two
+			 * cases:
+			 *
+			 * 1) This bucket == argbucket, so we will quickly
+			 *    detect an eviction cycle and terminate.
+			 * 2) An item was evicted to this bucket from another,
+			 *    which means that at least one item in this bucket
+			 *    has hashes that indicate distinct buckets.
+			 */
+		}
+		/* Check for a cycle. */
+		if (tbucket == argbucket) {
+			*argkey = key;
+			*argdata = data;
+			return (true);
+		}
+
+		bucket = tbucket;
+		if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+			return (false);
+	}
+}
+
+JEMALLOC_INLINE bool
+ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
+{
+	size_t hash1, hash2, bucket;
+	const void *key = *argkey;
+	const void *data = *argdata;
+
+	ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
+
+	/* Try to insert in primary bucket. */
+	bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+		return (false);
+
+	/* Try to insert in secondary bucket. */
+	bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1);
+	if (ckh_try_bucket_insert(ckh, bucket, key, data) == false)
+		return (false);
+
+	/*
+	 * Try to find a place for this item via iterative eviction/relocation.
+	 */
+	return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata));
+}
+
+/*
+ * Try to rebuild the hash table from scratch by inserting all items from the
+ * old table into the new.
+ */
+JEMALLOC_INLINE bool
+ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
+{
+	size_t count, i, nins;
+	const void *key, *data;
+
+	count = ckh->count;
+	ckh->count = 0;
+	for (i = nins = 0; nins < count; i++) {
+		if (aTab[i].key != NULL) {
+			key = aTab[i].key;
+			data = aTab[i].data;
+			if (ckh_try_insert(ckh, &key, &data)) {
+				ckh->count = count;
+				return (true);
+			}
+			nins++;
+		}
+	}
+
+	return (false);
+}
+
+static bool
+ckh_grow(ckh_t *ckh)
+{
+	bool ret;
+	ckhc_t *tab, *ttab;
+	size_t lg_curcells;
+	unsigned lg_prevbuckets;
+
+#ifdef CKH_COUNT
+	ckh->ngrows++;
+#endif
+
+	/*
+	 * It is possible (though unlikely, given well behaved hashes) that the
+	 * table will have to be doubled more than once in order to create a
+	 * usable table.
+	 */
+	lg_prevbuckets = ckh->lg_curbuckets;
+	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS;
+	while (true) {
+		size_t usize;
+
+		lg_curcells++;
+		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
+		if (usize == 0) {
+			ret = true;
+			goto RETURN;
+		}
+		tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+		if (tab == NULL) {
+			ret = true;
+			goto RETURN;
+		}
+		/* Swap in new table. */
+		ttab = ckh->tab;
+		ckh->tab = tab;
+		tab = ttab;
+		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
+
+		if (ckh_rebuild(ckh, tab) == false) {
+			idalloc(tab);
+			break;
+		}
+
+		/* Rebuilding failed, so back out partially rebuilt table. */
+		idalloc(ckh->tab);
+		ckh->tab = tab;
+		ckh->lg_curbuckets = lg_prevbuckets;
+	}
+
+	ret = false;
+RETURN:
+	return (ret);
+}
+
+static void
+ckh_shrink(ckh_t *ckh)
+{
+	ckhc_t *tab, *ttab;
+	size_t lg_curcells, usize;
+	unsigned lg_prevbuckets;
+
+	/*
+	 * It is possible (though unlikely, given well behaved hashes) that the
+	 * table rebuild will fail.
+	 */
+	lg_prevbuckets = ckh->lg_curbuckets;
+	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
+	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
+	if (usize == 0)
+		return;
+	tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	if (tab == NULL) {
+		/*
+		 * An OOM error isn't worth propagating, since it doesn't
+		 * prevent this or future operations from proceeding.
+		 */
+		return;
+	}
+	/* Swap in new table. */
+	ttab = ckh->tab;
+	ckh->tab = tab;
+	tab = ttab;
+	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
+
+	if (ckh_rebuild(ckh, tab) == false) {
+		idalloc(tab);
+#ifdef CKH_COUNT
+		ckh->nshrinks++;
+#endif
+		return;
+	}
+
+	/* Rebuilding failed, so back out partially rebuilt table. */
+	idalloc(ckh->tab);
+	ckh->tab = tab;
+	ckh->lg_curbuckets = lg_prevbuckets;
+#ifdef CKH_COUNT
+	ckh->nshrinkfails++;
+#endif
+}
+
+bool
+ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
+{
+	bool ret;
+	size_t mincells, usize;
+	unsigned lg_mincells;
+
+	assert(minitems > 0);
+	assert(hash != NULL);
+	assert(keycomp != NULL);
+
+#ifdef CKH_COUNT
+	ckh->ngrows = 0;
+	ckh->nshrinks = 0;
+	ckh->nshrinkfails = 0;
+	ckh->ninserts = 0;
+	ckh->nrelocs = 0;
+#endif
+	ckh->prn_state = 42; /* Value doesn't really matter. */
+	ckh->count = 0;
+
+	/*
+	 * Find the minimum power of 2 that is large enough to fit aBaseCount
+	 * entries.  We are using (2+,2) cuckoo hashing, which has an expected
+	 * maximum load factor of at least ~0.86, so 0.75 is a conservative load
+	 * factor that will typically allow 2^aLgMinItems to fit without ever
+	 * growing the table.
+	 */
+	assert(LG_CKH_BUCKET_CELLS > 0);
+	mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
+	for (lg_mincells = LG_CKH_BUCKET_CELLS;
+	    (ZU(1) << lg_mincells) < mincells;
+	    lg_mincells++)
+		; /* Do nothing. */
+	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
+	ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
+	ckh->hash = hash;
+	ckh->keycomp = keycomp;
+
+	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL);
+	if (usize == 0) {
+		ret = true;
+		goto RETURN;
+	}
+	ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
+	if (ckh->tab == NULL) {
+		ret = true;
+		goto RETURN;
+	}
+
+#ifdef JEMALLOC_DEBUG
+	ckh->magic = CKH_MAGIC;
+#endif
+
+	ret = false;
+RETURN:
+	return (ret);
+}
+
+void
+ckh_delete(ckh_t *ckh)
+{
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+
+#ifdef CKH_VERBOSE
+	malloc_printf(
+	    "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64","
+	    " nshrinkfails: %"PRIu64", ninserts: %"PRIu64","
+	    " nrelocs: %"PRIu64"\n", __func__, ckh,
+	    (unsigned long long)ckh->ngrows,
+	    (unsigned long long)ckh->nshrinks,
+	    (unsigned long long)ckh->nshrinkfails,
+	    (unsigned long long)ckh->ninserts,
+	    (unsigned long long)ckh->nrelocs);
+#endif
+
+	idalloc(ckh->tab);
+#ifdef JEMALLOC_DEBUG
+	memset(ckh, 0x5a, sizeof(ckh_t));
+#endif
+}
+
+size_t
+ckh_count(ckh_t *ckh)
+{
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+
+	return (ckh->count);
+}
+
+bool
+ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
+{
+	size_t i, ncells;
+
+	for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
+	    LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
+		if (ckh->tab[i].key != NULL) {
+			if (key != NULL)
+				*key = (void *)ckh->tab[i].key;
+			if (data != NULL)
+				*data = (void *)ckh->tab[i].data;
+			*tabind = i + 1;
+			return (false);
+		}
+	}
+
+	return (true);
+}
+
+bool
+ckh_insert(ckh_t *ckh, const void *key, const void *data)
+{
+	bool ret;
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+	assert(ckh_search(ckh, key, NULL, NULL));
+
+#ifdef CKH_COUNT
+	ckh->ninserts++;
+#endif
+
+	while (ckh_try_insert(ckh, &key, &data)) {
+		if (ckh_grow(ckh)) {
+			ret = true;
+			goto RETURN;
+		}
+	}
+
+	ret = false;
+RETURN:
+	return (ret);
+}
+
+bool
+ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
+{
+	size_t cell;
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+
+	cell = ckh_isearch(ckh, searchkey);
+	if (cell != SIZE_T_MAX) {
+		if (key != NULL)
+			*key = (void *)ckh->tab[cell].key;
+		if (data != NULL)
+			*data = (void *)ckh->tab[cell].data;
+		ckh->tab[cell].key = NULL;
+		ckh->tab[cell].data = NULL; /* Not necessary. */
+
+		ckh->count--;
+		/* Try to halve the table if it is less than 1/4 full. */
+		if (ckh->count < (ZU(1) << (ckh->lg_curbuckets
+		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
+		    > ckh->lg_minbuckets) {
+			/* Ignore error due to OOM. */
+			ckh_shrink(ckh);
+		}
+
+		return (false);
+	}
+
+	return (true);
+}
+
+bool
+ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
+{
+	size_t cell;
+
+	assert(ckh != NULL);
+	dassert(ckh->magic == CKH_MAGIC);
+
+	cell = ckh_isearch(ckh, searchkey);
+	if (cell != SIZE_T_MAX) {
+		if (key != NULL)
+			*key = (void *)ckh->tab[cell].key;
+		if (data != NULL)
+			*data = (void *)ckh->tab[cell].data;
+		return (false);
+	}
+
+	return (true);
+}
+
+void
+ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
+{
+	size_t ret1, ret2;
+	uint64_t h;
+
+	assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
+	assert(hash1 != NULL);
+	assert(hash2 != NULL);
+
+	h = hash(key, strlen((const char *)key), 0x94122f335b332aeaLLU);
+	if (minbits <= 32) {
+		/*
+		 * Avoid doing multiple hashes, since a single hash provides
+		 * enough bits.
+		 */
+		ret1 = h & ZU(0xffffffffU);
+		ret2 = h >> 32;
+	} else {
+		ret1 = h;
+		ret2 = hash(key, strlen((const char *)key),
+		    0x8432a476666bbc13LLU);
+	}
+
+	*hash1 = ret1;
+	*hash2 = ret2;
+}
+
+bool
+ckh_string_keycomp(const void *k1, const void *k2)
+{
+
+    assert(k1 != NULL);
+    assert(k2 != NULL);
+
+    return (strcmp((char *)k1, (char *)k2) ? false : true);
+}
+
+void
+ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
+    size_t *hash2)
+{
+	size_t ret1, ret2;
+	uint64_t h;
+	union {
+		const void	*v;
+		uint64_t	i;
+	} u;
+
+	assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
+	assert(hash1 != NULL);
+	assert(hash2 != NULL);
+
+	assert(sizeof(u.v) == sizeof(u.i));
+#if (LG_SIZEOF_PTR != LG_SIZEOF_INT)
+	u.i = 0;
+#endif
+	u.v = key;
+	h = hash(&u.i, sizeof(u.i), 0xd983396e68886082LLU);
+	if (minbits <= 32) {
+		/*
+		 * Avoid doing multiple hashes, since a single hash provides
+		 * enough bits.
+		 */
+		ret1 = h & ZU(0xffffffffU);
+		ret2 = h >> 32;
+	} else {
+		assert(SIZEOF_PTR == 8);
+		ret1 = h;
+		ret2 = hash(&u.i, sizeof(u.i), 0x5e2be9aff8709a5dLLU);
+	}
+
+	*hash1 = ret1;
+	*hash2 = ret2;
+}
+
+bool
+ckh_pointer_keycomp(const void *k1, const void *k2)
+{
+
+	return ((k1 == k2) ? true : false);
+}
diff --git a/deps/jemalloc.orig/src/ctl.c b/deps/jemalloc.orig/src/ctl.c
new file mode 100644
index 00000000..e5336d36
--- /dev/null
+++ b/deps/jemalloc.orig/src/ctl.c
@@ -0,0 +1,1670 @@
+#define	JEMALLOC_CTL_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+/*
+ * ctl_mtx protects the following:
+ * - ctl_stats.*
+ * - opt_prof_active
+ * - swap_enabled
+ * - swap_prezeroed
+ */
+static malloc_mutex_t	ctl_mtx;
+static bool		ctl_initialized;
+static uint64_t		ctl_epoch;
+static ctl_stats_t	ctl_stats;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+#define	CTL_PROTO(n)							\
+static int	n##_ctl(const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen);
+
+#define	INDEX_PROTO(n)							\
+const ctl_node_t	*n##_index(const size_t *mib, size_t miblen,	\
+    size_t i);
+
+#ifdef JEMALLOC_STATS
+static bool	ctl_arena_init(ctl_arena_stats_t *astats);
+#endif
+static void	ctl_arena_clear(ctl_arena_stats_t *astats);
+#ifdef JEMALLOC_STATS
+static void	ctl_arena_stats_amerge(ctl_arena_stats_t *cstats,
+    arena_t *arena);
+static void	ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
+    ctl_arena_stats_t *astats);
+#endif
+static void	ctl_arena_refresh(arena_t *arena, unsigned i);
+static void	ctl_refresh(void);
+static bool	ctl_init(void);
+static int	ctl_lookup(const char *name, ctl_node_t const **nodesp,
+    size_t *mibp, size_t *depthp);
+
+CTL_PROTO(version)
+CTL_PROTO(epoch)
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(tcache_flush)
+#endif
+CTL_PROTO(thread_arena)
+#ifdef JEMALLOC_STATS
+CTL_PROTO(thread_allocated)
+CTL_PROTO(thread_allocatedp)
+CTL_PROTO(thread_deallocated)
+CTL_PROTO(thread_deallocatedp)
+#endif
+CTL_PROTO(config_debug)
+CTL_PROTO(config_dss)
+CTL_PROTO(config_dynamic_page_shift)
+CTL_PROTO(config_fill)
+CTL_PROTO(config_lazy_lock)
+CTL_PROTO(config_prof)
+CTL_PROTO(config_prof_libgcc)
+CTL_PROTO(config_prof_libunwind)
+CTL_PROTO(config_stats)
+CTL_PROTO(config_swap)
+CTL_PROTO(config_sysv)
+CTL_PROTO(config_tcache)
+CTL_PROTO(config_tiny)
+CTL_PROTO(config_tls)
+CTL_PROTO(config_xmalloc)
+CTL_PROTO(opt_abort)
+CTL_PROTO(opt_lg_qspace_max)
+CTL_PROTO(opt_lg_cspace_max)
+CTL_PROTO(opt_lg_chunk)
+CTL_PROTO(opt_narenas)
+CTL_PROTO(opt_lg_dirty_mult)
+CTL_PROTO(opt_stats_print)
+#ifdef JEMALLOC_FILL
+CTL_PROTO(opt_junk)
+CTL_PROTO(opt_zero)
+#endif
+#ifdef JEMALLOC_SYSV
+CTL_PROTO(opt_sysv)
+#endif
+#ifdef JEMALLOC_XMALLOC
+CTL_PROTO(opt_xmalloc)
+#endif
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(opt_tcache)
+CTL_PROTO(opt_lg_tcache_gc_sweep)
+#endif
+#ifdef JEMALLOC_PROF
+CTL_PROTO(opt_prof)
+CTL_PROTO(opt_prof_prefix)
+CTL_PROTO(opt_prof_active)
+CTL_PROTO(opt_lg_prof_bt_max)
+CTL_PROTO(opt_lg_prof_sample)
+CTL_PROTO(opt_lg_prof_interval)
+CTL_PROTO(opt_prof_gdump)
+CTL_PROTO(opt_prof_leak)
+CTL_PROTO(opt_prof_accum)
+CTL_PROTO(opt_lg_prof_tcmax)
+#endif
+#ifdef JEMALLOC_SWAP
+CTL_PROTO(opt_overcommit)
+#endif
+CTL_PROTO(arenas_bin_i_size)
+CTL_PROTO(arenas_bin_i_nregs)
+CTL_PROTO(arenas_bin_i_run_size)
+INDEX_PROTO(arenas_bin_i)
+CTL_PROTO(arenas_lrun_i_size)
+INDEX_PROTO(arenas_lrun_i)
+CTL_PROTO(arenas_narenas)
+CTL_PROTO(arenas_initialized)
+CTL_PROTO(arenas_quantum)
+CTL_PROTO(arenas_cacheline)
+CTL_PROTO(arenas_subpage)
+CTL_PROTO(arenas_pagesize)
+CTL_PROTO(arenas_chunksize)
+#ifdef JEMALLOC_TINY
+CTL_PROTO(arenas_tspace_min)
+CTL_PROTO(arenas_tspace_max)
+#endif
+CTL_PROTO(arenas_qspace_min)
+CTL_PROTO(arenas_qspace_max)
+CTL_PROTO(arenas_cspace_min)
+CTL_PROTO(arenas_cspace_max)
+CTL_PROTO(arenas_sspace_min)
+CTL_PROTO(arenas_sspace_max)
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(arenas_tcache_max)
+#endif
+CTL_PROTO(arenas_ntbins)
+CTL_PROTO(arenas_nqbins)
+CTL_PROTO(arenas_ncbins)
+CTL_PROTO(arenas_nsbins)
+CTL_PROTO(arenas_nbins)
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(arenas_nhbins)
+#endif
+CTL_PROTO(arenas_nlruns)
+CTL_PROTO(arenas_purge)
+#ifdef JEMALLOC_PROF
+CTL_PROTO(prof_active)
+CTL_PROTO(prof_dump)
+CTL_PROTO(prof_interval)
+#endif
+#ifdef JEMALLOC_STATS
+CTL_PROTO(stats_chunks_current)
+CTL_PROTO(stats_chunks_total)
+CTL_PROTO(stats_chunks_high)
+CTL_PROTO(stats_huge_allocated)
+CTL_PROTO(stats_huge_nmalloc)
+CTL_PROTO(stats_huge_ndalloc)
+CTL_PROTO(stats_arenas_i_small_allocated)
+CTL_PROTO(stats_arenas_i_small_nmalloc)
+CTL_PROTO(stats_arenas_i_small_ndalloc)
+CTL_PROTO(stats_arenas_i_small_nrequests)
+CTL_PROTO(stats_arenas_i_large_allocated)
+CTL_PROTO(stats_arenas_i_large_nmalloc)
+CTL_PROTO(stats_arenas_i_large_ndalloc)
+CTL_PROTO(stats_arenas_i_large_nrequests)
+CTL_PROTO(stats_arenas_i_bins_j_allocated)
+CTL_PROTO(stats_arenas_i_bins_j_nmalloc)
+CTL_PROTO(stats_arenas_i_bins_j_ndalloc)
+CTL_PROTO(stats_arenas_i_bins_j_nrequests)
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(stats_arenas_i_bins_j_nfills)
+CTL_PROTO(stats_arenas_i_bins_j_nflushes)
+#endif
+CTL_PROTO(stats_arenas_i_bins_j_nruns)
+CTL_PROTO(stats_arenas_i_bins_j_nreruns)
+CTL_PROTO(stats_arenas_i_bins_j_highruns)
+CTL_PROTO(stats_arenas_i_bins_j_curruns)
+INDEX_PROTO(stats_arenas_i_bins_j)
+CTL_PROTO(stats_arenas_i_lruns_j_nmalloc)
+CTL_PROTO(stats_arenas_i_lruns_j_ndalloc)
+CTL_PROTO(stats_arenas_i_lruns_j_nrequests)
+CTL_PROTO(stats_arenas_i_lruns_j_highruns)
+CTL_PROTO(stats_arenas_i_lruns_j_curruns)
+INDEX_PROTO(stats_arenas_i_lruns_j)
+#endif
+CTL_PROTO(stats_arenas_i_nthreads)
+CTL_PROTO(stats_arenas_i_pactive)
+CTL_PROTO(stats_arenas_i_pdirty)
+#ifdef JEMALLOC_STATS
+CTL_PROTO(stats_arenas_i_mapped)
+CTL_PROTO(stats_arenas_i_npurge)
+CTL_PROTO(stats_arenas_i_nmadvise)
+CTL_PROTO(stats_arenas_i_purged)
+#endif
+INDEX_PROTO(stats_arenas_i)
+#ifdef JEMALLOC_STATS
+CTL_PROTO(stats_cactive)
+CTL_PROTO(stats_allocated)
+CTL_PROTO(stats_active)
+CTL_PROTO(stats_mapped)
+#endif
+#ifdef JEMALLOC_SWAP
+#  ifdef JEMALLOC_STATS
+CTL_PROTO(swap_avail)
+#  endif
+CTL_PROTO(swap_prezeroed)
+CTL_PROTO(swap_nfds)
+CTL_PROTO(swap_fds)
+#endif
+
+/******************************************************************************/
+/* mallctl tree. */
+
+/* Maximum tree depth. */
+#define	CTL_MAX_DEPTH	6
+
+#define	NAME(n)	true,	{.named = {n
+#define	CHILD(c) sizeof(c##_node) / sizeof(ctl_node_t),	c##_node}},	NULL
+#define	CTL(c)	0,				NULL}},		c##_ctl
+
+/*
+ * Only handles internal indexed nodes, since there are currently no external
+ * ones.
+ */
+#define	INDEX(i)	false,	{.indexed = {i##_index}},		NULL
+
+#ifdef JEMALLOC_TCACHE
+static const ctl_node_t	tcache_node[] = {
+	{NAME("flush"),		CTL(tcache_flush)}
+};
+#endif
+
+static const ctl_node_t	thread_node[] = {
+	{NAME("arena"),		CTL(thread_arena)}
+#ifdef JEMALLOC_STATS
+	,
+	{NAME("allocated"),	CTL(thread_allocated)},
+	{NAME("allocatedp"),	CTL(thread_allocatedp)},
+	{NAME("deallocated"),	CTL(thread_deallocated)},
+	{NAME("deallocatedp"),	CTL(thread_deallocatedp)}
+#endif
+};
+
+static const ctl_node_t	config_node[] = {
+	{NAME("debug"),			CTL(config_debug)},
+	{NAME("dss"),			CTL(config_dss)},
+	{NAME("dynamic_page_shift"),	CTL(config_dynamic_page_shift)},
+	{NAME("fill"),			CTL(config_fill)},
+	{NAME("lazy_lock"),		CTL(config_lazy_lock)},
+	{NAME("prof"),			CTL(config_prof)},
+	{NAME("prof_libgcc"),		CTL(config_prof_libgcc)},
+	{NAME("prof_libunwind"),	CTL(config_prof_libunwind)},
+	{NAME("stats"),			CTL(config_stats)},
+	{NAME("swap"),			CTL(config_swap)},
+	{NAME("sysv"),			CTL(config_sysv)},
+	{NAME("tcache"),		CTL(config_tcache)},
+	{NAME("tiny"),			CTL(config_tiny)},
+	{NAME("tls"),			CTL(config_tls)},
+	{NAME("xmalloc"),		CTL(config_xmalloc)}
+};
+
+static const ctl_node_t opt_node[] = {
+	{NAME("abort"),			CTL(opt_abort)},
+	{NAME("lg_qspace_max"),		CTL(opt_lg_qspace_max)},
+	{NAME("lg_cspace_max"),		CTL(opt_lg_cspace_max)},
+	{NAME("lg_chunk"),		CTL(opt_lg_chunk)},
+	{NAME("narenas"),		CTL(opt_narenas)},
+	{NAME("lg_dirty_mult"),		CTL(opt_lg_dirty_mult)},
+	{NAME("stats_print"),		CTL(opt_stats_print)}
+#ifdef JEMALLOC_FILL
+	,
+	{NAME("junk"),			CTL(opt_junk)},
+	{NAME("zero"),			CTL(opt_zero)}
+#endif
+#ifdef JEMALLOC_SYSV
+	,
+	{NAME("sysv"),			CTL(opt_sysv)}
+#endif
+#ifdef JEMALLOC_XMALLOC
+	,
+	{NAME("xmalloc"),		CTL(opt_xmalloc)}
+#endif
+#ifdef JEMALLOC_TCACHE
+	,
+	{NAME("tcache"),		CTL(opt_tcache)},
+	{NAME("lg_tcache_gc_sweep"),	CTL(opt_lg_tcache_gc_sweep)}
+#endif
+#ifdef JEMALLOC_PROF
+	,
+	{NAME("prof"),			CTL(opt_prof)},
+	{NAME("prof_prefix"),		CTL(opt_prof_prefix)},
+	{NAME("prof_active"),		CTL(opt_prof_active)},
+	{NAME("lg_prof_bt_max"),	CTL(opt_lg_prof_bt_max)},
+	{NAME("lg_prof_sample"),	CTL(opt_lg_prof_sample)},
+	{NAME("lg_prof_interval"),	CTL(opt_lg_prof_interval)},
+	{NAME("prof_gdump"),		CTL(opt_prof_gdump)},
+	{NAME("prof_leak"),		CTL(opt_prof_leak)},
+	{NAME("prof_accum"),		CTL(opt_prof_accum)},
+	{NAME("lg_prof_tcmax"),		CTL(opt_lg_prof_tcmax)}
+#endif
+#ifdef JEMALLOC_SWAP
+	,
+	{NAME("overcommit"),		CTL(opt_overcommit)}
+#endif
+};
+
+static const ctl_node_t arenas_bin_i_node[] = {
+	{NAME("size"),			CTL(arenas_bin_i_size)},
+	{NAME("nregs"),			CTL(arenas_bin_i_nregs)},
+	{NAME("run_size"),		CTL(arenas_bin_i_run_size)}
+};
+static const ctl_node_t super_arenas_bin_i_node[] = {
+	{NAME(""),			CHILD(arenas_bin_i)}
+};
+
+static const ctl_node_t arenas_bin_node[] = {
+	{INDEX(arenas_bin_i)}
+};
+
+static const ctl_node_t arenas_lrun_i_node[] = {
+	{NAME("size"),			CTL(arenas_lrun_i_size)}
+};
+static const ctl_node_t super_arenas_lrun_i_node[] = {
+	{NAME(""),			CHILD(arenas_lrun_i)}
+};
+
+static const ctl_node_t arenas_lrun_node[] = {
+	{INDEX(arenas_lrun_i)}
+};
+
+static const ctl_node_t arenas_node[] = {
+	{NAME("narenas"),		CTL(arenas_narenas)},
+	{NAME("initialized"),		CTL(arenas_initialized)},
+	{NAME("quantum"),		CTL(arenas_quantum)},
+	{NAME("cacheline"),		CTL(arenas_cacheline)},
+	{NAME("subpage"),		CTL(arenas_subpage)},
+	{NAME("pagesize"),		CTL(arenas_pagesize)},
+	{NAME("chunksize"),		CTL(arenas_chunksize)},
+#ifdef JEMALLOC_TINY
+	{NAME("tspace_min"),		CTL(arenas_tspace_min)},
+	{NAME("tspace_max"),		CTL(arenas_tspace_max)},
+#endif
+	{NAME("qspace_min"),		CTL(arenas_qspace_min)},
+	{NAME("qspace_max"),		CTL(arenas_qspace_max)},
+	{NAME("cspace_min"),		CTL(arenas_cspace_min)},
+	{NAME("cspace_max"),		CTL(arenas_cspace_max)},
+	{NAME("sspace_min"),		CTL(arenas_sspace_min)},
+	{NAME("sspace_max"),		CTL(arenas_sspace_max)},
+#ifdef JEMALLOC_TCACHE
+	{NAME("tcache_max"),		CTL(arenas_tcache_max)},
+#endif
+	{NAME("ntbins"),		CTL(arenas_ntbins)},
+	{NAME("nqbins"),		CTL(arenas_nqbins)},
+	{NAME("ncbins"),		CTL(arenas_ncbins)},
+	{NAME("nsbins"),		CTL(arenas_nsbins)},
+	{NAME("nbins"),			CTL(arenas_nbins)},
+#ifdef JEMALLOC_TCACHE
+	{NAME("nhbins"),		CTL(arenas_nhbins)},
+#endif
+	{NAME("bin"),			CHILD(arenas_bin)},
+	{NAME("nlruns"),		CTL(arenas_nlruns)},
+	{NAME("lrun"),			CHILD(arenas_lrun)},
+	{NAME("purge"),			CTL(arenas_purge)}
+};
+
+#ifdef JEMALLOC_PROF
+static const ctl_node_t	prof_node[] = {
+	{NAME("active"),	CTL(prof_active)},
+	{NAME("dump"),		CTL(prof_dump)},
+	{NAME("interval"),	CTL(prof_interval)}
+};
+#endif
+
+#ifdef JEMALLOC_STATS
+static const ctl_node_t stats_chunks_node[] = {
+	{NAME("current"),		CTL(stats_chunks_current)},
+	{NAME("total"),			CTL(stats_chunks_total)},
+	{NAME("high"),			CTL(stats_chunks_high)}
+};
+
+static const ctl_node_t stats_huge_node[] = {
+	{NAME("allocated"),		CTL(stats_huge_allocated)},
+	{NAME("nmalloc"),		CTL(stats_huge_nmalloc)},
+	{NAME("ndalloc"),		CTL(stats_huge_ndalloc)}
+};
+
+static const ctl_node_t stats_arenas_i_small_node[] = {
+	{NAME("allocated"),		CTL(stats_arenas_i_small_allocated)},
+	{NAME("nmalloc"),		CTL(stats_arenas_i_small_nmalloc)},
+	{NAME("ndalloc"),		CTL(stats_arenas_i_small_ndalloc)},
+	{NAME("nrequests"),		CTL(stats_arenas_i_small_nrequests)}
+};
+
+static const ctl_node_t stats_arenas_i_large_node[] = {
+	{NAME("allocated"),		CTL(stats_arenas_i_large_allocated)},
+	{NAME("nmalloc"),		CTL(stats_arenas_i_large_nmalloc)},
+	{NAME("ndalloc"),		CTL(stats_arenas_i_large_ndalloc)},
+	{NAME("nrequests"),		CTL(stats_arenas_i_large_nrequests)}
+};
+
+static const ctl_node_t stats_arenas_i_bins_j_node[] = {
+	{NAME("allocated"),		CTL(stats_arenas_i_bins_j_allocated)},
+	{NAME("nmalloc"),		CTL(stats_arenas_i_bins_j_nmalloc)},
+	{NAME("ndalloc"),		CTL(stats_arenas_i_bins_j_ndalloc)},
+	{NAME("nrequests"),		CTL(stats_arenas_i_bins_j_nrequests)},
+#ifdef JEMALLOC_TCACHE
+	{NAME("nfills"),		CTL(stats_arenas_i_bins_j_nfills)},
+	{NAME("nflushes"),		CTL(stats_arenas_i_bins_j_nflushes)},
+#endif
+	{NAME("nruns"),			CTL(stats_arenas_i_bins_j_nruns)},
+	{NAME("nreruns"),		CTL(stats_arenas_i_bins_j_nreruns)},
+	{NAME("highruns"),		CTL(stats_arenas_i_bins_j_highruns)},
+	{NAME("curruns"),		CTL(stats_arenas_i_bins_j_curruns)}
+};
+static const ctl_node_t super_stats_arenas_i_bins_j_node[] = {
+	{NAME(""),			CHILD(stats_arenas_i_bins_j)}
+};
+
+static const ctl_node_t stats_arenas_i_bins_node[] = {
+	{INDEX(stats_arenas_i_bins_j)}
+};
+
+static const ctl_node_t stats_arenas_i_lruns_j_node[] = {
+	{NAME("nmalloc"),		CTL(stats_arenas_i_lruns_j_nmalloc)},
+	{NAME("ndalloc"),		CTL(stats_arenas_i_lruns_j_ndalloc)},
+	{NAME("nrequests"),		CTL(stats_arenas_i_lruns_j_nrequests)},
+	{NAME("highruns"),		CTL(stats_arenas_i_lruns_j_highruns)},
+	{NAME("curruns"),		CTL(stats_arenas_i_lruns_j_curruns)}
+};
+static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = {
+	{NAME(""),			CHILD(stats_arenas_i_lruns_j)}
+};
+
+static const ctl_node_t stats_arenas_i_lruns_node[] = {
+	{INDEX(stats_arenas_i_lruns_j)}
+};
+#endif
+
+static const ctl_node_t stats_arenas_i_node[] = {
+	{NAME("nthreads"),		CTL(stats_arenas_i_nthreads)},
+	{NAME("pactive"),		CTL(stats_arenas_i_pactive)},
+	{NAME("pdirty"),		CTL(stats_arenas_i_pdirty)}
+#ifdef JEMALLOC_STATS
+	,
+	{NAME("mapped"),		CTL(stats_arenas_i_mapped)},
+	{NAME("npurge"),		CTL(stats_arenas_i_npurge)},
+	{NAME("nmadvise"),		CTL(stats_arenas_i_nmadvise)},
+	{NAME("purged"),		CTL(stats_arenas_i_purged)},
+	{NAME("small"),			CHILD(stats_arenas_i_small)},
+	{NAME("large"),			CHILD(stats_arenas_i_large)},
+	{NAME("bins"),			CHILD(stats_arenas_i_bins)},
+	{NAME("lruns"),		CHILD(stats_arenas_i_lruns)}
+#endif
+};
+static const ctl_node_t super_stats_arenas_i_node[] = {
+	{NAME(""),			CHILD(stats_arenas_i)}
+};
+
+static const ctl_node_t stats_arenas_node[] = {
+	{INDEX(stats_arenas_i)}
+};
+
+static const ctl_node_t stats_node[] = {
+#ifdef JEMALLOC_STATS
+	{NAME("cactive"),		CTL(stats_cactive)},
+	{NAME("allocated"),		CTL(stats_allocated)},
+	{NAME("active"),		CTL(stats_active)},
+	{NAME("mapped"),		CTL(stats_mapped)},
+	{NAME("chunks"),		CHILD(stats_chunks)},
+	{NAME("huge"),			CHILD(stats_huge)},
+#endif
+	{NAME("arenas"),		CHILD(stats_arenas)}
+};
+
+#ifdef JEMALLOC_SWAP
+static const ctl_node_t swap_node[] = {
+#  ifdef JEMALLOC_STATS
+	{NAME("avail"),			CTL(swap_avail)},
+#  endif
+	{NAME("prezeroed"),		CTL(swap_prezeroed)},
+	{NAME("nfds"),			CTL(swap_nfds)},
+	{NAME("fds"),			CTL(swap_fds)}
+};
+#endif
+
+static const ctl_node_t	root_node[] = {
+	{NAME("version"),	CTL(version)},
+	{NAME("epoch"),		CTL(epoch)},
+#ifdef JEMALLOC_TCACHE
+	{NAME("tcache"),	CHILD(tcache)},
+#endif
+	{NAME("thread"),	CHILD(thread)},
+	{NAME("config"),	CHILD(config)},
+	{NAME("opt"),		CHILD(opt)},
+	{NAME("arenas"),	CHILD(arenas)},
+#ifdef JEMALLOC_PROF
+	{NAME("prof"),		CHILD(prof)},
+#endif
+	{NAME("stats"),		CHILD(stats)}
+#ifdef JEMALLOC_SWAP
+	,
+	{NAME("swap"),		CHILD(swap)}
+#endif
+};
+static const ctl_node_t super_root_node[] = {
+	{NAME(""),		CHILD(root)}
+};
+
+#undef NAME
+#undef CHILD
+#undef CTL
+#undef INDEX
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_STATS
+static bool
+ctl_arena_init(ctl_arena_stats_t *astats)
+{
+
+	if (astats->bstats == NULL) {
+		astats->bstats = (malloc_bin_stats_t *)base_alloc(nbins *
+		    sizeof(malloc_bin_stats_t));
+		if (astats->bstats == NULL)
+			return (true);
+	}
+	if (astats->lstats == NULL) {
+		astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses *
+		    sizeof(malloc_large_stats_t));
+		if (astats->lstats == NULL)
+			return (true);
+	}
+
+	return (false);
+}
+#endif
+
+static void
+ctl_arena_clear(ctl_arena_stats_t *astats)
+{
+
+	astats->pactive = 0;
+	astats->pdirty = 0;
+#ifdef JEMALLOC_STATS
+	memset(&astats->astats, 0, sizeof(arena_stats_t));
+	astats->allocated_small = 0;
+	astats->nmalloc_small = 0;
+	astats->ndalloc_small = 0;
+	astats->nrequests_small = 0;
+	memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t));
+	memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t));
+#endif
+}
+
+#ifdef JEMALLOC_STATS
+static void
+ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
+{
+	unsigned i;
+
+	arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty,
+	    &cstats->astats, cstats->bstats, cstats->lstats);
+
+	for (i = 0; i < nbins; i++) {
+		cstats->allocated_small += cstats->bstats[i].allocated;
+		cstats->nmalloc_small += cstats->bstats[i].nmalloc;
+		cstats->ndalloc_small += cstats->bstats[i].ndalloc;
+		cstats->nrequests_small += cstats->bstats[i].nrequests;
+	}
+}
+
+static void
+ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
+{
+	unsigned i;
+
+	sstats->pactive += astats->pactive;
+	sstats->pdirty += astats->pdirty;
+
+	sstats->astats.mapped += astats->astats.mapped;
+	sstats->astats.npurge += astats->astats.npurge;
+	sstats->astats.nmadvise += astats->astats.nmadvise;
+	sstats->astats.purged += astats->astats.purged;
+
+	sstats->allocated_small += astats->allocated_small;
+	sstats->nmalloc_small += astats->nmalloc_small;
+	sstats->ndalloc_small += astats->ndalloc_small;
+	sstats->nrequests_small += astats->nrequests_small;
+
+	sstats->astats.allocated_large += astats->astats.allocated_large;
+	sstats->astats.nmalloc_large += astats->astats.nmalloc_large;
+	sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
+	sstats->astats.nrequests_large += astats->astats.nrequests_large;
+
+	for (i = 0; i < nlclasses; i++) {
+		sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
+		sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
+		sstats->lstats[i].nrequests += astats->lstats[i].nrequests;
+		sstats->lstats[i].highruns += astats->lstats[i].highruns;
+		sstats->lstats[i].curruns += astats->lstats[i].curruns;
+	}
+
+	for (i = 0; i < nbins; i++) {
+		sstats->bstats[i].allocated += astats->bstats[i].allocated;
+		sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
+		sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
+		sstats->bstats[i].nrequests += astats->bstats[i].nrequests;
+#ifdef JEMALLOC_TCACHE
+		sstats->bstats[i].nfills += astats->bstats[i].nfills;
+		sstats->bstats[i].nflushes += astats->bstats[i].nflushes;
+#endif
+		sstats->bstats[i].nruns += astats->bstats[i].nruns;
+		sstats->bstats[i].reruns += astats->bstats[i].reruns;
+		sstats->bstats[i].highruns += astats->bstats[i].highruns;
+		sstats->bstats[i].curruns += astats->bstats[i].curruns;
+	}
+}
+#endif
+
+static void
+ctl_arena_refresh(arena_t *arena, unsigned i)
+{
+	ctl_arena_stats_t *astats = &ctl_stats.arenas[i];
+	ctl_arena_stats_t *sstats = &ctl_stats.arenas[narenas];
+
+	ctl_arena_clear(astats);
+
+	sstats->nthreads += astats->nthreads;
+#ifdef JEMALLOC_STATS
+	ctl_arena_stats_amerge(astats, arena);
+	/* Merge into sum stats as well. */
+	ctl_arena_stats_smerge(sstats, astats);
+#else
+	astats->pactive += arena->nactive;
+	astats->pdirty += arena->ndirty;
+	/* Merge into sum stats as well. */
+	sstats->pactive += arena->nactive;
+	sstats->pdirty += arena->ndirty;
+#endif
+}
+
+static void
+ctl_refresh(void)
+{
+	unsigned i;
+	arena_t *tarenas[narenas];
+
+#ifdef JEMALLOC_STATS
+	malloc_mutex_lock(&chunks_mtx);
+	ctl_stats.chunks.current = stats_chunks.curchunks;
+	ctl_stats.chunks.total = stats_chunks.nchunks;
+	ctl_stats.chunks.high = stats_chunks.highchunks;
+	malloc_mutex_unlock(&chunks_mtx);
+
+	malloc_mutex_lock(&huge_mtx);
+	ctl_stats.huge.allocated = huge_allocated;
+	ctl_stats.huge.nmalloc = huge_nmalloc;
+	ctl_stats.huge.ndalloc = huge_ndalloc;
+	malloc_mutex_unlock(&huge_mtx);
+#endif
+
+	/*
+	 * Clear sum stats, since they will be merged into by
+	 * ctl_arena_refresh().
+	 */
+	ctl_stats.arenas[narenas].nthreads = 0;
+	ctl_arena_clear(&ctl_stats.arenas[narenas]);
+
+	malloc_mutex_lock(&arenas_lock);
+	memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
+	for (i = 0; i < narenas; i++) {
+		if (arenas[i] != NULL)
+			ctl_stats.arenas[i].nthreads = arenas[i]->nthreads;
+		else
+			ctl_stats.arenas[i].nthreads = 0;
+	}
+	malloc_mutex_unlock(&arenas_lock);
+	for (i = 0; i < narenas; i++) {
+		bool initialized = (tarenas[i] != NULL);
+
+		ctl_stats.arenas[i].initialized = initialized;
+		if (initialized)
+			ctl_arena_refresh(tarenas[i], i);
+	}
+
+#ifdef JEMALLOC_STATS
+	ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small
+	    + ctl_stats.arenas[narenas].astats.allocated_large
+	    + ctl_stats.huge.allocated;
+	ctl_stats.active = (ctl_stats.arenas[narenas].pactive << PAGE_SHIFT)
+	    + ctl_stats.huge.allocated;
+	ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk);
+
+#  ifdef JEMALLOC_SWAP
+	malloc_mutex_lock(&swap_mtx);
+	ctl_stats.swap_avail = swap_avail;
+	malloc_mutex_unlock(&swap_mtx);
+#  endif
+#endif
+
+	ctl_epoch++;
+}
+
+static bool
+ctl_init(void)
+{
+	bool ret;
+
+	malloc_mutex_lock(&ctl_mtx);
+	if (ctl_initialized == false) {
+#ifdef JEMALLOC_STATS
+		unsigned i;
+#endif
+
+		/*
+		 * Allocate space for one extra arena stats element, which
+		 * contains summed stats across all arenas.
+		 */
+		ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc(
+		    (narenas + 1) * sizeof(ctl_arena_stats_t));
+		if (ctl_stats.arenas == NULL) {
+			ret = true;
+			goto RETURN;
+		}
+		memset(ctl_stats.arenas, 0, (narenas + 1) *
+		    sizeof(ctl_arena_stats_t));
+
+		/*
+		 * Initialize all stats structures, regardless of whether they
+		 * ever get used.  Lazy initialization would allow errors to
+		 * cause inconsistent state to be viewable by the application.
+		 */
+#ifdef JEMALLOC_STATS
+		for (i = 0; i <= narenas; i++) {
+			if (ctl_arena_init(&ctl_stats.arenas[i])) {
+				ret = true;
+				goto RETURN;
+			}
+		}
+#endif
+		ctl_stats.arenas[narenas].initialized = true;
+
+		ctl_epoch = 0;
+		ctl_refresh();
+		ctl_initialized = true;
+	}
+
+	ret = false;
+RETURN:
+	malloc_mutex_unlock(&ctl_mtx);
+	return (ret);
+}
+
+static int
+ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
+    size_t *depthp)
+{
+	int ret;
+	const char *elm, *tdot, *dot;
+	size_t elen, i, j;
+	const ctl_node_t *node;
+
+	elm = name;
+	/* Equivalent to strchrnul(). */
+	dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : strchr(elm, '\0');
+	elen = (size_t)((uintptr_t)dot - (uintptr_t)elm);
+	if (elen == 0) {
+		ret = ENOENT;
+		goto RETURN;
+	}
+	node = super_root_node;
+	for (i = 0; i < *depthp; i++) {
+		assert(node->named);
+		assert(node->u.named.nchildren > 0);
+		if (node->u.named.children[0].named) {
+			const ctl_node_t *pnode = node;
+
+			/* Children are named. */
+			for (j = 0; j < node->u.named.nchildren; j++) {
+				const ctl_node_t *child =
+				    &node->u.named.children[j];
+				if (strlen(child->u.named.name) == elen
+				    && strncmp(elm, child->u.named.name,
+				    elen) == 0) {
+					node = child;
+					if (nodesp != NULL)
+						nodesp[i] = node;
+					mibp[i] = j;
+					break;
+				}
+			}
+			if (node == pnode) {
+				ret = ENOENT;
+				goto RETURN;
+			}
+		} else {
+			unsigned long index;
+			const ctl_node_t *inode;
+
+			/* Children are indexed. */
+			index = strtoul(elm, NULL, 10);
+			if (index == ULONG_MAX) {
+				ret = ENOENT;
+				goto RETURN;
+			}
+
+			inode = &node->u.named.children[0];
+			node = inode->u.indexed.index(mibp, *depthp,
+			    index);
+			if (node == NULL) {
+				ret = ENOENT;
+				goto RETURN;
+			}
+
+			if (nodesp != NULL)
+				nodesp[i] = node;
+			mibp[i] = (size_t)index;
+		}
+
+		if (node->ctl != NULL) {
+			/* Terminal node. */
+			if (*dot != '\0') {
+				/*
+				 * The name contains more elements than are
+				 * in this path through the tree.
+				 */
+				ret = ENOENT;
+				goto RETURN;
+			}
+			/* Complete lookup successful. */
+			*depthp = i + 1;
+			break;
+		}
+
+		/* Update elm. */
+		if (*dot == '\0') {
+			/* No more elements. */
+			ret = ENOENT;
+			goto RETURN;
+		}
+		elm = &dot[1];
+		dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot :
+		    strchr(elm, '\0');
+		elen = (size_t)((uintptr_t)dot - (uintptr_t)elm);
+	}
+
+	ret = 0;
+RETURN:
+	return (ret);
+}
+
+int
+ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen)
+{
+	int ret;
+	size_t depth;
+	ctl_node_t const *nodes[CTL_MAX_DEPTH];
+	size_t mib[CTL_MAX_DEPTH];
+
+	if (ctl_initialized == false && ctl_init()) {
+		ret = EAGAIN;
+		goto RETURN;
+	}
+
+	depth = CTL_MAX_DEPTH;
+	ret = ctl_lookup(name, nodes, mib, &depth);
+	if (ret != 0)
+		goto RETURN;
+
+	if (nodes[depth-1]->ctl == NULL) {
+		/* The name refers to a partial path through the ctl tree. */
+		ret = ENOENT;
+		goto RETURN;
+	}
+
+	ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen);
+RETURN:
+	return(ret);
+}
+
+int
+ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp)
+{
+	int ret;
+
+	if (ctl_initialized == false && ctl_init()) {
+		ret = EAGAIN;
+		goto RETURN;
+	}
+
+	ret = ctl_lookup(name, NULL, mibp, miblenp);
+RETURN:
+	return(ret);
+}
+
+int
+ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
+{
+	int ret;
+	const ctl_node_t *node;
+	size_t i;
+
+	if (ctl_initialized == false && ctl_init()) {
+		ret = EAGAIN;
+		goto RETURN;
+	}
+
+	/* Iterate down the tree. */
+	node = super_root_node;
+	for (i = 0; i < miblen; i++) {
+		if (node->u.named.children[0].named) {
+			/* Children are named. */
+			if (node->u.named.nchildren <= mib[i]) {
+				ret = ENOENT;
+				goto RETURN;
+			}
+			node = &node->u.named.children[mib[i]];
+		} else {
+			const ctl_node_t *inode;
+
+			/* Indexed element. */
+			inode = &node->u.named.children[0];
+			node = inode->u.indexed.index(mib, miblen, mib[i]);
+			if (node == NULL) {
+				ret = ENOENT;
+				goto RETURN;
+			}
+		}
+	}
+
+	/* Call the ctl function. */
+	if (node->ctl == NULL) {
+		/* Partial MIB. */
+		ret = ENOENT;
+		goto RETURN;
+	}
+	ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen);
+
+RETURN:
+	return(ret);
+}
+
+bool
+ctl_boot(void)
+{
+
+	if (malloc_mutex_init(&ctl_mtx))
+		return (true);
+
+	ctl_initialized = false;
+
+	return (false);
+}
+
+/******************************************************************************/
+/* *_ctl() functions. */
+
+#define	READONLY()	do {						\
+	if (newp != NULL || newlen != 0) {				\
+		ret = EPERM;						\
+		goto RETURN;						\
+	}								\
+} while (0)
+
+#define	WRITEONLY()	do {						\
+	if (oldp != NULL || oldlenp != NULL) {				\
+		ret = EPERM;						\
+		goto RETURN;						\
+	}								\
+} while (0)
+
+#define	VOID()	do {							\
+	READONLY();							\
+	WRITEONLY();							\
+} while (0)
+
+#define	READ(v, t)	do {						\
+	if (oldp != NULL && oldlenp != NULL) {				\
+		if (*oldlenp != sizeof(t)) {				\
+			size_t	copylen = (sizeof(t) <= *oldlenp)	\
+			    ? sizeof(t) : *oldlenp;			\
+			memcpy(oldp, (void *)&v, copylen);		\
+			ret = EINVAL;					\
+			goto RETURN;					\
+		} else							\
+			*(t *)oldp = v;					\
+	}								\
+} while (0)
+
+#define	WRITE(v, t)	do {						\
+	if (newp != NULL) {						\
+		if (newlen != sizeof(t)) {				\
+			ret = EINVAL;					\
+			goto RETURN;					\
+		}							\
+		v = *(t *)newp;						\
+	}								\
+} while (0)
+
+#define	CTL_RO_GEN(n, v, t)						\
+static int								\
+n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
+    void *newp, size_t newlen)						\
+{									\
+	int ret;							\
+	t oldval;							\
+									\
+	malloc_mutex_lock(&ctl_mtx);					\
+	READONLY();							\
+	oldval = v;							\
+	READ(oldval, t);						\
+									\
+	ret = 0;							\
+RETURN:									\
+	malloc_mutex_unlock(&ctl_mtx);					\
+	return (ret);							\
+}
+
+/*
+ * ctl_mtx is not acquired, under the assumption that no pertinent data will
+ * mutate during the call.
+ */
+#define	CTL_RO_NL_GEN(n, v, t)					\
+static int								\
+n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
+    void *newp, size_t newlen)						\
+{									\
+	int ret;							\
+	t oldval;							\
+									\
+	READONLY();							\
+	oldval = v;							\
+	READ(oldval, t);						\
+									\
+	ret = 0;							\
+RETURN:									\
+	return (ret);							\
+}
+
+#define	CTL_RO_TRUE_GEN(n)						\
+static int								\
+n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
+    void *newp, size_t newlen)						\
+{									\
+	int ret;							\
+	bool oldval;							\
+									\
+	READONLY();							\
+	oldval = true;							\
+	READ(oldval, bool);						\
+									\
+	ret = 0;							\
+RETURN:									\
+	return (ret);							\
+}
+
+#define	CTL_RO_FALSE_GEN(n)						\
+static int								\
+n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
+    void *newp, size_t newlen)						\
+{									\
+	int ret;							\
+	bool oldval;							\
+									\
+	READONLY();							\
+	oldval = false;							\
+	READ(oldval, bool);						\
+									\
+	ret = 0;							\
+RETURN:									\
+	return (ret);							\
+}
+
+CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *)
+
+static int
+epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
+{
+	int ret;
+	uint64_t newval;
+
+	malloc_mutex_lock(&ctl_mtx);
+	newval = 0;
+	WRITE(newval, uint64_t);
+	if (newval != 0)
+		ctl_refresh();
+	READ(ctl_epoch, uint64_t);
+
+	ret = 0;
+RETURN:
+	malloc_mutex_unlock(&ctl_mtx);
+	return (ret);
+}
+
+#ifdef JEMALLOC_TCACHE
+static int
+tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
+{
+	int ret;
+	tcache_t *tcache;
+
+	VOID();
+
+	tcache = TCACHE_GET();
+	if (tcache == NULL) {
+		ret = 0;
+		goto RETURN;
+	}
+	tcache_destroy(tcache);
+	TCACHE_SET(NULL);
+
+	ret = 0;
+RETURN:
+	return (ret);
+}
+#endif
+
+static int
+thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
+{
+	int ret;
+	unsigned newind, oldind;
+
+	newind = oldind = choose_arena()->ind;
+	WRITE(newind, unsigned);
+	READ(oldind, unsigned);
+	if (newind != oldind) {
+		arena_t *arena;
+
+		if (newind >= narenas) {
+			/* New arena index is out of range. */
+			ret = EFAULT;
+			goto RETURN;
+		}
+
+		/* Initialize arena if necessary. */
+		malloc_mutex_lock(&arenas_lock);
+		if ((arena = arenas[newind]) == NULL)
+			arena = arenas_extend(newind);
+		arenas[oldind]->nthreads--;
+		arenas[newind]->nthreads++;
+		malloc_mutex_unlock(&arenas_lock);
+		if (arena == NULL) {
+			ret = EAGAIN;
+			goto RETURN;
+		}
+
+		/* Set new arena association. */
+		ARENA_SET(arena);
+#ifdef JEMALLOC_TCACHE
+		{
+			tcache_t *tcache = TCACHE_GET();
+			if (tcache != NULL)
+				tcache->arena = arena;
+		}
+#endif
+	}
+
+	ret = 0;
+RETURN:
+	return (ret);
+}
+
+#ifdef JEMALLOC_STATS
+CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t);
+CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *);
+CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t);
+CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *);
+#endif
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_DEBUG
+CTL_RO_TRUE_GEN(config_debug)
+#else
+CTL_RO_FALSE_GEN(config_debug)
+#endif
+
+#ifdef JEMALLOC_DSS
+CTL_RO_TRUE_GEN(config_dss)
+#else
+CTL_RO_FALSE_GEN(config_dss)
+#endif
+
+#ifdef JEMALLOC_DYNAMIC_PAGE_SHIFT
+CTL_RO_TRUE_GEN(config_dynamic_page_shift)
+#else
+CTL_RO_FALSE_GEN(config_dynamic_page_shift)
+#endif
+
+#ifdef JEMALLOC_FILL
+CTL_RO_TRUE_GEN(config_fill)
+#else
+CTL_RO_FALSE_GEN(config_fill)
+#endif
+
+#ifdef JEMALLOC_LAZY_LOCK
+CTL_RO_TRUE_GEN(config_lazy_lock)
+#else
+CTL_RO_FALSE_GEN(config_lazy_lock)
+#endif
+
+#ifdef JEMALLOC_PROF
+CTL_RO_TRUE_GEN(config_prof)
+#else
+CTL_RO_FALSE_GEN(config_prof)
+#endif
+
+#ifdef JEMALLOC_PROF_LIBGCC
+CTL_RO_TRUE_GEN(config_prof_libgcc)
+#else
+CTL_RO_FALSE_GEN(config_prof_libgcc)
+#endif
+
+#ifdef JEMALLOC_PROF_LIBUNWIND
+CTL_RO_TRUE_GEN(config_prof_libunwind)
+#else
+CTL_RO_FALSE_GEN(config_prof_libunwind)
+#endif
+
+#ifdef JEMALLOC_STATS
+CTL_RO_TRUE_GEN(config_stats)
+#else
+CTL_RO_FALSE_GEN(config_stats)
+#endif
+
+#ifdef JEMALLOC_SWAP
+CTL_RO_TRUE_GEN(config_swap)
+#else
+CTL_RO_FALSE_GEN(config_swap)
+#endif
+
+#ifdef JEMALLOC_SYSV
+CTL_RO_TRUE_GEN(config_sysv)
+#else
+CTL_RO_FALSE_GEN(config_sysv)
+#endif
+
+#ifdef JEMALLOC_TCACHE
+CTL_RO_TRUE_GEN(config_tcache)
+#else
+CTL_RO_FALSE_GEN(config_tcache)
+#endif
+
+#ifdef JEMALLOC_TINY
+CTL_RO_TRUE_GEN(config_tiny)
+#else
+CTL_RO_FALSE_GEN(config_tiny)
+#endif
+
+#ifdef JEMALLOC_TLS
+CTL_RO_TRUE_GEN(config_tls)
+#else
+CTL_RO_FALSE_GEN(config_tls)
+#endif
+
+#ifdef JEMALLOC_XMALLOC
+CTL_RO_TRUE_GEN(config_xmalloc)
+#else
+CTL_RO_FALSE_GEN(config_xmalloc)
+#endif
+
+/******************************************************************************/
+
+CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
+CTL_RO_NL_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t)
+CTL_RO_NL_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t)
+CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
+CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t)
+CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
+CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
+#ifdef JEMALLOC_FILL
+CTL_RO_NL_GEN(opt_junk, opt_junk, bool)
+CTL_RO_NL_GEN(opt_zero, opt_zero, bool)
+#endif
+#ifdef JEMALLOC_SYSV
+CTL_RO_NL_GEN(opt_sysv, opt_sysv, bool)
+#endif
+#ifdef JEMALLOC_XMALLOC
+CTL_RO_NL_GEN(opt_xmalloc, opt_xmalloc, bool)
+#endif
+#ifdef JEMALLOC_TCACHE
+CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
+CTL_RO_NL_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
+#endif
+#ifdef JEMALLOC_PROF
+CTL_RO_NL_GEN(opt_prof, opt_prof, bool)
+CTL_RO_NL_GEN(opt_prof_prefix, opt_prof_prefix, const char *)
+CTL_RO_GEN(opt_prof_active, opt_prof_active, bool) /* Mutable. */
+CTL_RO_NL_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t)
+CTL_RO_NL_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t)
+CTL_RO_NL_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
+CTL_RO_NL_GEN(opt_prof_gdump, opt_prof_gdump, bool)
+CTL_RO_NL_GEN(opt_prof_leak, opt_prof_leak, bool)
+CTL_RO_NL_GEN(opt_prof_accum, opt_prof_accum, bool)
+CTL_RO_NL_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t)
+#endif
+#ifdef JEMALLOC_SWAP
+CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool)
+#endif
+
+/******************************************************************************/
+
+CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
+CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
+const ctl_node_t *
+arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
+{
+
+	if (i > nbins)
+		return (NULL);
+	return (super_arenas_bin_i_node);
+}
+
+CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << PAGE_SHIFT), size_t)
+const ctl_node_t *
+arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
+{
+
+	if (i > nlclasses)
+		return (NULL);
+	return (super_arenas_lrun_i_node);
+}
+
+CTL_RO_NL_GEN(arenas_narenas, narenas, unsigned)
+
+static int
+arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	unsigned nread, i;
+
+	malloc_mutex_lock(&ctl_mtx);
+	READONLY();
+	if (*oldlenp != narenas * sizeof(bool)) {
+		ret = EINVAL;
+		nread = (*oldlenp < narenas * sizeof(bool))
+		    ? (*oldlenp / sizeof(bool)) : narenas;
+	} else {
+		ret = 0;
+		nread = narenas;
+	}
+
+	for (i = 0; i < nread; i++)
+		((bool *)oldp)[i] = ctl_stats.arenas[i].initialized;
+
+RETURN:
+	malloc_mutex_unlock(&ctl_mtx);
+	return (ret);
+}
+
+CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
+CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t)
+CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t)
+CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t)
+CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t)
+#ifdef JEMALLOC_TINY
+CTL_RO_NL_GEN(arenas_tspace_min, (1U << LG_TINY_MIN), size_t)
+CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t)
+#endif
+CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t)
+CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t)
+CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t)
+CTL_RO_NL_GEN(arenas_cspace_max, cspace_max, size_t)
+CTL_RO_NL_GEN(arenas_sspace_min, sspace_min, size_t)
+CTL_RO_NL_GEN(arenas_sspace_max, sspace_max, size_t)
+#ifdef JEMALLOC_TCACHE
+CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t)
+#endif
+CTL_RO_NL_GEN(arenas_ntbins, ntbins, unsigned)
+CTL_RO_NL_GEN(arenas_nqbins, nqbins, unsigned)
+CTL_RO_NL_GEN(arenas_ncbins, ncbins, unsigned)
+CTL_RO_NL_GEN(arenas_nsbins, nsbins, unsigned)
+CTL_RO_NL_GEN(arenas_nbins, nbins, unsigned)
+#ifdef JEMALLOC_TCACHE
+CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned)
+#endif
+CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t)
+
+static int
+arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
+{
+	int ret;
+	unsigned arena;
+
+	WRITEONLY();
+	arena = UINT_MAX;
+	WRITE(arena, unsigned);
+	if (newp != NULL && arena >= narenas) {
+		ret = EFAULT;
+		goto RETURN;
+	} else {
+		arena_t *tarenas[narenas];
+
+		malloc_mutex_lock(&arenas_lock);
+		memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
+		malloc_mutex_unlock(&arenas_lock);
+
+		if (arena == UINT_MAX) {
+			unsigned i;
+			for (i = 0; i < narenas; i++) {
+				if (tarenas[i] != NULL)
+					arena_purge_all(tarenas[i]);
+			}
+		} else {
+			assert(arena < narenas);
+			if (tarenas[arena] != NULL)
+				arena_purge_all(tarenas[arena]);
+		}
+	}
+
+	ret = 0;
+RETURN:
+	return (ret);
+}
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_PROF
+static int
+prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
+{
+	int ret;
+	bool oldval;
+
+	malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */
+	oldval = opt_prof_active;
+	if (newp != NULL) {
+		/*
+		 * The memory barriers will tend to make opt_prof_active
+		 * propagate faster on systems with weak memory ordering.
+		 */
+		mb_write();
+		WRITE(opt_prof_active, bool);
+		mb_write();
+	}
+	READ(oldval, bool);
+
+	ret = 0;
+RETURN:
+	malloc_mutex_unlock(&ctl_mtx);
+	return (ret);
+}
+
+static int
+prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
+{
+	int ret;
+	const char *filename = NULL;
+
+	WRITEONLY();
+	WRITE(filename, const char *);
+
+	if (prof_mdump(filename)) {
+		ret = EFAULT;
+		goto RETURN;
+	}
+
+	ret = 0;
+RETURN:
+	return (ret);
+}
+
+CTL_RO_NL_GEN(prof_interval, prof_interval, uint64_t)
+#endif
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_STATS
+CTL_RO_GEN(stats_chunks_current, ctl_stats.chunks.current, size_t)
+CTL_RO_GEN(stats_chunks_total, ctl_stats.chunks.total, uint64_t)
+CTL_RO_GEN(stats_chunks_high, ctl_stats.chunks.high, size_t)
+CTL_RO_GEN(stats_huge_allocated, huge_allocated, size_t)
+CTL_RO_GEN(stats_huge_nmalloc, huge_nmalloc, uint64_t)
+CTL_RO_GEN(stats_huge_ndalloc, huge_ndalloc, uint64_t)
+CTL_RO_GEN(stats_arenas_i_small_allocated,
+    ctl_stats.arenas[mib[2]].allocated_small, size_t)
+CTL_RO_GEN(stats_arenas_i_small_nmalloc,
+    ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t)
+CTL_RO_GEN(stats_arenas_i_small_ndalloc,
+    ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t)
+CTL_RO_GEN(stats_arenas_i_small_nrequests,
+    ctl_stats.arenas[mib[2]].nrequests_small, uint64_t)
+CTL_RO_GEN(stats_arenas_i_large_allocated,
+    ctl_stats.arenas[mib[2]].astats.allocated_large, size_t)
+CTL_RO_GEN(stats_arenas_i_large_nmalloc,
+    ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t)
+CTL_RO_GEN(stats_arenas_i_large_ndalloc,
+    ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t)
+CTL_RO_GEN(stats_arenas_i_large_nrequests,
+    ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t)
+
+CTL_RO_GEN(stats_arenas_i_bins_j_allocated,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_nmalloc,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_ndalloc,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_nrequests,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t)
+#ifdef JEMALLOC_TCACHE
+CTL_RO_GEN(stats_arenas_i_bins_j_nfills,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_nflushes,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t)
+#endif
+CTL_RO_GEN(stats_arenas_i_bins_j_nruns,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_nreruns,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_highruns,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].highruns, size_t)
+CTL_RO_GEN(stats_arenas_i_bins_j_curruns,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
+
+const ctl_node_t *
+stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j)
+{
+
+	if (j > nbins)
+		return (NULL);
+	return (super_stats_arenas_i_bins_j_node);
+}
+
+CTL_RO_GEN(stats_arenas_i_lruns_j_nmalloc,
+    ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t)
+CTL_RO_GEN(stats_arenas_i_lruns_j_ndalloc,
+    ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t)
+CTL_RO_GEN(stats_arenas_i_lruns_j_nrequests,
+    ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t)
+CTL_RO_GEN(stats_arenas_i_lruns_j_curruns,
+    ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
+CTL_RO_GEN(stats_arenas_i_lruns_j_highruns,
+    ctl_stats.arenas[mib[2]].lstats[mib[4]].highruns, size_t)
+
+const ctl_node_t *
+stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
+{
+
+	if (j > nlclasses)
+		return (NULL);
+	return (super_stats_arenas_i_lruns_j_node);
+}
+
+#endif
+CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
+CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
+CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
+#ifdef JEMALLOC_STATS
+CTL_RO_GEN(stats_arenas_i_mapped, ctl_stats.arenas[mib[2]].astats.mapped,
+    size_t)
+CTL_RO_GEN(stats_arenas_i_npurge, ctl_stats.arenas[mib[2]].astats.npurge,
+    uint64_t)
+CTL_RO_GEN(stats_arenas_i_nmadvise, ctl_stats.arenas[mib[2]].astats.nmadvise,
+    uint64_t)
+CTL_RO_GEN(stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged,
+    uint64_t)
+#endif
+
+const ctl_node_t *
+stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
+{
+	const ctl_node_t * ret;
+
+	malloc_mutex_lock(&ctl_mtx);
+	if (ctl_stats.arenas[i].initialized == false) {
+		ret = NULL;
+		goto RETURN;
+	}
+
+	ret = super_stats_arenas_i_node;
+RETURN:
+	malloc_mutex_unlock(&ctl_mtx);
+	return (ret);
+}
+
+#ifdef JEMALLOC_STATS
+CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *)
+CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t)
+CTL_RO_GEN(stats_active, ctl_stats.active, size_t)
+CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t)
+#endif
+
+/******************************************************************************/
+
+#ifdef JEMALLOC_SWAP
+#  ifdef JEMALLOC_STATS
+CTL_RO_GEN(swap_avail, ctl_stats.swap_avail, size_t)
+#  endif
+
+static int
+swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+
+	malloc_mutex_lock(&ctl_mtx);
+	if (swap_enabled) {
+		READONLY();
+	} else {
+		/*
+		 * swap_prezeroed isn't actually used by the swap code until it
+		 * is set during a successful chunk_swap_enabled() call.  We
+		 * use it here to store the value that we'll pass to
+		 * chunk_swap_enable() in a swap.fds mallctl().  This is not
+		 * very clean, but the obvious alternatives are even worse.
+		 */
+		WRITE(swap_prezeroed, bool);
+	}
+
+	READ(swap_prezeroed, bool);
+
+	ret = 0;
+RETURN:
+	malloc_mutex_unlock(&ctl_mtx);
+	return (ret);
+}
+
+CTL_RO_GEN(swap_nfds, swap_nfds, size_t)
+
+static int
+swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
+{
+	int ret;
+
+	malloc_mutex_lock(&ctl_mtx);
+	if (swap_enabled) {
+		READONLY();
+	} else if (newp != NULL) {
+		size_t nfds = newlen / sizeof(int);
+
+		{
+			int fds[nfds];
+
+			memcpy(fds, newp, nfds * sizeof(int));
+			if (chunk_swap_enable(fds, nfds, swap_prezeroed)) {
+				ret = EFAULT;
+				goto RETURN;
+			}
+		}
+	}
+
+	if (oldp != NULL && oldlenp != NULL) {
+		if (*oldlenp != swap_nfds * sizeof(int)) {
+			size_t copylen = (swap_nfds * sizeof(int) <= *oldlenp)
+			    ? swap_nfds * sizeof(int) : *oldlenp;
+
+			memcpy(oldp, swap_fds, copylen);
+			ret = EINVAL;
+			goto RETURN;
+		} else
+			memcpy(oldp, swap_fds, *oldlenp);
+	}
+
+	ret = 0;
+RETURN:
+	malloc_mutex_unlock(&ctl_mtx);
+	return (ret);
+}
+#endif
diff --git a/deps/jemalloc.orig/src/extent.c b/deps/jemalloc.orig/src/extent.c
new file mode 100644
index 00000000..3c04d3aa
--- /dev/null
+++ b/deps/jemalloc.orig/src/extent.c
@@ -0,0 +1,41 @@
+#define	JEMALLOC_EXTENT_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+static inline int
+extent_szad_comp(extent_node_t *a, extent_node_t *b)
+{
+	int ret;
+	size_t a_size = a->size;
+	size_t b_size = b->size;
+
+	ret = (a_size > b_size) - (a_size < b_size);
+	if (ret == 0) {
+		uintptr_t a_addr = (uintptr_t)a->addr;
+		uintptr_t b_addr = (uintptr_t)b->addr;
+
+		ret = (a_addr > b_addr) - (a_addr < b_addr);
+	}
+
+	return (ret);
+}
+
+/* Generate red-black tree functions. */
+rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad,
+    extent_szad_comp)
+#endif
+
+static inline int
+extent_ad_comp(extent_node_t *a, extent_node_t *b)
+{
+	uintptr_t a_addr = (uintptr_t)a->addr;
+	uintptr_t b_addr = (uintptr_t)b->addr;
+
+	return ((a_addr > b_addr) - (a_addr < b_addr));
+}
+
+/* Generate red-black tree functions. */
+rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
+    extent_ad_comp)
diff --git a/deps/jemalloc.orig/src/hash.c b/deps/jemalloc.orig/src/hash.c
new file mode 100644
index 00000000..cfa4da02
--- /dev/null
+++ b/deps/jemalloc.orig/src/hash.c
@@ -0,0 +1,2 @@
+#define	JEMALLOC_HASH_C_
+#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/deps/jemalloc.orig/src/huge.c b/deps/jemalloc.orig/src/huge.c
new file mode 100644
index 00000000..a4f9b054
--- /dev/null
+++ b/deps/jemalloc.orig/src/huge.c
@@ -0,0 +1,386 @@
+#define	JEMALLOC_HUGE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+#ifdef JEMALLOC_STATS
+uint64_t	huge_nmalloc;
+uint64_t	huge_ndalloc;
+size_t		huge_allocated;
+#endif
+
+malloc_mutex_t	huge_mtx;
+
+/******************************************************************************/
+
+/* Tree of chunks that are stand-alone huge allocations. */
+static extent_tree_t	huge;
+
+void *
+huge_malloc(size_t size, bool zero)
+{
+	void *ret;
+	size_t csize;
+	extent_node_t *node;
+
+	/* Allocate one or more contiguous chunks for this request. */
+
+	csize = CHUNK_CEILING(size);
+	if (csize == 0) {
+		/* size is large enough to cause size_t wrap-around. */
+		return (NULL);
+	}
+
+	/* Allocate an extent node with which to track the chunk. */
+	node = base_node_alloc();
+	if (node == NULL)
+		return (NULL);
+
+	ret = chunk_alloc(csize, false, &zero);
+	if (ret == NULL) {
+		base_node_dealloc(node);
+		return (NULL);
+	}
+
+	/* Insert node into huge. */
+	node->addr = ret;
+	node->size = csize;
+
+	malloc_mutex_lock(&huge_mtx);
+	extent_tree_ad_insert(&huge, node);
+#ifdef JEMALLOC_STATS
+	stats_cactive_add(csize);
+	huge_nmalloc++;
+	huge_allocated += csize;
+#endif
+	malloc_mutex_unlock(&huge_mtx);
+
+#ifdef JEMALLOC_FILL
+	if (zero == false) {
+		if (opt_junk)
+			memset(ret, 0xa5, csize);
+		else if (opt_zero)
+			memset(ret, 0, csize);
+	}
+#endif
+
+	return (ret);
+}
+
+/* Only handles large allocations that require more than chunk alignment. */
+void *
+huge_palloc(size_t size, size_t alignment, bool zero)
+{
+	void *ret;
+	size_t alloc_size, chunk_size, offset;
+	extent_node_t *node;
+
+	/*
+	 * This allocation requires alignment that is even larger than chunk
+	 * alignment.  This means that huge_malloc() isn't good enough.
+	 *
+	 * Allocate almost twice as many chunks as are demanded by the size or
+	 * alignment, in order to assure the alignment can be achieved, then
+	 * unmap leading and trailing chunks.
+	 */
+	assert(alignment > chunksize);
+
+	chunk_size = CHUNK_CEILING(size);
+
+	if (size >= alignment)
+		alloc_size = chunk_size + alignment - chunksize;
+	else
+		alloc_size = (alignment << 1) - chunksize;
+
+	/* Allocate an extent node with which to track the chunk. */
+	node = base_node_alloc();
+	if (node == NULL)
+		return (NULL);
+
+	ret = chunk_alloc(alloc_size, false, &zero);
+	if (ret == NULL) {
+		base_node_dealloc(node);
+		return (NULL);
+	}
+
+	offset = (uintptr_t)ret & (alignment - 1);
+	assert((offset & chunksize_mask) == 0);
+	assert(offset < alloc_size);
+	if (offset == 0) {
+		/* Trim trailing space. */
+		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
+		    - chunk_size, true);
+	} else {
+		size_t trailsize;
+
+		/* Trim leading space. */
+		chunk_dealloc(ret, alignment - offset, true);
+
+		ret = (void *)((uintptr_t)ret + (alignment - offset));
+
+		trailsize = alloc_size - (alignment - offset) - chunk_size;
+		if (trailsize != 0) {
+		    /* Trim trailing space. */
+		    assert(trailsize < alloc_size);
+		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
+			trailsize, true);
+		}
+	}
+
+	/* Insert node into huge. */
+	node->addr = ret;
+	node->size = chunk_size;
+
+	malloc_mutex_lock(&huge_mtx);
+	extent_tree_ad_insert(&huge, node);
+#ifdef JEMALLOC_STATS
+	stats_cactive_add(chunk_size);
+	huge_nmalloc++;
+	huge_allocated += chunk_size;
+#endif
+	malloc_mutex_unlock(&huge_mtx);
+
+#ifdef JEMALLOC_FILL
+	if (zero == false) {
+		if (opt_junk)
+			memset(ret, 0xa5, chunk_size);
+		else if (opt_zero)
+			memset(ret, 0, chunk_size);
+	}
+#endif
+
+	return (ret);
+}
+
+void *
+huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra)
+{
+
+	/*
+	 * Avoid moving the allocation if the size class can be left the same.
+	 */
+	if (oldsize > arena_maxclass
+	    && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size)
+	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) {
+		assert(CHUNK_CEILING(oldsize) == oldsize);
+#ifdef JEMALLOC_FILL
+		if (opt_junk && size < oldsize) {
+			memset((void *)((uintptr_t)ptr + size), 0x5a,
+			    oldsize - size);
+		}
+#endif
+		return (ptr);
+	}
+
+	/* Reallocation would require a move. */
+	return (NULL);
+}
+
+void *
+huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero)
+{
+	void *ret;
+	size_t copysize;
+
+	/* Try to avoid moving the allocation. */
+	ret = huge_ralloc_no_move(ptr, oldsize, size, extra);
+	if (ret != NULL)
+		return (ret);
+
+	/*
+	 * size and oldsize are different enough that we need to use a
+	 * different size class.  In that case, fall back to allocating new
+	 * space and copying.
+	 */
+	if (alignment > chunksize)
+		ret = huge_palloc(size + extra, alignment, zero);
+	else
+		ret = huge_malloc(size + extra, zero);
+
+	if (ret == NULL) {
+		if (extra == 0)
+			return (NULL);
+		/* Try again, this time without extra. */
+		if (alignment > chunksize)
+			ret = huge_palloc(size, alignment, zero);
+		else
+			ret = huge_malloc(size, zero);
+
+		if (ret == NULL)
+			return (NULL);
+	}
+
+	/*
+	 * Copy at most size bytes (not size+extra), since the caller has no
+	 * expectation that the extra bytes will be reliably preserved.
+	 */
+	copysize = (size < oldsize) ? size : oldsize;
+
+	/*
+	 * Use mremap(2) if this is a huge-->huge reallocation, and neither the
+	 * source nor the destination are in swap or dss.
+	 */
+#ifdef JEMALLOC_MREMAP_FIXED
+	if (oldsize >= chunksize
+#  ifdef JEMALLOC_SWAP
+	    && (swap_enabled == false || (chunk_in_swap(ptr) == false &&
+	    chunk_in_swap(ret) == false))
+#  endif
+#  ifdef JEMALLOC_DSS
+	    && chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false
+#  endif
+	    ) {
+		size_t newsize = huge_salloc(ret);
+
+		/*
+		 * Remove ptr from the tree of huge allocations before
+		 * performing the remap operation, in order to avoid the
+		 * possibility of another thread acquiring that mapping before
+		 * this one removes it from the tree.
+		 */
+		huge_dalloc(ptr, false);
+		if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED,
+		    ret) == MAP_FAILED) {
+			/*
+			 * Assuming no chunk management bugs in the allocator,
+			 * the only documented way an error can occur here is
+			 * if the application changed the map type for a
+			 * portion of the old allocation.  This is firmly in
+			 * undefined behavior territory, so write a diagnostic
+			 * message, and optionally abort.
+			 */
+			char buf[BUFERROR_BUF];
+
+			buferror(errno, buf, sizeof(buf));
+			malloc_write("<jemalloc>: Error in mremap(): ");
+			malloc_write(buf);
+			malloc_write("\n");
+			if (opt_abort)
+				abort();
+			memcpy(ret, ptr, copysize);
+			chunk_dealloc_mmap(ptr, oldsize);
+		}
+	} else
+#endif
+	{
+		memcpy(ret, ptr, copysize);
+		idalloc(ptr);
+	}
+	return (ret);
+}
+
+void
+huge_dalloc(void *ptr, bool unmap)
+{
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = ptr;
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+	assert(node->addr == ptr);
+	extent_tree_ad_remove(&huge, node);
+
+#ifdef JEMALLOC_STATS
+	stats_cactive_sub(node->size);
+	huge_ndalloc++;
+	huge_allocated -= node->size;
+#endif
+
+	malloc_mutex_unlock(&huge_mtx);
+
+	if (unmap) {
+	/* Unmap chunk. */
+#ifdef JEMALLOC_FILL
+#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
+		if (opt_junk)
+			memset(node->addr, 0x5a, node->size);
+#endif
+#endif
+	}
+
+	chunk_dealloc(node->addr, node->size, unmap);
+
+	base_node_dealloc(node);
+}
+
+size_t
+huge_salloc(const void *ptr)
+{
+	size_t ret;
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = __DECONST(void *, ptr);
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+
+	ret = node->size;
+
+	malloc_mutex_unlock(&huge_mtx);
+
+	return (ret);
+}
+
+#ifdef JEMALLOC_PROF
+prof_ctx_t *
+huge_prof_ctx_get(const void *ptr)
+{
+	prof_ctx_t *ret;
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = __DECONST(void *, ptr);
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+
+	ret = node->prof_ctx;
+
+	malloc_mutex_unlock(&huge_mtx);
+
+	return (ret);
+}
+
+void
+huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
+{
+	extent_node_t *node, key;
+
+	malloc_mutex_lock(&huge_mtx);
+
+	/* Extract from tree of huge allocations. */
+	key.addr = __DECONST(void *, ptr);
+	node = extent_tree_ad_search(&huge, &key);
+	assert(node != NULL);
+
+	node->prof_ctx = ctx;
+
+	malloc_mutex_unlock(&huge_mtx);
+}
+#endif
+
+bool
+huge_boot(void)
+{
+
+	/* Initialize chunks data. */
+	if (malloc_mutex_init(&huge_mtx))
+		return (true);
+	extent_tree_ad_new(&huge);
+
+#ifdef JEMALLOC_STATS
+	huge_nmalloc = 0;
+	huge_ndalloc = 0;
+	huge_allocated = 0;
+#endif
+
+	return (false);
+}
diff --git a/deps/jemalloc.orig/src/jemalloc.c b/deps/jemalloc.orig/src/jemalloc.c
new file mode 100644
index 00000000..a161c2e2
--- /dev/null
+++ b/deps/jemalloc.orig/src/jemalloc.c
@@ -0,0 +1,1881 @@
+#define	JEMALLOC_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+malloc_mutex_t		arenas_lock;
+arena_t			**arenas;
+unsigned		narenas;
+
+pthread_key_t		arenas_tsd;
+#ifndef NO_TLS
+__thread arena_t	*arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#endif
+
+#ifdef JEMALLOC_STATS
+#  ifndef NO_TLS
+__thread thread_allocated_t	thread_allocated_tls;
+#  else
+pthread_key_t		thread_allocated_tsd;
+#  endif
+#endif
+
+/* Set to true once the allocator has been initialized. */
+static bool		malloc_initialized = false;
+
+/* Used to let the initializing thread recursively allocate. */
+static pthread_t	malloc_initializer = (unsigned long)0;
+
+/* Used to avoid initialization races. */
+static malloc_mutex_t	init_lock =
+#ifdef JEMALLOC_OSSPIN
+    0
+#else
+    MALLOC_MUTEX_INITIALIZER
+#endif
+    ;
+
+#ifdef DYNAMIC_PAGE_SHIFT
+size_t		pagesize;
+size_t		pagesize_mask;
+size_t		lg_pagesize;
+#endif
+
+unsigned	ncpus;
+
+/* Runtime configuration options. */
+const char	*JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default"));
+#ifdef JEMALLOC_DEBUG
+bool	opt_abort = true;
+#  ifdef JEMALLOC_FILL
+bool	opt_junk = true;
+#  endif
+#else
+bool	opt_abort = false;
+#  ifdef JEMALLOC_FILL
+bool	opt_junk = false;
+#  endif
+#endif
+#ifdef JEMALLOC_SYSV
+bool	opt_sysv = false;
+#endif
+#ifdef JEMALLOC_XMALLOC
+bool	opt_xmalloc = false;
+#endif
+#ifdef JEMALLOC_FILL
+bool	opt_zero = false;
+#endif
+size_t	opt_narenas = 0;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	wrtmessage(void *cbopaque, const char *s);
+static void	stats_print_atexit(void);
+static unsigned	malloc_ncpus(void);
+static void	arenas_cleanup(void *arg);
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+static void	thread_allocated_cleanup(void *arg);
+#endif
+static bool	malloc_conf_next(char const **opts_p, char const **k_p,
+    size_t *klen_p, char const **v_p, size_t *vlen_p);
+static void	malloc_conf_error(const char *msg, const char *k, size_t klen,
+    const char *v, size_t vlen);
+static void	malloc_conf_init(void);
+static bool	malloc_init_hard(void);
+static int	imemalign(void **memptr, size_t alignment, size_t size);
+
+/******************************************************************************/
+/* malloc_message() setup. */
+
+#ifdef JEMALLOC_HAVE_ATTR
+JEMALLOC_ATTR(visibility("hidden"))
+#else
+static
+#endif
+void
+wrtmessage(void *cbopaque, const char *s)
+{
+#ifdef JEMALLOC_CC_SILENCE
+	int result =
+#endif
+	    write(STDERR_FILENO, s, strlen(s));
+#ifdef JEMALLOC_CC_SILENCE
+	if (result < 0)
+		result = errno;
+#endif
+}
+
+void	(*JEMALLOC_P(malloc_message))(void *, const char *s)
+    JEMALLOC_ATTR(visibility("default")) = wrtmessage;
+
+/******************************************************************************/
+/*
+ * Begin miscellaneous support functions.
+ */
+
+/* Create a new arena and insert it into the arenas array at index ind. */
+arena_t *
+arenas_extend(unsigned ind)
+{
+	arena_t *ret;
+
+	/* Allocate enough space for trailing bins. */
+	ret = (arena_t *)base_alloc(offsetof(arena_t, bins)
+	    + (sizeof(arena_bin_t) * nbins));
+	if (ret != NULL && arena_new(ret, ind) == false) {
+		arenas[ind] = ret;
+		return (ret);
+	}
+	/* Only reached if there is an OOM error. */
+
+	/*
+	 * OOM here is quite inconvenient to propagate, since dealing with it
+	 * would require a check for failure in the fast path.  Instead, punt
+	 * by using arenas[0].  In practice, this is an extremely unlikely
+	 * failure.
+	 */
+	malloc_write("<jemalloc>: Error initializing arena\n");
+	if (opt_abort)
+		abort();
+
+	return (arenas[0]);
+}
+
+/*
+ * Choose an arena based on a per-thread value (slow-path code only, called
+ * only by choose_arena()).
+ */
+arena_t *
+choose_arena_hard(void)
+{
+	arena_t *ret;
+
+	if (narenas > 1) {
+		unsigned i, choose, first_null;
+
+		choose = 0;
+		first_null = narenas;
+		malloc_mutex_lock(&arenas_lock);
+		assert(arenas[0] != NULL);
+		for (i = 1; i < narenas; i++) {
+			if (arenas[i] != NULL) {
+				/*
+				 * Choose the first arena that has the lowest
+				 * number of threads assigned to it.
+				 */
+				if (arenas[i]->nthreads <
+				    arenas[choose]->nthreads)
+					choose = i;
+			} else if (first_null == narenas) {
+				/*
+				 * Record the index of the first uninitialized
+				 * arena, in case all extant arenas are in use.
+				 *
+				 * NB: It is possible for there to be
+				 * discontinuities in terms of initialized
+				 * versus uninitialized arenas, due to the
+				 * "thread.arena" mallctl.
+				 */
+				first_null = i;
+			}
+		}
+
+		if (arenas[choose] == 0 || first_null == narenas) {
+			/*
+			 * Use an unloaded arena, or the least loaded arena if
+			 * all arenas are already initialized.
+			 */
+			ret = arenas[choose];
+		} else {
+			/* Initialize a new arena. */
+			ret = arenas_extend(first_null);
+		}
+		ret->nthreads++;
+		malloc_mutex_unlock(&arenas_lock);
+	} else {
+		ret = arenas[0];
+		malloc_mutex_lock(&arenas_lock);
+		ret->nthreads++;
+		malloc_mutex_unlock(&arenas_lock);
+	}
+
+	ARENA_SET(ret);
+
+	return (ret);
+}
+
+/*
+ * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so
+ * provide a wrapper.
+ */
+int
+buferror(int errnum, char *buf, size_t buflen)
+{
+#ifdef _GNU_SOURCE
+	char *b = strerror_r(errno, buf, buflen);
+	if (b != buf) {
+		strncpy(buf, b, buflen);
+		buf[buflen-1] = '\0';
+	}
+	return (0);
+#else
+	return (strerror_r(errno, buf, buflen));
+#endif
+}
+
+static void
+stats_print_atexit(void)
+{
+
+#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS))
+	unsigned i;
+
+	/*
+	 * Merge stats from extant threads.  This is racy, since individual
+	 * threads do not lock when recording tcache stats events.  As a
+	 * consequence, the final stats may be slightly out of date by the time
+	 * they are reported, if other threads continue to allocate.
+	 */
+	for (i = 0; i < narenas; i++) {
+		arena_t *arena = arenas[i];
+		if (arena != NULL) {
+			tcache_t *tcache;
+
+			/*
+			 * tcache_stats_merge() locks bins, so if any code is
+			 * introduced that acquires both arena and bin locks in
+			 * the opposite order, deadlocks may result.
+			 */
+			malloc_mutex_lock(&arena->lock);
+			ql_foreach(tcache, &arena->tcache_ql, link) {
+				tcache_stats_merge(tcache, arena);
+			}
+			malloc_mutex_unlock(&arena->lock);
+		}
+	}
+#endif
+	JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL);
+}
+
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+thread_allocated_t *
+thread_allocated_get_hard(void)
+{
+	thread_allocated_t *thread_allocated = (thread_allocated_t *)
+	    imalloc(sizeof(thread_allocated_t));
+	if (thread_allocated == NULL) {
+		static thread_allocated_t static_thread_allocated = {0, 0};
+		malloc_write("<jemalloc>: Error allocating TSD;"
+		    " mallctl(\"thread.{de,}allocated[p]\", ...)"
+		    " will be inaccurate\n");
+		if (opt_abort)
+			abort();
+		return (&static_thread_allocated);
+	}
+	pthread_setspecific(thread_allocated_tsd, thread_allocated);
+	thread_allocated->allocated = 0;
+	thread_allocated->deallocated = 0;
+	return (thread_allocated);
+}
+#endif
+
+/*
+ * End miscellaneous support functions.
+ */
+/******************************************************************************/
+/*
+ * Begin initialization functions.
+ */
+
+static unsigned
+malloc_ncpus(void)
+{
+	unsigned ret;
+	long result;
+
+	result = sysconf(_SC_NPROCESSORS_ONLN);
+	if (result == -1) {
+		/* Error. */
+		ret = 1;
+	}
+	ret = (unsigned)result;
+
+	return (ret);
+}
+
+static void
+arenas_cleanup(void *arg)
+{
+	arena_t *arena = (arena_t *)arg;
+
+	malloc_mutex_lock(&arenas_lock);
+	arena->nthreads--;
+	malloc_mutex_unlock(&arenas_lock);
+}
+
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+static void
+thread_allocated_cleanup(void *arg)
+{
+	uint64_t *allocated = (uint64_t *)arg;
+
+	if (allocated != NULL)
+		idalloc(allocated);
+}
+#endif
+
+/*
+ * FreeBSD's pthreads implementation calls malloc(3), so the malloc
+ * implementation has to take pains to avoid infinite recursion during
+ * initialization.
+ */
+static inline bool
+malloc_init(void)
+{
+
+	if (malloc_initialized == false)
+		return (malloc_init_hard());
+
+	return (false);
+}
+
+static bool
+malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
+    char const **v_p, size_t *vlen_p)
+{
+	bool accept;
+	const char *opts = *opts_p;
+
+	*k_p = opts;
+
+	for (accept = false; accept == false;) {
+		switch (*opts) {
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F': case 'G': case 'H': case 'I': case 'J':
+			case 'K': case 'L': case 'M': case 'N': case 'O':
+			case 'P': case 'Q': case 'R': case 'S': case 'T':
+			case 'U': case 'V': case 'W': case 'X': case 'Y':
+			case 'Z':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f': case 'g': case 'h': case 'i': case 'j':
+			case 'k': case 'l': case 'm': case 'n': case 'o':
+			case 'p': case 'q': case 'r': case 's': case 't':
+			case 'u': case 'v': case 'w': case 'x': case 'y':
+			case 'z':
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case '_':
+				opts++;
+				break;
+			case ':':
+				opts++;
+				*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
+				*v_p = opts;
+				accept = true;
+				break;
+			case '\0':
+				if (opts != *opts_p) {
+					malloc_write("<jemalloc>: Conf string "
+					    "ends with key\n");
+				}
+				return (true);
+			default:
+				malloc_write("<jemalloc>: Malformed conf "
+				    "string\n");
+				return (true);
+		}
+	}
+
+	for (accept = false; accept == false;) {
+		switch (*opts) {
+			case ',':
+				opts++;
+				/*
+				 * Look ahead one character here, because the
+				 * next time this function is called, it will
+				 * assume that end of input has been cleanly
+				 * reached if no input remains, but we have
+				 * optimistically already consumed the comma if
+				 * one exists.
+				 */
+				if (*opts == '\0') {
+					malloc_write("<jemalloc>: Conf string "
+					    "ends with comma\n");
+				}
+				*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
+				accept = true;
+				break;
+			case '\0':
+				*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
+				accept = true;
+				break;
+			default:
+				opts++;
+				break;
+		}
+	}
+
+	*opts_p = opts;
+	return (false);
+}
+
+static void
+malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
+    size_t vlen)
+{
+	char buf[PATH_MAX + 1];
+
+	malloc_write("<jemalloc>: ");
+	malloc_write(msg);
+	malloc_write(": ");
+	memcpy(buf, k, klen);
+	memcpy(&buf[klen], ":", 1);
+	memcpy(&buf[klen+1], v, vlen);
+	buf[klen+1+vlen] = '\0';
+	malloc_write(buf);
+	malloc_write("\n");
+}
+
+static void
+malloc_conf_init(void)
+{
+	unsigned i;
+	char buf[PATH_MAX + 1];
+	const char *opts, *k, *v;
+	size_t klen, vlen;
+
+	for (i = 0; i < 3; i++) {
+		/* Get runtime configuration. */
+		switch (i) {
+		case 0:
+			if (JEMALLOC_P(malloc_conf) != NULL) {
+				/*
+				 * Use options that were compiled into the
+				 * program.
+				 */
+				opts = JEMALLOC_P(malloc_conf);
+			} else {
+				/* No configuration specified. */
+				buf[0] = '\0';
+				opts = buf;
+			}
+			break;
+		case 1: {
+			int linklen;
+			const char *linkname =
+#ifdef JEMALLOC_PREFIX
+			    "/etc/"JEMALLOC_PREFIX"malloc.conf"
+#else
+			    "/etc/malloc.conf"
+#endif
+			    ;
+
+			if ((linklen = readlink(linkname, buf,
+			    sizeof(buf) - 1)) != -1) {
+				/*
+				 * Use the contents of the "/etc/malloc.conf"
+				 * symbolic link's name.
+				 */
+				buf[linklen] = '\0';
+				opts = buf;
+			} else {
+				/* No configuration specified. */
+				buf[0] = '\0';
+				opts = buf;
+			}
+			break;
+		}
+		case 2: {
+			const char *envname =
+#ifdef JEMALLOC_PREFIX
+			    JEMALLOC_CPREFIX"MALLOC_CONF"
+#else
+			    "MALLOC_CONF"
+#endif
+			    ;
+
+			if ((opts = getenv(envname)) != NULL) {
+				/*
+				 * Do nothing; opts is already initialized to
+				 * the value of the MALLOC_CONF environment
+				 * variable.
+				 */
+			} else {
+				/* No configuration specified. */
+				buf[0] = '\0';
+				opts = buf;
+			}
+			break;
+		}
+		default:
+			/* NOTREACHED */
+			assert(false);
+			buf[0] = '\0';
+			opts = buf;
+		}
+
+		while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v,
+		    &vlen) == false) {
+#define	CONF_HANDLE_BOOL(n)						\
+			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+			    klen) == 0) {				\
+				if (strncmp("true", v, vlen) == 0 &&	\
+				    vlen == sizeof("true")-1)		\
+					opt_##n = true;			\
+				else if (strncmp("false", v, vlen) ==	\
+				    0 && vlen == sizeof("false")-1)	\
+					opt_##n = false;		\
+				else {					\
+					malloc_conf_error(		\
+					    "Invalid conf value",	\
+					    k, klen, v, vlen);		\
+				}					\
+				continue;				\
+			}
+#define	CONF_HANDLE_SIZE_T(n, min, max)					\
+			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+			    klen) == 0) {				\
+				unsigned long ul;			\
+				char *end;				\
+									\
+				errno = 0;				\
+				ul = strtoul(v, &end, 0);		\
+				if (errno != 0 || (uintptr_t)end -	\
+				    (uintptr_t)v != vlen) {		\
+					malloc_conf_error(		\
+					    "Invalid conf value",	\
+					    k, klen, v, vlen);		\
+				} else if (ul < min || ul > max) {	\
+					malloc_conf_error(		\
+					    "Out-of-range conf value",	\
+					    k, klen, v, vlen);		\
+				} else					\
+					opt_##n = ul;			\
+				continue;				\
+			}
+#define	CONF_HANDLE_SSIZE_T(n, min, max)				\
+			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+			    klen) == 0) {				\
+				long l;					\
+				char *end;				\
+									\
+				errno = 0;				\
+				l = strtol(v, &end, 0);			\
+				if (errno != 0 || (uintptr_t)end -	\
+				    (uintptr_t)v != vlen) {		\
+					malloc_conf_error(		\
+					    "Invalid conf value",	\
+					    k, klen, v, vlen);		\
+				} else if (l < (ssize_t)min || l >	\
+				    (ssize_t)max) {			\
+					malloc_conf_error(		\
+					    "Out-of-range conf value",	\
+					    k, klen, v, vlen);		\
+				} else					\
+					opt_##n = l;			\
+				continue;				\
+			}
+#define	CONF_HANDLE_CHAR_P(n, d)					\
+			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+			    klen) == 0) {				\
+				size_t cpylen = (vlen <=		\
+				    sizeof(opt_##n)-1) ? vlen :		\
+				    sizeof(opt_##n)-1;			\
+				strncpy(opt_##n, v, cpylen);		\
+				opt_##n[cpylen] = '\0';			\
+				continue;				\
+			}
+
+			CONF_HANDLE_BOOL(abort)
+			CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM,
+			    PAGE_SHIFT-1)
+			CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM,
+			    PAGE_SHIFT-1)
+			/*
+			 * Chunks always require at least one * header page,
+			 * plus one data page.
+			 */
+			CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1,
+			    (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX)
+			CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1,
+			    (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_BOOL(stats_print)
+#ifdef JEMALLOC_FILL
+			CONF_HANDLE_BOOL(junk)
+			CONF_HANDLE_BOOL(zero)
+#endif
+#ifdef JEMALLOC_SYSV
+			CONF_HANDLE_BOOL(sysv)
+#endif
+#ifdef JEMALLOC_XMALLOC
+			CONF_HANDLE_BOOL(xmalloc)
+#endif
+#ifdef JEMALLOC_TCACHE
+			CONF_HANDLE_BOOL(tcache)
+			CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1,
+			    (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_SSIZE_T(lg_tcache_max, -1,
+			    (sizeof(size_t) << 3) - 1)
+#endif
+#ifdef JEMALLOC_PROF
+			CONF_HANDLE_BOOL(prof)
+			CONF_HANDLE_CHAR_P(prof_prefix, "jeprof")
+			CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, LG_PROF_BT_MAX)
+			CONF_HANDLE_BOOL(prof_active)
+			CONF_HANDLE_SSIZE_T(lg_prof_sample, 0,
+			    (sizeof(uint64_t) << 3) - 1)
+			CONF_HANDLE_BOOL(prof_accum)
+			CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1,
+			    (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_SSIZE_T(lg_prof_interval, -1,
+			    (sizeof(uint64_t) << 3) - 1)
+			CONF_HANDLE_BOOL(prof_gdump)
+			CONF_HANDLE_BOOL(prof_leak)
+#endif
+#ifdef JEMALLOC_SWAP
+			CONF_HANDLE_BOOL(overcommit)
+#endif
+			malloc_conf_error("Invalid conf pair", k, klen, v,
+			    vlen);
+#undef CONF_HANDLE_BOOL
+#undef CONF_HANDLE_SIZE_T
+#undef CONF_HANDLE_SSIZE_T
+#undef CONF_HANDLE_CHAR_P
+		}
+
+		/* Validate configuration of options that are inter-related. */
+		if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) {
+			malloc_write("<jemalloc>: Invalid lg_[qc]space_max "
+			    "relationship; restoring defaults\n");
+			opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
+			opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
+		}
+	}
+}
+
+static bool
+malloc_init_hard(void)
+{
+	arena_t *init_arenas[1];
+
+	malloc_mutex_lock(&init_lock);
+	if (malloc_initialized || malloc_initializer == pthread_self()) {
+		/*
+		 * Another thread initialized the allocator before this one
+		 * acquired init_lock, or this thread is the initializing
+		 * thread, and it is recursively allocating.
+		 */
+		malloc_mutex_unlock(&init_lock);
+		return (false);
+	}
+	if (malloc_initializer != (unsigned long)0) {
+		/* Busy-wait until the initializing thread completes. */
+		do {
+			malloc_mutex_unlock(&init_lock);
+			CPU_SPINWAIT;
+			malloc_mutex_lock(&init_lock);
+		} while (malloc_initialized == false);
+		malloc_mutex_unlock(&init_lock);
+		return (false);
+	}
+
+#ifdef DYNAMIC_PAGE_SHIFT
+	/* Get page size. */
+	{
+		long result;
+
+		result = sysconf(_SC_PAGESIZE);
+		assert(result != -1);
+		pagesize = (size_t)result;
+
+		/*
+		 * We assume that pagesize is a power of 2 when calculating
+		 * pagesize_mask and lg_pagesize.
+		 */
+		assert(((result - 1) & result) == 0);
+		pagesize_mask = result - 1;
+		lg_pagesize = ffs((int)result) - 1;
+	}
+#endif
+
+#ifdef JEMALLOC_PROF
+	prof_boot0();
+#endif
+
+	malloc_conf_init();
+
+	/* Register fork handlers. */
+	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork,
+	    jemalloc_postfork) != 0) {
+		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
+		if (opt_abort)
+			abort();
+	}
+
+	if (ctl_boot()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+
+	if (opt_stats_print) {
+		/* Print statistics at exit. */
+		if (atexit(stats_print_atexit) != 0) {
+			malloc_write("<jemalloc>: Error in atexit()\n");
+			if (opt_abort)
+				abort();
+		}
+	}
+
+	if (chunk_boot()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+
+	if (base_boot()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+
+#ifdef JEMALLOC_PROF
+	prof_boot1();
+#endif
+
+	if (arena_boot()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+
+#ifdef JEMALLOC_TCACHE
+	if (tcache_boot()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+#endif
+
+	if (huge_boot()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+	/* Initialize allocation counters before any allocations can occur. */
+	if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup)
+	    != 0) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+#endif
+
+	if (malloc_mutex_init(&arenas_lock))
+		return (true);
+
+	if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+
+	/*
+	 * Create enough scaffolding to allow recursive allocation in
+	 * malloc_ncpus().
+	 */
+	narenas = 1;
+	arenas = init_arenas;
+	memset(arenas, 0, sizeof(arena_t *) * narenas);
+
+	/*
+	 * Initialize one arena here.  The rest are lazily created in
+	 * choose_arena_hard().
+	 */
+	arenas_extend(0);
+	if (arenas[0] == NULL) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+
+	/*
+	 * Assign the initial arena to the initial thread, in order to avoid
+	 * spurious creation of an extra arena if the application switches to
+	 * threaded mode.
+	 */
+	ARENA_SET(arenas[0]);
+	arenas[0]->nthreads++;
+
+#ifdef JEMALLOC_PROF
+	if (prof_boot2()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+#endif
+
+	/* Get number of CPUs. */
+	malloc_initializer = pthread_self();
+	malloc_mutex_unlock(&init_lock);
+	ncpus = malloc_ncpus();
+	malloc_mutex_lock(&init_lock);
+
+	if (opt_narenas == 0) {
+		/*
+		 * For SMP systems, create more than one arena per CPU by
+		 * default.
+		 */
+		if (ncpus > 1)
+			opt_narenas = ncpus << 2;
+		else
+			opt_narenas = 1;
+	}
+	narenas = opt_narenas;
+	/*
+	 * Make sure that the arenas array can be allocated.  In practice, this
+	 * limit is enough to allow the allocator to function, but the ctl
+	 * machinery will fail to allocate memory at far lower limits.
+	 */
+	if (narenas > chunksize / sizeof(arena_t *)) {
+		char buf[UMAX2S_BUFSIZE];
+
+		narenas = chunksize / sizeof(arena_t *);
+		malloc_write("<jemalloc>: Reducing narenas to limit (");
+		malloc_write(u2s(narenas, 10, buf));
+		malloc_write(")\n");
+	}
+
+	/* Allocate and initialize arenas. */
+	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
+	if (arenas == NULL) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+	/*
+	 * Zero the array.  In practice, this should always be pre-zeroed,
+	 * since it was just mmap()ed, but let's be sure.
+	 */
+	memset(arenas, 0, sizeof(arena_t *) * narenas);
+	/* Copy the pointer to the one arena that was already initialized. */
+	arenas[0] = init_arenas[0];
+
+#ifdef JEMALLOC_ZONE
+	/* Register the custom zone. */
+	malloc_zone_register(create_zone());
+
+	/*
+	 * Convert the default szone to an "overlay zone" that is capable of
+	 * deallocating szone-allocated objects, but allocating new objects
+	 * from jemalloc.
+	 */
+	szone2ozone(malloc_default_zone());
+#endif
+
+	malloc_initialized = true;
+	malloc_mutex_unlock(&init_lock);
+	return (false);
+}
+
+#ifdef JEMALLOC_ZONE
+JEMALLOC_ATTR(constructor)
+void
+jemalloc_darwin_init(void)
+{
+
+	if (malloc_init_hard())
+		abort();
+}
+#endif
+
+/*
+ * End initialization functions.
+ */
+/******************************************************************************/
+/*
+ * Begin malloc(3)-compatible functions.
+ */
+
+JEMALLOC_ATTR(malloc)
+JEMALLOC_ATTR(visibility("default"))
+void *
+JEMALLOC_P(malloc)(size_t size)
+{
+	void *ret;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+	size_t usize
+#  ifdef JEMALLOC_CC_SILENCE
+	    = 0
+#  endif
+	    ;
+#endif
+#ifdef JEMALLOC_PROF
+	prof_thr_cnt_t *cnt
+#  ifdef JEMALLOC_CC_SILENCE
+	    = NULL
+#  endif
+	    ;
+#endif
+
+	if (malloc_init()) {
+		ret = NULL;
+		goto OOM;
+	}
+
+	if (size == 0) {
+#ifdef JEMALLOC_SYSV
+		if (opt_sysv == false)
+#endif
+			size = 1;
+#ifdef JEMALLOC_SYSV
+		else {
+#  ifdef JEMALLOC_XMALLOC
+			if (opt_xmalloc) {
+				malloc_write("<jemalloc>: Error in malloc(): "
+				    "invalid size 0\n");
+				abort();
+			}
+#  endif
+			ret = NULL;
+			goto RETURN;
+		}
+#endif
+	}
+
+#ifdef JEMALLOC_PROF
+	if (opt_prof) {
+		usize = s2u(size);
+		PROF_ALLOC_PREP(1, usize, cnt);
+		if (cnt == NULL) {
+			ret = NULL;
+			goto OOM;
+		}
+		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
+		    small_maxclass) {
+			ret = imalloc(small_maxclass+1);
+			if (ret != NULL)
+				arena_prof_promoted(ret, usize);
+		} else
+			ret = imalloc(size);
+	} else
+#endif
+	{
+#ifdef JEMALLOC_STATS
+		usize = s2u(size);
+#endif
+		ret = imalloc(size);
+	}
+
+OOM:
+	if (ret == NULL) {
+#ifdef JEMALLOC_XMALLOC
+		if (opt_xmalloc) {
+			malloc_write("<jemalloc>: Error in malloc(): "
+			    "out of memory\n");
+			abort();
+		}
+#endif
+		errno = ENOMEM;
+	}
+
+#ifdef JEMALLOC_SYSV
+RETURN:
+#endif
+#ifdef JEMALLOC_PROF
+	if (opt_prof && ret != NULL)
+		prof_malloc(ret, usize, cnt);
+#endif
+#ifdef JEMALLOC_STATS
+	if (ret != NULL) {
+		assert(usize == isalloc(ret));
+		ALLOCATED_ADD(usize, 0);
+	}
+#endif
+	return (ret);
+}
+
+JEMALLOC_ATTR(nonnull(1))
+#ifdef JEMALLOC_PROF
+/*
+ * Avoid any uncertainty as to how many backtrace frames to ignore in 
+ * PROF_ALLOC_PREP().
+ */
+JEMALLOC_ATTR(noinline)
+#endif
+static int
+imemalign(void **memptr, size_t alignment, size_t size)
+{
+	int ret;
+	size_t usize
+#ifdef JEMALLOC_CC_SILENCE
+	    = 0
+#endif
+	    ;
+	void *result;
+#ifdef JEMALLOC_PROF
+	prof_thr_cnt_t *cnt
+#  ifdef JEMALLOC_CC_SILENCE
+	    = NULL
+#  endif
+	    ;
+#endif
+
+	if (malloc_init())
+		result = NULL;
+	else {
+		if (size == 0) {
+#ifdef JEMALLOC_SYSV
+			if (opt_sysv == false)
+#endif
+				size = 1;
+#ifdef JEMALLOC_SYSV
+			else {
+#  ifdef JEMALLOC_XMALLOC
+				if (opt_xmalloc) {
+					malloc_write("<jemalloc>: Error in "
+					    "posix_memalign(): invalid size "
+					    "0\n");
+					abort();
+				}
+#  endif
+				result = NULL;
+				*memptr = NULL;
+				ret = 0;
+				goto RETURN;
+			}
+#endif
+		}
+
+		/* Make sure that alignment is a large enough power of 2. */
+		if (((alignment - 1) & alignment) != 0
+		    || alignment < sizeof(void *)) {
+#ifdef JEMALLOC_XMALLOC
+			if (opt_xmalloc) {
+				malloc_write("<jemalloc>: Error in "
+				    "posix_memalign(): invalid alignment\n");
+				abort();
+			}
+#endif
+			result = NULL;
+			ret = EINVAL;
+			goto RETURN;
+		}
+
+		usize = sa2u(size, alignment, NULL);
+		if (usize == 0) {
+			result = NULL;
+			ret = ENOMEM;
+			goto RETURN;
+		}
+
+#ifdef JEMALLOC_PROF
+		if (opt_prof) {
+			PROF_ALLOC_PREP(2, usize, cnt);
+			if (cnt == NULL) {
+				result = NULL;
+				ret = EINVAL;
+			} else {
+				if (prof_promote && (uintptr_t)cnt !=
+				    (uintptr_t)1U && usize <= small_maxclass) {
+					assert(sa2u(small_maxclass+1,
+					    alignment, NULL) != 0);
+					result = ipalloc(sa2u(small_maxclass+1,
+					    alignment, NULL), alignment, false);
+					if (result != NULL) {
+						arena_prof_promoted(result,
+						    usize);
+					}
+				} else {
+					result = ipalloc(usize, alignment,
+					    false);
+				}
+			}
+		} else
+#endif
+			result = ipalloc(usize, alignment, false);
+	}
+
+	if (result == NULL) {
+#ifdef JEMALLOC_XMALLOC
+		if (opt_xmalloc) {
+			malloc_write("<jemalloc>: Error in posix_memalign(): "
+			    "out of memory\n");
+			abort();
+		}
+#endif
+		ret = ENOMEM;
+		goto RETURN;
+	}
+
+	*memptr = result;
+	ret = 0;
+
+RETURN:
+#ifdef JEMALLOC_STATS
+	if (result != NULL) {
+		assert(usize == isalloc(result));
+		ALLOCATED_ADD(usize, 0);
+	}
+#endif
+#ifdef JEMALLOC_PROF
+	if (opt_prof && result != NULL)
+		prof_malloc(result, usize, cnt);
+#endif
+	return (ret);
+}
+
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
+{
+
+	return imemalign(memptr, alignment, size);
+}
+
+JEMALLOC_ATTR(malloc)
+JEMALLOC_ATTR(visibility("default"))
+void *
+JEMALLOC_P(calloc)(size_t num, size_t size)
+{
+	void *ret;
+	size_t num_size;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+	size_t usize
+#  ifdef JEMALLOC_CC_SILENCE
+	    = 0
+#  endif
+	    ;
+#endif
+#ifdef JEMALLOC_PROF
+	prof_thr_cnt_t *cnt
+#  ifdef JEMALLOC_CC_SILENCE
+	    = NULL
+#  endif
+	    ;
+#endif
+
+	if (malloc_init()) {
+		num_size = 0;
+		ret = NULL;
+		goto RETURN;
+	}
+
+	num_size = num * size;
+	if (num_size == 0) {
+#ifdef JEMALLOC_SYSV
+		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
+#endif
+			num_size = 1;
+#ifdef JEMALLOC_SYSV
+		else {
+			ret = NULL;
+			goto RETURN;
+		}
+#endif
+	/*
+	 * Try to avoid division here.  We know that it isn't possible to
+	 * overflow during multiplication if neither operand uses any of the
+	 * most significant half of the bits in a size_t.
+	 */
+	} else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
+	    && (num_size / size != num)) {
+		/* size_t overflow. */
+		ret = NULL;
+		goto RETURN;
+	}
+
+#ifdef JEMALLOC_PROF
+	if (opt_prof) {
+		usize = s2u(num_size);
+		PROF_ALLOC_PREP(1, usize, cnt);
+		if (cnt == NULL) {
+			ret = NULL;
+			goto RETURN;
+		}
+		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize
+		    <= small_maxclass) {
+			ret = icalloc(small_maxclass+1);
+			if (ret != NULL)
+				arena_prof_promoted(ret, usize);
+		} else
+			ret = icalloc(num_size);
+	} else
+#endif
+	{
+#ifdef JEMALLOC_STATS
+		usize = s2u(num_size);
+#endif
+		ret = icalloc(num_size);
+	}
+
+RETURN:
+	if (ret == NULL) {
+#ifdef JEMALLOC_XMALLOC
+		if (opt_xmalloc) {
+			malloc_write("<jemalloc>: Error in calloc(): out of "
+			    "memory\n");
+			abort();
+		}
+#endif
+		errno = ENOMEM;
+	}
+
+#ifdef JEMALLOC_PROF
+	if (opt_prof && ret != NULL)
+		prof_malloc(ret, usize, cnt);
+#endif
+#ifdef JEMALLOC_STATS
+	if (ret != NULL) {
+		assert(usize == isalloc(ret));
+		ALLOCATED_ADD(usize, 0);
+	}
+#endif
+	return (ret);
+}
+
+JEMALLOC_ATTR(visibility("default"))
+void *
+JEMALLOC_P(realloc)(void *ptr, size_t size)
+{
+	void *ret;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+	size_t usize
+#  ifdef JEMALLOC_CC_SILENCE
+	    = 0
+#  endif
+	    ;
+	size_t old_size = 0;
+#endif
+#ifdef JEMALLOC_PROF
+	prof_thr_cnt_t *cnt
+#  ifdef JEMALLOC_CC_SILENCE
+	    = NULL
+#  endif
+	    ;
+	prof_ctx_t *old_ctx
+#  ifdef JEMALLOC_CC_SILENCE
+	    = NULL
+#  endif
+	    ;
+#endif
+
+	if (size == 0) {
+#ifdef JEMALLOC_SYSV
+		if (opt_sysv == false)
+#endif
+			size = 1;
+#ifdef JEMALLOC_SYSV
+		else {
+			if (ptr != NULL) {
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+				old_size = isalloc(ptr);
+#endif
+#ifdef JEMALLOC_PROF
+				if (opt_prof) {
+					old_ctx = prof_ctx_get(ptr);
+					cnt = NULL;
+				}
+#endif
+				idalloc(ptr);
+			}
+#ifdef JEMALLOC_PROF
+			else if (opt_prof) {
+				old_ctx = NULL;
+				cnt = NULL;
+			}
+#endif
+			ret = NULL;
+			goto RETURN;
+		}
+#endif
+	}
+
+	if (ptr != NULL) {
+		assert(malloc_initialized || malloc_initializer ==
+		    pthread_self());
+
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+		old_size = isalloc(ptr);
+#endif
+#ifdef JEMALLOC_PROF
+		if (opt_prof) {
+			usize = s2u(size);
+			old_ctx = prof_ctx_get(ptr);
+			PROF_ALLOC_PREP(1, usize, cnt);
+			if (cnt == NULL) {
+				old_ctx = NULL;
+				ret = NULL;
+				goto OOM;
+			}
+			if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U &&
+			    usize <= small_maxclass) {
+				ret = iralloc(ptr, small_maxclass+1, 0, 0,
+				    false, false);
+				if (ret != NULL)
+					arena_prof_promoted(ret, usize);
+				else
+					old_ctx = NULL;
+			} else {
+				ret = iralloc(ptr, size, 0, 0, false, false);
+				if (ret == NULL)
+					old_ctx = NULL;
+			}
+		} else
+#endif
+		{
+#ifdef JEMALLOC_STATS
+			usize = s2u(size);
+#endif
+			ret = iralloc(ptr, size, 0, 0, false, false);
+		}
+
+#ifdef JEMALLOC_PROF
+OOM:
+#endif
+		if (ret == NULL) {
+#ifdef JEMALLOC_XMALLOC
+			if (opt_xmalloc) {
+				malloc_write("<jemalloc>: Error in realloc(): "
+				    "out of memory\n");
+				abort();
+			}
+#endif
+			errno = ENOMEM;
+		}
+	} else {
+#ifdef JEMALLOC_PROF
+		if (opt_prof)
+			old_ctx = NULL;
+#endif
+		if (malloc_init()) {
+#ifdef JEMALLOC_PROF
+			if (opt_prof)
+				cnt = NULL;
+#endif
+			ret = NULL;
+		} else {
+#ifdef JEMALLOC_PROF
+			if (opt_prof) {
+				usize = s2u(size);
+				PROF_ALLOC_PREP(1, usize, cnt);
+				if (cnt == NULL)
+					ret = NULL;
+				else {
+					if (prof_promote && (uintptr_t)cnt !=
+					    (uintptr_t)1U && usize <=
+					    small_maxclass) {
+						ret = imalloc(small_maxclass+1);
+						if (ret != NULL) {
+							arena_prof_promoted(ret,
+							    usize);
+						}
+					} else
+						ret = imalloc(size);
+				}
+			} else
+#endif
+			{
+#ifdef JEMALLOC_STATS
+				usize = s2u(size);
+#endif
+				ret = imalloc(size);
+			}
+		}
+
+		if (ret == NULL) {
+#ifdef JEMALLOC_XMALLOC
+			if (opt_xmalloc) {
+				malloc_write("<jemalloc>: Error in realloc(): "
+				    "out of memory\n");
+				abort();
+			}
+#endif
+			errno = ENOMEM;
+		}
+	}
+
+#ifdef JEMALLOC_SYSV
+RETURN:
+#endif
+#ifdef JEMALLOC_PROF
+	if (opt_prof)
+		prof_realloc(ret, usize, cnt, old_size, old_ctx);
+#endif
+#ifdef JEMALLOC_STATS
+	if (ret != NULL) {
+		assert(usize == isalloc(ret));
+		ALLOCATED_ADD(usize, old_size);
+	}
+#endif
+	return (ret);
+}
+
+JEMALLOC_ATTR(visibility("default"))
+void
+JEMALLOC_P(free)(void *ptr)
+{
+
+	if (ptr != NULL) {
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+		size_t usize;
+#endif
+
+		assert(malloc_initialized || malloc_initializer ==
+		    pthread_self());
+
+#ifdef JEMALLOC_STATS
+		usize = isalloc(ptr);
+#endif
+#ifdef JEMALLOC_PROF
+		if (opt_prof) {
+#  ifndef JEMALLOC_STATS
+			usize = isalloc(ptr);
+#  endif
+			prof_free(ptr, usize);
+		}
+#endif
+#ifdef JEMALLOC_STATS
+		ALLOCATED_ADD(0, usize);
+#endif
+		idalloc(ptr);
+	}
+}
+
+/*
+ * End malloc(3)-compatible functions.
+ */
+/******************************************************************************/
+/*
+ * Begin non-standard override functions.
+ *
+ * These overrides are omitted if the JEMALLOC_PREFIX is defined, since the
+ * entire point is to avoid accidental mixed allocator usage.
+ */
+#ifndef JEMALLOC_PREFIX
+
+#ifdef JEMALLOC_OVERRIDE_MEMALIGN
+JEMALLOC_ATTR(malloc)
+JEMALLOC_ATTR(visibility("default"))
+void *
+JEMALLOC_P(memalign)(size_t alignment, size_t size)
+{
+	void *ret;
+#ifdef JEMALLOC_CC_SILENCE
+	int result =
+#endif
+	    imemalign(&ret, alignment, size);
+#ifdef JEMALLOC_CC_SILENCE
+	if (result != 0)
+		return (NULL);
+#endif
+	return (ret);
+}
+#endif
+
+#ifdef JEMALLOC_OVERRIDE_VALLOC
+JEMALLOC_ATTR(malloc)
+JEMALLOC_ATTR(visibility("default"))
+void *
+JEMALLOC_P(valloc)(size_t size)
+{
+	void *ret;
+#ifdef JEMALLOC_CC_SILENCE
+	int result =
+#endif
+	    imemalign(&ret, PAGE_SIZE, size);
+#ifdef JEMALLOC_CC_SILENCE
+	if (result != 0)
+		return (NULL);
+#endif
+	return (ret);
+}
+#endif
+
+#endif /* JEMALLOC_PREFIX */
+/*
+ * End non-standard override functions.
+ */
+/******************************************************************************/
+/*
+ * Begin non-standard functions.
+ */
+
+JEMALLOC_ATTR(visibility("default"))
+size_t
+JEMALLOC_P(malloc_usable_size)(const void *ptr)
+{
+	size_t ret;
+
+	assert(malloc_initialized || malloc_initializer == pthread_self());
+
+#ifdef JEMALLOC_IVSALLOC
+	ret = ivsalloc(ptr);
+#else
+	assert(ptr != NULL);
+	ret = isalloc(ptr);
+#endif
+
+	return (ret);
+}
+
+JEMALLOC_ATTR(visibility("default"))
+void
+JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *),
+    void *cbopaque, const char *opts)
+{
+
+	stats_print(write_cb, cbopaque, opts);
+}
+
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen)
+{
+
+	if (malloc_init())
+		return (EAGAIN);
+
+	return (ctl_byname(name, oldp, oldlenp, newp, newlen));
+}
+
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp)
+{
+
+	if (malloc_init())
+		return (EAGAIN);
+
+	return (ctl_nametomib(name, mibp, miblenp));
+}
+
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+
+	if (malloc_init())
+		return (EAGAIN);
+
+	return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen));
+}
+
+JEMALLOC_INLINE void *
+iallocm(size_t usize, size_t alignment, bool zero)
+{
+
+	assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment,
+	    NULL)));
+
+	if (alignment != 0)
+		return (ipalloc(usize, alignment, zero));
+	else if (zero)
+		return (icalloc(usize));
+	else
+		return (imalloc(usize));
+}
+
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
+{
+	void *p;
+	size_t usize;
+	size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
+	    & (SIZE_T_MAX-1));
+	bool zero = flags & ALLOCM_ZERO;
+#ifdef JEMALLOC_PROF
+	prof_thr_cnt_t *cnt;
+#endif
+
+	assert(ptr != NULL);
+	assert(size != 0);
+
+	if (malloc_init())
+		goto OOM;
+
+	usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL);
+	if (usize == 0)
+		goto OOM;
+
+#ifdef JEMALLOC_PROF
+	if (opt_prof) {
+		PROF_ALLOC_PREP(1, usize, cnt);
+		if (cnt == NULL)
+			goto OOM;
+		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
+		    small_maxclass) {
+			size_t usize_promoted = (alignment == 0) ?
+			    s2u(small_maxclass+1) : sa2u(small_maxclass+1,
+			    alignment, NULL);
+			assert(usize_promoted != 0);
+			p = iallocm(usize_promoted, alignment, zero);
+			if (p == NULL)
+				goto OOM;
+			arena_prof_promoted(p, usize);
+		} else {
+			p = iallocm(usize, alignment, zero);
+			if (p == NULL)
+				goto OOM;
+		}
+		prof_malloc(p, usize, cnt);
+		if (rsize != NULL)
+			*rsize = usize;
+	} else
+#endif
+	{
+		p = iallocm(usize, alignment, zero);
+		if (p == NULL)
+			goto OOM;
+#ifndef JEMALLOC_STATS
+		if (rsize != NULL)
+#endif
+		{
+#ifdef JEMALLOC_STATS
+			if (rsize != NULL)
+#endif
+				*rsize = usize;
+		}
+	}
+
+	*ptr = p;
+#ifdef JEMALLOC_STATS
+	assert(usize == isalloc(p));
+	ALLOCATED_ADD(usize, 0);
+#endif
+	return (ALLOCM_SUCCESS);
+OOM:
+#ifdef JEMALLOC_XMALLOC
+	if (opt_xmalloc) {
+		malloc_write("<jemalloc>: Error in allocm(): "
+		    "out of memory\n");
+		abort();
+	}
+#endif
+	*ptr = NULL;
+	return (ALLOCM_ERR_OOM);
+}
+
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
+    int flags)
+{
+	void *p, *q;
+	size_t usize;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+	size_t old_size;
+#endif
+	size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
+	    & (SIZE_T_MAX-1));
+	bool zero = flags & ALLOCM_ZERO;
+	bool no_move = flags & ALLOCM_NO_MOVE;
+#ifdef JEMALLOC_PROF
+	prof_thr_cnt_t *cnt;
+#endif
+
+	assert(ptr != NULL);
+	assert(*ptr != NULL);
+	assert(size != 0);
+	assert(SIZE_T_MAX - size >= extra);
+	assert(malloc_initialized || malloc_initializer == pthread_self());
+
+	p = *ptr;
+#ifdef JEMALLOC_PROF
+	if (opt_prof) {
+		/*
+		 * usize isn't knowable before iralloc() returns when extra is
+		 * non-zero.  Therefore, compute its maximum possible value and
+		 * use that in PROF_ALLOC_PREP() to decide whether to capture a
+		 * backtrace.  prof_realloc() will use the actual usize to
+		 * decide whether to sample.
+		 */
+		size_t max_usize = (alignment == 0) ? s2u(size+extra) :
+		    sa2u(size+extra, alignment, NULL);
+		prof_ctx_t *old_ctx = prof_ctx_get(p);
+		old_size = isalloc(p);
+		PROF_ALLOC_PREP(1, max_usize, cnt);
+		if (cnt == NULL)
+			goto OOM;
+		/*
+		 * Use minimum usize to determine whether promotion may happen.
+		 */
+		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U
+		    && ((alignment == 0) ? s2u(size) : sa2u(size,
+		    alignment, NULL)) <= small_maxclass) {
+			q = iralloc(p, small_maxclass+1, (small_maxclass+1 >=
+			    size+extra) ? 0 : size+extra - (small_maxclass+1),
+			    alignment, zero, no_move);
+			if (q == NULL)
+				goto ERR;
+			if (max_usize < PAGE_SIZE) {
+				usize = max_usize;
+				arena_prof_promoted(q, usize);
+			} else
+				usize = isalloc(q);
+		} else {
+			q = iralloc(p, size, extra, alignment, zero, no_move);
+			if (q == NULL)
+				goto ERR;
+			usize = isalloc(q);
+		}
+		prof_realloc(q, usize, cnt, old_size, old_ctx);
+		if (rsize != NULL)
+			*rsize = usize;
+	} else
+#endif
+	{
+#ifdef JEMALLOC_STATS
+		old_size = isalloc(p);
+#endif
+		q = iralloc(p, size, extra, alignment, zero, no_move);
+		if (q == NULL)
+			goto ERR;
+#ifndef JEMALLOC_STATS
+		if (rsize != NULL)
+#endif
+		{
+			usize = isalloc(q);
+#ifdef JEMALLOC_STATS
+			if (rsize != NULL)
+#endif
+				*rsize = usize;
+		}
+	}
+
+	*ptr = q;
+#ifdef JEMALLOC_STATS
+	ALLOCATED_ADD(usize, old_size);
+#endif
+	return (ALLOCM_SUCCESS);
+ERR:
+	if (no_move)
+		return (ALLOCM_ERR_NOT_MOVED);
+#ifdef JEMALLOC_PROF
+OOM:
+#endif
+#ifdef JEMALLOC_XMALLOC
+	if (opt_xmalloc) {
+		malloc_write("<jemalloc>: Error in rallocm(): "
+		    "out of memory\n");
+		abort();
+	}
+#endif
+	return (ALLOCM_ERR_OOM);
+}
+
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags)
+{
+	size_t sz;
+
+	assert(malloc_initialized || malloc_initializer == pthread_self());
+
+#ifdef JEMALLOC_IVSALLOC
+	sz = ivsalloc(ptr);
+#else
+	assert(ptr != NULL);
+	sz = isalloc(ptr);
+#endif
+	assert(rsize != NULL);
+	*rsize = sz;
+
+	return (ALLOCM_SUCCESS);
+}
+
+JEMALLOC_ATTR(nonnull(1))
+JEMALLOC_ATTR(visibility("default"))
+int
+JEMALLOC_P(dallocm)(void *ptr, int flags)
+{
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+	size_t usize;
+#endif
+
+	assert(ptr != NULL);
+	assert(malloc_initialized || malloc_initializer == pthread_self());
+
+#ifdef JEMALLOC_STATS
+	usize = isalloc(ptr);
+#endif
+#ifdef JEMALLOC_PROF
+	if (opt_prof) {
+#  ifndef JEMALLOC_STATS
+		usize = isalloc(ptr);
+#  endif
+		prof_free(ptr, usize);
+	}
+#endif
+#ifdef JEMALLOC_STATS
+	ALLOCATED_ADD(0, usize);
+#endif
+	idalloc(ptr);
+
+	return (ALLOCM_SUCCESS);
+}
+
+/*
+ * End non-standard functions.
+ */
+/******************************************************************************/
+
+/*
+ * The following functions are used by threading libraries for protection of
+ * malloc during fork().
+ */
+
+void
+jemalloc_prefork(void)
+{
+	unsigned i;
+
+	/* Acquire all mutexes in a safe order. */
+
+	malloc_mutex_lock(&arenas_lock);
+	for (i = 0; i < narenas; i++) {
+		if (arenas[i] != NULL)
+			malloc_mutex_lock(&arenas[i]->lock);
+	}
+
+	malloc_mutex_lock(&base_mtx);
+
+	malloc_mutex_lock(&huge_mtx);
+
+#ifdef JEMALLOC_DSS
+	malloc_mutex_lock(&dss_mtx);
+#endif
+
+#ifdef JEMALLOC_SWAP
+	malloc_mutex_lock(&swap_mtx);
+#endif
+}
+
+void
+jemalloc_postfork(void)
+{
+	unsigned i;
+
+	/* Release all mutexes, now that fork() has completed. */
+
+#ifdef JEMALLOC_SWAP
+	malloc_mutex_unlock(&swap_mtx);
+#endif
+
+#ifdef JEMALLOC_DSS
+	malloc_mutex_unlock(&dss_mtx);
+#endif
+
+	malloc_mutex_unlock(&huge_mtx);
+
+	malloc_mutex_unlock(&base_mtx);
+
+	for (i = 0; i < narenas; i++) {
+		if (arenas[i] != NULL)
+			malloc_mutex_unlock(&arenas[i]->lock);
+	}
+	malloc_mutex_unlock(&arenas_lock);
+}
+
+/******************************************************************************/
diff --git a/deps/jemalloc.orig/src/mb.c b/deps/jemalloc.orig/src/mb.c
new file mode 100644
index 00000000..dc2c0a25
--- /dev/null
+++ b/deps/jemalloc.orig/src/mb.c
@@ -0,0 +1,2 @@
+#define	JEMALLOC_MB_C_
+#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/deps/jemalloc.orig/src/mutex.c b/deps/jemalloc.orig/src/mutex.c
new file mode 100644
index 00000000..ca89ef1c
--- /dev/null
+++ b/deps/jemalloc.orig/src/mutex.c
@@ -0,0 +1,90 @@
+#define	JEMALLOC_MUTEX_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+#ifdef JEMALLOC_LAZY_LOCK
+bool isthreaded = false;
+#endif
+
+#ifdef JEMALLOC_LAZY_LOCK
+static void	pthread_create_once(void);
+#endif
+
+/******************************************************************************/
+/*
+ * We intercept pthread_create() calls in order to toggle isthreaded if the
+ * process goes multi-threaded.
+ */
+
+#ifdef JEMALLOC_LAZY_LOCK
+static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
+    void *(*)(void *), void *__restrict);
+
+static void
+pthread_create_once(void)
+{
+
+	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+	if (pthread_create_fptr == NULL) {
+		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+		    "\"pthread_create\")\n");
+		abort();
+	}
+
+	isthreaded = true;
+}
+
+JEMALLOC_ATTR(visibility("default"))
+int
+pthread_create(pthread_t *__restrict thread,
+    const pthread_attr_t *__restrict attr, void *(*start_routine)(void *),
+    void *__restrict arg)
+{
+	static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+
+	pthread_once(&once_control, pthread_create_once);
+
+	return (pthread_create_fptr(thread, attr, start_routine, arg));
+}
+#endif
+
+/******************************************************************************/
+
+bool
+malloc_mutex_init(malloc_mutex_t *mutex)
+{
+#ifdef JEMALLOC_OSSPIN
+	*mutex = 0;
+#else
+	pthread_mutexattr_t attr;
+
+	if (pthread_mutexattr_init(&attr) != 0)
+		return (true);
+#ifdef PTHREAD_MUTEX_ADAPTIVE_NP
+	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+#else
+	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT);
+#endif
+	if (pthread_mutex_init(mutex, &attr) != 0) {
+		pthread_mutexattr_destroy(&attr);
+		return (true);
+	}
+	pthread_mutexattr_destroy(&attr);
+
+#endif
+	return (false);
+}
+
+void
+malloc_mutex_destroy(malloc_mutex_t *mutex)
+{
+
+#ifndef JEMALLOC_OSSPIN
+	if (pthread_mutex_destroy(mutex) != 0) {
+		malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n");
+		abort();
+	}
+#endif
+}
diff --git a/deps/jemalloc.orig/src/prof.c b/deps/jemalloc.orig/src/prof.c
new file mode 100644
index 00000000..8a144b4e
--- /dev/null
+++ b/deps/jemalloc.orig/src/prof.c
@@ -0,0 +1,1244 @@
+#define	JEMALLOC_PROF_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifdef JEMALLOC_PROF
+/******************************************************************************/
+
+#ifdef JEMALLOC_PROF_LIBUNWIND
+#define	UNW_LOCAL_ONLY
+#include <libunwind.h>
+#endif
+
+#ifdef JEMALLOC_PROF_LIBGCC
+#include <unwind.h>
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+bool		opt_prof = false;
+bool		opt_prof_active = true;
+size_t		opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT;
+size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
+ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
+bool		opt_prof_gdump = false;
+bool		opt_prof_leak = false;
+bool		opt_prof_accum = true;
+ssize_t		opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT;
+char		opt_prof_prefix[PATH_MAX + 1];
+
+uint64_t	prof_interval;
+bool		prof_promote;
+
+unsigned	prof_bt_max;
+
+#ifndef NO_TLS
+__thread prof_tdata_t	*prof_tdata_tls
+    JEMALLOC_ATTR(tls_model("initial-exec"));
+#endif
+pthread_key_t	prof_tdata_tsd;
+
+/*
+ * Global hash of (prof_bt_t *)-->(prof_ctx_t *).  This is the master data
+ * structure that knows about all backtraces currently captured.
+ */
+static ckh_t		bt2ctx;
+static malloc_mutex_t	bt2ctx_mtx;
+
+static malloc_mutex_t	prof_dump_seq_mtx;
+static uint64_t		prof_dump_seq;
+static uint64_t		prof_dump_iseq;
+static uint64_t		prof_dump_mseq;
+static uint64_t		prof_dump_useq;
+
+/*
+ * This buffer is rather large for stack allocation, so use a single buffer for
+ * all profile dumps.  The buffer is implicitly protected by bt2ctx_mtx, since
+ * it must be locked anyway during dumping.
+ */
+static char		prof_dump_buf[PROF_DUMP_BUF_SIZE];
+static unsigned		prof_dump_buf_end;
+static int		prof_dump_fd;
+
+/* Do not dump any profiles until bootstrapping is complete. */
+static bool		prof_booted = false;
+
+static malloc_mutex_t	enq_mtx;
+static bool		enq;
+static bool		enq_idump;
+static bool		enq_gdump;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static prof_bt_t	*bt_dup(prof_bt_t *bt);
+static void	bt_destroy(prof_bt_t *bt);
+#ifdef JEMALLOC_PROF_LIBGCC
+static _Unwind_Reason_Code	prof_unwind_init_callback(
+    struct _Unwind_Context *context, void *arg);
+static _Unwind_Reason_Code	prof_unwind_callback(
+    struct _Unwind_Context *context, void *arg);
+#endif
+static bool	prof_flush(bool propagate_err);
+static bool	prof_write(const char *s, bool propagate_err);
+static void	prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
+    size_t *leak_nctx);
+static void	prof_ctx_destroy(prof_ctx_t *ctx);
+static void	prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt);
+static bool	prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt,
+    bool propagate_err);
+static bool	prof_dump_maps(bool propagate_err);
+static bool	prof_dump(const char *filename, bool leakcheck,
+    bool propagate_err);
+static void	prof_dump_filename(char *filename, char v, int64_t vseq);
+static void	prof_fdump(void);
+static void	prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
+    size_t *hash2);
+static bool	prof_bt_keycomp(const void *k1, const void *k2);
+static void	prof_tdata_cleanup(void *arg);
+
+/******************************************************************************/
+
+void
+bt_init(prof_bt_t *bt, void **vec)
+{
+
+	bt->vec = vec;
+	bt->len = 0;
+}
+
+static void
+bt_destroy(prof_bt_t *bt)
+{
+
+	idalloc(bt);
+}
+
+static prof_bt_t *
+bt_dup(prof_bt_t *bt)
+{
+	prof_bt_t *ret;
+
+	/*
+	 * Create a single allocation that has space for vec immediately
+	 * following the prof_bt_t structure.  The backtraces that get
+	 * stored in the backtrace caches are copied from stack-allocated
+	 * temporary variables, so size is known at creation time.  Making this
+	 * a contiguous object improves cache locality.
+	 */
+	ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
+	    (bt->len * sizeof(void *)));
+	if (ret == NULL)
+		return (NULL);
+	ret->vec = (void **)((uintptr_t)ret +
+	    QUANTUM_CEILING(sizeof(prof_bt_t)));
+	memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
+	ret->len = bt->len;
+
+	return (ret);
+}
+
+static inline void
+prof_enter(void)
+{
+
+	malloc_mutex_lock(&enq_mtx);
+	enq = true;
+	malloc_mutex_unlock(&enq_mtx);
+
+	malloc_mutex_lock(&bt2ctx_mtx);
+}
+
+static inline void
+prof_leave(void)
+{
+	bool idump, gdump;
+
+	malloc_mutex_unlock(&bt2ctx_mtx);
+
+	malloc_mutex_lock(&enq_mtx);
+	enq = false;
+	idump = enq_idump;
+	enq_idump = false;
+	gdump = enq_gdump;
+	enq_gdump = false;
+	malloc_mutex_unlock(&enq_mtx);
+
+	if (idump)
+		prof_idump();
+	if (gdump)
+		prof_gdump();
+}
+
+#ifdef JEMALLOC_PROF_LIBUNWIND
+void
+prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+{
+	unw_context_t uc;
+	unw_cursor_t cursor;
+	unsigned i;
+	int err;
+
+	assert(bt->len == 0);
+	assert(bt->vec != NULL);
+	assert(max <= (1U << opt_lg_prof_bt_max));
+
+	unw_getcontext(&uc);
+	unw_init_local(&cursor, &uc);
+
+	/* Throw away (nignore+1) stack frames, if that many exist. */
+	for (i = 0; i < nignore + 1; i++) {
+		err = unw_step(&cursor);
+		if (err <= 0)
+			return;
+	}
+
+	/*
+	 * Iterate over stack frames until there are no more, or until no space
+	 * remains in bt.
+	 */
+	for (i = 0; i < max; i++) {
+		unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
+		bt->len++;
+		err = unw_step(&cursor);
+		if (err <= 0)
+			break;
+	}
+}
+#endif
+#ifdef JEMALLOC_PROF_LIBGCC
+static _Unwind_Reason_Code
+prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
+{
+
+	return (_URC_NO_REASON);
+}
+
+static _Unwind_Reason_Code
+prof_unwind_callback(struct _Unwind_Context *context, void *arg)
+{
+	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
+
+	if (data->nignore > 0)
+		data->nignore--;
+	else {
+		data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
+		data->bt->len++;
+		if (data->bt->len == data->max)
+			return (_URC_END_OF_STACK);
+	}
+
+	return (_URC_NO_REASON);
+}
+
+void
+prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+{
+	prof_unwind_data_t data = {bt, nignore, max};
+
+	_Unwind_Backtrace(prof_unwind_callback, &data);
+}
+#endif
+#ifdef JEMALLOC_PROF_GCC
+void
+prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+{
+#define	BT_FRAME(i)							\
+	if ((i) < nignore + max) {					\
+		void *p;						\
+		if (__builtin_frame_address(i) == 0)			\
+			return;						\
+		p = __builtin_return_address(i);			\
+		if (p == NULL)						\
+			return;						\
+		if (i >= nignore) {					\
+			bt->vec[(i) - nignore] = p;			\
+			bt->len = (i) - nignore + 1;			\
+		}							\
+	} else								\
+		return;
+
+	assert(nignore <= 3);
+	assert(max <= (1U << opt_lg_prof_bt_max));
+
+	BT_FRAME(0)
+	BT_FRAME(1)
+	BT_FRAME(2)
+	BT_FRAME(3)
+	BT_FRAME(4)
+	BT_FRAME(5)
+	BT_FRAME(6)
+	BT_FRAME(7)
+	BT_FRAME(8)
+	BT_FRAME(9)
+
+	BT_FRAME(10)
+	BT_FRAME(11)
+	BT_FRAME(12)
+	BT_FRAME(13)
+	BT_FRAME(14)
+	BT_FRAME(15)
+	BT_FRAME(16)
+	BT_FRAME(17)
+	BT_FRAME(18)
+	BT_FRAME(19)
+
+	BT_FRAME(20)
+	BT_FRAME(21)
+	BT_FRAME(22)
+	BT_FRAME(23)
+	BT_FRAME(24)
+	BT_FRAME(25)
+	BT_FRAME(26)
+	BT_FRAME(27)
+	BT_FRAME(28)
+	BT_FRAME(29)
+
+	BT_FRAME(30)
+	BT_FRAME(31)
+	BT_FRAME(32)
+	BT_FRAME(33)
+	BT_FRAME(34)
+	BT_FRAME(35)
+	BT_FRAME(36)
+	BT_FRAME(37)
+	BT_FRAME(38)
+	BT_FRAME(39)
+
+	BT_FRAME(40)
+	BT_FRAME(41)
+	BT_FRAME(42)
+	BT_FRAME(43)
+	BT_FRAME(44)
+	BT_FRAME(45)
+	BT_FRAME(46)
+	BT_FRAME(47)
+	BT_FRAME(48)
+	BT_FRAME(49)
+
+	BT_FRAME(50)
+	BT_FRAME(51)
+	BT_FRAME(52)
+	BT_FRAME(53)
+	BT_FRAME(54)
+	BT_FRAME(55)
+	BT_FRAME(56)
+	BT_FRAME(57)
+	BT_FRAME(58)
+	BT_FRAME(59)
+
+	BT_FRAME(60)
+	BT_FRAME(61)
+	BT_FRAME(62)
+	BT_FRAME(63)
+	BT_FRAME(64)
+	BT_FRAME(65)
+	BT_FRAME(66)
+	BT_FRAME(67)
+	BT_FRAME(68)
+	BT_FRAME(69)
+
+	BT_FRAME(70)
+	BT_FRAME(71)
+	BT_FRAME(72)
+	BT_FRAME(73)
+	BT_FRAME(74)
+	BT_FRAME(75)
+	BT_FRAME(76)
+	BT_FRAME(77)
+	BT_FRAME(78)
+	BT_FRAME(79)
+
+	BT_FRAME(80)
+	BT_FRAME(81)
+	BT_FRAME(82)
+	BT_FRAME(83)
+	BT_FRAME(84)
+	BT_FRAME(85)
+	BT_FRAME(86)
+	BT_FRAME(87)
+	BT_FRAME(88)
+	BT_FRAME(89)
+
+	BT_FRAME(90)
+	BT_FRAME(91)
+	BT_FRAME(92)
+	BT_FRAME(93)
+	BT_FRAME(94)
+	BT_FRAME(95)
+	BT_FRAME(96)
+	BT_FRAME(97)
+	BT_FRAME(98)
+	BT_FRAME(99)
+
+	BT_FRAME(100)
+	BT_FRAME(101)
+	BT_FRAME(102)
+	BT_FRAME(103)
+	BT_FRAME(104)
+	BT_FRAME(105)
+	BT_FRAME(106)
+	BT_FRAME(107)
+	BT_FRAME(108)
+	BT_FRAME(109)
+
+	BT_FRAME(110)
+	BT_FRAME(111)
+	BT_FRAME(112)
+	BT_FRAME(113)
+	BT_FRAME(114)
+	BT_FRAME(115)
+	BT_FRAME(116)
+	BT_FRAME(117)
+	BT_FRAME(118)
+	BT_FRAME(119)
+
+	BT_FRAME(120)
+	BT_FRAME(121)
+	BT_FRAME(122)
+	BT_FRAME(123)
+	BT_FRAME(124)
+	BT_FRAME(125)
+	BT_FRAME(126)
+	BT_FRAME(127)
+
+	/* Extras to compensate for nignore. */
+	BT_FRAME(128)
+	BT_FRAME(129)
+	BT_FRAME(130)
+#undef BT_FRAME
+}
+#endif
+
+prof_thr_cnt_t *
+prof_lookup(prof_bt_t *bt)
+{
+	union {
+		prof_thr_cnt_t	*p;
+		void		*v;
+	} ret;
+	prof_tdata_t *prof_tdata;
+
+	prof_tdata = PROF_TCACHE_GET();
+	if (prof_tdata == NULL) {
+		prof_tdata = prof_tdata_init();
+		if (prof_tdata == NULL)
+			return (NULL);
+	}
+
+	if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
+		union {
+			prof_bt_t	*p;
+			void		*v;
+		} btkey;
+		union {
+			prof_ctx_t	*p;
+			void		*v;
+		} ctx;
+		bool new_ctx;
+
+		/*
+		 * This thread's cache lacks bt.  Look for it in the global
+		 * cache.
+		 */
+		prof_enter();
+		if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
+			/* bt has never been seen before.  Insert it. */
+			ctx.v = imalloc(sizeof(prof_ctx_t));
+			if (ctx.v == NULL) {
+				prof_leave();
+				return (NULL);
+			}
+			btkey.p = bt_dup(bt);
+			if (btkey.v == NULL) {
+				prof_leave();
+				idalloc(ctx.v);
+				return (NULL);
+			}
+			ctx.p->bt = btkey.p;
+			if (malloc_mutex_init(&ctx.p->lock)) {
+				prof_leave();
+				idalloc(btkey.v);
+				idalloc(ctx.v);
+				return (NULL);
+			}
+			memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t));
+			ql_new(&ctx.p->cnts_ql);
+			if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
+				/* OOM. */
+				prof_leave();
+				malloc_mutex_destroy(&ctx.p->lock);
+				idalloc(btkey.v);
+				idalloc(ctx.v);
+				return (NULL);
+			}
+			/*
+			 * Artificially raise curobjs, in order to avoid a race
+			 * condition with prof_ctx_merge()/prof_ctx_destroy().
+			 *
+			 * No locking is necessary for ctx here because no other
+			 * threads have had the opportunity to fetch it from
+			 * bt2ctx yet.
+			 */
+			ctx.p->cnt_merged.curobjs++;
+			new_ctx = true;
+		} else {
+			/*
+			 * Artificially raise curobjs, in order to avoid a race
+			 * condition with prof_ctx_merge()/prof_ctx_destroy().
+			 */
+			malloc_mutex_lock(&ctx.p->lock);
+			ctx.p->cnt_merged.curobjs++;
+			malloc_mutex_unlock(&ctx.p->lock);
+			new_ctx = false;
+		}
+		prof_leave();
+
+		/* Link a prof_thd_cnt_t into ctx for this thread. */
+		if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt)
+		    == (ZU(1) << opt_lg_prof_tcmax)) {
+			assert(ckh_count(&prof_tdata->bt2cnt) > 0);
+			/*
+			 * Flush the least recently used cnt in order to keep
+			 * bt2cnt from becoming too large.
+			 */
+			ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
+			assert(ret.v != NULL);
+			if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
+			    NULL, NULL))
+				assert(false);
+			ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
+			prof_ctx_merge(ret.p->ctx, ret.p);
+			/* ret can now be re-used. */
+		} else {
+			assert(opt_lg_prof_tcmax < 0 ||
+			    ckh_count(&prof_tdata->bt2cnt) < (ZU(1) <<
+			    opt_lg_prof_tcmax));
+			/* Allocate and partially initialize a new cnt. */
+			ret.v = imalloc(sizeof(prof_thr_cnt_t));
+			if (ret.p == NULL) {
+				if (new_ctx)
+					prof_ctx_destroy(ctx.p);
+				return (NULL);
+			}
+			ql_elm_new(ret.p, cnts_link);
+			ql_elm_new(ret.p, lru_link);
+		}
+		/* Finish initializing ret. */
+		ret.p->ctx = ctx.p;
+		ret.p->epoch = 0;
+		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
+		if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) {
+			if (new_ctx)
+				prof_ctx_destroy(ctx.p);
+			idalloc(ret.v);
+			return (NULL);
+		}
+		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
+		malloc_mutex_lock(&ctx.p->lock);
+		ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
+		ctx.p->cnt_merged.curobjs--;
+		malloc_mutex_unlock(&ctx.p->lock);
+	} else {
+		/* Move ret to the front of the LRU. */
+		ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
+		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
+	}
+
+	return (ret.p);
+}
+
+static bool
+prof_flush(bool propagate_err)
+{
+	bool ret = false;
+	ssize_t err;
+
+	err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
+	if (err == -1) {
+		if (propagate_err == false) {
+			malloc_write("<jemalloc>: write() failed during heap "
+			    "profile flush\n");
+			if (opt_abort)
+				abort();
+		}
+		ret = true;
+	}
+	prof_dump_buf_end = 0;
+
+	return (ret);
+}
+
+static bool
+prof_write(const char *s, bool propagate_err)
+{
+	unsigned i, slen, n;
+
+	i = 0;
+	slen = strlen(s);
+	while (i < slen) {
+		/* Flush the buffer if it is full. */
+		if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE)
+			if (prof_flush(propagate_err) && propagate_err)
+				return (true);
+
+		if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) {
+			/* Finish writing. */
+			n = slen - i;
+		} else {
+			/* Write as much of s as will fit. */
+			n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end;
+		}
+		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
+		prof_dump_buf_end += n;
+		i += n;
+	}
+
+	return (false);
+}
+
+static void
+prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
+{
+	prof_thr_cnt_t *thr_cnt;
+	prof_cnt_t tcnt;
+
+	malloc_mutex_lock(&ctx->lock);
+
+	memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
+	ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
+		volatile unsigned *epoch = &thr_cnt->epoch;
+
+		while (true) {
+			unsigned epoch0 = *epoch;
+
+			/* Make sure epoch is even. */
+			if (epoch0 & 1U)
+				continue;
+
+			memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
+
+			/* Terminate if epoch didn't change while reading. */
+			if (*epoch == epoch0)
+				break;
+		}
+
+		ctx->cnt_summed.curobjs += tcnt.curobjs;
+		ctx->cnt_summed.curbytes += tcnt.curbytes;
+		if (opt_prof_accum) {
+			ctx->cnt_summed.accumobjs += tcnt.accumobjs;
+			ctx->cnt_summed.accumbytes += tcnt.accumbytes;
+		}
+	}
+
+	if (ctx->cnt_summed.curobjs != 0)
+		(*leak_nctx)++;
+
+	/* Add to cnt_all. */
+	cnt_all->curobjs += ctx->cnt_summed.curobjs;
+	cnt_all->curbytes += ctx->cnt_summed.curbytes;
+	if (opt_prof_accum) {
+		cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
+		cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
+	}
+
+	malloc_mutex_unlock(&ctx->lock);
+}
+
+static void
+prof_ctx_destroy(prof_ctx_t *ctx)
+{
+
+	/*
+	 * Check that ctx is still unused by any thread cache before destroying
+	 * it.  prof_lookup() artificially raises ctx->cnt_merge.curobjs in
+	 * order to avoid a race condition with this function, as does
+	 * prof_ctx_merge() in order to avoid a race between the main body of
+	 * prof_ctx_merge() and entry into this function.
+	 */
+	prof_enter();
+	malloc_mutex_lock(&ctx->lock);
+	if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
+		assert(ctx->cnt_merged.curbytes == 0);
+		assert(ctx->cnt_merged.accumobjs == 0);
+		assert(ctx->cnt_merged.accumbytes == 0);
+		/* Remove ctx from bt2ctx. */
+		if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
+			assert(false);
+		prof_leave();
+		/* Destroy ctx. */
+		malloc_mutex_unlock(&ctx->lock);
+		bt_destroy(ctx->bt);
+		malloc_mutex_destroy(&ctx->lock);
+		idalloc(ctx);
+	} else {
+		/*
+		 * Compensate for increment in prof_ctx_merge() or
+		 * prof_lookup().
+		 */
+		ctx->cnt_merged.curobjs--;
+		malloc_mutex_unlock(&ctx->lock);
+		prof_leave();
+	}
+}
+
+static void
+prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
+{
+	bool destroy;
+
+	/* Merge cnt stats and detach from ctx. */
+	malloc_mutex_lock(&ctx->lock);
+	ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
+	ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
+	ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
+	ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
+	ql_remove(&ctx->cnts_ql, cnt, cnts_link);
+	if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
+	    ctx->cnt_merged.curobjs == 0) {
+		/*
+		 * Artificially raise ctx->cnt_merged.curobjs in order to keep
+		 * another thread from winning the race to destroy ctx while
+		 * this one has ctx->lock dropped.  Without this, it would be
+		 * possible for another thread to:
+		 *
+		 * 1) Sample an allocation associated with ctx.
+		 * 2) Deallocate the sampled object.
+		 * 3) Successfully prof_ctx_destroy(ctx).
+		 *
+		 * The result would be that ctx no longer exists by the time
+		 * this thread accesses it in prof_ctx_destroy().
+		 */
+		ctx->cnt_merged.curobjs++;
+		destroy = true;
+	} else
+		destroy = false;
+	malloc_mutex_unlock(&ctx->lock);
+	if (destroy)
+		prof_ctx_destroy(ctx);
+}
+
+static bool
+prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err)
+{
+	char buf[UMAX2S_BUFSIZE];
+	unsigned i;
+
+	if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) {
+		assert(ctx->cnt_summed.curbytes == 0);
+		assert(ctx->cnt_summed.accumobjs == 0);
+		assert(ctx->cnt_summed.accumbytes == 0);
+		return (false);
+	}
+
+	if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err)
+	    || prof_write(": ", propagate_err)
+	    || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf),
+	    propagate_err)
+	    || prof_write(" [", propagate_err)
+	    || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf),
+	    propagate_err)
+	    || prof_write(": ", propagate_err)
+	    || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf),
+	    propagate_err)
+	    || prof_write("] @", propagate_err))
+		return (true);
+
+	for (i = 0; i < bt->len; i++) {
+		if (prof_write(" 0x", propagate_err)
+		    || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf),
+		    propagate_err))
+			return (true);
+	}
+
+	if (prof_write("\n", propagate_err))
+		return (true);
+
+	return (false);
+}
+
+static bool
+prof_dump_maps(bool propagate_err)
+{
+	int mfd;
+	char buf[UMAX2S_BUFSIZE];
+	char *s;
+	unsigned i, slen;
+	/*         /proc/<pid>/maps\0 */
+	char mpath[6     + UMAX2S_BUFSIZE
+			      + 5  + 1];
+
+	i = 0;
+
+	s = "/proc/";
+	slen = strlen(s);
+	memcpy(&mpath[i], s, slen);
+	i += slen;
+
+	s = u2s(getpid(), 10, buf);
+	slen = strlen(s);
+	memcpy(&mpath[i], s, slen);
+	i += slen;
+
+	s = "/maps";
+	slen = strlen(s);
+	memcpy(&mpath[i], s, slen);
+	i += slen;
+
+	mpath[i] = '\0';
+
+	mfd = open(mpath, O_RDONLY);
+	if (mfd != -1) {
+		ssize_t nread;
+
+		if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) &&
+		    propagate_err)
+			return (true);
+		nread = 0;
+		do {
+			prof_dump_buf_end += nread;
+			if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) {
+				/* Make space in prof_dump_buf before read(). */
+				if (prof_flush(propagate_err) && propagate_err)
+					return (true);
+			}
+			nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
+			    PROF_DUMP_BUF_SIZE - prof_dump_buf_end);
+		} while (nread > 0);
+		close(mfd);
+	} else
+		return (true);
+
+	return (false);
+}
+
+static bool
+prof_dump(const char *filename, bool leakcheck, bool propagate_err)
+{
+	prof_cnt_t cnt_all;
+	size_t tabind;
+	union {
+		prof_bt_t	*p;
+		void		*v;
+	} bt;
+	union {
+		prof_ctx_t	*p;
+		void		*v;
+	} ctx;
+	char buf[UMAX2S_BUFSIZE];
+	size_t leak_nctx;
+
+	prof_enter();
+	prof_dump_fd = creat(filename, 0644);
+	if (prof_dump_fd == -1) {
+		if (propagate_err == false) {
+			malloc_write("<jemalloc>: creat(\"");
+			malloc_write(filename);
+			malloc_write("\", 0644) failed\n");
+			if (opt_abort)
+				abort();
+		}
+		goto ERROR;
+	}
+
+	/* Merge per thread profile stats, and sum them in cnt_all. */
+	memset(&cnt_all, 0, sizeof(prof_cnt_t));
+	leak_nctx = 0;
+	for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
+		prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx);
+
+	/* Dump profile header. */
+	if (prof_write("heap profile: ", propagate_err)
+	    || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err)
+	    || prof_write(": ", propagate_err)
+	    || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err)
+	    || prof_write(" [", propagate_err)
+	    || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err)
+	    || prof_write(": ", propagate_err)
+	    || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err))
+		goto ERROR;
+
+	if (opt_lg_prof_sample == 0) {
+		if (prof_write("] @ heapprofile\n", propagate_err))
+			goto ERROR;
+	} else {
+		if (prof_write("] @ heap_v2/", propagate_err)
+		    || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10,
+		    buf), propagate_err)
+		    || prof_write("\n", propagate_err))
+			goto ERROR;
+	}
+
+	/* Dump  per ctx profile stats. */
+	for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v)
+	    == false;) {
+		if (prof_dump_ctx(ctx.p, bt.p, propagate_err))
+			goto ERROR;
+	}
+
+	/* Dump /proc/<pid>/maps if possible. */
+	if (prof_dump_maps(propagate_err))
+		goto ERROR;
+
+	if (prof_flush(propagate_err))
+		goto ERROR;
+	close(prof_dump_fd);
+	prof_leave();
+
+	if (leakcheck && cnt_all.curbytes != 0) {
+		malloc_write("<jemalloc>: Leak summary: ");
+		malloc_write(u2s(cnt_all.curbytes, 10, buf));
+		malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, ");
+		malloc_write(u2s(cnt_all.curobjs, 10, buf));
+		malloc_write((cnt_all.curobjs != 1) ? " objects, " :
+		    " object, ");
+		malloc_write(u2s(leak_nctx, 10, buf));
+		malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n");
+		malloc_write("<jemalloc>: Run pprof on \"");
+		malloc_write(filename);
+		malloc_write("\" for leak detail\n");
+	}
+
+	return (false);
+ERROR:
+	prof_leave();
+	return (true);
+}
+
+#define	DUMP_FILENAME_BUFSIZE	(PATH_MAX+ UMAX2S_BUFSIZE		\
+					       + 1			\
+						+ UMAX2S_BUFSIZE	\
+						     + 2		\
+						       + UMAX2S_BUFSIZE	\
+						             + 5  + 1)
+static void
+prof_dump_filename(char *filename, char v, int64_t vseq)
+{
+	char buf[UMAX2S_BUFSIZE];
+	char *s;
+	unsigned i, slen;
+
+	/*
+	 * Construct a filename of the form:
+	 *
+	 *   <prefix>.<pid>.<seq>.v<vseq>.heap\0
+	 */
+
+	i = 0;
+
+	s = opt_prof_prefix;
+	slen = strlen(s);
+	memcpy(&filename[i], s, slen);
+	i += slen;
+
+	s = ".";
+	slen = strlen(s);
+	memcpy(&filename[i], s, slen);
+	i += slen;
+
+	s = u2s(getpid(), 10, buf);
+	slen = strlen(s);
+	memcpy(&filename[i], s, slen);
+	i += slen;
+
+	s = ".";
+	slen = strlen(s);
+	memcpy(&filename[i], s, slen);
+	i += slen;
+
+	s = u2s(prof_dump_seq, 10, buf);
+	prof_dump_seq++;
+	slen = strlen(s);
+	memcpy(&filename[i], s, slen);
+	i += slen;
+
+	s = ".";
+	slen = strlen(s);
+	memcpy(&filename[i], s, slen);
+	i += slen;
+
+	filename[i] = v;
+	i++;
+
+	if (vseq != 0xffffffffffffffffLLU) {
+		s = u2s(vseq, 10, buf);
+		slen = strlen(s);
+		memcpy(&filename[i], s, slen);
+		i += slen;
+	}
+
+	s = ".heap";
+	slen = strlen(s);
+	memcpy(&filename[i], s, slen);
+	i += slen;
+
+	filename[i] = '\0';
+}
+
+static void
+prof_fdump(void)
+{
+	char filename[DUMP_FILENAME_BUFSIZE];
+
+	if (prof_booted == false)
+		return;
+
+	if (opt_prof_prefix[0] != '\0') {
+		malloc_mutex_lock(&prof_dump_seq_mtx);
+		prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU);
+		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		prof_dump(filename, opt_prof_leak, false);
+	}
+}
+
+void
+prof_idump(void)
+{
+	char filename[DUMP_FILENAME_BUFSIZE];
+
+	if (prof_booted == false)
+		return;
+	malloc_mutex_lock(&enq_mtx);
+	if (enq) {
+		enq_idump = true;
+		malloc_mutex_unlock(&enq_mtx);
+		return;
+	}
+	malloc_mutex_unlock(&enq_mtx);
+
+	if (opt_prof_prefix[0] != '\0') {
+		malloc_mutex_lock(&prof_dump_seq_mtx);
+		prof_dump_filename(filename, 'i', prof_dump_iseq);
+		prof_dump_iseq++;
+		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		prof_dump(filename, false, false);
+	}
+}
+
+bool
+prof_mdump(const char *filename)
+{
+	char filename_buf[DUMP_FILENAME_BUFSIZE];
+
+	if (opt_prof == false || prof_booted == false)
+		return (true);
+
+	if (filename == NULL) {
+		/* No filename specified, so automatically generate one. */
+		if (opt_prof_prefix[0] == '\0')
+			return (true);
+		malloc_mutex_lock(&prof_dump_seq_mtx);
+		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
+		prof_dump_mseq++;
+		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		filename = filename_buf;
+	}
+	return (prof_dump(filename, false, true));
+}
+
+void
+prof_gdump(void)
+{
+	char filename[DUMP_FILENAME_BUFSIZE];
+
+	if (prof_booted == false)
+		return;
+	malloc_mutex_lock(&enq_mtx);
+	if (enq) {
+		enq_gdump = true;
+		malloc_mutex_unlock(&enq_mtx);
+		return;
+	}
+	malloc_mutex_unlock(&enq_mtx);
+
+	if (opt_prof_prefix[0] != '\0') {
+		malloc_mutex_lock(&prof_dump_seq_mtx);
+		prof_dump_filename(filename, 'u', prof_dump_useq);
+		prof_dump_useq++;
+		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		prof_dump(filename, false, false);
+	}
+}
+
+static void
+prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
+{
+	size_t ret1, ret2;
+	uint64_t h;
+	prof_bt_t *bt = (prof_bt_t *)key;
+
+	assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
+	assert(hash1 != NULL);
+	assert(hash2 != NULL);
+
+	h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU);
+	if (minbits <= 32) {
+		/*
+		 * Avoid doing multiple hashes, since a single hash provides
+		 * enough bits.
+		 */
+		ret1 = h & ZU(0xffffffffU);
+		ret2 = h >> 32;
+	} else {
+		ret1 = h;
+		ret2 = hash(bt->vec, bt->len * sizeof(void *),
+		    0x8432a476666bbc13LLU);
+	}
+
+	*hash1 = ret1;
+	*hash2 = ret2;
+}
+
+static bool
+prof_bt_keycomp(const void *k1, const void *k2)
+{
+	const prof_bt_t *bt1 = (prof_bt_t *)k1;
+	const prof_bt_t *bt2 = (prof_bt_t *)k2;
+
+	if (bt1->len != bt2->len)
+		return (false);
+	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
+}
+
+prof_tdata_t *
+prof_tdata_init(void)
+{
+	prof_tdata_t *prof_tdata;
+
+	/* Initialize an empty cache for this thread. */
+	prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
+	if (prof_tdata == NULL)
+		return (NULL);
+
+	if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
+	    prof_bt_hash, prof_bt_keycomp)) {
+		idalloc(prof_tdata);
+		return (NULL);
+	}
+	ql_new(&prof_tdata->lru_ql);
+
+	prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
+	if (prof_tdata->vec == NULL) {
+		ckh_delete(&prof_tdata->bt2cnt);
+		idalloc(prof_tdata);
+		return (NULL);
+	}
+
+	prof_tdata->prn_state = 0;
+	prof_tdata->threshold = 0;
+	prof_tdata->accum = 0;
+
+	PROF_TCACHE_SET(prof_tdata);
+
+	return (prof_tdata);
+}
+
+static void
+prof_tdata_cleanup(void *arg)
+{
+	prof_thr_cnt_t *cnt;
+	prof_tdata_t *prof_tdata = (prof_tdata_t *)arg;
+
+	/*
+	 * Delete the hash table.  All of its contents can still be iterated
+	 * over via the LRU.
+	 */
+	ckh_delete(&prof_tdata->bt2cnt);
+
+	/* Iteratively merge cnt's into the global stats and delete them. */
+	while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
+		ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
+		prof_ctx_merge(cnt->ctx, cnt);
+		idalloc(cnt);
+	}
+
+	idalloc(prof_tdata->vec);
+
+	idalloc(prof_tdata);
+	PROF_TCACHE_SET(NULL);
+}
+
+void
+prof_boot0(void)
+{
+
+	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
+	    sizeof(PROF_PREFIX_DEFAULT));
+}
+
+void
+prof_boot1(void)
+{
+
+	/*
+	 * opt_prof and prof_promote must be in their final state before any
+	 * arenas are initialized, so this function must be executed early.
+	 */
+
+	if (opt_prof_leak && opt_prof == false) {
+		/*
+		 * Enable opt_prof, but in such a way that profiles are never
+		 * automatically dumped.
+		 */
+		opt_prof = true;
+		opt_prof_gdump = false;
+		prof_interval = 0;
+	} else if (opt_prof) {
+		if (opt_lg_prof_interval >= 0) {
+			prof_interval = (((uint64_t)1U) <<
+			    opt_lg_prof_interval);
+		} else
+			prof_interval = 0;
+	}
+
+	prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT);
+}
+
+bool
+prof_boot2(void)
+{
+
+	if (opt_prof) {
+		if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
+		    prof_bt_keycomp))
+			return (true);
+		if (malloc_mutex_init(&bt2ctx_mtx))
+			return (true);
+		if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup)
+		    != 0) {
+			malloc_write(
+			    "<jemalloc>: Error in pthread_key_create()\n");
+			abort();
+		}
+
+		prof_bt_max = (1U << opt_lg_prof_bt_max);
+		if (malloc_mutex_init(&prof_dump_seq_mtx))
+			return (true);
+
+		if (malloc_mutex_init(&enq_mtx))
+			return (true);
+		enq = false;
+		enq_idump = false;
+		enq_gdump = false;
+
+		if (atexit(prof_fdump) != 0) {
+			malloc_write("<jemalloc>: Error in atexit()\n");
+			if (opt_abort)
+				abort();
+		}
+	}
+
+#ifdef JEMALLOC_PROF_LIBGCC
+	/*
+	 * Cause the backtracing machinery to allocate its internal state
+	 * before enabling profiling.
+	 */
+	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
+#endif
+
+	prof_booted = true;
+
+	return (false);
+}
+
+/******************************************************************************/
+#endif /* JEMALLOC_PROF */
diff --git a/deps/jemalloc.orig/src/rtree.c b/deps/jemalloc.orig/src/rtree.c
new file mode 100644
index 00000000..eb0ff1e2
--- /dev/null
+++ b/deps/jemalloc.orig/src/rtree.c
@@ -0,0 +1,46 @@
+#define	JEMALLOC_RTREE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+rtree_t *
+rtree_new(unsigned bits)
+{
+	rtree_t *ret;
+	unsigned bits_per_level, height, i;
+
+	bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1;
+	height = bits / bits_per_level;
+	if (height * bits_per_level != bits)
+		height++;
+	assert(height * bits_per_level >= bits);
+
+	ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) +
+	    (sizeof(unsigned) * height));
+	if (ret == NULL)
+		return (NULL);
+	memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) *
+	    height));
+
+	if (malloc_mutex_init(&ret->mutex)) {
+		/* Leak the rtree. */
+		return (NULL);
+	}
+	ret->height = height;
+	if (bits_per_level * height > bits)
+		ret->level2bits[0] = bits % bits_per_level;
+	else
+		ret->level2bits[0] = bits_per_level;
+	for (i = 1; i < height; i++)
+		ret->level2bits[i] = bits_per_level;
+
+	ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]);
+	if (ret->root == NULL) {
+		/*
+		 * We leak the rtree here, since there's no generic base
+		 * deallocation.
+		 */
+		return (NULL);
+	}
+	memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);
+
+	return (ret);
+}
diff --git a/deps/jemalloc.orig/src/stats.c b/deps/jemalloc.orig/src/stats.c
new file mode 100644
index 00000000..dc172e42
--- /dev/null
+++ b/deps/jemalloc.orig/src/stats.c
@@ -0,0 +1,790 @@
+#define	JEMALLOC_STATS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+#define	CTL_GET(n, v, t) do {						\
+	size_t sz = sizeof(t);						\
+	xmallctl(n, v, &sz, NULL, 0);					\
+} while (0)
+
+#define	CTL_I_GET(n, v, t) do {						\
+	size_t mib[6];							\
+	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+	size_t sz = sizeof(t);						\
+	xmallctlnametomib(n, mib, &miblen);				\
+	mib[2] = i;							\
+	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+} while (0)
+
+#define	CTL_J_GET(n, v, t) do {						\
+	size_t mib[6];							\
+	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+	size_t sz = sizeof(t);						\
+	xmallctlnametomib(n, mib, &miblen);				\
+	mib[2] = j;							\
+	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+} while (0)
+
+#define	CTL_IJ_GET(n, v, t) do {					\
+	size_t mib[6];							\
+	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+	size_t sz = sizeof(t);						\
+	xmallctlnametomib(n, mib, &miblen);				\
+	mib[2] = i;							\
+	mib[4] = j;							\
+	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+} while (0)
+
+/******************************************************************************/
+/* Data. */
+
+bool	opt_stats_print = false;
+
+#ifdef JEMALLOC_STATS
+size_t	stats_cactive = 0;
+#endif
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+#ifdef JEMALLOC_STATS
+static void	malloc_vcprintf(void (*write_cb)(void *, const char *),
+    void *cbopaque, const char *format, va_list ap);
+static void	stats_arena_bins_print(void (*write_cb)(void *, const char *),
+    void *cbopaque, unsigned i);
+static void	stats_arena_lruns_print(void (*write_cb)(void *, const char *),
+    void *cbopaque, unsigned i);
+static void	stats_arena_print(void (*write_cb)(void *, const char *),
+    void *cbopaque, unsigned i);
+#endif
+
+/******************************************************************************/
+
+/*
+ * We don't want to depend on vsnprintf() for production builds, since that can
+ * cause unnecessary bloat for static binaries.  u2s() provides minimal integer
+ * printing functionality, so that malloc_printf() use can be limited to
+ * JEMALLOC_STATS code.
+ */
+char *
+u2s(uint64_t x, unsigned base, char *s)
+{
+	unsigned i;
+
+	i = UMAX2S_BUFSIZE - 1;
+	s[i] = '\0';
+	switch (base) {
+	case 10:
+		do {
+			i--;
+			s[i] = "0123456789"[x % (uint64_t)10];
+			x /= (uint64_t)10;
+		} while (x > 0);
+		break;
+	case 16:
+		do {
+			i--;
+			s[i] = "0123456789abcdef"[x & 0xf];
+			x >>= 4;
+		} while (x > 0);
+		break;
+	default:
+		do {
+			i--;
+			s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x %
+			    (uint64_t)base];
+			x /= (uint64_t)base;
+		} while (x > 0);
+	}
+
+	return (&s[i]);
+}
+
+#ifdef JEMALLOC_STATS
+static void
+malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap)
+{
+	char buf[4096];
+
+	if (write_cb == NULL) {
+		/*
+		 * The caller did not provide an alternate write_cb callback
+		 * function, so use the default one.  malloc_write() is an
+		 * inline function, so use malloc_message() directly here.
+		 */
+		write_cb = JEMALLOC_P(malloc_message);
+		cbopaque = NULL;
+	}
+
+	vsnprintf(buf, sizeof(buf), format, ap);
+	write_cb(cbopaque, buf);
+}
+
+/*
+ * Print to a callback function in such a way as to (hopefully) avoid memory
+ * allocation.
+ */
+JEMALLOC_ATTR(format(printf, 3, 4))
+void
+malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(write_cb, cbopaque, format, ap);
+	va_end(ap);
+}
+
+/*
+ * Print to stderr in such a way as to (hopefully) avoid memory allocation.
+ */
+JEMALLOC_ATTR(format(printf, 1, 2))
+void
+malloc_printf(const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(NULL, NULL, format, ap);
+	va_end(ap);
+}
+#endif
+
+#ifdef JEMALLOC_STATS
+static void
+stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    unsigned i)
+{
+	size_t pagesize;
+	bool config_tcache;
+	unsigned nbins, j, gap_start;
+
+	CTL_GET("arenas.pagesize", &pagesize, size_t);
+
+	CTL_GET("config.tcache", &config_tcache, bool);
+	if (config_tcache) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "bins:     bin    size regs pgs    allocated      nmalloc"
+		    "      ndalloc    nrequests       nfills     nflushes"
+		    "      newruns       reruns      maxruns      curruns\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "bins:     bin    size regs pgs    allocated      nmalloc"
+		    "      ndalloc      newruns       reruns      maxruns"
+		    "      curruns\n");
+	}
+	CTL_GET("arenas.nbins", &nbins, unsigned);
+	for (j = 0, gap_start = UINT_MAX; j < nbins; j++) {
+		uint64_t nruns;
+
+		CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t);
+		if (nruns == 0) {
+			if (gap_start == UINT_MAX)
+				gap_start = j;
+		} else {
+			unsigned ntbins_, nqbins, ncbins, nsbins;
+			size_t reg_size, run_size, allocated;
+			uint32_t nregs;
+			uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
+			uint64_t reruns;
+			size_t highruns, curruns;
+
+			if (gap_start != UINT_MAX) {
+				if (j > gap_start + 1) {
+					/* Gap of more than one size class. */
+					malloc_cprintf(write_cb, cbopaque,
+					    "[%u..%u]\n", gap_start,
+					    j - 1);
+				} else {
+					/* Gap of one size class. */
+					malloc_cprintf(write_cb, cbopaque,
+					    "[%u]\n", gap_start);
+				}
+				gap_start = UINT_MAX;
+			}
+			CTL_GET("arenas.ntbins", &ntbins_, unsigned);
+			CTL_GET("arenas.nqbins", &nqbins, unsigned);
+			CTL_GET("arenas.ncbins", &ncbins, unsigned);
+			CTL_GET("arenas.nsbins", &nsbins, unsigned);
+			CTL_J_GET("arenas.bin.0.size", &reg_size, size_t);
+			CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t);
+			CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.allocated",
+			    &allocated, size_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc",
+			    &nmalloc, uint64_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc",
+			    &ndalloc, uint64_t);
+			if (config_tcache) {
+				CTL_IJ_GET("stats.arenas.0.bins.0.nrequests",
+				    &nrequests, uint64_t);
+				CTL_IJ_GET("stats.arenas.0.bins.0.nfills",
+				    &nfills, uint64_t);
+				CTL_IJ_GET("stats.arenas.0.bins.0.nflushes",
+				    &nflushes, uint64_t);
+			}
+			CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns,
+			    uint64_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.highruns", &highruns,
+			    size_t);
+			CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns,
+			    size_t);
+			if (config_tcache) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64
+				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
+				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
+				    " %12zu %12zu\n",
+				    j,
+				    j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
+				    "Q" : j < ntbins_ + nqbins + ncbins ? "C" :
+				    "S",
+				    reg_size, nregs, run_size / pagesize,
+				    allocated, nmalloc, ndalloc, nrequests,
+				    nfills, nflushes, nruns, reruns, highruns,
+				    curruns);
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64
+				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
+				    " %12zu %12zu\n",
+				    j,
+				    j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
+				    "Q" : j < ntbins_ + nqbins + ncbins ? "C" :
+				    "S",
+				    reg_size, nregs, run_size / pagesize,
+				    allocated, nmalloc, ndalloc, nruns, reruns,
+				    highruns, curruns);
+			}
+		}
+	}
+	if (gap_start != UINT_MAX) {
+		if (j > gap_start + 1) {
+			/* Gap of more than one size class. */
+			malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n",
+			    gap_start, j - 1);
+		} else {
+			/* Gap of one size class. */
+			malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start);
+		}
+	}
+}
+
+static void
+stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    unsigned i)
+{
+	size_t pagesize, nlruns, j;
+	ssize_t gap_start;
+
+	CTL_GET("arenas.pagesize", &pagesize, size_t);
+
+	malloc_cprintf(write_cb, cbopaque,
+	    "large:   size pages      nmalloc      ndalloc    nrequests"
+	    "      maxruns      curruns\n");
+	CTL_GET("arenas.nlruns", &nlruns, size_t);
+	for (j = 0, gap_start = -1; j < nlruns; j++) {
+		uint64_t nmalloc, ndalloc, nrequests;
+		size_t run_size, highruns, curruns;
+
+		CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc,
+		    uint64_t);
+		CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc,
+		    uint64_t);
+		CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests,
+		    uint64_t);
+		if (nrequests == 0) {
+			if (gap_start == -1)
+				gap_start = j;
+		} else {
+			CTL_J_GET("arenas.lrun.0.size", &run_size, size_t);
+			CTL_IJ_GET("stats.arenas.0.lruns.0.highruns", &highruns,
+			    size_t);
+			CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns,
+			    size_t);
+			if (gap_start != -1) {
+				malloc_cprintf(write_cb, cbopaque, "[%zu]\n",
+				    j - gap_start);
+				gap_start = -1;
+			}
+			malloc_cprintf(write_cb, cbopaque,
+			    "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64
+			    " %12zu %12zu\n",
+			    run_size, run_size / pagesize, nmalloc, ndalloc,
+			    nrequests, highruns, curruns);
+		}
+	}
+	if (gap_start != -1)
+		malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start);
+}
+
+static void
+stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    unsigned i)
+{
+	unsigned nthreads;
+	size_t pagesize, pactive, pdirty, mapped;
+	uint64_t npurge, nmadvise, purged;
+	size_t small_allocated;
+	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
+	size_t large_allocated;
+	uint64_t large_nmalloc, large_ndalloc, large_nrequests;
+
+	CTL_GET("arenas.pagesize", &pagesize, size_t);
+
+	CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
+	malloc_cprintf(write_cb, cbopaque,
+	    "assigned threads: %u\n", nthreads);
+	CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t);
+	CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t);
+	CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t);
+	CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t);
+	CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s,"
+	    " %"PRIu64" madvise%s, %"PRIu64" purged\n",
+	    pactive, pdirty, npurge, npurge == 1 ? "" : "s",
+	    nmadvise, nmadvise == 1 ? "" : "s", purged);
+
+	malloc_cprintf(write_cb, cbopaque,
+	    "            allocated      nmalloc      ndalloc    nrequests\n");
+	CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t);
+	CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "small:   %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+	    small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
+	CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t);
+	CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t);
+	CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "large:   %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+	    large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
+	malloc_cprintf(write_cb, cbopaque,
+	    "total:   %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
+	    small_allocated + large_allocated,
+	    small_nmalloc + large_nmalloc,
+	    small_ndalloc + large_ndalloc,
+	    small_nrequests + large_nrequests);
+	malloc_cprintf(write_cb, cbopaque, "active:  %12zu\n",
+	    pactive * pagesize );
+	CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t);
+	malloc_cprintf(write_cb, cbopaque, "mapped:  %12zu\n", mapped);
+
+	stats_arena_bins_print(write_cb, cbopaque, i);
+	stats_arena_lruns_print(write_cb, cbopaque, i);
+}
+#endif
+
+void
+stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *opts)
+{
+	int err;
+	uint64_t epoch;
+	size_t u64sz;
+	char s[UMAX2S_BUFSIZE];
+	bool general = true;
+	bool merged = true;
+	bool unmerged = true;
+	bool bins = true;
+	bool large = true;
+
+	/*
+	 * Refresh stats, in case mallctl() was called by the application.
+	 *
+	 * Check for OOM here, since refreshing the ctl cache can trigger
+	 * allocation.  In practice, none of the subsequent mallctl()-related
+	 * calls in this function will cause OOM if this one succeeds.
+	 * */
+	epoch = 1;
+	u64sz = sizeof(uint64_t);
+	err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch,
+	    sizeof(uint64_t));
+	if (err != 0) {
+		if (err == EAGAIN) {
+			malloc_write("<jemalloc>: Memory allocation failure in "
+			    "mallctl(\"epoch\", ...)\n");
+			return;
+		}
+		malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", "
+		    "...)\n");
+		abort();
+	}
+
+	if (write_cb == NULL) {
+		/*
+		 * The caller did not provide an alternate write_cb callback
+		 * function, so use the default one.  malloc_write() is an
+		 * inline function, so use malloc_message() directly here.
+		 */
+		write_cb = JEMALLOC_P(malloc_message);
+		cbopaque = NULL;
+	}
+
+	if (opts != NULL) {
+		unsigned i;
+
+		for (i = 0; opts[i] != '\0'; i++) {
+			switch (opts[i]) {
+				case 'g':
+					general = false;
+					break;
+				case 'm':
+					merged = false;
+					break;
+				case 'a':
+					unmerged = false;
+					break;
+				case 'b':
+					bins = false;
+					break;
+				case 'l':
+					large = false;
+					break;
+				default:;
+			}
+		}
+	}
+
+	write_cb(cbopaque, "___ Begin jemalloc statistics ___\n");
+	if (general) {
+		int err;
+		const char *cpv;
+		bool bv;
+		unsigned uv;
+		ssize_t ssv;
+		size_t sv, bsz, ssz, sssz, cpsz;
+
+		bsz = sizeof(bool);
+		ssz = sizeof(size_t);
+		sssz = sizeof(ssize_t);
+		cpsz = sizeof(const char *);
+
+		CTL_GET("version", &cpv, const char *);
+		write_cb(cbopaque, "Version: ");
+		write_cb(cbopaque, cpv);
+		write_cb(cbopaque, "\n");
+		CTL_GET("config.debug", &bv, bool);
+		write_cb(cbopaque, "Assertions ");
+		write_cb(cbopaque, bv ? "enabled" : "disabled");
+		write_cb(cbopaque, "\n");
+
+#define OPT_WRITE_BOOL(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz,	\
+		    NULL, 0)) == 0) {					\
+			write_cb(cbopaque, "  opt."#n": ");		\
+			write_cb(cbopaque, bv ? "true" : "false");	\
+			write_cb(cbopaque, "\n");			\
+		}
+#define OPT_WRITE_SIZE_T(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz,	\
+		    NULL, 0)) == 0) {					\
+			write_cb(cbopaque, "  opt."#n": ");		\
+			write_cb(cbopaque, u2s(sv, 10, s));		\
+			write_cb(cbopaque, "\n");			\
+		}
+#define OPT_WRITE_SSIZE_T(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz,	\
+		    NULL, 0)) == 0) {					\
+			if (ssv >= 0) {					\
+				write_cb(cbopaque, "  opt."#n": ");	\
+				write_cb(cbopaque, u2s(ssv, 10, s));	\
+			} else {					\
+				write_cb(cbopaque, "  opt."#n": -");	\
+				write_cb(cbopaque, u2s(-ssv, 10, s));	\
+			}						\
+			write_cb(cbopaque, "\n");			\
+		}
+#define OPT_WRITE_CHAR_P(n)						\
+		if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz,	\
+		    NULL, 0)) == 0) {					\
+			write_cb(cbopaque, "  opt."#n": \"");		\
+			write_cb(cbopaque, cpv);			\
+			write_cb(cbopaque, "\"\n");			\
+		}
+
+		write_cb(cbopaque, "Run-time option settings:\n");
+		OPT_WRITE_BOOL(abort)
+		OPT_WRITE_SIZE_T(lg_qspace_max)
+		OPT_WRITE_SIZE_T(lg_cspace_max)
+		OPT_WRITE_SIZE_T(lg_chunk)
+		OPT_WRITE_SIZE_T(narenas)
+		OPT_WRITE_SSIZE_T(lg_dirty_mult)
+		OPT_WRITE_BOOL(stats_print)
+		OPT_WRITE_BOOL(junk)
+		OPT_WRITE_BOOL(zero)
+		OPT_WRITE_BOOL(sysv)
+		OPT_WRITE_BOOL(xmalloc)
+		OPT_WRITE_BOOL(tcache)
+		OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep)
+		OPT_WRITE_SSIZE_T(lg_tcache_max)
+		OPT_WRITE_BOOL(prof)
+		OPT_WRITE_CHAR_P(prof_prefix)
+		OPT_WRITE_SIZE_T(lg_prof_bt_max)
+		OPT_WRITE_BOOL(prof_active)
+		OPT_WRITE_SSIZE_T(lg_prof_sample)
+		OPT_WRITE_BOOL(prof_accum)
+		OPT_WRITE_SSIZE_T(lg_prof_tcmax)
+		OPT_WRITE_SSIZE_T(lg_prof_interval)
+		OPT_WRITE_BOOL(prof_gdump)
+		OPT_WRITE_BOOL(prof_leak)
+		OPT_WRITE_BOOL(overcommit)
+
+#undef OPT_WRITE_BOOL
+#undef OPT_WRITE_SIZE_T
+#undef OPT_WRITE_SSIZE_T
+#undef OPT_WRITE_CHAR_P
+
+		write_cb(cbopaque, "CPUs: ");
+		write_cb(cbopaque, u2s(ncpus, 10, s));
+		write_cb(cbopaque, "\n");
+
+		CTL_GET("arenas.narenas", &uv, unsigned);
+		write_cb(cbopaque, "Max arenas: ");
+		write_cb(cbopaque, u2s(uv, 10, s));
+		write_cb(cbopaque, "\n");
+
+		write_cb(cbopaque, "Pointer size: ");
+		write_cb(cbopaque, u2s(sizeof(void *), 10, s));
+		write_cb(cbopaque, "\n");
+
+		CTL_GET("arenas.quantum", &sv, size_t);
+		write_cb(cbopaque, "Quantum size: ");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "\n");
+
+		CTL_GET("arenas.cacheline", &sv, size_t);
+		write_cb(cbopaque, "Cacheline size (assumed): ");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "\n");
+
+		CTL_GET("arenas.subpage", &sv, size_t);
+		write_cb(cbopaque, "Subpage spacing: ");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "\n");
+
+		if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz,
+		    NULL, 0)) == 0) {
+			write_cb(cbopaque, "Tiny 2^n-spaced sizes: [");
+			write_cb(cbopaque, u2s(sv, 10, s));
+			write_cb(cbopaque, "..");
+
+			CTL_GET("arenas.tspace_max", &sv, size_t);
+			write_cb(cbopaque, u2s(sv, 10, s));
+			write_cb(cbopaque, "]\n");
+		}
+
+		CTL_GET("arenas.qspace_min", &sv, size_t);
+		write_cb(cbopaque, "Quantum-spaced sizes: [");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "..");
+		CTL_GET("arenas.qspace_max", &sv, size_t);
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "]\n");
+
+		CTL_GET("arenas.cspace_min", &sv, size_t);
+		write_cb(cbopaque, "Cacheline-spaced sizes: [");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "..");
+		CTL_GET("arenas.cspace_max", &sv, size_t);
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "]\n");
+
+		CTL_GET("arenas.sspace_min", &sv, size_t);
+		write_cb(cbopaque, "Subpage-spaced sizes: [");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "..");
+		CTL_GET("arenas.sspace_max", &sv, size_t);
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, "]\n");
+
+		CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t);
+		if (ssv >= 0) {
+			write_cb(cbopaque,
+			    "Min active:dirty page ratio per arena: ");
+			write_cb(cbopaque, u2s((1U << ssv), 10, s));
+			write_cb(cbopaque, ":1\n");
+		} else {
+			write_cb(cbopaque,
+			    "Min active:dirty page ratio per arena: N/A\n");
+		}
+		if ((err = JEMALLOC_P(mallctl)("arenas.tcache_max", &sv,
+		    &ssz, NULL, 0)) == 0) {
+			write_cb(cbopaque,
+			    "Maximum thread-cached size class: ");
+			write_cb(cbopaque, u2s(sv, 10, s));
+			write_cb(cbopaque, "\n");
+		}
+		if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv,
+		    &ssz, NULL, 0)) == 0) {
+			size_t tcache_gc_sweep = (1U << ssv);
+			bool tcache_enabled;
+			CTL_GET("opt.tcache", &tcache_enabled, bool);
+			write_cb(cbopaque, "Thread cache GC sweep interval: ");
+			write_cb(cbopaque, tcache_enabled && ssv >= 0 ?
+			    u2s(tcache_gc_sweep, 10, s) : "N/A");
+			write_cb(cbopaque, "\n");
+		}
+		if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
+		   == 0 && bv) {
+			CTL_GET("opt.lg_prof_bt_max", &sv, size_t);
+			write_cb(cbopaque, "Maximum profile backtrace depth: ");
+			write_cb(cbopaque, u2s((1U << sv), 10, s));
+			write_cb(cbopaque, "\n");
+
+			CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t);
+			write_cb(cbopaque,
+			    "Maximum per thread backtrace cache: ");
+			if (ssv >= 0) {
+				write_cb(cbopaque, u2s((1U << ssv), 10, s));
+				write_cb(cbopaque, " (2^");
+				write_cb(cbopaque, u2s(ssv, 10, s));
+				write_cb(cbopaque, ")\n");
+			} else
+				write_cb(cbopaque, "N/A\n");
+
+			CTL_GET("opt.lg_prof_sample", &sv, size_t);
+			write_cb(cbopaque, "Average profile sample interval: ");
+			write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s));
+			write_cb(cbopaque, " (2^");
+			write_cb(cbopaque, u2s(sv, 10, s));
+			write_cb(cbopaque, ")\n");
+
+			CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
+			write_cb(cbopaque, "Average profile dump interval: ");
+			if (ssv >= 0) {
+				write_cb(cbopaque, u2s((((uint64_t)1U) << ssv),
+				    10, s));
+				write_cb(cbopaque, " (2^");
+				write_cb(cbopaque, u2s(ssv, 10, s));
+				write_cb(cbopaque, ")\n");
+			} else
+				write_cb(cbopaque, "N/A\n");
+		}
+		CTL_GET("arenas.chunksize", &sv, size_t);
+		write_cb(cbopaque, "Chunk size: ");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		CTL_GET("opt.lg_chunk", &sv, size_t);
+		write_cb(cbopaque, " (2^");
+		write_cb(cbopaque, u2s(sv, 10, s));
+		write_cb(cbopaque, ")\n");
+	}
+
+#ifdef JEMALLOC_STATS
+	{
+		int err;
+		size_t sszp, ssz;
+		size_t *cactive;
+		size_t allocated, active, mapped;
+		size_t chunks_current, chunks_high, swap_avail;
+		uint64_t chunks_total;
+		size_t huge_allocated;
+		uint64_t huge_nmalloc, huge_ndalloc;
+
+		sszp = sizeof(size_t *);
+		ssz = sizeof(size_t);
+
+		CTL_GET("stats.cactive", &cactive, size_t *);
+		CTL_GET("stats.allocated", &allocated, size_t);
+		CTL_GET("stats.active", &active, size_t);
+		CTL_GET("stats.mapped", &mapped, size_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "Allocated: %zu, active: %zu, mapped: %zu\n",
+		    allocated, active, mapped);
+		malloc_cprintf(write_cb, cbopaque,
+		    "Current active ceiling: %zu\n", atomic_read_z(cactive));
+
+		/* Print chunk stats. */
+		CTL_GET("stats.chunks.total", &chunks_total, uint64_t);
+		CTL_GET("stats.chunks.high", &chunks_high, size_t);
+		CTL_GET("stats.chunks.current", &chunks_current, size_t);
+		if ((err = JEMALLOC_P(mallctl)("swap.avail", &swap_avail, &ssz,
+		    NULL, 0)) == 0) {
+			size_t lg_chunk;
+
+			malloc_cprintf(write_cb, cbopaque, "chunks: nchunks   "
+			    "highchunks    curchunks   swap_avail\n");
+			CTL_GET("opt.lg_chunk", &lg_chunk, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "  %13"PRIu64"%13zu%13zu%13zu\n",
+			    chunks_total, chunks_high, chunks_current,
+			    swap_avail << lg_chunk);
+		} else {
+			malloc_cprintf(write_cb, cbopaque, "chunks: nchunks   "
+			    "highchunks    curchunks\n");
+			malloc_cprintf(write_cb, cbopaque,
+			    "  %13"PRIu64"%13zu%13zu\n",
+			    chunks_total, chunks_high, chunks_current);
+		}
+
+		/* Print huge stats. */
+		CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t);
+		CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t);
+		CTL_GET("stats.huge.allocated", &huge_allocated, size_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "huge: nmalloc      ndalloc    allocated\n");
+		malloc_cprintf(write_cb, cbopaque,
+		    " %12"PRIu64" %12"PRIu64" %12zu\n",
+		    huge_nmalloc, huge_ndalloc, huge_allocated);
+
+		if (merged) {
+			unsigned narenas;
+
+			CTL_GET("arenas.narenas", &narenas, unsigned);
+			{
+				bool initialized[narenas];
+				size_t isz;
+				unsigned i, ninitialized;
+
+				isz = sizeof(initialized);
+				xmallctl("arenas.initialized", initialized,
+				    &isz, NULL, 0);
+				for (i = ninitialized = 0; i < narenas; i++) {
+					if (initialized[i])
+						ninitialized++;
+				}
+
+				if (ninitialized > 1 || unmerged == false) {
+					/* Print merged arena stats. */
+					malloc_cprintf(write_cb, cbopaque,
+					    "\nMerged arenas stats:\n");
+					stats_arena_print(write_cb, cbopaque,
+					    narenas);
+				}
+			}
+		}
+
+		if (unmerged) {
+			unsigned narenas;
+
+			/* Print stats for each arena. */
+
+			CTL_GET("arenas.narenas", &narenas, unsigned);
+			{
+				bool initialized[narenas];
+				size_t isz;
+				unsigned i;
+
+				isz = sizeof(initialized);
+				xmallctl("arenas.initialized", initialized,
+				    &isz, NULL, 0);
+
+				for (i = 0; i < narenas; i++) {
+					if (initialized[i]) {
+						malloc_cprintf(write_cb,
+						    cbopaque,
+						    "\narenas[%u]:\n", i);
+						stats_arena_print(write_cb,
+						    cbopaque, i);
+					}
+				}
+			}
+		}
+	}
+#endif /* #ifdef JEMALLOC_STATS */
+	write_cb(cbopaque, "--- End jemalloc statistics ---\n");
+}
diff --git a/deps/jemalloc.orig/src/tcache.c b/deps/jemalloc.orig/src/tcache.c
new file mode 100644
index 00000000..31c329e1
--- /dev/null
+++ b/deps/jemalloc.orig/src/tcache.c
@@ -0,0 +1,480 @@
+#define	JEMALLOC_TCACHE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifdef JEMALLOC_TCACHE
+/******************************************************************************/
+/* Data. */
+
+bool	opt_tcache = true;
+ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
+ssize_t	opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
+
+tcache_bin_info_t	*tcache_bin_info;
+static unsigned		stack_nelms; /* Total stack elms per tcache. */
+
+/* Map of thread-specific caches. */
+#ifndef NO_TLS
+__thread tcache_t	*tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#endif
+
+/*
+ * Same contents as tcache, but initialized such that the TSD destructor is
+ * called when a thread exits, so that the cache can be cleaned up.
+ */
+pthread_key_t		tcache_tsd;
+
+size_t				nhbins;
+size_t				tcache_maxclass;
+unsigned			tcache_gc_incr;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	tcache_thread_cleanup(void *arg);
+
+/******************************************************************************/
+
+void *
+tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
+{
+	void *ret;
+
+	arena_tcache_fill_small(tcache->arena, tbin, binind
+#ifdef JEMALLOC_PROF
+	    , tcache->prof_accumbytes
+#endif
+	    );
+#ifdef JEMALLOC_PROF
+	tcache->prof_accumbytes = 0;
+#endif
+	ret = tcache_alloc_easy(tbin);
+
+	return (ret);
+}
+
+void
+tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+    , tcache_t *tcache
+#endif
+    )
+{
+	void *ptr;
+	unsigned i, nflush, ndeferred;
+#ifdef JEMALLOC_STATS
+	bool merged_stats = false;
+#endif
+
+	assert(binind < nbins);
+	assert(rem <= tbin->ncached);
+
+	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+		/* Lock the arena bin associated with the first object. */
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
+		    tbin->avail[0]);
+		arena_t *arena = chunk->arena;
+		arena_bin_t *bin = &arena->bins[binind];
+
+#ifdef JEMALLOC_PROF
+		if (arena == tcache->arena) {
+			malloc_mutex_lock(&arena->lock);
+			arena_prof_accum(arena, tcache->prof_accumbytes);
+			malloc_mutex_unlock(&arena->lock);
+			tcache->prof_accumbytes = 0;
+		}
+#endif
+
+		malloc_mutex_lock(&bin->lock);
+#ifdef JEMALLOC_STATS
+		if (arena == tcache->arena) {
+			assert(merged_stats == false);
+			merged_stats = true;
+			bin->stats.nflushes++;
+			bin->stats.nrequests += tbin->tstats.nrequests;
+			tbin->tstats.nrequests = 0;
+		}
+#endif
+		ndeferred = 0;
+		for (i = 0; i < nflush; i++) {
+			ptr = tbin->avail[i];
+			assert(ptr != NULL);
+			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+			if (chunk->arena == arena) {
+				size_t pageind = ((uintptr_t)ptr -
+				    (uintptr_t)chunk) >> PAGE_SHIFT;
+				arena_chunk_map_t *mapelm =
+				    &chunk->map[pageind-map_bias];
+				arena_dalloc_bin(arena, chunk, ptr, mapelm);
+			} else {
+				/*
+				 * This object was allocated via a different
+				 * arena bin than the one that is currently
+				 * locked.  Stash the object, so that it can be
+				 * handled in a future pass.
+				 */
+				tbin->avail[ndeferred] = ptr;
+				ndeferred++;
+			}
+		}
+		malloc_mutex_unlock(&bin->lock);
+	}
+#ifdef JEMALLOC_STATS
+	if (merged_stats == false) {
+		/*
+		 * The flush loop didn't happen to flush to this thread's
+		 * arena, so the stats didn't get merged.  Manually do so now.
+		 */
+		arena_bin_t *bin = &tcache->arena->bins[binind];
+		malloc_mutex_lock(&bin->lock);
+		bin->stats.nflushes++;
+		bin->stats.nrequests += tbin->tstats.nrequests;
+		tbin->tstats.nrequests = 0;
+		malloc_mutex_unlock(&bin->lock);
+	}
+#endif
+
+	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
+	    rem * sizeof(void *));
+	tbin->ncached = rem;
+	if ((int)tbin->ncached < tbin->low_water)
+		tbin->low_water = tbin->ncached;
+}
+
+void
+tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+    , tcache_t *tcache
+#endif
+    )
+{
+	void *ptr;
+	unsigned i, nflush, ndeferred;
+#ifdef JEMALLOC_STATS
+	bool merged_stats = false;
+#endif
+
+	assert(binind < nhbins);
+	assert(rem <= tbin->ncached);
+
+	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+		/* Lock the arena associated with the first object. */
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
+		    tbin->avail[0]);
+		arena_t *arena = chunk->arena;
+
+		malloc_mutex_lock(&arena->lock);
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+		if (arena == tcache->arena) {
+#endif
+#ifdef JEMALLOC_PROF
+			arena_prof_accum(arena, tcache->prof_accumbytes);
+			tcache->prof_accumbytes = 0;
+#endif
+#ifdef JEMALLOC_STATS
+			merged_stats = true;
+			arena->stats.nrequests_large += tbin->tstats.nrequests;
+			arena->stats.lstats[binind - nbins].nrequests +=
+			    tbin->tstats.nrequests;
+			tbin->tstats.nrequests = 0;
+#endif
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+		}
+#endif
+		ndeferred = 0;
+		for (i = 0; i < nflush; i++) {
+			ptr = tbin->avail[i];
+			assert(ptr != NULL);
+			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+			if (chunk->arena == arena)
+				arena_dalloc_large(arena, chunk, ptr);
+			else {
+				/*
+				 * This object was allocated via a different
+				 * arena than the one that is currently locked.
+				 * Stash the object, so that it can be handled
+				 * in a future pass.
+				 */
+				tbin->avail[ndeferred] = ptr;
+				ndeferred++;
+			}
+		}
+		malloc_mutex_unlock(&arena->lock);
+	}
+#ifdef JEMALLOC_STATS
+	if (merged_stats == false) {
+		/*
+		 * The flush loop didn't happen to flush to this thread's
+		 * arena, so the stats didn't get merged.  Manually do so now.
+		 */
+		arena_t *arena = tcache->arena;
+		malloc_mutex_lock(&arena->lock);
+		arena->stats.nrequests_large += tbin->tstats.nrequests;
+		arena->stats.lstats[binind - nbins].nrequests +=
+		    tbin->tstats.nrequests;
+		tbin->tstats.nrequests = 0;
+		malloc_mutex_unlock(&arena->lock);
+	}
+#endif
+
+	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
+	    rem * sizeof(void *));
+	tbin->ncached = rem;
+	if ((int)tbin->ncached < tbin->low_water)
+		tbin->low_water = tbin->ncached;
+}
+
+tcache_t *
+tcache_create(arena_t *arena)
+{
+	tcache_t *tcache;
+	size_t size, stack_offset;
+	unsigned i;
+
+	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
+	/* Naturally align the pointer stacks. */
+	size = PTR_CEILING(size);
+	stack_offset = size;
+	size += stack_nelms * sizeof(void *);
+	/*
+	 * Round up to the nearest multiple of the cacheline size, in order to
+	 * avoid the possibility of false cacheline sharing.
+	 *
+	 * That this works relies on the same logic as in ipalloc(), but we
+	 * cannot directly call ipalloc() here due to tcache bootstrapping
+	 * issues.
+	 */
+	size = (size + CACHELINE_MASK) & (-CACHELINE);
+
+	if (size <= small_maxclass)
+		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
+	else if (size <= tcache_maxclass)
+		tcache = (tcache_t *)arena_malloc_large(arena, size, true);
+	else
+		tcache = (tcache_t *)icalloc(size);
+
+	if (tcache == NULL)
+		return (NULL);
+
+#ifdef JEMALLOC_STATS
+	/* Link into list of extant tcaches. */
+	malloc_mutex_lock(&arena->lock);
+	ql_elm_new(tcache, link);
+	ql_tail_insert(&arena->tcache_ql, tcache, link);
+	malloc_mutex_unlock(&arena->lock);
+#endif
+
+	tcache->arena = arena;
+	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
+	for (i = 0; i < nhbins; i++) {
+		tcache->tbins[i].lg_fill_div = 1;
+		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
+		    (uintptr_t)stack_offset);
+		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+	}
+
+	TCACHE_SET(tcache);
+
+	return (tcache);
+}
+
+void
+tcache_destroy(tcache_t *tcache)
+{
+	unsigned i;
+	size_t tcache_size;
+
+#ifdef JEMALLOC_STATS
+	/* Unlink from list of extant tcaches. */
+	malloc_mutex_lock(&tcache->arena->lock);
+	ql_remove(&tcache->arena->tcache_ql, tcache, link);
+	malloc_mutex_unlock(&tcache->arena->lock);
+	tcache_stats_merge(tcache, tcache->arena);
+#endif
+
+	for (i = 0; i < nbins; i++) {
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		tcache_bin_flush_small(tbin, i, 0
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+		    , tcache
+#endif
+		    );
+
+#ifdef JEMALLOC_STATS
+		if (tbin->tstats.nrequests != 0) {
+			arena_t *arena = tcache->arena;
+			arena_bin_t *bin = &arena->bins[i];
+			malloc_mutex_lock(&bin->lock);
+			bin->stats.nrequests += tbin->tstats.nrequests;
+			malloc_mutex_unlock(&bin->lock);
+		}
+#endif
+	}
+
+	for (; i < nhbins; i++) {
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		tcache_bin_flush_large(tbin, i, 0
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+		    , tcache
+#endif
+		    );
+
+#ifdef JEMALLOC_STATS
+		if (tbin->tstats.nrequests != 0) {
+			arena_t *arena = tcache->arena;
+			malloc_mutex_lock(&arena->lock);
+			arena->stats.nrequests_large += tbin->tstats.nrequests;
+			arena->stats.lstats[i - nbins].nrequests +=
+			    tbin->tstats.nrequests;
+			malloc_mutex_unlock(&arena->lock);
+		}
+#endif
+	}
+
+#ifdef JEMALLOC_PROF
+	if (tcache->prof_accumbytes > 0) {
+		malloc_mutex_lock(&tcache->arena->lock);
+		arena_prof_accum(tcache->arena, tcache->prof_accumbytes);
+		malloc_mutex_unlock(&tcache->arena->lock);
+	}
+#endif
+
+	tcache_size = arena_salloc(tcache);
+	if (tcache_size <= small_maxclass) {
+		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
+		arena_t *arena = chunk->arena;
+		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
+		    PAGE_SHIFT;
+		arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
+		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+		    (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) <<
+		    PAGE_SHIFT));
+		arena_bin_t *bin = run->bin;
+
+		malloc_mutex_lock(&bin->lock);
+		arena_dalloc_bin(arena, chunk, tcache, mapelm);
+		malloc_mutex_unlock(&bin->lock);
+	} else if (tcache_size <= tcache_maxclass) {
+		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
+		arena_t *arena = chunk->arena;
+
+		malloc_mutex_lock(&arena->lock);
+		arena_dalloc_large(arena, chunk, tcache);
+		malloc_mutex_unlock(&arena->lock);
+	} else
+		idalloc(tcache);
+}
+
+static void
+tcache_thread_cleanup(void *arg)
+{
+	tcache_t *tcache = (tcache_t *)arg;
+
+	if (tcache == (void *)(uintptr_t)1) {
+		/*
+		 * The previous time this destructor was called, we set the key
+		 * to 1 so that other destructors wouldn't cause re-creation of
+		 * the tcache.  This time, do nothing, so that the destructor
+		 * will not be called again.
+		 */
+	} else if (tcache == (void *)(uintptr_t)2) {
+		/*
+		 * Another destructor called an allocator function after this
+		 * destructor was called.  Reset tcache to 1 in order to
+		 * receive another callback.
+		 */
+		TCACHE_SET((uintptr_t)1);
+	} else if (tcache != NULL) {
+		assert(tcache != (void *)(uintptr_t)1);
+		tcache_destroy(tcache);
+		TCACHE_SET((uintptr_t)1);
+	}
+}
+
+#ifdef JEMALLOC_STATS
+void
+tcache_stats_merge(tcache_t *tcache, arena_t *arena)
+{
+	unsigned i;
+
+	/* Merge and reset tcache stats. */
+	for (i = 0; i < nbins; i++) {
+		arena_bin_t *bin = &arena->bins[i];
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		malloc_mutex_lock(&bin->lock);
+		bin->stats.nrequests += tbin->tstats.nrequests;
+		malloc_mutex_unlock(&bin->lock);
+		tbin->tstats.nrequests = 0;
+	}
+
+	for (; i < nhbins; i++) {
+		malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins];
+		tcache_bin_t *tbin = &tcache->tbins[i];
+		arena->stats.nrequests_large += tbin->tstats.nrequests;
+		lstats->nrequests += tbin->tstats.nrequests;
+		tbin->tstats.nrequests = 0;
+	}
+}
+#endif
+
+bool
+tcache_boot(void)
+{
+
+	if (opt_tcache) {
+		unsigned i;
+
+		/*
+		 * If necessary, clamp opt_lg_tcache_max, now that
+		 * small_maxclass and arena_maxclass are known.
+		 */
+		if (opt_lg_tcache_max < 0 || (1U <<
+		    opt_lg_tcache_max) < small_maxclass)
+			tcache_maxclass = small_maxclass;
+		else if ((1U << opt_lg_tcache_max) > arena_maxclass)
+			tcache_maxclass = arena_maxclass;
+		else
+			tcache_maxclass = (1U << opt_lg_tcache_max);
+
+		nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
+
+		/* Initialize tcache_bin_info. */
+		tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
+		    sizeof(tcache_bin_info_t));
+		if (tcache_bin_info == NULL)
+			return (true);
+		stack_nelms = 0;
+		for (i = 0; i < nbins; i++) {
+			if ((arena_bin_info[i].nregs << 1) <=
+			    TCACHE_NSLOTS_SMALL_MAX) {
+				tcache_bin_info[i].ncached_max =
+				    (arena_bin_info[i].nregs << 1);
+			} else {
+				tcache_bin_info[i].ncached_max =
+				    TCACHE_NSLOTS_SMALL_MAX;
+			}
+			stack_nelms += tcache_bin_info[i].ncached_max;
+		}
+		for (; i < nhbins; i++) {
+			tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
+			stack_nelms += tcache_bin_info[i].ncached_max;
+		}
+
+		/* Compute incremental GC event threshold. */
+		if (opt_lg_tcache_gc_sweep >= 0) {
+			tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
+			    nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
+			    0) ? 0 : 1);
+		} else
+			tcache_gc_incr = 0;
+
+		if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
+		    0) {
+			malloc_write(
+			    "<jemalloc>: Error in pthread_key_create()\n");
+			abort();
+		}
+	}
+
+	return (false);
+}
+/******************************************************************************/
+#endif /* JEMALLOC_TCACHE */
diff --git a/deps/jemalloc.orig/src/zone.c b/deps/jemalloc.orig/src/zone.c
new file mode 100644
index 00000000..2c1b2318
--- /dev/null
+++ b/deps/jemalloc.orig/src/zone.c
@@ -0,0 +1,354 @@
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifndef JEMALLOC_ZONE
+#  error "This source file is for zones on Darwin (OS X)."
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+static malloc_zone_t zone, szone;
+static struct malloc_introspection_t zone_introspect, ozone_introspect;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static size_t	zone_size(malloc_zone_t *zone, void *ptr);
+static void	*zone_malloc(malloc_zone_t *zone, size_t size);
+static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
+static void	*zone_valloc(malloc_zone_t *zone, size_t size);
+static void	zone_free(malloc_zone_t *zone, void *ptr);
+static void	*zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
+    size_t size);
+static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
+    size_t size);
+#endif
+static void	*zone_destroy(malloc_zone_t *zone);
+static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
+static void	zone_force_lock(malloc_zone_t *zone);
+static void	zone_force_unlock(malloc_zone_t *zone);
+static size_t	ozone_size(malloc_zone_t *zone, void *ptr);
+static void	ozone_free(malloc_zone_t *zone, void *ptr);
+static void	*ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+static unsigned	ozone_batch_malloc(malloc_zone_t *zone, size_t size,
+    void **results, unsigned num_requested);
+static void	ozone_batch_free(malloc_zone_t *zone, void **to_be_freed,
+    unsigned num);
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void	ozone_free_definite_size(malloc_zone_t *zone, void *ptr,
+    size_t size);
+#endif
+static void	ozone_force_lock(malloc_zone_t *zone);
+static void	ozone_force_unlock(malloc_zone_t *zone);
+
+/******************************************************************************/
+/*
+ * Functions.
+ */
+
+static size_t
+zone_size(malloc_zone_t *zone, void *ptr)
+{
+
+	/*
+	 * There appear to be places within Darwin (such as setenv(3)) that
+	 * cause calls to this function with pointers that *no* zone owns.  If
+	 * we knew that all pointers were owned by *some* zone, we could split
+	 * our zone into two parts, and use one as the default allocator and
+	 * the other as the default deallocator/reallocator.  Since that will
+	 * not work in practice, we must check all pointers to assure that they
+	 * reside within a mapped chunk before determining size.
+	 */
+	return (ivsalloc(ptr));
+}
+
+static void *
+zone_malloc(malloc_zone_t *zone, size_t size)
+{
+
+	return (JEMALLOC_P(malloc)(size));
+}
+
+static void *
+zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
+{
+
+	return (JEMALLOC_P(calloc)(num, size));
+}
+
+static void *
+zone_valloc(malloc_zone_t *zone, size_t size)
+{
+	void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+	JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size);
+
+	return (ret);
+}
+
+static void
+zone_free(malloc_zone_t *zone, void *ptr)
+{
+
+	JEMALLOC_P(free)(ptr);
+}
+
+static void *
+zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+	return (JEMALLOC_P(realloc)(ptr, size));
+}
+
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void *
+zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
+{
+	void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+	JEMALLOC_P(posix_memalign)(&ret, alignment, size);
+
+	return (ret);
+}
+
+static void
+zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+	assert(ivsalloc(ptr) == size);
+	JEMALLOC_P(free)(ptr);
+}
+#endif
+
+static void *
+zone_destroy(malloc_zone_t *zone)
+{
+
+	/* This function should never be called. */
+	assert(false);
+	return (NULL);
+}
+
+static size_t
+zone_good_size(malloc_zone_t *zone, size_t size)
+{
+	size_t ret;
+	void *p;
+
+	/*
+	 * Actually create an object of the appropriate size, then find out
+	 * how large it could have been without moving up to the next size
+	 * class.
+	 */
+	p = JEMALLOC_P(malloc)(size);
+	if (p != NULL) {
+		ret = isalloc(p);
+		JEMALLOC_P(free)(p);
+	} else
+		ret = size;
+
+	return (ret);
+}
+
+static void
+zone_force_lock(malloc_zone_t *zone)
+{
+
+	if (isthreaded)
+		jemalloc_prefork();
+}
+
+static void
+zone_force_unlock(malloc_zone_t *zone)
+{
+
+	if (isthreaded)
+		jemalloc_postfork();
+}
+
+malloc_zone_t *
+create_zone(void)
+{
+
+	zone.size = (void *)zone_size;
+	zone.malloc = (void *)zone_malloc;
+	zone.calloc = (void *)zone_calloc;
+	zone.valloc = (void *)zone_valloc;
+	zone.free = (void *)zone_free;
+	zone.realloc = (void *)zone_realloc;
+	zone.destroy = (void *)zone_destroy;
+	zone.zone_name = "jemalloc_zone";
+	zone.batch_malloc = NULL;
+	zone.batch_free = NULL;
+	zone.introspect = &zone_introspect;
+	zone.version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	zone.memalign = zone_memalign;
+	zone.free_definite_size = zone_free_definite_size;
+#endif
+
+	zone_introspect.enumerator = NULL;
+	zone_introspect.good_size = (void *)zone_good_size;
+	zone_introspect.check = NULL;
+	zone_introspect.print = NULL;
+	zone_introspect.log = NULL;
+	zone_introspect.force_lock = (void *)zone_force_lock;
+	zone_introspect.force_unlock = (void *)zone_force_unlock;
+	zone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	zone_introspect.zone_locked = NULL;
+#endif
+
+	return (&zone);
+}
+
+static size_t
+ozone_size(malloc_zone_t *zone, void *ptr)
+{
+	size_t ret;
+
+	ret = ivsalloc(ptr);
+	if (ret == 0)
+		ret = szone.size(zone, ptr);
+
+	return (ret);
+}
+
+static void
+ozone_free(malloc_zone_t *zone, void *ptr)
+{
+
+	if (ivsalloc(ptr) != 0)
+		JEMALLOC_P(free)(ptr);
+	else {
+		size_t size = szone.size(zone, ptr);
+		if (size != 0)
+			(szone.free)(zone, ptr);
+	}
+}
+
+static void *
+ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+	size_t oldsize;
+
+	if (ptr == NULL)
+		return (JEMALLOC_P(malloc)(size));
+
+	oldsize = ivsalloc(ptr);
+	if (oldsize != 0)
+		return (JEMALLOC_P(realloc)(ptr, size));
+	else {
+		oldsize = szone.size(zone, ptr);
+		if (oldsize == 0)
+			return (JEMALLOC_P(malloc)(size));
+		else {
+			void *ret = JEMALLOC_P(malloc)(size);
+			if (ret != NULL) {
+				memcpy(ret, ptr, (oldsize < size) ? oldsize :
+				    size);
+				(szone.free)(zone, ptr);
+			}
+			return (ret);
+		}
+	}
+}
+
+static unsigned
+ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results,
+    unsigned num_requested)
+{
+
+	/* Don't bother implementing this interface, since it isn't required. */
+	return (0);
+}
+
+static void
+ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num)
+{
+	unsigned i;
+
+	for (i = 0; i < num; i++)
+		ozone_free(zone, to_be_freed[i]);
+}
+
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void
+ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+	if (ivsalloc(ptr) != 0) {
+		assert(ivsalloc(ptr) == size);
+		JEMALLOC_P(free)(ptr);
+	} else {
+		assert(size == szone.size(zone, ptr));
+		szone.free_definite_size(zone, ptr, size);
+	}
+}
+#endif
+
+static void
+ozone_force_lock(malloc_zone_t *zone)
+{
+
+	/* jemalloc locking is taken care of by the normal jemalloc zone. */
+	szone.introspect->force_lock(zone);
+}
+
+static void
+ozone_force_unlock(malloc_zone_t *zone)
+{
+
+	/* jemalloc locking is taken care of by the normal jemalloc zone. */
+	szone.introspect->force_unlock(zone);
+}
+
+/*
+ * Overlay the default scalable zone (szone) such that existing allocations are
+ * drained, and further allocations come from jemalloc.  This is necessary
+ * because Core Foundation directly accesses and uses the szone before the
+ * jemalloc library is even loaded.
+ */
+void
+szone2ozone(malloc_zone_t *zone)
+{
+
+	/*
+	 * Stash a copy of the original szone so that we can call its
+	 * functions as needed.  Note that the internally, the szone stores its
+	 * bookkeeping data structures immediately following the malloc_zone_t
+	 * header, so when calling szone functions, we need to pass a pointer
+	 * to the original zone structure.
+	 */
+	memcpy(&szone, zone, sizeof(malloc_zone_t));
+
+	zone->size = (void *)ozone_size;
+	zone->malloc = (void *)zone_malloc;
+	zone->calloc = (void *)zone_calloc;
+	zone->valloc = (void *)zone_valloc;
+	zone->free = (void *)ozone_free;
+	zone->realloc = (void *)ozone_realloc;
+	zone->destroy = (void *)zone_destroy;
+	zone->zone_name = "jemalloc_ozone";
+	zone->batch_malloc = ozone_batch_malloc;
+	zone->batch_free = ozone_batch_free;
+	zone->introspect = &ozone_introspect;
+	zone->version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	zone->memalign = zone_memalign;
+	zone->free_definite_size = ozone_free_definite_size;
+#endif
+
+	ozone_introspect.enumerator = NULL;
+	ozone_introspect.good_size = (void *)zone_good_size;
+	ozone_introspect.check = NULL;
+	ozone_introspect.print = NULL;
+	ozone_introspect.log = NULL;
+	ozone_introspect.force_lock = (void *)ozone_force_lock;
+	ozone_introspect.force_unlock = (void *)ozone_force_unlock;
+	ozone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	ozone_introspect.zone_locked = NULL;
+#endif
+}
diff --git a/deps/jemalloc.orig/test/allocated.c b/deps/jemalloc.orig/test/allocated.c
new file mode 100644
index 00000000..b1e40e47
--- /dev/null
+++ b/deps/jemalloc.orig/test/allocated.c
@@ -0,0 +1,142 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+void *
+thread_start(void *arg)
+{
+	int err;
+	void *p;
+	uint64_t a0, a1, d0, d1;
+	uint64_t *ap0, *ap1, *dp0, *dp1;
+	size_t sz, usize;
+
+	sz = sizeof(a0);
+	if ((err = JEMALLOC_P(mallctl)("thread.allocated", &a0, &sz, NULL,
+	    0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto RETURN;
+		}
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	sz = sizeof(ap0);
+	if ((err = JEMALLOC_P(mallctl)("thread.allocatedp", &ap0, &sz, NULL,
+	    0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto RETURN;
+		}
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	assert(*ap0 == a0);
+
+	sz = sizeof(d0);
+	if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL,
+	    0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto RETURN;
+		}
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	sz = sizeof(dp0);
+	if ((err = JEMALLOC_P(mallctl)("thread.deallocatedp", &dp0, &sz, NULL,
+	    0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+			assert(false);
+#endif
+			goto RETURN;
+		}
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		exit(1);
+	}
+	assert(*dp0 == d0);
+
+	p = JEMALLOC_P(malloc)(1);
+	if (p == NULL) {
+		fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+		exit(1);
+	}
+
+	sz = sizeof(a1);
+	JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0);
+	sz = sizeof(ap1);
+	JEMALLOC_P(mallctl)("thread.allocatedp", &ap1, &sz, NULL, 0);
+	assert(*ap1 == a1);
+	assert(ap0 == ap1);
+
+	usize = JEMALLOC_P(malloc_usable_size)(p);
+	assert(a0 + usize <= a1);
+
+	JEMALLOC_P(free)(p);
+
+	sz = sizeof(d1);
+	JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0);
+	sz = sizeof(dp1);
+	JEMALLOC_P(mallctl)("thread.deallocatedp", &dp1, &sz, NULL, 0);
+	assert(*dp1 == d1);
+	assert(dp0 == dp1);
+
+	assert(d0 + usize <= d1);
+
+RETURN:
+	return (NULL);
+}
+
+int
+main(void)
+{
+	int ret = 0;
+	pthread_t thread;
+
+	fprintf(stderr, "Test begin\n");
+
+	thread_start(NULL);
+
+	if (pthread_create(&thread, NULL, thread_start, NULL)
+	    != 0) {
+		fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
+		ret = 1;
+		goto RETURN;
+	}
+	pthread_join(thread, (void *)&ret);
+
+	thread_start(NULL);
+
+	if (pthread_create(&thread, NULL, thread_start, NULL)
+	    != 0) {
+		fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
+		ret = 1;
+		goto RETURN;
+	}
+	pthread_join(thread, (void *)&ret);
+
+	thread_start(NULL);
+
+RETURN:
+	fprintf(stderr, "Test end\n");
+	return (ret);
+}
diff --git a/deps/jemalloc.orig/test/allocated.exp b/deps/jemalloc.orig/test/allocated.exp
new file mode 100644
index 00000000..369a88dd
--- /dev/null
+++ b/deps/jemalloc.orig/test/allocated.exp
@@ -0,0 +1,2 @@
+Test begin
+Test end
diff --git a/deps/jemalloc.orig/test/allocm.c b/deps/jemalloc.orig/test/allocm.c
new file mode 100644
index 00000000..59d0002e
--- /dev/null
+++ b/deps/jemalloc.orig/test/allocm.c
@@ -0,0 +1,133 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define CHUNK 0x400000
+/* #define MAXALIGN ((size_t)0x80000000000LLU) */
+#define MAXALIGN ((size_t)0x2000000LLU)
+#define NITER 4
+
+int
+main(void)
+{
+	int r;
+	void *p;
+	size_t sz, alignment, total, tsz;
+	unsigned i;
+	void *ps[NITER];
+
+	fprintf(stderr, "Test begin\n");
+
+	sz = 0;
+	r = JEMALLOC_P(allocm)(&p, &sz, 42, 0);
+	if (r != ALLOCM_SUCCESS) {
+		fprintf(stderr, "Unexpected allocm() error\n");
+		abort();
+	}
+	if (sz < 42)
+		fprintf(stderr, "Real size smaller than expected\n");
+	if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected dallocm() error\n");
+
+	r = JEMALLOC_P(allocm)(&p, NULL, 42, 0);
+	if (r != ALLOCM_SUCCESS) {
+		fprintf(stderr, "Unexpected allocm() error\n");
+		abort();
+	}
+	if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected dallocm() error\n");
+
+	r = JEMALLOC_P(allocm)(&p, NULL, 42, ALLOCM_ZERO);
+	if (r != ALLOCM_SUCCESS) {
+		fprintf(stderr, "Unexpected allocm() error\n");
+		abort();
+	}
+	if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected dallocm() error\n");
+
+#if LG_SIZEOF_PTR == 3
+	alignment = 0x8000000000000000LLU;
+	sz        = 0x8000000000000000LLU;
+#else
+	alignment = 0x80000000LU;
+	sz        = 0x80000000LU;
+#endif
+	r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		fprintf(stderr,
+		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = 0x4000000000000000LLU;
+	sz        = 0x8400000000000001LLU;
+#else
+	alignment = 0x40000000LU;
+	sz        = 0x84000001LU;
+#endif
+	r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		fprintf(stderr,
+		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+
+	alignment = 0x10LLU;
+#if LG_SIZEOF_PTR == 3
+	sz   = 0xfffffffffffffff0LLU;
+#else
+	sz   = 0xfffffff0LU;
+#endif
+	r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		fprintf(stderr,
+		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+
+	for (i = 0; i < NITER; i++)
+		ps[i] = NULL;
+
+	for (alignment = 8;
+	    alignment <= MAXALIGN;
+	    alignment <<= 1) {
+		total = 0;
+		fprintf(stderr, "Alignment: %zu\n", alignment);
+		for (sz = 1;
+		    sz < 3 * alignment && sz < (1U << 31);
+		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+			for (i = 0; i < NITER; i++) {
+				r = JEMALLOC_P(allocm)(&ps[i], NULL, sz,
+				    ALLOCM_ALIGN(alignment) | ALLOCM_ZERO);
+				if (r != ALLOCM_SUCCESS) {
+					fprintf(stderr,
+					    "Error for size %zu (0x%zx): %d\n",
+					    sz, sz, r);
+					exit(1);
+				}
+				if ((uintptr_t)p & (alignment-1)) {
+					fprintf(stderr,
+					    "%p inadequately aligned for"
+					    " alignment: %zu\n", p, alignment);
+				}
+				JEMALLOC_P(sallocm)(ps[i], &tsz, 0);
+				total += tsz;
+				if (total >= (MAXALIGN << 1))
+					break;
+			}
+			for (i = 0; i < NITER; i++) {
+				if (ps[i] != NULL) {
+					JEMALLOC_P(dallocm)(ps[i], 0);
+					ps[i] = NULL;
+				}
+			}
+		}
+	}
+
+	fprintf(stderr, "Test end\n");
+	return (0);
+}
diff --git a/deps/jemalloc.orig/test/allocm.exp b/deps/jemalloc.orig/test/allocm.exp
new file mode 100644
index 00000000..b5061c72
--- /dev/null
+++ b/deps/jemalloc.orig/test/allocm.exp
@@ -0,0 +1,25 @@
+Test begin
+Alignment: 8
+Alignment: 16
+Alignment: 32
+Alignment: 64
+Alignment: 128
+Alignment: 256
+Alignment: 512
+Alignment: 1024
+Alignment: 2048
+Alignment: 4096
+Alignment: 8192
+Alignment: 16384
+Alignment: 32768
+Alignment: 65536
+Alignment: 131072
+Alignment: 262144
+Alignment: 524288
+Alignment: 1048576
+Alignment: 2097152
+Alignment: 4194304
+Alignment: 8388608
+Alignment: 16777216
+Alignment: 33554432
+Test end
diff --git a/deps/jemalloc.orig/test/bitmap.c b/deps/jemalloc.orig/test/bitmap.c
new file mode 100644
index 00000000..adfaacfe
--- /dev/null
+++ b/deps/jemalloc.orig/test/bitmap.c
@@ -0,0 +1,157 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+/*
+ * Avoid using the assert() from jemalloc_internal.h, since it requires
+ * internal libjemalloc functionality.
+ * */
+#include <assert.h>
+
+/*
+ * Directly include the bitmap code, since it isn't exposed outside
+ * libjemalloc.
+ */
+#include "../src/bitmap.c"
+
+#if (LG_BITMAP_MAXBITS > 12)
+#  define MAXBITS	4500
+#else
+#  define MAXBITS	(1U << LG_BITMAP_MAXBITS)
+#endif
+
+static void
+test_bitmap_size(void)
+{
+	size_t i, prev_size;
+
+	prev_size = 0;
+	for (i = 1; i <= MAXBITS; i++) {
+		size_t size = bitmap_size(i);
+		assert(size >= prev_size);
+		prev_size = size;
+	}
+}
+
+static void
+test_bitmap_init(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			size_t j;
+			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_init(bitmap, &binfo);
+
+			for (j = 0; j < i; j++)
+				assert(bitmap_get(bitmap, &binfo, j) == false);
+
+		}
+	}
+}
+
+static void
+test_bitmap_set(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			size_t j;
+			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_init(bitmap, &binfo);
+
+			for (j = 0; j < i; j++)
+				bitmap_set(bitmap, &binfo, j);
+			assert(bitmap_full(bitmap, &binfo));
+		}
+	}
+}
+
+static void
+test_bitmap_unset(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			size_t j;
+			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_init(bitmap, &binfo);
+
+			for (j = 0; j < i; j++)
+				bitmap_set(bitmap, &binfo, j);
+			assert(bitmap_full(bitmap, &binfo));
+			for (j = 0; j < i; j++)
+				bitmap_unset(bitmap, &binfo, j);
+			for (j = 0; j < i; j++)
+				bitmap_set(bitmap, &binfo, j);
+			assert(bitmap_full(bitmap, &binfo));
+		}
+	}
+}
+
+static void
+test_bitmap_sfu(void)
+{
+	size_t i;
+
+	for (i = 1; i <= MAXBITS; i++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, i);
+		{
+			ssize_t j;
+			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_init(bitmap, &binfo);
+
+			/* Iteratively set bits starting at the beginning. */
+			for (j = 0; j < i; j++)
+				assert(bitmap_sfu(bitmap, &binfo) == j);
+			assert(bitmap_full(bitmap, &binfo));
+
+			/*
+			 * Iteratively unset bits starting at the end, and
+			 * verify that bitmap_sfu() reaches the unset bits.
+			 */
+			for (j = i - 1; j >= 0; j--) {
+				bitmap_unset(bitmap, &binfo, j);
+				assert(bitmap_sfu(bitmap, &binfo) == j);
+				bitmap_unset(bitmap, &binfo, j);
+			}
+			assert(bitmap_get(bitmap, &binfo, 0) == false);
+
+			/*
+			 * Iteratively set bits starting at the beginning, and
+			 * verify that bitmap_sfu() looks past them.
+			 */
+			for (j = 1; j < i; j++) {
+				bitmap_set(bitmap, &binfo, j - 1);
+				assert(bitmap_sfu(bitmap, &binfo) == j);
+				bitmap_unset(bitmap, &binfo, j);
+			}
+			assert(bitmap_sfu(bitmap, &binfo) == i - 1);
+			assert(bitmap_full(bitmap, &binfo));
+		}
+	}
+}
+
+int
+main(void)
+{
+	fprintf(stderr, "Test begin\n");
+
+	test_bitmap_size();
+	test_bitmap_init();
+	test_bitmap_set();
+	test_bitmap_unset();
+	test_bitmap_sfu();
+
+	fprintf(stderr, "Test end\n");
+	return (0);
+}
diff --git a/deps/jemalloc.orig/test/bitmap.exp b/deps/jemalloc.orig/test/bitmap.exp
new file mode 100644
index 00000000..369a88dd
--- /dev/null
+++ b/deps/jemalloc.orig/test/bitmap.exp
@@ -0,0 +1,2 @@
+Test begin
+Test end
diff --git a/deps/jemalloc.orig/test/mremap.c b/deps/jemalloc.orig/test/mremap.c
new file mode 100644
index 00000000..146c66f4
--- /dev/null
+++ b/deps/jemalloc.orig/test/mremap.c
@@ -0,0 +1,67 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+int
+main(void)
+{
+	int ret, err;
+	size_t sz, lg_chunk, chunksize, i;
+	char *p, *q;
+
+	fprintf(stderr, "Test begin\n");
+
+	sz = sizeof(lg_chunk);
+	if ((err = JEMALLOC_P(mallctl)("opt.lg_chunk", &lg_chunk, &sz, NULL,
+	    0))) {
+		assert(err != ENOENT);
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		ret = 1;
+		goto RETURN;
+	}
+	chunksize = ((size_t)1U) << lg_chunk;
+
+	p = (char *)malloc(chunksize);
+	if (p == NULL) {
+		fprintf(stderr, "malloc(%zu) --> %p\n", chunksize, p);
+		ret = 1;
+		goto RETURN;
+	}
+	memset(p, 'a', chunksize);
+
+	q = (char *)realloc(p, chunksize * 2);
+	if (q == NULL) {
+		fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize * 2,
+		    q);
+		ret = 1;
+		goto RETURN;
+	}
+	for (i = 0; i < chunksize; i++) {
+		assert(q[i] == 'a');
+	}
+
+	p = q;
+
+	q = (char *)realloc(p, chunksize);
+	if (q == NULL) {
+		fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize, q);
+		ret = 1;
+		goto RETURN;
+	}
+	for (i = 0; i < chunksize; i++) {
+		assert(q[i] == 'a');
+	}
+
+	free(q);
+
+	ret = 0;
+RETURN:
+	fprintf(stderr, "Test end\n");
+	return (ret);
+}
diff --git a/deps/jemalloc.orig/test/mremap.exp b/deps/jemalloc.orig/test/mremap.exp
new file mode 100644
index 00000000..369a88dd
--- /dev/null
+++ b/deps/jemalloc.orig/test/mremap.exp
@@ -0,0 +1,2 @@
+Test begin
+Test end
diff --git a/deps/jemalloc.orig/test/posix_memalign.c b/deps/jemalloc.orig/test/posix_memalign.c
new file mode 100644
index 00000000..3e306c01
--- /dev/null
+++ b/deps/jemalloc.orig/test/posix_memalign.c
@@ -0,0 +1,121 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define CHUNK 0x400000
+/* #define MAXALIGN ((size_t)0x80000000000LLU) */
+#define MAXALIGN ((size_t)0x2000000LLU)
+#define NITER 4
+
+int
+main(void)
+{
+	size_t alignment, size, total;
+	unsigned i;
+	int err;
+	void *p, *ps[NITER];
+
+	fprintf(stderr, "Test begin\n");
+
+	/* Test error conditions. */
+	for (alignment = 0; alignment < sizeof(void *); alignment++) {
+		err = JEMALLOC_P(posix_memalign)(&p, alignment, 1);
+		if (err != EINVAL) {
+			fprintf(stderr,
+			    "Expected error for invalid alignment %zu\n",
+			    alignment);
+		}
+	}
+
+	for (alignment = sizeof(size_t); alignment < MAXALIGN;
+	    alignment <<= 1) {
+		err = JEMALLOC_P(posix_memalign)(&p, alignment + 1, 1);
+		if (err == 0) {
+			fprintf(stderr,
+			    "Expected error for invalid alignment %zu\n",
+			    alignment + 1);
+		}
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = 0x8000000000000000LLU;
+	size      = 0x8000000000000000LLU;
+#else
+	alignment = 0x80000000LU;
+	size      = 0x80000000LU;
+#endif
+	err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+	if (err == 0) {
+		fprintf(stderr,
+		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
+		    alignment, size);
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = 0x4000000000000000LLU;
+	size      = 0x8400000000000001LLU;
+#else
+	alignment = 0x40000000LU;
+	size      = 0x84000001LU;
+#endif
+	err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+	if (err == 0) {
+		fprintf(stderr,
+		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
+		    alignment, size);
+	}
+
+	alignment = 0x10LLU;
+#if LG_SIZEOF_PTR == 3
+	size = 0xfffffffffffffff0LLU;
+#else
+	size = 0xfffffff0LU;
+#endif
+	err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+	if (err == 0) {
+		fprintf(stderr,
+		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
+		    alignment, size);
+	}
+
+	for (i = 0; i < NITER; i++)
+		ps[i] = NULL;
+
+	for (alignment = 8;
+	    alignment <= MAXALIGN;
+	    alignment <<= 1) {
+		total = 0;
+		fprintf(stderr, "Alignment: %zu\n", alignment);
+		for (size = 1;
+		    size < 3 * alignment && size < (1U << 31);
+		    size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+			for (i = 0; i < NITER; i++) {
+				err = JEMALLOC_P(posix_memalign)(&ps[i],
+				    alignment, size);
+				if (err) {
+					fprintf(stderr,
+					    "Error for size %zu (0x%zx): %s\n",
+					    size, size, strerror(err));
+					exit(1);
+				}
+				total += JEMALLOC_P(malloc_usable_size)(ps[i]);
+				if (total >= (MAXALIGN << 1))
+					break;
+			}
+			for (i = 0; i < NITER; i++) {
+				if (ps[i] != NULL) {
+					JEMALLOC_P(free)(ps[i]);
+					ps[i] = NULL;
+				}
+			}
+		}
+	}
+
+	fprintf(stderr, "Test end\n");
+	return (0);
+}
diff --git a/deps/jemalloc.orig/test/posix_memalign.exp b/deps/jemalloc.orig/test/posix_memalign.exp
new file mode 100644
index 00000000..b5061c72
--- /dev/null
+++ b/deps/jemalloc.orig/test/posix_memalign.exp
@@ -0,0 +1,25 @@
+Test begin
+Alignment: 8
+Alignment: 16
+Alignment: 32
+Alignment: 64
+Alignment: 128
+Alignment: 256
+Alignment: 512
+Alignment: 1024
+Alignment: 2048
+Alignment: 4096
+Alignment: 8192
+Alignment: 16384
+Alignment: 32768
+Alignment: 65536
+Alignment: 131072
+Alignment: 262144
+Alignment: 524288
+Alignment: 1048576
+Alignment: 2097152
+Alignment: 4194304
+Alignment: 8388608
+Alignment: 16777216
+Alignment: 33554432
+Test end
diff --git a/deps/jemalloc.orig/test/rallocm.c b/deps/jemalloc.orig/test/rallocm.c
new file mode 100644
index 00000000..ccf326bb
--- /dev/null
+++ b/deps/jemalloc.orig/test/rallocm.c
@@ -0,0 +1,127 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <assert.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+int
+main(void)
+{
+	size_t pagesize;
+	void *p, *q;
+	size_t sz, tsz;
+	int r;
+
+	fprintf(stderr, "Test begin\n");
+
+	/* Get page size. */
+	{
+		long result = sysconf(_SC_PAGESIZE);
+		assert(result != -1);
+		pagesize = (size_t)result;
+	}
+
+	r = JEMALLOC_P(allocm)(&p, &sz, 42, 0);
+	if (r != ALLOCM_SUCCESS) {
+		fprintf(stderr, "Unexpected allocm() error\n");
+		abort();
+	}
+
+	q = p;
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q != p)
+		fprintf(stderr, "Unexpected object move\n");
+	if (tsz != sz) {
+		fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+
+	q = p;
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 5, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q != p)
+		fprintf(stderr, "Unexpected object move\n");
+	if (tsz != sz) {
+		fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+
+	q = p;
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_ERR_NOT_MOVED)
+		fprintf(stderr, "Unexpected rallocm() result\n");
+	if (q != p)
+		fprintf(stderr, "Unexpected object move\n");
+	if (tsz != sz) {
+		fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+
+	q = p;
+	r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, 0);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q == p)
+		fprintf(stderr, "Expected object move\n");
+	if (tsz == sz) {
+		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	p = q;
+	sz = tsz;
+
+	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, 0);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q == p)
+		fprintf(stderr, "Expected object move\n");
+	if (tsz == sz) {
+		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	p = q;
+	sz = tsz;
+
+	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, 0);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (tsz == sz) {
+		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	p = q;
+	sz = tsz;
+
+	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q != p)
+		fprintf(stderr, "Unexpected object move\n");
+	if (tsz == sz) {
+		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	sz = tsz;
+
+	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE);
+	if (r != ALLOCM_SUCCESS)
+		fprintf(stderr, "Unexpected rallocm() error\n");
+	if (q != p)
+		fprintf(stderr, "Unexpected object move\n");
+	if (tsz == sz) {
+		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		    sz, tsz);
+	}
+	sz = tsz;
+
+	JEMALLOC_P(dallocm)(p, 0);
+
+	fprintf(stderr, "Test end\n");
+	return (0);
+}
diff --git a/deps/jemalloc.orig/test/rallocm.exp b/deps/jemalloc.orig/test/rallocm.exp
new file mode 100644
index 00000000..369a88dd
--- /dev/null
+++ b/deps/jemalloc.orig/test/rallocm.exp
@@ -0,0 +1,2 @@
+Test begin
+Test end
diff --git a/deps/jemalloc.orig/test/thread_arena.c b/deps/jemalloc.orig/test/thread_arena.c
new file mode 100644
index 00000000..ef8d6817
--- /dev/null
+++ b/deps/jemalloc.orig/test/thread_arena.c
@@ -0,0 +1,92 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <string.h>
+#include <assert.h>
+
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define NTHREADS 10
+
+void *
+thread_start(void *arg)
+{
+	unsigned main_arena_ind = *(unsigned *)arg;
+	void *p;
+	unsigned arena_ind;
+	size_t size;
+	int err;
+
+	p = JEMALLOC_P(malloc)(1);
+	if (p == NULL) {
+		fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+		return (void *)1;
+	}
+
+	size = sizeof(arena_ind);
+	if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size,
+	    &main_arena_ind, sizeof(main_arena_ind)))) {
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		return (void *)1;
+	}
+
+	size = sizeof(arena_ind);
+	if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL,
+	    0))) {
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		return (void *)1;
+	}
+	assert(arena_ind == main_arena_ind);
+
+	return (NULL);
+}
+
+int
+main(void)
+{
+	int ret = 0;
+	void *p;
+	unsigned arena_ind;
+	size_t size;
+	int err;
+	pthread_t threads[NTHREADS];
+	unsigned i;
+
+	fprintf(stderr, "Test begin\n");
+
+	p = JEMALLOC_P(malloc)(1);
+	if (p == NULL) {
+		fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+		ret = 1;
+		goto RETURN;
+	}
+
+	size = sizeof(arena_ind);
+	if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL,
+	    0))) {
+		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		    strerror(err));
+		ret = 1;
+		goto RETURN;
+	}
+
+	for (i = 0; i < NTHREADS; i++) {
+		if (pthread_create(&threads[i], NULL, thread_start,
+		    (void *)&arena_ind) != 0) {
+			fprintf(stderr, "%s(): Error in pthread_create()\n",
+			    __func__);
+			ret = 1;
+			goto RETURN;
+		}
+	}
+
+	for (i = 0; i < NTHREADS; i++)
+		pthread_join(threads[i], (void *)&ret);
+
+RETURN:
+	fprintf(stderr, "Test end\n");
+	return (ret);
+}
diff --git a/deps/jemalloc.orig/test/thread_arena.exp b/deps/jemalloc.orig/test/thread_arena.exp
new file mode 100644
index 00000000..369a88dd
--- /dev/null
+++ b/deps/jemalloc.orig/test/thread_arena.exp
@@ -0,0 +1,2 @@
+Test begin
+Test end
diff --git a/deps/jemalloc/.gitignore b/deps/jemalloc/.gitignore
index 32b4c424..e6e8bb00 100644
--- a/deps/jemalloc/.gitignore
+++ b/deps/jemalloc/.gitignore
@@ -11,6 +11,7 @@
 /lib/
 /Makefile
 /include/jemalloc/internal/jemalloc_internal\.h
+/include/jemalloc/internal/size_classes\.h
 /include/jemalloc/jemalloc\.h
 /include/jemalloc/jemalloc_defs\.h
 /test/jemalloc_test\.h
@@ -21,3 +22,4 @@
 !test/*.c
 !test/*.exp
 /VERSION
+/bin/jemalloc.sh
diff --git a/deps/jemalloc/COPYING b/deps/jemalloc/COPYING
index 10ade120..e27fc4d6 100644
--- a/deps/jemalloc/COPYING
+++ b/deps/jemalloc/COPYING
@@ -1,9 +1,10 @@
 Unless otherwise specified, files in the jemalloc source distribution are
-subject to the following licenses:
+subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2010 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2012 Jason Evans <jasone@canonware.com>.
 All rights reserved.
-Copyright (C) 2007-2010 Mozilla Foundation.  All rights reserved.
+Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
+Copyright (C) 2009-2012 Facebook, Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:
@@ -24,28 +25,3 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 --------------------------------------------------------------------------------
-Copyright (C) 2009-2010 Facebook, Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above copyright notice, this
-  list of conditions and the following disclaimer in the documentation and/or
-  other materials provided with the distribution.
-* Neither the name of Facebook, Inc. nor the names of its contributors may be
-  used to endorse or promote products derived from this software without
-  specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
---------------------------------------------------------------------------------
diff --git a/deps/jemalloc/ChangeLog b/deps/jemalloc/ChangeLog
index 326ee7a9..231dd6da 100644
--- a/deps/jemalloc/ChangeLog
+++ b/deps/jemalloc/ChangeLog
@@ -6,6 +6,95 @@ found in the git revision history:
     http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git
     git://canonware.com/jemalloc.git
 
+* 3.0.0 (May 11, 2012)
+
+  Although this version adds some major new features, the primary focus is on
+  internal code cleanup that facilitates maintainability and portability, most
+  of which is not reflected in the ChangeLog.  This is the first release to
+  incorporate substantial contributions from numerous other developers, and the
+  result is a more broadly useful allocator (see the git revision history for
+  contribution details).  Note that the license has been unified, thanks to
+  Facebook granting a license under the same terms as the other copyright
+  holders (see COPYING).
+
+  New features:
+  - Implement Valgrind support, redzones, and quarantine.
+  - Add support for additional platforms:
+    + FreeBSD
+    + Mac OS X Lion
+    + MinGW
+    + Windows (no support yet for replacing the system malloc)
+  - Add support for additional architectures:
+    + MIPS
+    + SH4
+    + Tilera
+  - Add support for cross compiling.
+  - Add nallocm(), which rounds a request size up to the nearest size class
+    without actually allocating.
+  - Implement aligned_alloc() (blame C11).
+  - Add the "thread.tcache.enabled" mallctl.
+  - Add the "opt.prof_final" mallctl.
+  - Update pprof (from gperftools 2.0).
+  - Add the --with-mangling option.
+  - Add the --disable-experimental option.
+  - Add the --disable-munmap option, and make it the default on Linux.
+  - Add the --enable-mremap option, which disables use of mremap(2) by default.
+
+  Incompatible changes:
+  - Enable stats by default.
+  - Enable fill by default.
+  - Disable lazy locking by default.
+  - Rename the "tcache.flush" mallctl to "thread.tcache.flush".
+  - Rename the "arenas.pagesize" mallctl to "arenas.page".
+  - Change the "opt.lg_prof_sample" default from 0 to 19 (1 B to 512 KiB).
+  - Change the "opt.prof_accum" default from true to false.
+
+  Removed features:
+  - Remove the swap feature, including the "config.swap", "swap.avail",
+    "swap.prezeroed", "swap.nfds", and "swap.fds" mallctls.
+  - Remove highruns statistics, including the
+    "stats.arenas.<i>.bins.<j>.highruns" and
+    "stats.arenas.<i>.lruns.<j>.highruns" mallctls.
+  - As part of small size class refactoring, remove the "opt.lg_[qc]space_max",
+    "arenas.cacheline", "arenas.subpage", "arenas.[tqcs]space_{min,max}", and
+    "arenas.[tqcs]bins" mallctls.
+  - Remove the "arenas.chunksize" mallctl.
+  - Remove the "opt.lg_prof_tcmax" option.
+  - Remove the "opt.lg_prof_bt_max" option.
+  - Remove the "opt.lg_tcache_gc_sweep" option.
+  - Remove the --disable-tiny option, including the "config.tiny" mallctl.
+  - Remove the --enable-dynamic-page-shift configure option.
+  - Remove the --enable-sysv configure option.
+
+  Bug fixes:
+  - Fix a statistics-related bug in the "thread.arena" mallctl that could cause
+    invalid statistics and crashes.
+  - Work around TLS deallocation via free() on Linux.  This bug could cause
+    write-after-free memory corruption.
+  - Fix a potential deadlock that could occur during interval- and
+    growth-triggered heap profile dumps.
+  - Fix large calloc() zeroing bugs due to dropping chunk map unzeroed flags.
+  - Fix chunk_alloc_dss() to stop claiming memory is zeroed.  This bug could
+    cause memory corruption and crashes with --enable-dss specified.
+  - Fix fork-related bugs that could cause deadlock in children between fork
+    and exec.
+  - Fix malloc_stats_print() to honor 'b' and 'l' in the opts parameter.
+  - Fix realloc(p, 0) to act like free(p).
+  - Do not enforce minimum alignment in memalign().
+  - Check for NULL pointer in malloc_usable_size().
+  - Fix an off-by-one heap profile statistics bug that could be observed in
+    interval- and growth-triggered heap profiles.
+  - Fix the "epoch" mallctl to update cached stats even if the passed in epoch
+    is 0.
+  - Fix bin->runcur management to fix a layout policy bug.  This bug did not
+    affect correctness.
+  - Fix a bug in choose_arena_hard() that potentially caused more arenas to be
+    initialized than necessary.
+  - Add missing "opt.lg_tcache_max" mallctl implementation.
+  - Use glibc allocator hooks to make mixed allocator usage less likely.
+  - Fix build issues for --disable-tcache.
+  - Don't mangle pthread_create() when --with-private-namespace is specified.
+
 * 2.2.5 (November 14, 2011)
 
   Bug fixes:
diff --git a/deps/jemalloc/INSTALL b/deps/jemalloc/INSTALL
index 2a1e469c..e40a7edd 100644
--- a/deps/jemalloc/INSTALL
+++ b/deps/jemalloc/INSTALL
@@ -26,6 +26,19 @@ any of the following arguments (not a definitive list) to 'configure':
     Embed one or more library paths, so that libjemalloc can find the libraries
     it is linked to.  This works only on ELF-based systems.
 
+--with-mangling=<map>
+    Mangle public symbols specified in <map> which is a comma-separated list of
+    name:mangled pairs.
+
+    For example, to use ld's --wrap option as an alternative method for
+    overriding libc's malloc implementation, specify something like:
+
+      --with-mangling=malloc:__wrap_malloc,free:__wrap_free[...]
+
+    Note that mangling happens prior to application of the prefix specified by
+    --with-jemalloc-prefix, and mangled symbols are then ignored when applying
+    the prefix.
+
 --with-jemalloc-prefix=<prefix>
     Prefix all public APIs with <prefix>.  For example, if <prefix> is
     "prefix_", API changes like the following occur:
@@ -62,8 +75,8 @@ any of the following arguments (not a definitive list) to 'configure':
     Enable assertions and validation code.  This incurs a substantial
     performance hit, but is very useful during application development.
 
---enable-stats
-    Enable statistics gathering functionality.  See the "opt.stats_print"
+--disable-stats
+    Disable statistics gathering functionality.  See the "opt.stats_print"
     option documentation for usage details.
 
 --enable-prof
@@ -90,51 +103,50 @@ any of the following arguments (not a definitive list) to 'configure':
     Statically link against the specified libunwind.a rather than dynamically
     linking with -lunwind.
 
---disable-tiny
-    Disable tiny (sub-quantum-sized) object support.  Technically it is not
-    legal for a malloc implementation to allocate objects with less than
-    quantum alignment (8 or 16 bytes, depending on architecture), but in
-    practice it never causes any problems if, for example, 4-byte allocations
-    are 4-byte-aligned.
-
 --disable-tcache
     Disable thread-specific caches for small objects.  Objects are cached and
     released in bulk, thus reducing the total number of mutex operations.  See
     the "opt.tcache" option for usage details.
 
---enable-swap
-    Enable mmap()ed swap file support.  When this feature is built in, it is
-    possible to specify one or more files that act as backing store.  This
-    effectively allows for per application swap files.
+--enable-mremap
+    Enable huge realloc() via mremap(2).  mremap() is disabled by default
+    because the flavor used is specific to Linux, which has a quirk in its
+    virtual memory allocation algorithm that causes semi-permanent VM map holes
+    under normal jemalloc operation.
+
+--disable-munmap
+    Disable virtual memory deallocation via munmap(2); instead keep track of
+    the virtual memory for later use.  munmap() is disabled by default (i.e.
+    --disable-munmap is implied) on Linux, which has a quirk in its virtual
+    memory allocation algorithm that causes semi-permanent VM map holes under
+    normal jemalloc operation.
 
 --enable-dss
     Enable support for page allocation/deallocation via sbrk(2), in addition to
     mmap(2).
 
---enable-fill
-    Enable support for junk/zero filling of memory.  See the "opt.junk"/
-    "opt.zero" option documentation for usage details.
+--disable-fill
+    Disable support for junk/zero filling of memory, quarantine, and redzones.
+    See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option
+    documentation for usage details.
+
+--disable-valgrind
+    Disable support for Valgrind.
+
+--disable-experimental
+    Disable support for the experimental API (*allocm()).
+
+--enable-utrace
+    Enable utrace(2)-based allocation tracing.  This feature is not broadly
+    portable (FreeBSD has it, but Linux and OS X do not).
 
 --enable-xmalloc
     Enable support for optional immediate termination due to out-of-memory
     errors, as is commonly implemented by "xmalloc" wrapper function for malloc.
     See the "opt.xmalloc" option documentation for usage details.
 
---enable-sysv
-    Enable support for System V semantics, wherein malloc(0) returns NULL
-    rather than a minimal allocation.  See the "opt.sysv" option documentation
-    for usage details.
-
---enable-dynamic-page-shift
-    Under most conditions, the system page size never changes (usually 4KiB or
-    8KiB, depending on architecture and configuration), and unless this option
-    is enabled, jemalloc assumes that page size can safely be determined during
-    configuration and hard-coded.  Enabling dynamic page size determination has
-    a measurable impact on performance, since the compiler is forced to load
-    the page size from memory rather than embedding immediate values.
-
---disable-lazy-lock
-    Disable code that wraps pthread_create() to detect when an application
+--enable-lazy-lock
+    Enable code that wraps pthread_create() to detect when an application
     switches from single-threaded to multi-threaded mode, so that it can avoid
     mutex locking/unlocking operations while in single-threaded mode.  In
     practice, this feature usually has little impact on performance unless
@@ -181,11 +193,24 @@ PATH="?"
 
 === Advanced compilation =======================================================
 
+To build only parts of jemalloc, use the following targets:
+
+    build_lib_shared
+    build_lib_static
+    build_lib
+    build_doc_html
+    build_doc_man
+    build_doc
+
 To install only parts of jemalloc, use the following targets:
 
     install_bin
     install_include
+    install_lib_shared
+    install_lib_static
     install_lib
+    install_doc_html
+    install_doc_man
     install_doc
 
 To clean up build results to varying degrees, use the following make targets:
@@ -248,10 +273,6 @@ directory, issue configuration and build commands:
 
 The manual page is generated in both html and roff formats.  Any web browser
 can be used to view the html manual.  The roff manual page can be formatted
-prior to installation via any of the following commands:
+prior to installation via the following command:
 
     nroff -man -t doc/jemalloc.3
-
-    groff -man -t -Tps doc/jemalloc.3 | ps2pdf - doc/jemalloc.3.pdf
-
-    (cd doc; groff -man -man-ext -t -Thtml jemalloc.3 > jemalloc.3.html)
diff --git a/deps/jemalloc/Makefile.in b/deps/jemalloc/Makefile.in
index de7492f9..6675b596 100644
--- a/deps/jemalloc/Makefile.in
+++ b/deps/jemalloc/Makefile.in
@@ -17,129 +17,184 @@ INCLUDEDIR := $(DESTDIR)@INCLUDEDIR@
 LIBDIR := $(DESTDIR)@LIBDIR@
 DATADIR := $(DESTDIR)@DATADIR@
 MANDIR := $(DESTDIR)@MANDIR@
+srcroot := @srcroot@
+objroot := @objroot@
+abs_srcroot := @abs_srcroot@
+abs_objroot := @abs_objroot@
 
 # Build parameters.
-CPPFLAGS := @CPPFLAGS@ -I@srcroot@include -I@objroot@include
+CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
 CFLAGS := @CFLAGS@
-ifeq (macho, @abi@)
-CFLAGS += -dynamic
-endif
 LDFLAGS := @LDFLAGS@
+EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
 RPATH_EXTRA := @RPATH_EXTRA@
-ifeq (macho, @abi@)
-SO := dylib
-WL_SONAME := dylib_install_name
+SO := @so@
+IMPORTLIB := @importlib@
+O := @o@
+A := @a@
+EXE := @exe@
+LIBPREFIX := @libprefix@
+REV := @rev@
+install_suffix := @install_suffix@
+ABI := @abi@
+XSLTPROC := @XSLTPROC@
+AUTOCONF := @AUTOCONF@
+_RPATH = @RPATH@
+RPATH = $(if $(1),$(call _RPATH,$(1)))
+cfghdrs_in := @cfghdrs_in@
+cfghdrs_out := @cfghdrs_out@
+cfgoutputs_in := @cfgoutputs_in@
+cfgoutputs_out := @cfgoutputs_out@
+enable_autogen := @enable_autogen@
+enable_experimental := @enable_experimental@
+DSO_LDFLAGS = @DSO_LDFLAGS@
+SOREV = @SOREV@
+PIC_CFLAGS = @PIC_CFLAGS@
+CTARGET = @CTARGET@
+LDTARGET = @LDTARGET@
+MKLIB = @MKLIB@
+CC_MM = @CC_MM@
+
+ifeq (macho, $(ABI))
+TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib"
 else
-SO := so
-WL_SONAME := soname
-endif
-REV := 1
-ifeq (macho, @abi@)
-TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib
+ifeq (pecoff, $(ABI))
+TEST_LIBRARY_PATH := PATH="$(PATH):$(objroot)lib"
 else
 TEST_LIBRARY_PATH :=
 endif
+endif
+
+LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix)
 
 # Lists of files.
-BINS := @srcroot@bin/pprof
-CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \
-	@objroot@include/jemalloc/jemalloc_defs@install_suffix@.h
-CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \
-	@srcroot@src/base.c @srcroot@src/bitmap.c @srcroot@src/chunk.c \
-	@srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \
-	@srcroot@src/chunk_swap.c @srcroot@src/ckh.c @srcroot@src/ctl.c \
-	@srcroot@src/extent.c @srcroot@src/hash.c @srcroot@src/huge.c \
-	@srcroot@src/mb.c @srcroot@src/mutex.c @srcroot@src/prof.c \
-	@srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c
-ifeq (macho, @abi@)
-CSRCS += @srcroot@src/zone.c
+BINS := $(srcroot)bin/pprof $(objroot)bin/jemalloc.sh
+CHDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h \
+	$(objroot)include/jemalloc/jemalloc_defs$(install_suffix).h
+CSRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c $(srcroot)src/atomic.c \
+	$(srcroot)src/base.c $(srcroot)src/bitmap.c $(srcroot)src/chunk.c \
+	$(srcroot)src/chunk_dss.c $(srcroot)src/chunk_mmap.c \
+	$(srcroot)src/ckh.c $(srcroot)src/ctl.c $(srcroot)src/extent.c \
+	$(srcroot)src/hash.c $(srcroot)src/huge.c $(srcroot)src/mb.c \
+	$(srcroot)src/mutex.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \
+	$(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \
+	$(srcroot)src/util.c $(srcroot)src/tsd.c
+ifeq (macho, $(ABI))
+CSRCS += $(srcroot)src/zone.c
+endif
+ifeq ($(IMPORTLIB),$(SO))
+STATIC_LIBS := $(objroot)lib/$(LIBJEMALLOC).$(A)
+endif
+ifdef PIC_CFLAGS
+STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_pic.$(A)
+else
+STATIC_LIBS += $(objroot)lib/$(LIBJEMALLOC)_s.$(A)
 endif
-STATIC_LIBS :=	@objroot@lib/libjemalloc@install_suffix@.a
-DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \
-	@objroot@lib/libjemalloc@install_suffix@.$(SO) \
-	@objroot@lib/libjemalloc@install_suffix@_pic.a
-MAN3 := @objroot@doc/jemalloc@install_suffix@.3
-DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml
-DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html)
-DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3)
+DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
+ifneq ($(SOREV),$(SO))
+DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
+endif
+MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
+DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
+DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.html)
+DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(srcroot)%.3)
 DOCS := $(DOCS_HTML) $(DOCS_MAN3)
-CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \
-	@srcroot@test/bitmap.c @srcroot@test/mremap.c \
-	@srcroot@test/posix_memalign.c @srcroot@test/rallocm.c \
-	@srcroot@test/thread_arena.c
+CTESTS := $(srcroot)test/aligned_alloc.c $(srcroot)test/allocated.c \
+	$(srcroot)test/bitmap.c $(srcroot)test/mremap.c \
+	$(srcroot)test/posix_memalign.c $(srcroot)test/thread_arena.c \
+	$(srcroot)test/thread_tcache_enabled.c
+ifeq ($(enable_experimental), 1)
+CTESTS += $(srcroot)test/allocm.c $(srcroot)test/rallocm.c
+endif
+
+COBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.$(O))
+CPICOBJS := $(CSRCS:$(srcroot)%.c=$(objroot)%.pic.$(O))
+CTESTOBJS := $(CTESTS:$(srcroot)%.c=$(objroot)%.$(O))
 
 .PHONY: all dist doc_html doc_man doc
 .PHONY: install_bin install_include install_lib
 .PHONY: install_html install_man install_doc install
 .PHONY: tests check clean distclean relclean
 
-.SECONDARY : $(CTESTS:@srcroot@%.c=@objroot@%.o)
+.SECONDARY : $(CTESTOBJS)
 
 # Default target.
-all: $(DSOS) $(STATIC_LIBS)
+all: build
 
-dist: doc
+dist: build_doc
 
-@srcroot@doc/%.html : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/html.xsl
-	@XSLTPROC@ -o $@ @objroot@doc/html.xsl $<
+$(srcroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
+	$(XSLTPROC) -o $@ $(objroot)doc/html.xsl $<
 
-@srcroot@doc/%.3 : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/manpages.xsl
-	@XSLTPROC@ -o $@ @objroot@doc/manpages.xsl $<
+$(srcroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
+	$(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $<
 
-doc_html: $(DOCS_HTML)
-doc_man: $(DOCS_MAN3)
-doc: $(DOCS)
+build_doc_html: $(DOCS_HTML)
+build_doc_man: $(DOCS_MAN3)
+build_doc: $(DOCS)
 
 #
 # Include generated dependency files.
 #
--include $(CSRCS:@srcroot@%.c=@objroot@%.d)
--include $(CSRCS:@srcroot@%.c=@objroot@%.pic.d)
--include $(CTESTS:@srcroot@%.c=@objroot@%.d)
+ifdef CC_MM
+-include $(COBJS:%.$(O)=%.d)
+-include $(CPICOBJS:%.$(O)=%.d)
+-include $(CTESTOBJS:%.$(O)=%.d)
+endif
 
-@objroot@src/%.o: @srcroot@src/%.c
-	@mkdir -p $(@D)
-	$(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $<
-	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
+$(COBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c
+$(CPICOBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c
+$(CPICOBJS): CFLAGS += $(PIC_CFLAGS)
+$(CTESTOBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
+$(CTESTOBJS): CPPFLAGS += -I$(objroot)test
+ifneq ($(IMPORTLIB),$(SO))
+$(COBJS): CPPFLAGS += -DDLLEXPORT
+endif
 
-@objroot@src/%.pic.o: @srcroot@src/%.c
-	@mkdir -p $(@D)
-	$(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $<
-	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $(basename $@))))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.pic.o \2/g\" > $(@:%.o=%.d)"
+ifndef CC_MM
+# Dependencies
+HEADER_DIRS = $(srcroot)include/jemalloc/internal \
+	$(objroot)include/jemalloc $(objroot)include/jemalloc/internal
+HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h))
+$(COBJS) $(CPICOBJS) $(CTESTOBJS): $(HEADERS)
+$(CTESTOBJS): $(objroot)test/jemalloc_test.h
+endif
 
-%.$(SO) : %.$(SO).$(REV)
+$(COBJS) $(CPICOBJS) $(CTESTOBJS): %.$(O):
 	@mkdir -p $(@D)
-	ln -sf $(<F) $@
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $<
+ifdef CC_MM
+	@$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $<
+endif
 
-@objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) : $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
+ifneq ($(SOREV),$(SO))
+%.$(SO) : %.$(SOREV)
 	@mkdir -p $(@D)
-	$(CC) -shared -Wl,-$(WL_SONAME),$(@F) $(RPATH_EXTRA:%=@RPATH@%) -o $@ $+ $(LDFLAGS) $(LIBS)
+	ln -sf $(<F) $@
+endif
 
-@objroot@lib/libjemalloc@install_suffix@_pic.a : $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
+$(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(CPICOBJS),$(COBJS))
 	@mkdir -p $(@D)
-	ar crus $@ $+
+	$(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS)
 
-@objroot@lib/libjemalloc@install_suffix@.a : $(CSRCS:@srcroot@%.c=@objroot@%.o)
-	@mkdir -p $(@D)
-	ar crus $@ $+
+$(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(CPICOBJS)
+$(objroot)lib/$(LIBJEMALLOC).$(A) : $(COBJS)
+$(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(COBJS)
 
-@objroot@test/%.o: @srcroot@test/%.c
+$(STATIC_LIBS):
 	@mkdir -p $(@D)
-	$(CC) $(CFLAGS) -c $(CPPFLAGS) -I@objroot@test -o $@ $<
-	@$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) -I@objroot@test $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)"
+	$(MKLIB) $+
 
-# Automatic dependency generation misses #include "*.c".
-@objroot@test/bitmap.o : @objroot@src/bitmap.o
+$(objroot)test/bitmap$(EXE): $(objroot)src/bitmap.$(O)
 
-@objroot@test/%: @objroot@test/%.o \
-		 @objroot@lib/libjemalloc@install_suffix@.$(SO)
+$(objroot)test/%$(EXE): $(objroot)test/%.$(O) $(objroot)src/util.$(O) $(DSOS)
 	@mkdir -p $(@D)
-ifneq (@RPATH@, )
-	$(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ -lpthread
-else
-	$(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ -lpthread
-endif
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(filter -lpthread,$(LIBS)) $(EXTRA_LDFLAGS)
+
+build_lib_shared: $(DSOS)
+build_lib_static: $(STATIC_LIBS)
+build: build_lib_shared build_lib_static
 
 install_bin:
 	install -d $(BINDIR)
@@ -155,46 +210,55 @@ install_include:
 	install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
-install_lib: $(DSOS) $(STATIC_LIBS)
+install_lib_shared: $(DSOS)
 	install -d $(LIBDIR)
-	install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)
-	ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO)
-	install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR)
-	install -m 755 @objroot@lib/libjemalloc@install_suffix@.a $(LIBDIR)
+	install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+ifneq ($(SOREV),$(SO))
+	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
+endif
+
+install_lib_static: $(STATIC_LIBS)
+	install -d $(LIBDIR)
+	@for l in $(STATIC_LIBS); do \
+	echo "install -m 755 $$l $(LIBDIR)"; \
+	install -m 755 $$l $(LIBDIR); \
+done
+
+install_lib: install_lib_shared install_lib_static
 
-install_html:
-	install -d $(DATADIR)/doc/jemalloc@install_suffix@
+install_doc_html:
+	install -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
-	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@"; \
-	install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@; \
+	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+	install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
-install_man:
+install_doc_man:
 	install -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
 	echo "install -m 644 $$d $(MANDIR)/man3"; \
 	install -m 644 $$d $(MANDIR)/man3; \
 done
 
-install_doc: install_html install_man
+install_doc: install_doc_html install_doc_man
 
 install: install_bin install_include install_lib install_doc
 
-tests: $(CTESTS:@srcroot@%.c=@objroot@%)
+tests: $(CTESTS:$(srcroot)%.c=$(objroot)%$(EXE))
 
 check: tests
-	@mkdir -p @objroot@test
+	@mkdir -p $(objroot)test
 	@$(SHELL) -c 'total=0; \
 		failures=0; \
 		echo "========================================="; \
-		for t in $(CTESTS:@srcroot@%.c=@objroot@%); do \
+		for t in $(CTESTS:$(srcroot)%.c=$(objroot)%); do \
 			total=`expr $$total + 1`; \
 			/bin/echo -n "$${t} ... "; \
-			$(TEST_LIBRARY_PATH) $${t} @abs_srcroot@ @abs_objroot@ \
-			  > @objroot@$${t}.out 2>&1; \
-			if test -e "@srcroot@$${t}.exp"; then \
-				diff -u @srcroot@$${t}.exp \
-				  @objroot@$${t}.out >/dev/null 2>&1; \
+			$(TEST_LIBRARY_PATH) $${t}$(EXE) $(abs_srcroot) \
+			  $(abs_objroot) > $(objroot)$${t}.out 2>&1; \
+			if test -e "$(srcroot)$${t}.exp"; then \
+				diff -w -u $(srcroot)$${t}.exp \
+				  $(objroot)$${t}.out >/dev/null 2>&1; \
 				fail=$$?; \
 				if test "$${fail}" -eq "1" ; then \
 					failures=`expr $${failures} + 1`; \
@@ -211,49 +275,49 @@ check: tests
 		echo "Failures: $${failures}/$${total}"'
 
 clean:
-	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.o)
-	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.o)
-	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.d)
-	rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.d)
-	rm -f $(CTESTS:@srcroot@%.c=@objroot@%)
-	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.o)
-	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.d)
-	rm -f $(CTESTS:@srcroot@%.c=@objroot@%.out)
+	rm -f $(COBJS)
+	rm -f $(CPICOBJS)
+	rm -f $(COBJS:%.$(O)=%.d)
+	rm -f $(CPICOBJS:%.$(O)=%.d)
+	rm -f $(CTESTOBJS:%.$(O)=%$(EXE))
+	rm -f $(CTESTOBJS)
+	rm -f $(CTESTOBJS:%.$(O)=%.d)
+	rm -f $(CTESTOBJS:%.$(O)=%.out)
 	rm -f $(DSOS) $(STATIC_LIBS)
 
 distclean: clean
-	rm -rf @objroot@autom4te.cache
-	rm -f @objroot@config.log
-	rm -f @objroot@config.status
-	rm -f @objroot@config.stamp
-	rm -f @cfghdrs_out@
-	rm -f @cfgoutputs_out@
+	rm -rf $(objroot)autom4te.cache
+	rm -f $(objroot)config.log
+	rm -f $(objroot)config.status
+	rm -f $(objroot)config.stamp
+	rm -f $(cfghdrs_out)
+	rm -f $(cfgoutputs_out)
 
 relclean: distclean
-	rm -f @objroot@configure
-	rm -f @srcroot@VERSION
+	rm -f $(objroot)configure
+	rm -f $(srcroot)VERSION
 	rm -f $(DOCS_HTML)
 	rm -f $(DOCS_MAN3)
 
 #===============================================================================
 # Re-configuration rules.
 
-ifeq (@enable_autogen@, 1)
-@srcroot@configure : @srcroot@configure.ac
-	cd ./@srcroot@ && @AUTOCONF@
+ifeq ($(enable_autogen), 1)
+$(srcroot)configure : $(srcroot)configure.ac
+	cd ./$(srcroot) && $(AUTOCONF)
 
-@objroot@config.status : @srcroot@configure
-	./@objroot@config.status --recheck
+$(objroot)config.status : $(srcroot)configure
+	./$(objroot)config.status --recheck
 
-@srcroot@config.stamp.in : @srcroot@configure.ac
-	echo stamp > @srcroot@config.stamp.in
+$(srcroot)config.stamp.in : $(srcroot)configure.ac
+	echo stamp > $(srcroot)config.stamp.in
 
-@objroot@config.stamp : @cfgoutputs_in@ @cfghdrs_in@ @srcroot@configure
-	./@objroot@config.status
+$(objroot)config.stamp : $(cfgoutputs_in) $(cfghdrs_in) $(srcroot)configure
+	./$(objroot)config.status
 	@touch $@
 
 # There must be some action in order for make to re-read Makefile when it is
 # out of date.
-@cfgoutputs_out@ @cfghdrs_out@ : @objroot@config.stamp
+$(cfgoutputs_out) $(cfghdrs_out) : $(objroot)config.stamp
 	@true
 endif
diff --git a/deps/jemalloc/README b/deps/jemalloc/README
index 4d7b552b..7661683b 100644
--- a/deps/jemalloc/README
+++ b/deps/jemalloc/README
@@ -1,10 +1,10 @@
 jemalloc is a general-purpose scalable concurrent malloc(3) implementation.
-This distribution is a stand-alone "portable" implementation that currently
-targets Linux and Apple OS X.  jemalloc is included as the default allocator in
-the FreeBSD and NetBSD operating systems, and it is used by the Mozilla Firefox
-web browser on Microsoft Windows-related platforms.  Depending on your needs,
-one of the other divergent versions may suit your needs better than this
-distribution.
+This distribution is a "portable" implementation that currently targets
+FreeBSD, Linux, Apple OS X, and MinGW.  jemalloc is included as the default
+allocator in the FreeBSD and NetBSD operating systems, and it is used by the
+Mozilla Firefox web browser on Microsoft Windows-related platforms.  Depending
+on your needs, one of the other divergent versions may suit your needs better
+than this distribution.
 
 The COPYING file contains copyright and licensing information.
 
diff --git a/deps/jemalloc/VERSION b/deps/jemalloc/VERSION
index aa85f5a2..c0f4e740 100644
--- a/deps/jemalloc/VERSION
+++ b/deps/jemalloc/VERSION
@@ -1 +1 @@
-2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760
+3.0.0-0-gfc9b1dbf69f59d7ecfc4ac68da9847e017e1d046
diff --git a/deps/jemalloc/bin/jemalloc.sh.in b/deps/jemalloc/bin/jemalloc.sh.in
new file mode 100644
index 00000000..cdf36737
--- /dev/null
+++ b/deps/jemalloc/bin/jemalloc.sh.in
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+
+@LD_PRELOAD_VAR@=${libdir}/libjemalloc.@SOREV@
+export @LD_PRELOAD_VAR@
+exec "$@"
diff --git a/deps/jemalloc/bin/pprof b/deps/jemalloc/bin/pprof
index 280ddcc8..727eb437 100755
--- a/deps/jemalloc/bin/pprof
+++ b/deps/jemalloc/bin/pprof
@@ -72,7 +72,7 @@ use strict;
 use warnings;
 use Getopt::Long;
 
-my $PPROF_VERSION = "1.7";
+my $PPROF_VERSION = "2.0";
 
 # These are the object tools we use which can come from a
 # user-specified location using --tools, from the PPROF_TOOLS
@@ -87,13 +87,14 @@ my %obj_tool_map = (
   #"addr2line_pdb" => "addr2line-pdb",                                # ditto
   #"otool" => "otool",         # equivalent of objdump on OS X
 );
-my $DOT = "dot";          # leave non-absolute, since it may be in /usr/local
-my $GV = "gv";
-my $EVINCE = "evince";    # could also be xpdf or perhaps acroread
-my $KCACHEGRIND = "kcachegrind";
-my $PS2PDF = "ps2pdf";
+# NOTE: these are lists, so you can put in commandline flags if you want.
+my @DOT = ("dot");          # leave non-absolute, since it may be in /usr/local
+my @GV = ("gv");
+my @EVINCE = ("evince");    # could also be xpdf or perhaps acroread
+my @KCACHEGRIND = ("kcachegrind");
+my @PS2PDF = ("ps2pdf");
 # These are used for dynamic profiles
-my $URL_FETCHER = "curl -s";
+my @URL_FETCHER = ("curl", "-s");
 
 # These are the web pages that servers need to support for dynamic profiles
 my $HEAP_PAGE = "/pprof/heap";
@@ -104,7 +105,10 @@ my $GROWTH_PAGE = "/pprof/growth";
 my $CONTENTION_PAGE = "/pprof/contention";
 my $WALL_PAGE = "/pprof/wall(?:\\?.*)?";  # accepts options like namefilter
 my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?";
-my $CENSUSPROFILE_PAGE = "/pprof/censusprofile";  # must support "?seconds=#"
+my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param
+                                                       # "?seconds=#",
+                                                       # "?tags_regexp=#" and
+                                                       # "?type=#".
 my $SYMBOL_PAGE = "/pprof/symbol";     # must support symbol lookup via POST
 my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
 
@@ -122,6 +126,11 @@ my $UNKNOWN_BINARY = "(unknown)";
 # 64-bit profiles.  To err on the safe size, default to 64-bit here:
 my $address_length = 16;
 
+my $dev_null = "/dev/null";
+if (! -e $dev_null && $^O =~ /MSWin/) {    # $^O is the OS perl was built for
+  $dev_null = "nul";
+}
+
 # A list of paths to search for shared object files
 my @prefix_list = ();
 
@@ -151,7 +160,8 @@ pprof [options] <profile>
    The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile,
                          $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall,
                          $CENSUSPROFILE_PAGE, or /pprof/filteredprofile.
-   For instance: "pprof http://myserver.com:80$HEAP_PAGE".
+   For instance:
+     pprof http://myserver.com:80$HEAP_PAGE
    If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
 pprof --symbols <program>
    Maps addresses to symbol names.  In this mode, stdin should be a
@@ -162,7 +172,7 @@ pprof --symbols <program>
    For more help with querying remote servers, including how to add the
    necessary server-side support code, see this filename (or one like it):
 
-   /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html
+   /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html
 
 Options:
    --cum               Sort by cumulative data
@@ -260,7 +270,7 @@ EOF
 
 sub version_string {
   return <<EOF
-pprof (part of google-perftools $PPROF_VERSION)
+pprof (part of gperftools $PPROF_VERSION)
 
 Copyright 1998-2007 Google Inc.
 
@@ -492,11 +502,13 @@ sub Init() {
   @main::pfile_args = ();
 
   # Remote profiling without a binary (using $SYMBOL_PAGE instead)
-  if (IsProfileURL($ARGV[0])) {
-    $main::use_symbol_page = 1;
-  } elsif (IsSymbolizedProfileFile($ARGV[0])) {
-    $main::use_symbolized_profile = 1;
-    $main::prog = $UNKNOWN_BINARY;  # will be set later from the profile file
+  if (@ARGV > 0) {
+    if (IsProfileURL($ARGV[0])) {
+      $main::use_symbol_page = 1;
+    } elsif (IsSymbolizedProfileFile($ARGV[0])) {
+      $main::use_symbolized_profile = 1;
+      $main::prog = $UNKNOWN_BINARY;  # will be set later from the profile file
+    }
   }
 
   if ($main::use_symbol_page || $main::use_symbolized_profile) {
@@ -540,7 +552,7 @@ sub Init() {
     ConfigureObjTools($main::prog)
   }
 
-  # Break the opt_list_prefix into the prefix_list array
+  # Break the opt_lib_prefix into the prefix_list array
   @prefix_list = split (',', $main::opt_lib_prefix);
 
   # Remove trailing / from the prefixes, in the list to prevent
@@ -636,9 +648,9 @@ sub Main() {
   # Print
   if (!$main::opt_interactive) {
     if ($main::opt_disasm) {
-      PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm, $total);
+      PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm);
     } elsif ($main::opt_list) {
-      PrintListing($libs, $flat, $cumulative, $main::opt_list);
+      PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0);
     } elsif ($main::opt_text) {
       # Make sure the output is empty when have nothing to report
       # (only matters when --heapcheck is given but we must be
@@ -646,7 +658,7 @@ sub Main() {
       if ($total != 0) {
         printf("Total: %s %s\n", Unparse($total), Units());
       }
-      PrintText($symbols, $flat, $cumulative, $total, -1);
+      PrintText($symbols, $flat, $cumulative, -1);
     } elsif ($main::opt_raw) {
       PrintSymbolizedProfile($symbols, $profile, $main::prog);
     } elsif ($main::opt_callgrind) {
@@ -656,7 +668,7 @@ sub Main() {
         if ($main::opt_gv) {
           RunGV(TempName($main::next_tmpfile, "ps"), "");
         } elsif ($main::opt_evince) {
-	  RunEvince(TempName($main::next_tmpfile, "pdf"), "");
+          RunEvince(TempName($main::next_tmpfile, "pdf"), "");
         } elsif ($main::opt_web) {
           my $tmp = TempName($main::next_tmpfile, "svg");
           RunWeb($tmp);
@@ -705,24 +717,25 @@ sub ReadlineMightFail {
 sub RunGV {
   my $fname = shift;
   my $bg = shift;       # "" or " &" if we should run in background
-  if (!system("$GV --version >/dev/null 2>&1")) {
+  if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) {
     # Options using double dash are supported by this gv version.
     # Also, turn on noantialias to better handle bug in gv for
     # postscript files with large dimensions.
     # TODO: Maybe we should not pass the --noantialias flag
     # if the gv version is known to work properly without the flag.
-    system("$GV --scale=$main::opt_scale --noantialias " . $fname . $bg);
+    system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname)
+           . $bg);
   } else {
     # Old gv version - only supports options that use single dash.
-    print STDERR "$GV -scale $main::opt_scale\n";
-    system("$GV -scale $main::opt_scale " . $fname . $bg);
+    print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n";
+    system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg);
   }
 }
 
 sub RunEvince {
   my $fname = shift;
   my $bg = shift;       # "" or " &" if we should run in background
-  system("$EVINCE " . $fname . $bg);
+  system(ShellEscape(@EVINCE, $fname) . $bg);
 }
 
 sub RunWeb {
@@ -756,8 +769,8 @@ sub RunWeb {
 sub RunKcachegrind {
   my $fname = shift;
   my $bg = shift;       # "" or " &" if we should run in background
-  print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n";
-  system("$KCACHEGRIND " . $fname . $bg);
+  print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n";
+  system(ShellEscape(@KCACHEGRIND, $fname) . $bg);
 }
 
 
@@ -834,14 +847,14 @@ sub InteractiveCommand {
     my $ignore;
     ($routine, $ignore) = ParseInteractiveArgs($3);
 
-    my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
+    my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore);
     my $reduced = ReduceProfile($symbols, $profile);
 
     # Get derived profiles
     my $flat = FlatProfile($reduced);
     my $cumulative = CumulativeProfile($reduced);
 
-    PrintText($symbols, $flat, $cumulative, $total, $line_limit);
+    PrintText($symbols, $flat, $cumulative, $line_limit);
     return 1;
   }
   if (m/^\s*callgrind\s*([^ \n]*)/) {
@@ -861,21 +874,22 @@ sub InteractiveCommand {
 
     return 1;
   }
-  if (m/^\s*list\s*(.+)/) {
+  if (m/^\s*(web)?list\s*(.+)/) {
+    my $html = (defined($1) && ($1 eq "web"));
     $main::opt_list = 1;
 
     my $routine;
     my $ignore;
-    ($routine, $ignore) = ParseInteractiveArgs($1);
+    ($routine, $ignore) = ParseInteractiveArgs($2);
 
-    my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
+    my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore);
     my $reduced = ReduceProfile($symbols, $profile);
 
     # Get derived profiles
     my $flat = FlatProfile($reduced);
     my $cumulative = CumulativeProfile($reduced);
 
-    PrintListing($libs, $flat, $cumulative, $routine);
+    PrintListing($total, $libs, $flat, $cumulative, $routine, $html);
     return 1;
   }
   if (m/^\s*disasm\s*(.+)/) {
@@ -886,14 +900,14 @@ sub InteractiveCommand {
     ($routine, $ignore) = ParseInteractiveArgs($1);
 
     # Process current profile to account for various settings
-    my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore);
+    my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore);
     my $reduced = ReduceProfile($symbols, $profile);
 
     # Get derived profiles
     my $flat = FlatProfile($reduced);
     my $cumulative = CumulativeProfile($reduced);
 
-    PrintDisassembly($libs, $flat, $cumulative, $routine, $total);
+    PrintDisassembly($libs, $flat, $cumulative, $routine);
     return 1;
   }
   if (m/^\s*(gv|web|evince)\s*(.*)/) {
@@ -913,7 +927,8 @@ sub InteractiveCommand {
     ($focus, $ignore) = ParseInteractiveArgs($2);
 
     # Process current profile to account for various settings
-    my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore);
+    my $profile = ProcessProfile($total, $orig_profile, $symbols,
+                                 $focus, $ignore);
     my $reduced = ReduceProfile($symbols, $profile);
 
     # Get derived profiles
@@ -941,6 +956,7 @@ sub InteractiveCommand {
 
 
 sub ProcessProfile {
+  my $total_count = shift;
   my $orig_profile = shift;
   my $symbols = shift;
   my $focus = shift;
@@ -948,7 +964,6 @@ sub ProcessProfile {
 
   # Process current profile to account for various settings
   my $profile = $orig_profile;
-  my $total_count = TotalProfile($profile);
   printf("Total: %s %s\n", Unparse($total_count), Units());
   if ($focus ne '') {
     $profile = FocusProfile($symbols, $profile, $focus);
@@ -995,6 +1010,11 @@ Commands:
   list [routine_regexp] [-ignore1] [-ignore2]
       Show source listing of routines whose names match "routine_regexp"
 
+  weblist [routine_regexp] [-ignore1] [-ignore2]
+     Displays a source listing of routines whose names match "routine_regexp"
+     in a web browser.  You can click on source lines to view the
+     corresponding disassembly.
+
   top [--cum] [-ignore1] [-ignore2]
   top20 [--cum] [-ignore1] [-ignore2]
   top37 [--cum] [-ignore1] [-ignore2]
@@ -1019,8 +1039,8 @@ parameters will be ignored.
 
 Further pprof details are available at this location (or one similar):
 
- /usr/doc/google-perftools-$PPROF_VERSION/cpu_profiler.html
- /usr/doc/google-perftools-$PPROF_VERSION/heap_profiler.html
+ /usr/doc/gperftools-$PPROF_VERSION/cpu_profiler.html
+ /usr/doc/gperftools-$PPROF_VERSION/heap_profiler.html
 
 ENDOFHELP
 }
@@ -1137,9 +1157,10 @@ sub PrintText {
   my $symbols = shift;
   my $flat = shift;
   my $cumulative = shift;
-  my $total = shift;
   my $line_limit = shift;
 
+  my $total = TotalProfile($flat);
+
   # Which profile to sort by?
   my $s = $main::opt_cum ? $cumulative : $flat;
 
@@ -1169,7 +1190,29 @@ sub PrintText {
              $sym);
     }
     $lines++;
-    last if ($line_limit >= 0 && $lines > $line_limit);
+    last if ($line_limit >= 0 && $lines >= $line_limit);
+  }
+}
+
+# Callgrind format has a compression for repeated function and file
+# names.  You show the name the first time, and just use its number
+# subsequently.  This can cut down the file to about a third or a
+# quarter of its uncompressed size.  $key and $val are the key/value
+# pair that would normally be printed by callgrind; $map is a map from
+# value to number.
+sub CompressedCGName {
+  my($key, $val, $map) = @_;
+  my $idx = $map->{$val};
+  # For very short keys, providing an index hurts rather than helps.
+  if (length($val) <= 3) {
+    return "$key=$val\n";
+  } elsif (defined($idx)) {
+    return "$key=($idx)\n";
+  } else {
+    # scalar(keys $map) gives the number of items in the map.
+    $idx = scalar(keys(%{$map})) + 1;
+    $map->{$val} = $idx;
+    return "$key=($idx) $val\n";
   }
 }
 
@@ -1177,13 +1220,16 @@ sub PrintText {
 sub PrintCallgrind {
   my $calls = shift;
   my $filename;
+  my %filename_to_index_map;
+  my %fnname_to_index_map;
+
   if ($main::opt_interactive) {
     $filename = shift;
     print STDERR "Writing callgrind file to '$filename'.\n"
   } else {
     $filename = "&STDOUT";
   }
-  open(CG, ">".$filename );
+  open(CG, ">$filename");
   printf CG ("events: Hits\n\n");
   foreach my $call ( map { $_->[0] }
                      sort { $a->[1] cmp $b ->[1] ||
@@ -1197,11 +1243,14 @@ sub PrintCallgrind {
          $callee_file, $callee_line, $callee_function ) =
        ( $1, $2, $3, $5, $6, $7 );
 
-      
-    printf CG ("fl=$caller_file\nfn=$caller_function\n");
+    # TODO(csilvers): for better compression, collect all the
+    # caller/callee_files and functions first, before printing
+    # anything, and only compress those referenced more than once.
+    printf CG CompressedCGName("fl", $caller_file, \%filename_to_index_map);
+    printf CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map);
     if (defined $6) {
-      printf CG ("cfl=$callee_file\n");
-      printf CG ("cfn=$callee_function\n");
+      printf CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map);
+      printf CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map);
       printf CG ("calls=$count $callee_line\n");
     }
     printf CG ("$caller_line $count\n\n");
@@ -1214,7 +1263,8 @@ sub PrintDisassembly {
   my $flat = shift;
   my $cumulative = shift;
   my $disasm_opts = shift;
-  my $total = shift;
+
+  my $total = TotalProfile($flat);
 
   foreach my $lib (@{$libs}) {
     my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts);
@@ -1249,10 +1299,10 @@ sub Disassemble {
   my $end_addr = shift;
 
   my $objdump = $obj_tool_map{"objdump"};
-  my $cmd = sprintf("$objdump -C -d -l --no-show-raw-insn " .
-                    "--start-address=0x$start_addr " .
-                    "--stop-address=0x$end_addr $prog");
-  open(OBJDUMP, "$cmd |") || error("$objdump: $!\n");
+  my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn",
+                        "--start-address=0x$start_addr",
+                        "--stop-address=0x$end_addr", $prog);
+  open(OBJDUMP, "$cmd |") || error("$cmd: $!\n");
   my @result = ();
   my $filename = "";
   my $linenumber = -1;
@@ -1315,13 +1365,33 @@ sub ByName {
   return ShortFunctionName($a) cmp ShortFunctionName($b);
 }
 
-# Print source-listing for all all routines that match $main::opt_list
+# Print source-listing for all all routines that match $list_opts
 sub PrintListing {
+  my $total = shift;
   my $libs = shift;
   my $flat = shift;
   my $cumulative = shift;
   my $list_opts = shift;
+  my $html = shift;
+
+  my $output = \*STDOUT;
+  my $fname = "";
 
+  if ($html) {
+    # Arrange to write the output to a temporary file
+    $fname = TempName($main::next_tmpfile, "html");
+    $main::next_tmpfile++;
+    if (!open(TEMP, ">$fname")) {
+      print STDERR "$fname: $!\n";
+      return;
+    }
+    $output = \*TEMP;
+    print $output HtmlListingHeader();
+    printf $output ("<div class=\"legend\">%s<br>Total: %s %s</div>\n",
+                    $main::prog, Unparse($total), Units());
+  }
+
+  my $listed = 0;
   foreach my $lib (@{$libs}) {
     my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts);
     my $offset = AddressSub($lib->[1], $lib->[3]);
@@ -1333,15 +1403,113 @@ sub PrintListing {
       my $addr = AddressAdd($start_addr, $offset);
       for (my $i = 0; $i < $length; $i++) {
         if (defined($cumulative->{$addr})) {
-          PrintSource($lib->[0], $offset,
-                      $routine, $flat, $cumulative,
-                      $start_addr, $end_addr);
+          $listed += PrintSource(
+            $lib->[0], $offset,
+            $routine, $flat, $cumulative,
+            $start_addr, $end_addr,
+            $html,
+            $output);
           last;
         }
         $addr = AddressInc($addr);
       }
     }
   }
+
+  if ($html) {
+    if ($listed > 0) {
+      print $output HtmlListingFooter();
+      close($output);
+      RunWeb($fname);
+    } else {
+      close($output);
+      unlink($fname);
+    }
+  }
+}
+
+sub HtmlListingHeader {
+  return <<'EOF';
+<DOCTYPE html>
+<html>
+<head>
+<title>Pprof listing</title>
+<style type="text/css">
+body {
+  font-family: sans-serif;
+}
+h1 {
+  font-size: 1.5em;
+  margin-bottom: 4px;
+}
+.legend {
+  font-size: 1.25em;
+}
+.line {
+  color: #aaaaaa;
+}
+.nop {
+  color: #aaaaaa;
+}
+.unimportant {
+  color: #cccccc;
+}
+.disasmloc {
+  color: #000000;
+}
+.deadsrc {
+  cursor: pointer;
+}
+.deadsrc:hover {
+  background-color: #eeeeee;
+}
+.livesrc {
+  color: #0000ff;
+  cursor: pointer;
+}
+.livesrc:hover {
+  background-color: #eeeeee;
+}
+.asm {
+  color: #008800;
+  display: none;
+}
+</style>
+<script type="text/javascript">
+function pprof_toggle_asm(e) {
+  var target;
+  if (!e) e = window.event;
+  if (e.target) target = e.target;
+  else if (e.srcElement) target = e.srcElement;
+
+  if (target) {
+    var asm = target.nextSibling;
+    if (asm && asm.className == "asm") {
+      asm.style.display = (asm.style.display == "block" ? "" : "block");
+      e.preventDefault();
+      return false;
+    }
+  }
+}
+</script>
+</head>
+<body>
+EOF
+}
+
+sub HtmlListingFooter {
+  return <<'EOF';
+</body>
+</html>
+EOF
+}
+
+sub HtmlEscape {
+  my $text = shift;
+  $text =~ s/&/&amp;/g;
+  $text =~ s/</&lt;/g;
+  $text =~ s/>/&gt;/g;
+  return $text;
 }
 
 # Returns the indentation of the line, if it has any non-whitespace
@@ -1355,6 +1523,45 @@ sub Indentation {
   }
 }
 
+# If the symbol table contains inlining info, Disassemble() may tag an
+# instruction with a location inside an inlined function.  But for
+# source listings, we prefer to use the location in the function we
+# are listing.  So use MapToSymbols() to fetch full location
+# information for each instruction and then pick out the first
+# location from a location list (location list contains callers before
+# callees in case of inlining).
+#
+# After this routine has run, each entry in $instructions contains:
+#   [0] start address
+#   [1] filename for function we are listing
+#   [2] line number for function we are listing
+#   [3] disassembly
+#   [4] limit address
+#   [5] most specific filename (may be different from [1] due to inlining)
+#   [6] most specific line number (may be different from [2] due to inlining)
+sub GetTopLevelLineNumbers {
+  my ($lib, $offset, $instructions) = @_;
+  my $pcs = [];
+  for (my $i = 0; $i <= $#{$instructions}; $i++) {
+    push(@{$pcs}, $instructions->[$i]->[0]);
+  }
+  my $symbols = {};
+  MapToSymbols($lib, $offset, $pcs, $symbols);
+  for (my $i = 0; $i <= $#{$instructions}; $i++) {
+    my $e = $instructions->[$i];
+    push(@{$e}, $e->[1]);
+    push(@{$e}, $e->[2]);
+    my $addr = $e->[0];
+    my $sym = $symbols->{$addr};
+    if (defined($sym)) {
+      if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) {
+        $e->[1] = $1;  # File name
+        $e->[2] = $2;  # Line number
+      }
+    }
+  }
+}
+
 # Print source-listing for one routine
 sub PrintSource {
   my $prog = shift;
@@ -1364,9 +1571,12 @@ sub PrintSource {
   my $cumulative = shift;
   my $start_addr = shift;
   my $end_addr = shift;
+  my $html = shift;
+  my $output = shift;
 
   # Disassemble all instructions (just to get line numbers)
   my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr);
+  GetTopLevelLineNumbers($prog, $offset, \@instructions);
 
   # Hack 1: assume that the first source file encountered in the
   # disassembly contains the routine
@@ -1379,7 +1589,7 @@ sub PrintSource {
   }
   if (!defined($filename)) {
     print STDERR "no filename found in $routine\n";
-    return;
+    return 0;
   }
 
   # Hack 2: assume that the largest line number from $filename is the
@@ -1412,7 +1622,7 @@ sub PrintSource {
   {
     if (!open(FILE, "<$filename")) {
       print STDERR "$filename: $!\n";
-      return;
+      return 0;
     }
     my $l = 0;
     my $first_indentation = -1;
@@ -1440,12 +1650,24 @@ sub PrintSource {
   # Assign all samples to the range $firstline,$lastline,
   # Hack 4: If an instruction does not occur in the range, its samples
   # are moved to the next instruction that occurs in the range.
-  my $samples1 = {};
-  my $samples2 = {};
-  my $running1 = 0;     # Unassigned flat counts
-  my $running2 = 0;     # Unassigned cumulative counts
-  my $total1 = 0;       # Total flat counts
-  my $total2 = 0;       # Total cumulative counts
+  my $samples1 = {};        # Map from line number to flat count
+  my $samples2 = {};        # Map from line number to cumulative count
+  my $running1 = 0;         # Unassigned flat counts
+  my $running2 = 0;         # Unassigned cumulative counts
+  my $total1 = 0;           # Total flat counts
+  my $total2 = 0;           # Total cumulative counts
+  my %disasm = ();          # Map from line number to disassembly
+  my $running_disasm = "";  # Unassigned disassembly
+  my $skip_marker = "---\n";
+  if ($html) {
+    $skip_marker = "";
+    for (my $l = $firstline; $l <= $lastline; $l++) {
+      $disasm{$l} = "";
+    }
+  }
+  my $last_dis_filename = '';
+  my $last_dis_linenum = -1;
+  my $last_touched_line = -1;  # To detect gaps in disassembly for a line
   foreach my $e (@instructions) {
     # Add up counts for all address that fall inside this instruction
     my $c1 = 0;
@@ -1454,6 +1676,38 @@ sub PrintSource {
       $c1 += GetEntry($flat, $a);
       $c2 += GetEntry($cumulative, $a);
     }
+
+    if ($html) {
+      my $dis = sprintf("      %6s %6s \t\t%8s: %s ",
+                        HtmlPrintNumber($c1),
+                        HtmlPrintNumber($c2),
+                        UnparseAddress($offset, $e->[0]),
+                        CleanDisassembly($e->[3]));
+      
+      # Append the most specific source line associated with this instruction
+      if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) };
+      $dis = HtmlEscape($dis);
+      my $f = $e->[5];
+      my $l = $e->[6];
+      if ($f ne $last_dis_filename) {
+        $dis .= sprintf("<span class=disasmloc>%s:%d</span>", 
+                        HtmlEscape(CleanFileName($f)), $l);
+      } elsif ($l ne $last_dis_linenum) {
+        # De-emphasize the unchanged file name portion
+        $dis .= sprintf("<span class=unimportant>%s</span>" .
+                        "<span class=disasmloc>:%d</span>", 
+                        HtmlEscape(CleanFileName($f)), $l);
+      } else {
+        # De-emphasize the entire location
+        $dis .= sprintf("<span class=unimportant>%s:%d</span>", 
+                        HtmlEscape(CleanFileName($f)), $l);
+      }
+      $last_dis_filename = $f;
+      $last_dis_linenum = $l;
+      $running_disasm .= $dis;
+      $running_disasm .= "\n";
+    }
+
     $running1 += $c1;
     $running2 += $c2;
     $total1 += $c1;
@@ -1468,23 +1722,49 @@ sub PrintSource {
       AddEntry($samples2, $line, $running2);
       $running1 = 0;
       $running2 = 0;
+      if ($html) {
+        if ($line != $last_touched_line && $disasm{$line} ne '') {
+          $disasm{$line} .= "\n";
+        }
+        $disasm{$line} .= $running_disasm;
+        $running_disasm = '';
+        $last_touched_line = $line;
+      }
     }
   }
 
   # Assign any leftover samples to $lastline
   AddEntry($samples1, $lastline, $running1);
   AddEntry($samples2, $lastline, $running2);
-
-  printf("ROUTINE ====================== %s in %s\n" .
-         "%6s %6s Total %s (flat / cumulative)\n",
-         ShortFunctionName($routine),
-         $filename,
-         Units(),
-         Unparse($total1),
-         Unparse($total2));
+  if ($html) {
+    if ($lastline != $last_touched_line && $disasm{$lastline} ne '') {
+      $disasm{$lastline} .= "\n";
+    }
+    $disasm{$lastline} .= $running_disasm;
+  }
+
+  if ($html) {
+    printf $output (
+      "<h1>%s</h1>%s\n<pre onClick=\"pprof_toggle_asm()\">\n" .
+      "Total:%6s %6s (flat / cumulative %s)\n",
+      HtmlEscape(ShortFunctionName($routine)),
+      HtmlEscape(CleanFileName($filename)),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  } else {
+    printf $output (
+      "ROUTINE ====================== %s in %s\n" .
+      "%6s %6s Total %s (flat / cumulative)\n",
+      ShortFunctionName($routine),
+      CleanFileName($filename),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  }
   if (!open(FILE, "<$filename")) {
     print STDERR "$filename: $!\n";
-    return;
+    return 0;
   }
   my $l = 0;
   while (<FILE>) {
@@ -1494,16 +1774,47 @@ sub PrintSource {
         (($l <= $oldlastline + 5) || ($l <= $lastline))) {
       chop;
       my $text = $_;
-      if ($l == $firstline) { printf("---\n"); }
-      printf("%6s %6s %4d: %s\n",
-             UnparseAlt(GetEntry($samples1, $l)),
-             UnparseAlt(GetEntry($samples2, $l)),
-             $l,
-             $text);
-      if ($l == $lastline)  { printf("---\n"); }
+      if ($l == $firstline) { print $output $skip_marker; }
+      my $n1 = GetEntry($samples1, $l);
+      my $n2 = GetEntry($samples2, $l);
+      if ($html) {
+        # Emit a span that has one of the following classes:
+        #    livesrc -- has samples
+        #    deadsrc -- has disassembly, but with no samples
+        #    nop     -- has no matching disasembly
+        # Also emit an optional span containing disassembly.
+        my $dis = $disasm{$l};
+        my $asm = "";
+        if (defined($dis) && $dis ne '') {
+          $asm = "<span class=\"asm\">" . $dis . "</span>";
+        }
+        my $source_class = (($n1 + $n2 > 0) 
+                            ? "livesrc" 
+                            : (($asm ne "") ? "deadsrc" : "nop"));
+        printf $output (
+          "<span class=\"line\">%5d</span> " .
+          "<span class=\"%s\">%6s %6s %s</span>%s\n",
+          $l, $source_class,
+          HtmlPrintNumber($n1),
+          HtmlPrintNumber($n2),
+          HtmlEscape($text),
+          $asm);
+      } else {
+        printf $output(
+          "%6s %6s %4d: %s\n",
+          UnparseAlt($n1),
+          UnparseAlt($n2),
+          $l,
+          $text);
+      }
+      if ($l == $lastline)  { print $output $skip_marker; }
     };
   }
   close(FILE);
+  if ($html) {
+    print $output "</pre>\n";
+  }
+  return 1;
 }
 
 # Return the source line for the specified file/linenumber.
@@ -1646,21 +1957,11 @@ sub PrintDisassembledFunction {
     # Print disassembly
     for (my $x = $first_inst; $x <= $last_inst; $x++) {
       my $e = $instructions[$x];
-      my $address = $e->[0];
-      $address = AddressSub($address, $offset);  # Make relative to section
-      $address =~ s/^0x//;
-      $address =~ s/^0*//;
-
-      # Trim symbols
-      my $d = $e->[3];
-      while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax)
-      while ($d =~ s/(\w+)<[^<>]*>/$1/g)  { }       # Remove template arguments
-
       printf("%6s %6s    %8s: %6s\n",
              UnparseAlt($flat_count[$x]),
              UnparseAlt($cum_count[$x]),
-             $address,
-             $d);
+             UnparseAddress($offset, $e->[0]),
+             CleanDisassembly($e->[3]));
     }
   }
 }
@@ -1706,19 +2007,24 @@ sub PrintDot {
 
   # Open DOT output file
   my $output;
+  my $escaped_dot = ShellEscape(@DOT);
+  my $escaped_ps2pdf = ShellEscape(@PS2PDF);
   if ($main::opt_gv) {
-    $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps");
+    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps"));
+    $output = "| $escaped_dot -Tps2 >$escaped_outfile";
   } elsif ($main::opt_evince) {
-    $output = "| $DOT -Tps2 | $PS2PDF - " . TempName($main::next_tmpfile, "pdf");
+    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf"));
+    $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile";
   } elsif ($main::opt_ps) {
-    $output = "| $DOT -Tps2";
+    $output = "| $escaped_dot -Tps2";
   } elsif ($main::opt_pdf) {
-    $output = "| $DOT -Tps2 | $PS2PDF - -";
+    $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -";
   } elsif ($main::opt_web || $main::opt_svg) {
     # We need to post-process the SVG, so write to a temporary file always.
-    $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg");
+    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg"));
+    $output = "| $escaped_dot -Tsvg >$escaped_outfile";
   } elsif ($main::opt_gif) {
-    $output = "| $DOT -Tgif";
+    $output = "| $escaped_dot -Tgif";
   } else {
     $output = ">&STDOUT";
   }
@@ -1770,7 +2076,7 @@ sub PrintDot {
     if ($f != $c) {
       $extra = sprintf("\\rof %s (%s)",
                        Unparse($c),
-                       Percent($c, $overall_total));
+                       Percent($c, $local_total));
     }
     my $style = "";
     if ($main::opt_heapcheck) {
@@ -1789,7 +2095,7 @@ sub PrintDot {
                 $node{$a},
                 $sym,
                 Unparse($f),
-                Percent($f, $overall_total),
+                Percent($f, $local_total),
                 $extra,
                 $fs,
                 $style,
@@ -1799,10 +2105,12 @@ sub PrintDot {
   # Get edges and counts per edge
   my %edge = ();
   my $n;
+  my $fullname_to_shortname_map = {};
+  FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map);
   foreach my $k (keys(%{$raw})) {
     # TODO: omit low %age edges
     $n = $raw->{$k};
-    my @translated = TranslateStack($symbols, $k);
+    my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k);
     for (my $i = 1; $i <= $#translated; $i++) {
       my $src = $translated[$i];
       my $dst = $translated[$i-1];
@@ -2186,6 +2494,50 @@ function handleMouseUp(evt) {
 EOF
 }
 
+# Provides a map from fullname to shortname for cases where the
+# shortname is ambiguous.  The symlist has both the fullname and
+# shortname for all symbols, which is usually fine, but sometimes --
+# such as overloaded functions -- two different fullnames can map to
+# the same shortname.  In that case, we use the address of the
+# function to disambiguate the two.  This function fills in a map that
+# maps fullnames to modified shortnames in such cases.  If a fullname
+# is not present in the map, the 'normal' shortname provided by the
+# symlist is the appropriate one to use.
+sub FillFullnameToShortnameMap {
+  my $symbols = shift;
+  my $fullname_to_shortname_map = shift;
+  my $shortnames_seen_once = {};
+  my $shortnames_seen_more_than_once = {};
+
+  foreach my $symlist (values(%{$symbols})) {
+    # TODO(csilvers): deal with inlined symbols too.
+    my $shortname = $symlist->[0];
+    my $fullname = $symlist->[2];
+    if ($fullname !~ /<[0-9a-fA-F]+>$/) {  # fullname doesn't end in an address
+      next;       # the only collisions we care about are when addresses differ
+    }
+    if (defined($shortnames_seen_once->{$shortname}) &&
+        $shortnames_seen_once->{$shortname} ne $fullname) {
+      $shortnames_seen_more_than_once->{$shortname} = 1;
+    } else {
+      $shortnames_seen_once->{$shortname} = $fullname;
+    }
+  }
+
+  foreach my $symlist (values(%{$symbols})) {
+    my $shortname = $symlist->[0];
+    my $fullname = $symlist->[2];
+    # TODO(csilvers): take in a list of addresses we care about, and only
+    # store in the map if $symlist->[1] is in that list.  Saves space.
+    next if defined($fullname_to_shortname_map->{$fullname});
+    if (defined($shortnames_seen_more_than_once->{$shortname})) {
+      if ($fullname =~ /<0*([^>]*)>$/) {   # fullname has address at end of it
+        $fullname_to_shortname_map->{$fullname} = "$shortname\@$1";
+      }
+    }
+  }
+}
+
 # Return a small number that identifies the argument.
 # Multiple calls with the same argument will return the same number.
 # Calls with different arguments will return different numbers.
@@ -2202,6 +2554,7 @@ sub ShortIdFor {
 # Translate a stack of addresses into a stack of symbols
 sub TranslateStack {
   my $symbols = shift;
+  my $fullname_to_shortname_map = shift;
   my $k = shift;
 
   my @addrs = split(/\n/, $k);
@@ -2233,6 +2586,9 @@ sub TranslateStack {
       my $func = $symlist->[$j-2];
       my $fileline = $symlist->[$j-1];
       my $fullfunc = $symlist->[$j];
+      if (defined($fullname_to_shortname_map->{$fullfunc})) {
+        $func = $fullname_to_shortname_map->{$fullfunc};
+      }
       if ($j > 2) {
         $func = "$func (inline)";
       }
@@ -2319,6 +2675,16 @@ sub UnparseAlt {
   }
 }
 
+# Alternate pretty-printed form: 0 maps to ""
+sub HtmlPrintNumber {
+  my $num = shift;
+  if ($num == 0) {
+    return "";
+  } else {
+    return Unparse($num);
+  }
+}
+
 # Return output units
 sub Units {
   if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
@@ -2475,6 +2841,13 @@ sub RemoveUninterestingFrames {
                       '__builtin_vec_new',
                       'operator new',
                       'operator new[]',
+                      # The entry to our memory-allocation routines on OS X
+                      'malloc_zone_malloc',
+                      'malloc_zone_calloc',
+                      'malloc_zone_valloc',
+                      'malloc_zone_realloc',
+                      'malloc_zone_memalign',
+                      'malloc_zone_free',
                       # These mark the beginning/end of our custom sections
                       '__start_google_malloc',
                       '__stop_google_malloc',
@@ -2566,9 +2939,11 @@ sub ReduceProfile {
   my $symbols = shift;
   my $profile = shift;
   my $result = {};
+  my $fullname_to_shortname_map = {};
+  FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map);
   foreach my $k (keys(%{$profile})) {
     my $count = $profile->{$k};
-    my @translated = TranslateStack($symbols, $k);
+    my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k);
     my @path = ();
     my %seen = ();
     $seen{''} = 1;      # So that empty keys are skipped
@@ -2775,7 +3150,8 @@ sub AddEntries {
 
 sub CheckSymbolPage {
   my $url = SymbolPageURL();
-  open(SYMBOL, "$URL_FETCHER '$url' |");
+  my $command = ShellEscape(@URL_FETCHER, $url);
+  open(SYMBOL, "$command |") or error($command);
   my $line = <SYMBOL>;
   $line =~ s/\r//g;         # turn windows-looking lines into unix-looking lines
   close(SYMBOL);
@@ -2832,7 +3208,7 @@ sub SymbolPageURL {
 sub FetchProgramName() {
   my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
   my $url = "$baseURL$PROGRAM_NAME_PAGE";
-  my $command_line = "$URL_FETCHER '$url'";
+  my $command_line = ShellEscape(@URL_FETCHER, $url);
   open(CMDLINE, "$command_line |") or error($command_line);
   my $cmdline = <CMDLINE>;
   $cmdline =~ s/\r//g;   # turn windows-looking lines into unix-looking lines
@@ -2849,7 +3225,7 @@ sub FetchProgramName() {
 # curl.  Redirection happens on borg hosts.
 sub ResolveRedirectionForCurl {
   my $url = shift;
-  my $command_line = "$URL_FETCHER --head '$url'";
+  my $command_line = ShellEscape(@URL_FETCHER, "--head", $url);
   open(CMDLINE, "$command_line |") or error($command_line);
   while (<CMDLINE>) {
     s/\r//g;         # turn windows-looking lines into unix-looking lines
@@ -2861,18 +3237,18 @@ sub ResolveRedirectionForCurl {
   return $url;
 }
 
-# Add a timeout flat to URL_FETCHER
+# Add a timeout flat to URL_FETCHER.  Returns a new list.
 sub AddFetchTimeout {
-  my $fetcher = shift;
   my $timeout = shift;
+  my @fetcher = shift;
   if (defined($timeout)) {
-    if ($fetcher =~ m/\bcurl -s/) {
-      $fetcher .= sprintf(" --max-time %d", $timeout);
-    } elsif ($fetcher =~ m/\brpcget\b/) {
-      $fetcher .= sprintf(" --deadline=%d", $timeout);
+    if (join(" ", @fetcher) =~ m/\bcurl -s/) {
+      push(@fetcher, "--max-time", sprintf("%d", $timeout));
+    } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) {
+      push(@fetcher, sprintf("--deadline=%d", $timeout));
     }
   }
-  return $fetcher;
+  return @fetcher;
 }
 
 # Reads a symbol map from the file handle name given as $1, returning
@@ -2932,15 +3308,17 @@ sub FetchSymbols {
     my $url = SymbolPageURL();
 
     my $command_line;
-    if ($URL_FETCHER =~ m/\bcurl -s/) {
+    if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) {
       $url = ResolveRedirectionForCurl($url);
-      $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'";
+      $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym",
+                                  $url);
     } else {
-      $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'";
+      $command_line = (ShellEscape(@URL_FETCHER, "--post", $url)
+                       . " < " . ShellEscape($main::tmpfile_sym));
     }
     # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
-    my $cppfilt = $obj_tool_map{"c++filt"};
-    open(SYMBOL, "$command_line | $cppfilt |") or error($command_line);
+    my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"});
+    open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line);
     $symbol_map = ReadSymbols(*SYMBOL{IO});
     close(SYMBOL);
   }
@@ -2956,8 +3334,8 @@ sub FetchSymbols {
     my $shortpc = $pc;
     $shortpc =~ s/^0*//;
     # Each line may have a list of names, which includes the function
-    # and also other functions it has inlined.  They are separated
-    # (in PrintSymbolizedFile), by --, which is illegal in function names.
+    # and also other functions it has inlined.  They are separated (in
+    # PrintSymbolizedProfile), by --, which is illegal in function names.
     my $fullnames;
     if (defined($symbol_map->{$shortpc})) {
       $fullnames = $symbol_map->{$shortpc};
@@ -3035,8 +3413,8 @@ sub FetchDynamicProfile {
       return $real_profile;
     }
 
-    my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout);
-    my $cmd = "$fetcher '$url' > '$tmp_profile'";
+    my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER);
+    my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile);
     if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){
       print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n  ${real_profile}\n";
       if ($encourage_patience) {
@@ -3047,7 +3425,7 @@ sub FetchDynamicProfile {
     }
 
     (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n");
-    (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n");
+    (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n");
     print STDERR "Wrote profile to $real_profile\n";
     $main::collected_profile = $real_profile;
     return $main::collected_profile;
@@ -3161,7 +3539,7 @@ BEGIN {
       my $has_q = 0;
       eval { $has_q = pack("Q", "1") ? 1 : 1; };
       if (!$has_q) {
-	$self->{perl_is_64bit} = 0;
+        $self->{perl_is_64bit} = 0;
       }
       read($self->{file}, $str, 8);
       if (substr($str, 4, 4) eq chr(0)x4) {
@@ -3197,17 +3575,17 @@ BEGIN {
         # TODO(csilvers): if this is a 32-bit perl, the math below
         #    could end up in a too-large int, which perl will promote
         #    to a double, losing necessary precision.  Deal with that.
-	#    Right now, we just die.
-	my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]);
+        #    Right now, we just die.
+        my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]);
         if ($self->{unpack_code} eq 'N') {    # big-endian
-	  ($lo, $hi) = ($hi, $lo);
-	}
-	my $value = $lo + $hi * (2**32);
-	if (!$self->{perl_is_64bit} &&   # check value is exactly represented
-	    (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) {
-	  ::error("Need a 64-bit perl to process this 64-bit profile.\n");
-	}
-	push(@b64_values, $value);
+          ($lo, $hi) = ($hi, $lo);
+        }
+        my $value = $lo + $hi * (2**32);
+        if (!$self->{perl_is_64bit} &&   # check value is exactly represented
+            (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) {
+          ::error("Need a 64-bit perl to process this 64-bit profile.\n");
+        }
+        push(@b64_values, $value);
       }
       @$slots = @b64_values;
     }
@@ -3335,7 +3713,7 @@ sub ReadProfile {
     if (!$main::use_symbolized_profile) {
       # we have both a binary and symbolized profiles, abort
       error("FATAL ERROR: Symbolized profile\n   $fname\ncannot be used with " .
-	    "a binary arg. Try again without passing\n   $prog\n");
+            "a binary arg. Try again without passing\n   $prog\n");
     }
     # Read the symbol section of the symbolized profile file.
     $symbols = ReadSymbols(*PROFILE{IO});
@@ -3636,18 +4014,18 @@ sub ReadHeapProfile {
           # The sampling frequency is the rate of a Poisson process.
           # This means that the probability of sampling an allocation of
           # size X with sampling rate Y is 1 - exp(-X/Y)
-	  if ($n1 != 0) {
-	    my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
-	    my $scale_factor = 1/(1 - exp(-$ratio));
-	    $n1 *= $scale_factor;
-	    $s1 *= $scale_factor;
-	  }
-	  if ($n2 != 0) {
-	    my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
-	    my $scale_factor = 1/(1 - exp(-$ratio));
-	    $n2 *= $scale_factor;
-	    $s2 *= $scale_factor;
-	  }
+          if ($n1 != 0) {
+            my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
+            my $scale_factor = 1/(1 - exp(-$ratio));
+            $n1 *= $scale_factor;
+            $s1 *= $scale_factor;
+          }
+          if ($n2 != 0) {
+            my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
+            my $scale_factor = 1/(1 - exp(-$ratio));
+            $n2 *= $scale_factor;
+            $s2 *= $scale_factor;
+          }
         } else {
           # Remote-heap version 1
           my $ratio;
@@ -3771,19 +4149,19 @@ sub ReadSynchProfile {
   return $r;
 }
 
-# Given a hex value in the form "0x1abcd" return "0001abcd" or
-# "000000000001abcd", depending on the current address length.
-# There's probably a more idiomatic (or faster) way to do this...
+# Given a hex value in the form "0x1abcd" or "1abcd", return either
+# "0001abcd" or "000000000001abcd", depending on the current (global)
+# address length.
 sub HexExtend {
   my $addr = shift;
 
-  $addr =~ s/^0x//;
-
-  if (length $addr > $address_length) {
-    printf STDERR "Warning:  address $addr is longer than address length $address_length\n";
+  $addr =~ s/^(0x)?0*//;
+  my $zeros_needed = $address_length - length($addr);
+  if ($zeros_needed < 0) {
+    printf STDERR "Warning: address $addr is longer than address length $address_length\n";
+    return $addr;
   }
-
-  return substr("000000000000000".$addr, -$address_length);
+  return ("0" x $zeros_needed) . $addr;
 }
 
 ##### Symbol extraction #####
@@ -3834,9 +4212,8 @@ sub ParseTextSectionHeaderFromObjdump {
   my $file_offset;
   # Get objdump output from the library file to figure out how to
   # map between mapped addresses and addresses in the library.
-  my $objdump = $obj_tool_map{"objdump"};
-  open(OBJDUMP, "$objdump -h $lib |")
-                || error("$objdump $lib: $!\n");
+  my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib);
+  open(OBJDUMP, "$cmd |") || error("$cmd: $!\n");
   while (<OBJDUMP>) {
     s/\r//g;         # turn windows-looking lines into unix-looking lines
     # Idx Name          Size      VMA       LMA       File off  Algn
@@ -3874,9 +4251,8 @@ sub ParseTextSectionHeaderFromOtool {
   my $file_offset = undef;
   # Get otool output from the library file to figure out how to
   # map between mapped addresses and addresses in the library.
-  my $otool = $obj_tool_map{"otool"};
-  open(OTOOL, "$otool -l $lib |")
-                || error("$otool $lib: $!\n");
+  my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib);
+  open(OTOOL, "$command |") || error("$command: $!\n");
   my $cmd = "";
   my $sectname = "";
   my $segname = "";
@@ -4218,18 +4594,18 @@ sub ExtractSymbols {
     my ($start_pc_index, $finish_pc_index);
     # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index].
     for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0;
-	 $finish_pc_index--) {
+         $finish_pc_index--) {
       last if $pcs[$finish_pc_index - 1] le $finish;
     }
     # Find smallest start_pc_index such that $start <= $pc[$start_pc_index].
     for ($start_pc_index = $finish_pc_index; $start_pc_index > 0;
-	 $start_pc_index--) {
+         $start_pc_index--) {
       last if $pcs[$start_pc_index - 1] lt $start;
     }
     # This keeps PC values higher than $pc[$finish_pc_index] in @pcs,
     # in case there are overlaps in libraries and the main binary.
     @{$contained} = splice(@pcs, $start_pc_index,
-			   $finish_pc_index - $start_pc_index);
+                           $finish_pc_index - $start_pc_index);
     # Map to symbols
     MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols);
   }
@@ -4251,15 +4627,15 @@ sub MapToSymbols {
 
   # Figure out the addr2line command to use
   my $addr2line = $obj_tool_map{"addr2line"};
-  my $cmd = "$addr2line -f -C -e $image";
+  my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image);
   if (exists $obj_tool_map{"addr2line_pdb"}) {
     $addr2line = $obj_tool_map{"addr2line_pdb"};
-    $cmd = "$addr2line --demangle -f -C -e $image";
+    $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image);
   }
 
   # If "addr2line" isn't installed on the system at all, just use
   # nm to get what info we can (function names, but not line numbers).
-  if (system("$addr2line --help >/dev/null 2>&1") != 0) {
+  if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) {
     MapSymbolsWithNM($image, $offset, $pclist, $symbols);
     return;
   }
@@ -4273,11 +4649,10 @@ sub MapToSymbols {
   $sep_address = undef;  # May be filled in by MapSymbolsWithNM()
   my $nm_symbols = {};
   MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols);
-  # TODO(csilvers): only add '-i' if addr2line supports it.
   if (defined($sep_address)) {
     # Only add " -i" to addr2line if the binary supports it.
     # addr2line --help returns 0, but not if it sees an unknown flag first.
-    if (system("$cmd -i --help >/dev/null 2>&1") == 0) {
+    if (system("$cmd -i --help >$dev_null 2>&1") == 0) {
       $cmd .= " -i";
     } else {
       $sep_address = undef;   # no need for sep_address if we don't support -i
@@ -4299,13 +4674,14 @@ sub MapToSymbols {
   close(ADDRESSES);
   if ($debug) {
     print("----\n");
-    system("cat $main::tmpfile_sym");
+    system("cat", $main::tmpfile_sym);
     print("----\n");
-    system("$cmd <$main::tmpfile_sym");
+    system("$cmd < " . ShellEscape($main::tmpfile_sym));
     print("----\n");
   }
 
-  open(SYMBOLS, "$cmd <$main::tmpfile_sym |") || error("$cmd: $!\n");
+  open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |")
+      || error("$cmd: $!\n");
   my $count = 0;   # Index in pclist
   while (<SYMBOLS>) {
     # Read fullfunction and filelineinfo from next pair of lines
@@ -4325,15 +4701,29 @@ sub MapToSymbols {
 
     my $pcstr = $pclist->[$count];
     my $function = ShortFunctionName($fullfunction);
-    if ($fullfunction eq '??') {
-      # See if nm found a symbol
-      my $nms = $nm_symbols->{$pcstr};
-      if (defined($nms)) {
+    my $nms = $nm_symbols->{$pcstr};
+    if (defined($nms)) {
+      if ($fullfunction eq '??') {
+        # nm found a symbol for us.
         $function = $nms->[0];
         $fullfunction = $nms->[2];
+      } else {
+	# MapSymbolsWithNM tags each routine with its starting address,
+	# useful in case the image has multiple occurrences of this
+	# routine.  (It uses a syntax that resembles template paramters,
+	# that are automatically stripped out by ShortFunctionName().)
+	# addr2line does not provide the same information.  So we check
+	# if nm disambiguated our symbol, and if so take the annotated
+	# (nm) version of the routine-name.  TODO(csilvers): this won't
+	# catch overloaded, inlined symbols, which nm doesn't see.
+	# Better would be to do a check similar to nm's, in this fn.
+	if ($nms->[2] =~ m/^\Q$function\E/) {  # sanity check it's the right fn
+	  $function = $nms->[0];
+	  $fullfunction = $nms->[2];
+	}
       }
     }
-
+    
     # Prepend to accumulated symbols for pcstr
     # (so that caller comes before callee)
     my $sym = $symbols->{$pcstr};
@@ -4344,7 +4734,7 @@ sub MapToSymbols {
     unshift(@{$sym}, $function, $filelinenum, $fullfunction);
     if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); }
     if (!defined($sep_address)) {
-      # Inlining is off, se this entry ends immediately
+      # Inlining is off, so this entry ends immediately
       $count++;
     }
   }
@@ -4407,6 +4797,31 @@ sub ShortFunctionName {
   return $function;
 }
 
+# Trim overly long symbols found in disassembler output
+sub CleanDisassembly {
+  my $d = shift;
+  while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax)
+  while ($d =~ s/(\w+)<[^<>]*>/$1/g)  { }       # Remove template arguments
+  return $d;
+}
+
+# Clean file name for display
+sub CleanFileName {
+  my ($f) = @_;
+  $f =~ s|^/proc/self/cwd/||;
+  $f =~ s|^\./||;
+  return $f;
+}
+
+# Make address relative to section and clean up for display
+sub UnparseAddress {
+  my ($offset, $address) = @_;
+  $address = AddressSub($address, $offset);
+  $address =~ s/^0x//;
+  $address =~ s/^0*//;
+  return $address;
+}
+
 ##### Miscellaneous #####
 
 # Find the right versions of the above object tools to use.  The
@@ -4423,8 +4838,18 @@ sub ConfigureObjTools {
   # predictably return error status in prod.
   (-e $prog_file)  || error("$prog_file does not exist.\n");
 
-  # Follow symlinks (at least for systems where "file" supports that)
-  my $file_type = `/usr/bin/file -L $prog_file 2>/dev/null || /usr/bin/file $prog_file`;
+  my $file_type = undef;
+  if (-e "/usr/bin/file") {
+    # Follow symlinks (at least for systems where "file" supports that).
+    my $escaped_prog_file = ShellEscape($prog_file);
+    $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null ||
+                  /usr/bin/file $escaped_prog_file`;
+  } elsif ($^O == "MSWin32") {
+    $file_type = "MS Windows";
+  } else {
+    print STDERR "WARNING: Can't determine the file type of $prog_file";
+  }
+
   if ($file_type =~ /64-bit/) {
     # Change $address_length to 16 if the program file is ELF 64-bit.
     # We can't detect this from many (most?) heap or lock contention
@@ -4500,6 +4925,19 @@ sub ConfigureTool {
   return $path;
 }
 
+sub ShellEscape {
+  my @escaped_words = ();
+  foreach my $word (@_) {
+    my $escaped_word = $word;
+    if ($word =~ m![^a-zA-Z0-9/.,_=-]!) {  # check for anything not in whitelist
+      $escaped_word =~ s/'/'\\''/;
+      $escaped_word = "'$escaped_word'";
+    }
+    push(@escaped_words, $escaped_word);
+  }
+  return join(" ", @escaped_words);
+}
+
 sub cleanup {
   unlink($main::tmpfile_sym);
   unlink(keys %main::tempnames);
@@ -4537,11 +4975,11 @@ sub error {
 # names match "$regexp" and returns them in a hashtable mapping from
 # procedure name to a two-element vector of [start address, end address]
 sub GetProcedureBoundariesViaNm {
-  my $nm_command = shift;
+  my $escaped_nm_command = shift;    # shell-escaped
   my $regexp = shift;
 
   my $symbol_table = {};
-  open(NM, "$nm_command |") || error("$nm_command: $!\n");
+  open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n");
   my $last_start = "0";
   my $routine = "";
   while (<NM>) {
@@ -4619,6 +5057,21 @@ sub GetProcedureBoundaries {
   my $image = shift;
   my $regexp = shift;
 
+  # If $image doesn't start with /, then put ./ in front of it.  This works
+  # around an obnoxious bug in our probing of nm -f behavior.
+  # "nm -f $image" is supposed to fail on GNU nm, but if:
+  #
+  # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND
+  # b. you have a.out in your current directory (a not uncommon occurence)
+  #
+  # then "nm -f $image" succeeds because -f only looks at the first letter of
+  # the argument, which looks valid because it's [BbSsPp], and then since
+  # there's no image provided, it looks for a.out and finds it.
+  #
+  # This regex makes sure that $image starts with . or /, forcing the -f
+  # parsing to fail since . and / are not valid formats.
+  $image =~ s#^[^/]#./$&#;
+
   # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
   my $debugging = DebuggingLibrary($image);
   if ($debugging) {
@@ -4636,28 +5089,29 @@ sub GetProcedureBoundaries {
   # --demangle and -f.
   my $demangle_flag = "";
   my $cppfilt_flag = "";
-  if (system("$nm --demangle $image >/dev/null 2>&1") == 0) {
+  my $to_devnull = ">$dev_null 2>&1";
+  if (system(ShellEscape($nm, "--demangle", "image") . $to_devnull) == 0) {
     # In this mode, we do "nm --demangle <foo>"
     $demangle_flag = "--demangle";
     $cppfilt_flag = "";
-  } elsif (system("$cppfilt $image >/dev/null 2>&1") == 0) {
+  } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) {
     # In this mode, we do "nm <foo> | c++filt"
-    $cppfilt_flag = " | $cppfilt";
+    $cppfilt_flag = " | " . ShellEscape($cppfilt);
   };
   my $flatten_flag = "";
-  if (system("$nm -f $image >/dev/null 2>&1") == 0) {
+  if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) {
     $flatten_flag = "-f";
   }
 
   # Finally, in the case $imagie isn't a debug library, we try again with
   # -D to at least get *exported* symbols.  If we can't use --demangle,
   # we use c++filt instead, if it exists on this system.
-  my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" .
-                     " $image 2>/dev/null $cppfilt_flag",
-                     "$nm -D -n $flatten_flag $demangle_flag" .
-                     " $image 2>/dev/null $cppfilt_flag",
+  my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag,
+                                 $image) . " 2>$dev_null $cppfilt_flag",
+                     ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag,
+                                 $image) . " 2>$dev_null $cppfilt_flag",
                      # 6nm is for Go binaries
-		     "6nm $image 2>/dev/null | sort",
+                     ShellEscape("6nm", "$image") . " 2>$dev_null | sort",
                      );
 
   # If the executable is an MS Windows PDB-format executable, we'll
@@ -4665,8 +5119,9 @@ sub GetProcedureBoundaries {
   # want to use both unix nm and windows-specific nm_pdb, since
   # PDB-format executables can apparently include dwarf .o files.
   if (exists $obj_tool_map{"nm_pdb"}) {
-    my $nm_pdb = $obj_tool_map{"nm_pdb"};
-    push(@nm_commands, "$nm_pdb --demangle $image 2>/dev/null");
+    push(@nm_commands,
+         ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image)
+         . " 2>$dev_null");
   }
 
   foreach my $nm_command (@nm_commands) {
diff --git a/deps/jemalloc/config.guess b/deps/jemalloc/config.guess
index 0773d0f6..d622a44e 100755
--- a/deps/jemalloc/config.guess
+++ b/deps/jemalloc/config.guess
@@ -1,9 +1,10 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2004-03-03'
+timestamp='2012-02-10'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -16,24 +17,24 @@ timestamp='2004-03-03'
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
 # configuration script generated by Autoconf, you may include it under
 # the same distribution terms that you use for the rest of that program.
 
-# Originally written by Per Bothner <per@bothner.com>.
-# Please send patches to <config-patches@gnu.org>.  Submit a context
-# diff and a properly formatted ChangeLog entry.
+
+# Originally written by Per Bothner.  Please send patches (context
+# diff format) to <config-patches@gnu.org> and include a ChangeLog
+# entry.
 #
 # This script attempts to guess a canonical system name similar to
 # config.sub.  If it succeeds, it prints the system name on stdout, and
 # exits with 0.  Otherwise, it exits with 1.
 #
-# The plan is that this can be called by configure scripts if you
-# don't specify an explicit build system type.
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
 
 me=`echo "$0" | sed -e 's,.*/,,'`
 
@@ -53,7 +54,8 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
@@ -66,11 +68,11 @@ Try \`$me --help' for more information."
 while test $# -gt 0 ; do
   case $1 in
     --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit 0 ;;
+       echo "$timestamp" ; exit ;;
     --version | -v )
-       echo "$version" ; exit 0 ;;
+       echo "$version" ; exit ;;
     --help | --h* | -h )
-       echo "$usage"; exit 0 ;;
+       echo "$usage"; exit ;;
     -- )     # Stop option processing
        shift; break ;;
     - )	# Use stdin as input.
@@ -104,7 +106,7 @@ set_cc_for_build='
 trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
 trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
 : ${TMPDIR=/tmp} ;
- { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
  { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
  { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
  { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
@@ -123,7 +125,7 @@ case $CC_FOR_BUILD,$HOST_CC,$CC in
 	;;
  ,,*)   CC_FOR_BUILD=$CC ;;
  ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
-esac ;'
+esac ; set_cc_for_build= ;'
 
 # This is needed to find uname on a Pyramid OSx when run in the BSD universe.
 # (ghazi@noc.rutgers.edu 1994-08-24)
@@ -141,7 +143,7 @@ UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
-	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
 	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
 	# switched to ELF, *-*-netbsd* would select the old
 	# object file format.  This provides both forward
@@ -158,6 +160,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	    arm*) machine=arm-unknown ;;
 	    sh3el) machine=shl-unknown ;;
 	    sh3eb) machine=sh-unknown ;;
+	    sh5el) machine=sh5le-unknown ;;
 	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
 	esac
 	# The Operating System including object format, if it has switched
@@ -166,7 +169,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
 		eval $set_cc_for_build
 		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
-			| grep __ELF__ >/dev/null
+			| grep -q __ELF__
 		then
 		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
 		    # Return netbsd for either.  FIX?
@@ -176,7 +179,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		fi
 		;;
 	    *)
-	        os=netbsd
+		os=netbsd
 		;;
 	esac
 	# The OS release
@@ -196,71 +199,30 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# contains redundant information, the shorter form:
 	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
 	echo "${machine}-${os}${release}"
-	exit 0 ;;
-    amd64:OpenBSD:*:*)
-	echo x86_64-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    amiga:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    arc:OpenBSD:*:*)
-	echo mipsel-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    cats:OpenBSD:*:*)
-	echo arm-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    hp300:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mac68k:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    macppc:OpenBSD:*:*)
-	echo powerpc-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mvme68k:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mvme88k:OpenBSD:*:*)
-	echo m88k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    mvmeppc:OpenBSD:*:*)
-	echo powerpc-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    pegasos:OpenBSD:*:*)
-	echo powerpc-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    pmax:OpenBSD:*:*)
-	echo mipsel-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    sgi:OpenBSD:*:*)
-	echo mipseb-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    sun3:OpenBSD:*:*)
-	echo m68k-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
-    wgrisc:OpenBSD:*:*)
-	echo mipsel-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:OpenBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE}
-	exit 0 ;;
+	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
+	exit ;;
     *:ekkoBSD:*:*)
 	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
+    *:SolidBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
+	exit ;;
     macppc:MirBSD:*:*)
-	echo powerppc-unknown-mirbsd${UNAME_RELEASE}
-	exit 0 ;;
+	echo powerpc-unknown-mirbsd${UNAME_RELEASE}
+	exit ;;
     *:MirBSD:*:*)
 	echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     alpha:OSF1:*:*)
 	case $UNAME_RELEASE in
 	*4.0)
 		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
 		;;
 	*5.*)
-	        UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
@@ -306,40 +268,46 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
 	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-	exit 0 ;;
-    Alpha*:OpenVMS:*:*)
-	echo alpha-hp-vms
-	exit 0 ;;
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	exitcode=$?
+	trap '' 0
+	exit $exitcode ;;
     Alpha\ *:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# Should we change UNAME_MACHINE based on the output of uname instead
 	# of the specific Alpha model?
 	echo alpha-pc-interix
-	exit 0 ;;
+	exit ;;
     21064:Windows_NT:50:3)
 	echo alpha-dec-winnt3.5
-	exit 0 ;;
+	exit ;;
     Amiga*:UNIX_System_V:4.0:*)
 	echo m68k-unknown-sysv4
-	exit 0;;
+	exit ;;
     *:[Aa]miga[Oo][Ss]:*:*)
 	echo ${UNAME_MACHINE}-unknown-amigaos
-	exit 0 ;;
+	exit ;;
     *:[Mm]orph[Oo][Ss]:*:*)
 	echo ${UNAME_MACHINE}-unknown-morphos
-	exit 0 ;;
+	exit ;;
     *:OS/390:*:*)
 	echo i370-ibm-openedition
-	exit 0 ;;
+	exit ;;
+    *:z/VM:*:*)
+	echo s390-ibm-zvmoe
+	exit ;;
     *:OS400:*:*)
-        echo powerpc-ibm-os400
-	exit 0 ;;
+	echo powerpc-ibm-os400
+	exit ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
 	echo arm-acorn-riscix${UNAME_RELEASE}
-	exit 0;;
+	exit ;;
+    arm:riscos:*:*|arm:RISCOS:*:*)
+	echo arm-unknown-riscos
+	exit ;;
     SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
 	echo hppa1.1-hitachi-hiuxmpp
-	exit 0;;
+	exit ;;
     Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
 	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
 	if test "`(/bin/universe) 2>/dev/null`" = att ; then
@@ -347,32 +315,51 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	else
 		echo pyramid-pyramid-bsd
 	fi
-	exit 0 ;;
+	exit ;;
     NILE*:*:*:dcosx)
 	echo pyramid-pyramid-svr4
-	exit 0 ;;
+	exit ;;
     DRS?6000:unix:4.0:6*)
 	echo sparc-icl-nx6
-	exit 0 ;;
-    DRS?6000:UNIX_SV:4.2*:7*)
+	exit ;;
+    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
 	case `/usr/bin/uname -p` in
-	    sparc) echo sparc-icl-nx7 && exit 0 ;;
+	    sparc) echo sparc-icl-nx7; exit ;;
 	esac ;;
+    s390x:SunOS:*:*)
+	echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
     sun4H:SunOS:5.*:*)
 	echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
+	exit ;;
     sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
 	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
-    i86pc:SunOS:5.*:*)
-	echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
+	exit ;;
+    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+	echo i386-pc-auroraux${UNAME_RELEASE}
+	exit ;;
+    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
+	eval $set_cc_for_build
+	SUN_ARCH="i386"
+	# If there is a compiler, see if it is configured for 64-bit objects.
+	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
+	# This test works for both compilers.
+	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
+		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		grep IS_64BIT_ARCH >/dev/null
+	    then
+		SUN_ARCH="x86_64"
+	    fi
+	fi
+	echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
     sun4*:SunOS:6*:*)
 	# According to config.sub, this is the proper way to canonicalize
 	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
 	# it's likely to be more like Solaris than SunOS4.
 	echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
+	exit ;;
     sun4*:SunOS:*:*)
 	case "`/usr/bin/arch -k`" in
 	    Series*|S4*)
@@ -381,10 +368,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	esac
 	# Japanese Language versions have a version number like `4.1.3-JL'.
 	echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
-	exit 0 ;;
+	exit ;;
     sun3*:SunOS:*:*)
 	echo m68k-sun-sunos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     sun*:*:4.2BSD:*)
 	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
 	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
@@ -396,10 +383,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		echo sparc-sun-sunos${UNAME_RELEASE}
 		;;
 	esac
-	exit 0 ;;
+	exit ;;
     aushp:SunOS:*:*)
 	echo sparc-auspex-sunos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     # The situation for MiNT is a little confusing.  The machine name
     # can be virtually everything (everything which is not
     # "atarist" or "atariste" at least should have a processor
@@ -409,41 +396,41 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
-	exit 0 ;;
+	echo m68k-atari-mint${UNAME_RELEASE}
+	exit ;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
 	echo m68k-atari-mint${UNAME_RELEASE}
-        exit 0 ;;
+	exit ;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-        echo m68k-atari-mint${UNAME_RELEASE}
-	exit 0 ;;
+	echo m68k-atari-mint${UNAME_RELEASE}
+	exit ;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-        echo m68k-milan-mint${UNAME_RELEASE}
-        exit 0 ;;
+	echo m68k-milan-mint${UNAME_RELEASE}
+	exit ;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-        echo m68k-hades-mint${UNAME_RELEASE}
-        exit 0 ;;
+	echo m68k-hades-mint${UNAME_RELEASE}
+	exit ;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-        echo m68k-unknown-mint${UNAME_RELEASE}
-        exit 0 ;;
+	echo m68k-unknown-mint${UNAME_RELEASE}
+	exit ;;
     m68k:machten:*:*)
 	echo m68k-apple-machten${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     powerpc:machten:*:*)
 	echo powerpc-apple-machten${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     RISC*:Mach:*:*)
 	echo mips-dec-mach_bsd4.3
-	exit 0 ;;
+	exit ;;
     RISC*:ULTRIX:*:*)
 	echo mips-dec-ultrix${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     VAX*:ULTRIX*:*:*)
 	echo vax-dec-ultrix${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     2020:CLIX:*:* | 2430:CLIX:*:*)
 	echo clipper-intergraph-clix${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     mips:*:*:UMIPS | mips:*:*:RISCos)
 	eval $set_cc_for_build
 	sed 's/^	//' << EOF >$dummy.c
@@ -467,35 +454,36 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	  exit (-1);
 	}
 EOF
-	$CC_FOR_BUILD -o $dummy $dummy.c \
-	  && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \
-	  && exit 0
+	$CC_FOR_BUILD -o $dummy $dummy.c &&
+	  dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+	  SYSTEM_NAME=`$dummy $dummyarg` &&
+	    { echo "$SYSTEM_NAME"; exit; }
 	echo mips-mips-riscos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     Motorola:PowerMAX_OS:*:*)
 	echo powerpc-motorola-powermax
-	exit 0 ;;
+	exit ;;
     Motorola:*:4.3:PL8-*)
 	echo powerpc-harris-powermax
-	exit 0 ;;
+	exit ;;
     Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
 	echo powerpc-harris-powermax
-	exit 0 ;;
+	exit ;;
     Night_Hawk:Power_UNIX:*:*)
 	echo powerpc-harris-powerunix
-	exit 0 ;;
+	exit ;;
     m88k:CX/UX:7*:*)
 	echo m88k-harris-cxux7
-	exit 0 ;;
+	exit ;;
     m88k:*:4*:R4*)
 	echo m88k-motorola-sysv4
-	exit 0 ;;
+	exit ;;
     m88k:*:3*:R3*)
 	echo m88k-motorola-sysv3
-	exit 0 ;;
+	exit ;;
     AViiON:dgux:*:*)
-        # DG/UX returns AViiON for all architectures
-        UNAME_PROCESSOR=`/usr/bin/uname -p`
+	# DG/UX returns AViiON for all architectures
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
 	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
 	then
 	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
@@ -508,29 +496,29 @@ EOF
 	else
 	    echo i586-dg-dgux${UNAME_RELEASE}
 	fi
- 	exit 0 ;;
+	exit ;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
 	echo m88k-dolphin-sysv3
-	exit 0 ;;
+	exit ;;
     M88*:*:R3*:*)
 	# Delta 88k system running SVR3
 	echo m88k-motorola-sysv3
-	exit 0 ;;
+	exit ;;
     XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
 	echo m88k-tektronix-sysv3
-	exit 0 ;;
+	exit ;;
     Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
 	echo m68k-tektronix-bsd
-	exit 0 ;;
+	exit ;;
     *:IRIX*:*:*)
 	echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
-	exit 0 ;;
+	exit ;;
     ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
-	echo romp-ibm-aix      # uname -m gives an 8 hex-code CPU id
-	exit 0 ;;              # Note that: echo "'`uname -s`'" gives 'AIX '
+	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
+	exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
     i*86:AIX:*:*)
 	echo i386-ibm-aix
-	exit 0 ;;
+	exit ;;
     ia64:AIX:*:*)
 	if [ -x /usr/bin/oslevel ] ; then
 		IBM_REV=`/usr/bin/oslevel`
@@ -538,7 +526,7 @@ EOF
 		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
 	fi
 	echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
-	exit 0 ;;
+	exit ;;
     *:AIX:2:3)
 	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
 		eval $set_cc_for_build
@@ -553,15 +541,19 @@ EOF
 			exit(0);
 			}
 EOF
-		$CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0
-		echo rs6000-ibm-aix3.2.5
+		if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
+		then
+			echo "$SYSTEM_NAME"
+		else
+			echo rs6000-ibm-aix3.2.5
+		fi
 	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
 		echo rs6000-ibm-aix3.2.4
 	else
 		echo rs6000-ibm-aix3.2
 	fi
-	exit 0 ;;
-    *:AIX:*:[45])
+	exit ;;
+    *:AIX:*:[4567])
 	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
 	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
@@ -574,28 +566,28 @@ EOF
 		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
 	fi
 	echo ${IBM_ARCH}-ibm-aix${IBM_REV}
-	exit 0 ;;
+	exit ;;
     *:AIX:*:*)
 	echo rs6000-ibm-aix
-	exit 0 ;;
+	exit ;;
     ibmrt:4.4BSD:*|romp-ibm:BSD:*)
 	echo romp-ibm-bsd4.4
-	exit 0 ;;
+	exit ;;
     ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
 	echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
-	exit 0 ;;                           # report: romp-ibm BSD 4.3
+	exit ;;                             # report: romp-ibm BSD 4.3
     *:BOSX:*:*)
 	echo rs6000-bull-bosx
-	exit 0 ;;
+	exit ;;
     DPX/2?00:B.O.S.:*:*)
 	echo m68k-bull-sysv3
-	exit 0 ;;
+	exit ;;
     9000/[34]??:4.3bsd:1.*:*)
 	echo m68k-hp-bsd
-	exit 0 ;;
+	exit ;;
     hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
 	echo m68k-hp-bsd4.4
-	exit 0 ;;
+	exit ;;
     9000/[34678]??:HP-UX:*:*)
 	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
 	case "${UNAME_MACHINE}" in
@@ -604,52 +596,52 @@ EOF
 	    9000/[678][0-9][0-9])
 		if [ -x /usr/bin/getconf ]; then
 		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
-                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
-                    case "${sc_cpu_version}" in
-                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
-                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
-                      532)                      # CPU_PA_RISC2_0
-                        case "${sc_kernel_bits}" in
-                          32) HP_ARCH="hppa2.0n" ;;
-                          64) HP_ARCH="hppa2.0w" ;;
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case "${sc_cpu_version}" in
+		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+		      532)                      # CPU_PA_RISC2_0
+			case "${sc_kernel_bits}" in
+			  32) HP_ARCH="hppa2.0n" ;;
+			  64) HP_ARCH="hppa2.0w" ;;
 			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
-                        esac ;;
-                    esac
+			esac ;;
+		    esac
 		fi
 		if [ "${HP_ARCH}" = "" ]; then
 		    eval $set_cc_for_build
-		    sed 's/^              //' << EOF >$dummy.c
+		    sed 's/^		//' << EOF >$dummy.c
 
-              #define _HPUX_SOURCE
-              #include <stdlib.h>
-              #include <unistd.h>
+		#define _HPUX_SOURCE
+		#include <stdlib.h>
+		#include <unistd.h>
 
-              int main ()
-              {
-              #if defined(_SC_KERNEL_BITS)
-                  long bits = sysconf(_SC_KERNEL_BITS);
-              #endif
-                  long cpu  = sysconf (_SC_CPU_VERSION);
+		int main ()
+		{
+		#if defined(_SC_KERNEL_BITS)
+		    long bits = sysconf(_SC_KERNEL_BITS);
+		#endif
+		    long cpu  = sysconf (_SC_CPU_VERSION);
 
-                  switch (cpu)
-              	{
-              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
-              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
-              	case CPU_PA_RISC2_0:
-              #if defined(_SC_KERNEL_BITS)
-              	    switch (bits)
-              		{
-              		case 64: puts ("hppa2.0w"); break;
-              		case 32: puts ("hppa2.0n"); break;
-              		default: puts ("hppa2.0"); break;
-              		} break;
-              #else  /* !defined(_SC_KERNEL_BITS) */
-              	    puts ("hppa2.0"); break;
-              #endif
-              	default: puts ("hppa1.0"); break;
-              	}
-                  exit (0);
-              }
+		    switch (cpu)
+			{
+			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+			case CPU_PA_RISC2_0:
+		#if defined(_SC_KERNEL_BITS)
+			    switch (bits)
+				{
+				case 64: puts ("hppa2.0w"); break;
+				case 32: puts ("hppa2.0n"); break;
+				default: puts ("hppa2.0"); break;
+				} break;
+		#else  /* !defined(_SC_KERNEL_BITS) */
+			    puts ("hppa2.0"); break;
+		#endif
+			default: puts ("hppa1.0"); break;
+			}
+		    exit (0);
+		}
 EOF
 		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
@@ -657,9 +649,19 @@ EOF
 	esac
 	if [ ${HP_ARCH} = "hppa2.0w" ]
 	then
-	    # avoid double evaluation of $set_cc_for_build
-	    test -n "$CC_FOR_BUILD" || eval $set_cc_for_build
-	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E -) | grep __LP64__ >/dev/null
+	    eval $set_cc_for_build
+
+	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
+	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
+	    # generating 64-bit code.  GNU and HP use different nomenclature:
+	    #
+	    # $ CC_FOR_BUILD=cc ./config.guess
+	    # => hppa2.0w-hp-hpux11.23
+	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
+	    # => hppa64-hp-hpux11.23
+
+	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
+		grep -q __LP64__
 	    then
 		HP_ARCH="hppa2.0w"
 	    else
@@ -667,11 +669,11 @@ EOF
 	    fi
 	fi
 	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
-	exit 0 ;;
+	exit ;;
     ia64:HP-UX:*:*)
 	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
 	echo ia64-hp-hpux${HPUX_REV}
-	exit 0 ;;
+	exit ;;
     3050*:HI-UX:*:*)
 	eval $set_cc_for_build
 	sed 's/^	//' << EOF >$dummy.c
@@ -699,221 +701,266 @@ EOF
 	  exit (0);
 	}
 EOF
-	$CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0
+	$CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
+		{ echo "$SYSTEM_NAME"; exit; }
 	echo unknown-hitachi-hiuxwe2
-	exit 0 ;;
+	exit ;;
     9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
 	echo hppa1.1-hp-bsd
-	exit 0 ;;
+	exit ;;
     9000/8??:4.3bsd:*:*)
 	echo hppa1.0-hp-bsd
-	exit 0 ;;
+	exit ;;
     *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
 	echo hppa1.0-hp-mpeix
-	exit 0 ;;
+	exit ;;
     hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
 	echo hppa1.1-hp-osf
-	exit 0 ;;
+	exit ;;
     hp8??:OSF1:*:*)
 	echo hppa1.0-hp-osf
-	exit 0 ;;
+	exit ;;
     i*86:OSF1:*:*)
 	if [ -x /usr/sbin/sysversion ] ; then
 	    echo ${UNAME_MACHINE}-unknown-osf1mk
 	else
 	    echo ${UNAME_MACHINE}-unknown-osf1
 	fi
-	exit 0 ;;
+	exit ;;
     parisc*:Lites*:*:*)
 	echo hppa1.1-hp-lites
-	exit 0 ;;
+	exit ;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
 	echo c1-convex-bsd
-        exit 0 ;;
+	exit ;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
 	else echo c2-convex-bsd
 	fi
-        exit 0 ;;
+	exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
 	echo c34-convex-bsd
-        exit 0 ;;
+	exit ;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
 	echo c38-convex-bsd
-        exit 0 ;;
+	exit ;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
 	echo c4-convex-bsd
-        exit 0 ;;
+	exit ;;
     CRAY*Y-MP:*:*:*)
 	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	exit ;;
     CRAY*[A-Z]90:*:*:*)
 	echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
 	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
 	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
 	      -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	exit ;;
     CRAY*TS:*:*:*)
 	echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	exit ;;
     CRAY*T3E:*:*:*)
 	echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	exit ;;
     CRAY*SV1:*:*:*)
 	echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	exit ;;
     *:UNICOS/mp:*:*)
-	echo nv1-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
-	exit 0 ;;
+	echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
 	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
-        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-        exit 0 ;;
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
     5000:UNIX_System_V:4.*:*)
-        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
-        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-	exit 0 ;;
+	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
 	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     sparc*:BSD/OS:*:*)
 	echo sparc-unknown-bsdi${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:BSD/OS:*:*)
 	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:FreeBSD:*:*)
-	# Determine whether the default compiler uses glibc.
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#include <features.h>
-	#if __GLIBC__ >= 2
-	LIBC=gnu
-	#else
-	LIBC=
-	#endif
-EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
-	# GNU/KFreeBSD systems have a "k" prefix to indicate we are using
-	# FreeBSD's kernel, but not the complete OS.
-	case ${LIBC} in gnu) kernel_only='k' ;; esac
-	echo ${UNAME_MACHINE}-unknown-${kernel_only}freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`${LIBC:+-$LIBC}
-	exit 0 ;;
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
+	case ${UNAME_PROCESSOR} in
+	    amd64)
+		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	    *)
+		echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	esac
+	exit ;;
     i*:CYGWIN*:*)
 	echo ${UNAME_MACHINE}-pc-cygwin
-	exit 0 ;;
-    i*:MINGW*:*)
+	exit ;;
+    *:MINGW*:*)
 	echo ${UNAME_MACHINE}-pc-mingw32
-	exit 0 ;;
+	exit ;;
+    i*:MSYS*:*)
+	echo ${UNAME_MACHINE}-pc-msys
+	exit ;;
+    i*:windows32*:*)
+	# uname -m includes "-pc" on this system.
+	echo ${UNAME_MACHINE}-mingw32
+	exit ;;
     i*:PW*:*)
 	echo ${UNAME_MACHINE}-pc-pw32
-	exit 0 ;;
-    x86:Interix*:[34]*)
-	echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//'
-	exit 0 ;;
+	exit ;;
+    *:Interix*:*)
+	case ${UNAME_MACHINE} in
+	    x86)
+		echo i586-pc-interix${UNAME_RELEASE}
+		exit ;;
+	    authenticamd | genuineintel | EM64T)
+		echo x86_64-unknown-interix${UNAME_RELEASE}
+		exit ;;
+	    IA64)
+		echo ia64-unknown-interix${UNAME_RELEASE}
+		exit ;;
+	esac ;;
     [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
 	echo i${UNAME_MACHINE}-pc-mks
-	exit 0 ;;
+	exit ;;
+    8664:Windows_NT:*)
+	echo x86_64-pc-mks
+	exit ;;
     i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
 	# How do we know it's Interix rather than the generic POSIX subsystem?
 	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
 	# UNAME_MACHINE based on the output of uname instead of i386?
 	echo i586-pc-interix
-	exit 0 ;;
+	exit ;;
     i*:UWIN*:*)
 	echo ${UNAME_MACHINE}-pc-uwin
-	exit 0 ;;
+	exit ;;
+    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
+	echo x86_64-unknown-cygwin
+	exit ;;
     p*:CYGWIN*:*)
 	echo powerpcle-unknown-cygwin
-	exit 0 ;;
+	exit ;;
     prep*:SunOS:5.*:*)
 	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
-	exit 0 ;;
+	exit ;;
     *:GNU:*:*)
 	# the GNU system
 	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
-	exit 0 ;;
+	exit ;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
 	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
-	exit 0 ;;
+	exit ;;
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
-	exit 0 ;;
+	exit ;;
+    aarch64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    aarch64_be:Linux:*:*)
+	UNAME_MACHINE=aarch64_be
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+	esac
+	objdump --private-headers /bin/sh | grep -q ld.so.1
+	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	exit ;;
     arm*:Linux:*:*)
+	eval $set_cc_for_build
+	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
+	    | grep -q __ARM_EABI__
+	then
+	    echo ${UNAME_MACHINE}-unknown-linux-gnu
+	else
+	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+		| grep -q __ARM_PCS_VFP
+	    then
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	    else
+		echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
+	    fi
+	fi
+	exit ;;
+    avr32*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
     cris:Linux:*:*)
-	echo cris-axis-linux-gnu
-	exit 0 ;;
-    ia64:Linux:*:*)
+	echo ${UNAME_MACHINE}-axis-linux-gnu
+	exit ;;
+    crisv32:Linux:*:*)
+	echo ${UNAME_MACHINE}-axis-linux-gnu
+	exit ;;
+    frv:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
-    m68*:Linux:*:*)
+	exit ;;
+    hexagon:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
-    mips:Linux:*:*)
+	exit ;;
+    i*86:Linux:*:*)
+	LIBC=gnu
 	eval $set_cc_for_build
 	sed 's/^	//' << EOF >$dummy.c
-	#undef CPU
-	#undef mips
-	#undef mipsel
-	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	CPU=mipsel
-	#else
-	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	CPU=mips
-	#else
-	CPU=
-	#endif
+	#ifdef __dietlibc__
+	LIBC=dietlibc
 	#endif
 EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
-	test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0
-	;;
-    mips64:Linux:*:*)
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC'`
+	echo "${UNAME_MACHINE}-pc-linux-${LIBC}"
+	exit ;;
+    ia64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    m32r*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    m68*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    mips:Linux:*:* | mips64:Linux:*:*)
 	eval $set_cc_for_build
 	sed 's/^	//' << EOF >$dummy.c
 	#undef CPU
-	#undef mips64
-	#undef mips64el
+	#undef ${UNAME_MACHINE}
+	#undef ${UNAME_MACHINE}el
 	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	CPU=mips64el
+	CPU=${UNAME_MACHINE}el
 	#else
 	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	CPU=mips64
+	CPU=${UNAME_MACHINE}
 	#else
 	CPU=
 	#endif
 	#endif
 EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
-	test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
+	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
 	;;
-    ppc:Linux:*:*)
-	echo powerpc-unknown-linux-gnu
-	exit 0 ;;
-    ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-gnu
-	exit 0 ;;
-    alpha:Linux:*:*)
-	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
-	  EV5)   UNAME_MACHINE=alphaev5 ;;
-	  EV56)  UNAME_MACHINE=alphaev56 ;;
-	  PCA56) UNAME_MACHINE=alphapca56 ;;
-	  PCA57) UNAME_MACHINE=alphapca56 ;;
-	  EV6)   UNAME_MACHINE=alphaev6 ;;
-	  EV67)  UNAME_MACHINE=alphaev67 ;;
-	  EV68*) UNAME_MACHINE=alphaev68 ;;
-        esac
-	objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
-	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
-	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
-	exit 0 ;;
+    or32:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    padre:Linux:*:*)
+	echo sparc-unknown-linux-gnu
+	exit ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-gnu
+	exit ;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
 	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
@@ -921,115 +968,71 @@ EOF
 	  PA8*) echo hppa2.0-unknown-linux-gnu ;;
 	  *)    echo hppa-unknown-linux-gnu ;;
 	esac
-	exit 0 ;;
-    parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-gnu
+	exit ;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-gnu
+	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
 	echo ${UNAME_MACHINE}-ibm-linux
-	exit 0 ;;
+	exit ;;
     sh64*:Linux:*:*)
-    	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     sh*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
     sparc:Linux:*:* | sparc64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-gnu
-	exit 0 ;;
+	exit ;;
+    tile*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    vax:Linux:*:*)
+	echo ${UNAME_MACHINE}-dec-linux-gnu
+	exit ;;
     x86_64:Linux:*:*)
-	echo x86_64-unknown-linux-gnu
-	exit 0 ;;
-    i*86:Linux:*:*)
-	# The BFD linker knows what the default object file format is, so
-	# first see if it will tell us. cd to the root directory to prevent
-	# problems with other programs or directories called `ld' in the path.
-	# Set LC_ALL=C to ensure ld outputs messages in English.
-	ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
-			 | sed -ne '/supported targets:/!d
-				    s/[ 	][ 	]*/ /g
-				    s/.*supported targets: *//
-				    s/ .*//
-				    p'`
-        case "$ld_supported_targets" in
-	  elf32-i386)
-		TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
-		;;
-	  a.out-i386-linux)
-		echo "${UNAME_MACHINE}-pc-linux-gnuaout"
-		exit 0 ;;
-	  coff-i386)
-		echo "${UNAME_MACHINE}-pc-linux-gnucoff"
-		exit 0 ;;
-	  "")
-		# Either a pre-BFD a.out linker (linux-gnuoldld) or
-		# one that does not give us useful --help.
-		echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
-		exit 0 ;;
-	esac
-	# Determine whether the default compiler is a.out or elf
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
-	#include <features.h>
-	#ifdef __ELF__
-	# ifdef __GLIBC__
-	#  if __GLIBC__ >= 2
-	LIBC=gnu
-	#  else
-	LIBC=gnulibc1
-	#  endif
-	# else
-	LIBC=gnulibc1
-	# endif
-	#else
-	#ifdef __INTEL_COMPILER
-	LIBC=gnu
-	#else
-	LIBC=gnuaout
-	#endif
-	#endif
-	#ifdef __dietlibc__
-	LIBC=dietlibc
-	#endif
-EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
-	test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0
-	test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0
-	;;
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    xtensa*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
 	# earlier versions are messed up and put the nodename in both
 	# sysname and nodename.
 	echo i386-sequent-sysv4
-	exit 0 ;;
+	exit ;;
     i*86:UNIX_SV:4.2MP:2.*)
-        # Unixware is an offshoot of SVR4, but it has its own version
-        # number series starting with 2...
-        # I am not positive that other SVR4 systems won't match this,
+	# Unixware is an offshoot of SVR4, but it has its own version
+	# number series starting with 2...
+	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
-        # Use sysv4.2uw... so that sysv4* matches it.
+	# Use sysv4.2uw... so that sysv4* matches it.
 	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
-	exit 0 ;;
+	exit ;;
     i*86:OS/2:*:*)
 	# If we were able to find `uname', then EMX Unix compatibility
 	# is probably installed.
 	echo ${UNAME_MACHINE}-pc-os2-emx
-	exit 0 ;;
+	exit ;;
     i*86:XTS-300:*:STOP)
 	echo ${UNAME_MACHINE}-unknown-stop
-	exit 0 ;;
+	exit ;;
     i*86:atheos:*:*)
 	echo ${UNAME_MACHINE}-unknown-atheos
-	exit 0 ;;
-	i*86:syllable:*:*)
+	exit ;;
+    i*86:syllable:*:*)
 	echo ${UNAME_MACHINE}-pc-syllable
-	exit 0 ;;
-    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+	exit ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
 	echo i386-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     i*86:*DOS:*:*)
 	echo ${UNAME_MACHINE}-pc-msdosdjgpp
-	exit 0 ;;
+	exit ;;
     i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
 	UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
 	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
@@ -1037,15 +1040,16 @@ EOF
 	else
 		echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
 	fi
-	exit 0 ;;
-    i*86:*:5:[78]*)
+	exit ;;
+    i*86:*:5:[678]*)
+	# UnixWare 7.x, OpenUNIX and OpenServer 6.
 	case `/bin/uname -X | grep "^Machine"` in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
 	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
 	esac
 	echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
-	exit 0 ;;
+	exit ;;
     i*86:*:3.2:*)
 	if test -f /usr/options/cb.name; then
 		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
@@ -1063,73 +1067,86 @@ EOF
 	else
 		echo ${UNAME_MACHINE}-pc-sysv32
 	fi
-	exit 0 ;;
+	exit ;;
     pc:*:*:*)
 	# Left here for compatibility:
-        # uname -m prints for DJGPP always 'pc', but it prints nothing about
-        # the processor, so we play safe by assuming i386.
-	echo i386-pc-msdosdjgpp
-        exit 0 ;;
+	# uname -m prints for DJGPP always 'pc', but it prints nothing about
+	# the processor, so we play safe by assuming i586.
+	# Note: whatever this is, it MUST be the same as what config.sub
+	# prints for the "djgpp" host, or else GDB configury will decide that
+	# this is a cross-build.
+	echo i586-pc-msdosdjgpp
+	exit ;;
     Intel:Mach:3*:*)
 	echo i386-pc-mach3
-	exit 0 ;;
+	exit ;;
     paragon:*:*:*)
 	echo i860-intel-osf1
-	exit 0 ;;
+	exit ;;
     i860:*:4.*:*) # i860-SVR4
 	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
 	  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
 	else # Add other i860-SVR4 vendors below as they are discovered.
 	  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
 	fi
-	exit 0 ;;
+	exit ;;
     mini*:CTIX:SYS*5:*)
 	# "miniframe"
 	echo m68010-convergent-sysv
-	exit 0 ;;
+	exit ;;
     mc68k:UNIX:SYSTEM5:3.51m)
 	echo m68k-convergent-sysv
-	exit 0 ;;
+	exit ;;
     M680?0:D-NIX:5.3:*)
 	echo m68k-diab-dnix
-	exit 0 ;;
-    M68*:*:R3V[567]*:*)
-	test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;;
-    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0)
+	exit ;;
+    M68*:*:R3V[5678]*:*)
+	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
+    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
 	OS_REL=''
 	test -r /etc/.relid \
 	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	  && echo i486-ncr-sysv4.3${OS_REL} && exit 0
+	  && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	  && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;;
+	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
     3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
-        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-          && echo i486-ncr-sysv4 && exit 0 ;;
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4; exit; } ;;
+    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
+	OS_REL='.3'
+	test -r /etc/.relid \
+	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	    && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
+	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
     m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
 	echo m68k-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     mc68030:UNIX_System_V:4.*:*)
 	echo m68k-atari-sysv4
-	exit 0 ;;
+	exit ;;
     TSUNAMI:LynxOS:2.*:*)
 	echo sparc-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     rs6000:LynxOS:2.*:*)
 	echo rs6000-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
-    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
+	exit ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
 	echo powerpc-unknown-lynxos${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     SM[BE]S:UNIX_SV:*:*)
 	echo mips-dde-sysv${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     RM*:ReliantUNIX-*:*:*)
 	echo mips-sni-sysv4
-	exit 0 ;;
+	exit ;;
     RM*:SINIX-*:*:*)
 	echo mips-sni-sysv4
-	exit 0 ;;
+	exit ;;
     *:SINIX-*:*:*)
 	if uname -p 2>/dev/null >/dev/null ; then
 		UNAME_MACHINE=`(uname -p) 2>/dev/null`
@@ -1137,68 +1154,94 @@ EOF
 	else
 		echo ns32k-sni-sysv
 	fi
-	exit 0 ;;
-    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-                      # says <Richard.M.Bartel@ccMail.Census.GOV>
-        echo i586-unisys-sysv4
-        exit 0 ;;
+	exit ;;
+    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+			# says <Richard.M.Bartel@ccMail.Census.GOV>
+	echo i586-unisys-sysv4
+	exit ;;
     *:UNIX_System_V:4*:FTX*)
 	# From Gerald Hewes <hewes@openmarket.com>.
 	# How about differentiating between stratus architectures? -djm
 	echo hppa1.1-stratus-sysv4
-	exit 0 ;;
+	exit ;;
     *:*:*:FTX*)
 	# From seanf@swdc.stratus.com.
 	echo i860-stratus-sysv4
-	exit 0 ;;
+	exit ;;
+    i*86:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo ${UNAME_MACHINE}-stratus-vos
+	exit ;;
     *:VOS:*:*)
 	# From Paul.Green@stratus.com.
 	echo hppa1.1-stratus-vos
-	exit 0 ;;
+	exit ;;
     mc68*:A/UX:*:*)
 	echo m68k-apple-aux${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     news*:NEWS-OS:6*:*)
 	echo mips-sony-newsos6
-	exit 0 ;;
+	exit ;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
 	if [ -d /usr/nec ]; then
-	        echo mips-nec-sysv${UNAME_RELEASE}
+		echo mips-nec-sysv${UNAME_RELEASE}
 	else
-	        echo mips-unknown-sysv${UNAME_RELEASE}
+		echo mips-unknown-sysv${UNAME_RELEASE}
 	fi
-        exit 0 ;;
+	exit ;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
 	echo powerpc-be-beos
-	exit 0 ;;
+	exit ;;
     BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
 	echo powerpc-apple-beos
-	exit 0 ;;
+	exit ;;
     BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
 	echo i586-pc-beos
-	exit 0 ;;
+	exit ;;
+    BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
+	echo i586-pc-haiku
+	exit ;;
     SX-4:SUPER-UX:*:*)
 	echo sx4-nec-superux${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     SX-5:SUPER-UX:*:*)
 	echo sx5-nec-superux${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     SX-6:SUPER-UX:*:*)
 	echo sx6-nec-superux${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
+    SX-7:SUPER-UX:*:*)
+	echo sx7-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-8:SUPER-UX:*:*)
+	echo sx8-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-8R:SUPER-UX:*:*)
+	echo sx8r-nec-superux${UNAME_RELEASE}
+	exit ;;
     Power*:Rhapsody:*:*)
 	echo powerpc-apple-rhapsody${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:Rhapsody:*:*)
 	echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:Darwin:*:*)
-	case `uname -p` in
-	    *86) UNAME_PROCESSOR=i686 ;;
-	    powerpc) UNAME_PROCESSOR=powerpc ;;
+	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
+	case $UNAME_PROCESSOR in
+	    i386)
+		eval $set_cc_for_build
+		if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+		  if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		      (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		      grep IS_64BIT_ARCH >/dev/null
+		  then
+		      UNAME_PROCESSOR="x86_64"
+		  fi
+		fi ;;
+	    unknown) UNAME_PROCESSOR=powerpc ;;
 	esac
 	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
 	UNAME_PROCESSOR=`uname -p`
 	if test "$UNAME_PROCESSOR" = "x86"; then
@@ -1206,22 +1249,28 @@ EOF
 		UNAME_MACHINE=pc
 	fi
 	echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:QNX:*:4*)
 	echo i386-pc-qnx
-	exit 0 ;;
+	exit ;;
+    NEO-?:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk${UNAME_RELEASE}
+	exit ;;
+    NSE-?:NONSTOP_KERNEL:*:*)
+	echo nse-tandem-nsk${UNAME_RELEASE}
+	exit ;;
     NSR-?:NONSTOP_KERNEL:*:*)
 	echo nsr-tandem-nsk${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:NonStop-UX:*:*)
 	echo mips-compaq-nonstopux
-	exit 0 ;;
+	exit ;;
     BS2000:POSIX*:*:*)
 	echo bs2000-siemens-sysv
-	exit 0 ;;
+	exit ;;
     DS/*:UNIX_System_V:*:*)
 	echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
-	exit 0 ;;
+	exit ;;
     *:Plan9:*:*)
 	# "uname -m" is not consistent, so use $cputype instead. 386
 	# is converted to i386 for consistency with other x86
@@ -1232,31 +1281,53 @@ EOF
 	    UNAME_MACHINE="$cputype"
 	fi
 	echo ${UNAME_MACHINE}-unknown-plan9
-	exit 0 ;;
+	exit ;;
     *:TOPS-10:*:*)
 	echo pdp10-unknown-tops10
-	exit 0 ;;
+	exit ;;
     *:TENEX:*:*)
 	echo pdp10-unknown-tenex
-	exit 0 ;;
+	exit ;;
     KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
 	echo pdp10-dec-tops20
-	exit 0 ;;
+	exit ;;
     XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
 	echo pdp10-xkl-tops20
-	exit 0 ;;
+	exit ;;
     *:TOPS-20:*:*)
 	echo pdp10-unknown-tops20
-	exit 0 ;;
+	exit ;;
     *:ITS:*:*)
 	echo pdp10-unknown-its
-	exit 0 ;;
+	exit ;;
     SEI:*:*:SEIUX)
-        echo mips-sei-seiux${UNAME_RELEASE}
-	exit 0 ;;
+	echo mips-sei-seiux${UNAME_RELEASE}
+	exit ;;
     *:DragonFly:*:*)
 	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
-	exit 0 ;;
+	exit ;;
+    *:*VMS:*:*)
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	case "${UNAME_MACHINE}" in
+	    A*) echo alpha-dec-vms ; exit ;;
+	    I*) echo ia64-dec-vms ; exit ;;
+	    V*) echo vax-dec-vms ; exit ;;
+	esac ;;
+    *:XENIX:*:SysV)
+	echo i386-pc-xenix
+	exit ;;
+    i*86:skyos:*:*)
+	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
+	exit ;;
+    i*86:rdos:*:*)
+	echo ${UNAME_MACHINE}-pc-rdos
+	exit ;;
+    i*86:AROS:*:*)
+	echo ${UNAME_MACHINE}-pc-aros
+	exit ;;
+    x86_64:VMkernel:*:*)
+	echo ${UNAME_MACHINE}-unknown-esx
+	exit ;;
 esac
 
 #echo '(No uname command or uname output not recognized.)' 1>&2
@@ -1279,16 +1350,16 @@ main ()
 #include <sys/param.h>
   printf ("m68k-sony-newsos%s\n",
 #ifdef NEWSOS4
-          "4"
+	"4"
 #else
-	  ""
+	""
 #endif
-         ); exit (0);
+	); exit (0);
 #endif
 #endif
 
 #if defined (__arm) && defined (__acorn) && defined (__unix)
-  printf ("arm-acorn-riscix"); exit (0);
+  printf ("arm-acorn-riscix\n"); exit (0);
 #endif
 
 #if defined (hp300) && !defined (hpux)
@@ -1377,11 +1448,12 @@ main ()
 }
 EOF
 
-$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && exit 0
+$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
+	{ echo "$SYSTEM_NAME"; exit; }
 
 # Apollos put the system type in the environment.
 
-test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; }
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
 
 # Convex versions that predate uname can use getsysinfo(1)
 
@@ -1390,22 +1462,22 @@ then
     case `getsysinfo -f cpu_type` in
     c1*)
 	echo c1-convex-bsd
-	exit 0 ;;
+	exit ;;
     c2*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
 	else echo c2-convex-bsd
 	fi
-	exit 0 ;;
+	exit ;;
     c34*)
 	echo c34-convex-bsd
-	exit 0 ;;
+	exit ;;
     c38*)
 	echo c38-convex-bsd
-	exit 0 ;;
+	exit ;;
     c4*)
 	echo c4-convex-bsd
-	exit 0 ;;
+	exit ;;
     esac
 fi
 
@@ -1416,7 +1488,9 @@ This script, last modified $timestamp, has failed to recognize
 the operating system you are using. It is advised that you
 download the most up to date version of the config scripts from
 
-    ftp://ftp.gnu.org/pub/gnu/config/
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+and
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
 
 If the version you run ($0) is already up to date, please
 send the following data and any information you think might be
diff --git a/deps/jemalloc/config.sub b/deps/jemalloc/config.sub
index 264f820a..c894da45 100755
--- a/deps/jemalloc/config.sub
+++ b/deps/jemalloc/config.sub
@@ -1,9 +1,10 @@
 #! /bin/sh
 # Configuration validation subroutine script.
 #   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-#   2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+#   2011, 2012 Free Software Foundation, Inc.
 
-timestamp='2004-02-23'
+timestamp='2012-02-10'
 
 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
@@ -20,23 +21,25 @@ timestamp='2004-02-23'
 # GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330,
-# Boston, MA 02111-1307, USA.
-
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+#
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
 # configuration script generated by Autoconf, you may include it under
 # the same distribution terms that you use for the rest of that program.
 
+
 # Please send patches to <config-patches@gnu.org>.  Submit a context
-# diff and a properly formatted ChangeLog entry.
+# diff and a properly formatted GNU ChangeLog entry.
 #
 # Configuration subroutine to validate and canonicalize a configuration type.
 # Supply the specified configuration type as an argument.
 # If it is invalid, we print an error message on stderr and exit with code 1.
 # Otherwise, we print the canonical config type on stdout and succeed.
 
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
 # This file is supposed to be the same for all GNU packages
 # and recognize all the CPU types, system types and aliases
 # that are meaningful with *any* GNU software.
@@ -70,7 +73,8 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
@@ -83,11 +87,11 @@ Try \`$me --help' for more information."
 while test $# -gt 0 ; do
   case $1 in
     --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit 0 ;;
+       echo "$timestamp" ; exit ;;
     --version | -v )
-       echo "$version" ; exit 0 ;;
+       echo "$version" ; exit ;;
     --help | --h* | -h )
-       echo "$usage"; exit 0 ;;
+       echo "$usage"; exit ;;
     -- )     # Stop option processing
        shift; break ;;
     - )	# Use stdin as input.
@@ -99,7 +103,7 @@ while test $# -gt 0 ; do
     *local*)
        # First pass through any local machine types.
        echo $1
-       exit 0;;
+       exit ;;
 
     * )
        break ;;
@@ -118,11 +122,18 @@ esac
 # Here we must recognize all the valid KERNEL-OS combinations.
 maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
-  nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \
-  kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*)
+  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
+  linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | \
+  kopensolaris*-gnu* | \
+  storm-chaos* | os2-emx* | rtmk-nova*)
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
     ;;
+  android-linux)
+    os=-linux-android
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+    ;;
   *)
     basic_machine=`echo $1 | sed 's/-[^-]*$//'`
     if [ $basic_machine != $1 ]
@@ -145,10 +156,13 @@ case $os in
 	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
 	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
 	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-	-apple | -axis)
+	-apple | -axis | -knuth | -cray | -microblaze)
 		os=
 		basic_machine=$1
 		;;
+	-bluegene*)
+		os=-cnk
+		;;
 	-sim | -cisco | -oki | -wec | -winbond)
 		os=
 		basic_machine=$1
@@ -163,13 +177,17 @@ case $os in
 		os=-chorusos
 		basic_machine=$1
 		;;
- 	-chorusrdb)
- 		os=-chorusrdb
+	-chorusrdb)
+		os=-chorusrdb
 		basic_machine=$1
- 		;;
+		;;
 	-hiux*)
 		os=-hiuxwe2
 		;;
+	-sco6)
+		os=-sco5v6
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
 	-sco5)
 		os=-sco3.2v5
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
@@ -186,6 +204,10 @@ case $os in
 		# Don't forget version if it is 3.2v4 or newer.
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
 		;;
+	-sco5v6*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
 	-sco*)
 		os=-sco3.2v2
 		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
@@ -227,25 +249,36 @@ case $basic_machine in
 	# Some are omitted here because they have special meanings below.
 	1750a | 580 \
 	| a29k \
+	| aarch64 | aarch64_be \
 	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
 	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
 	| am33_2.0 \
-	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \
+	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+        | be32 | be64 \
+	| bfin \
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
-	| fr30 | frv \
+	| epiphany \
+	| fido | fr30 | frv \
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| hexagon \
 	| i370 | i860 | i960 | ia64 \
 	| ip2k | iq2000 \
-	| m32r | m68000 | m68k | m88k | mcore \
+	| le32 | le64 \
+	| lm32 \
+	| m32c | m32r | m32rle | m68000 | m68k | m88k \
+	| maxq | mb | microblaze | mcore | mep | metag \
 	| mips | mipsbe | mipseb | mipsel | mipsle \
 	| mips16 \
 	| mips64 | mips64el \
-	| mips64vr | mips64vrel \
+	| mips64octeon | mips64octeonel \
 	| mips64orion | mips64orionel \
+	| mips64r5900 | mips64r5900el \
+	| mips64vr | mips64vrel \
 	| mips64vr4100 | mips64vr4100el \
 	| mips64vr4300 | mips64vr4300el \
 	| mips64vr5000 | mips64vr5000el \
+	| mips64vr5900 | mips64vr5900el \
 	| mipsisa32 | mipsisa32el \
 	| mipsisa32r2 | mipsisa32r2el \
 	| mipsisa64 | mipsisa64el \
@@ -254,30 +287,65 @@ case $basic_machine in
 	| mipsisa64sr71k | mipsisa64sr71kel \
 	| mipstx39 | mipstx39el \
 	| mn10200 | mn10300 \
+	| moxie \
+	| mt \
 	| msp430 \
+	| nds32 | nds32le | nds32be \
+	| nios | nios2 \
 	| ns16k | ns32k \
-	| openrisc | or32 \
+	| open8 \
+	| or32 \
 	| pdp10 | pdp11 | pj | pjl \
-	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+	| powerpc | powerpc64 | powerpc64le | powerpcle \
 	| pyramid \
-	| sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \
+	| rl78 | rx \
+	| score \
+	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
 	| sh64 | sh64le \
-	| sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \
-	| strongarm \
-	| tahoe | thumb | tic4x | tic80 | tron \
-	| v850 | v850e \
+	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
+	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
+	| spu \
+	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
+	| ubicom32 \
+	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
 	| we32k \
-	| x86 | xscale | xstormy16 | xtensa \
-	| z8k)
+	| x86 | xc16x | xstormy16 | xtensa \
+	| z8k | z80)
 		basic_machine=$basic_machine-unknown
 		;;
-	m6811 | m68hc11 | m6812 | m68hc12)
-		# Motorola 68HC11/12.
+	c54x)
+		basic_machine=tic54x-unknown
+		;;
+	c55x)
+		basic_machine=tic55x-unknown
+		;;
+	c6x)
+		basic_machine=tic6x-unknown
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
 		basic_machine=$basic_machine-unknown
 		os=-none
 		;;
 	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
 		;;
+	ms1)
+		basic_machine=mt-unknown
+		;;
+
+	strongarm | thumb | xscale)
+		basic_machine=arm-unknown
+		;;
+	xgate)
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	xscaleeb)
+		basic_machine=armeb-unknown
+		;;
+
+	xscaleel)
+		basic_machine=armel-unknown
+		;;
 
 	# We use `pc' rather than `unknown'
 	# because (1) that's what they normally are, and
@@ -293,32 +361,40 @@ case $basic_machine in
 	# Recognize the basic CPU types with company name.
 	580-* \
 	| a29k-* \
+	| aarch64-* | aarch64_be-* \
 	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
 	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
-	| avr-* \
-	| bs2000-* \
-	| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
-	| clipper-* | cydra-* \
+	| avr-* | avr32-* \
+	| be32-* | be64-* \
+	| bfin-* | bs2000-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* \
+	| clipper-* | craynv-* | cydra-* \
 	| d10v-* | d30v-* | dlx-* \
 	| elxsi-* \
-	| f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \
+	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
 	| h8300-* | h8500-* \
 	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| hexagon-* \
 	| i*86-* | i860-* | i960-* | ia64-* \
 	| ip2k-* | iq2000-* \
-	| m32r-* \
+	| le32-* | le64-* \
+	| lm32-* \
+	| m32c-* | m32r-* | m32rle-* \
 	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
-	| m88110-* | m88k-* | mcore-* \
+	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
 	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
 	| mips16-* \
 	| mips64-* | mips64el-* \
-	| mips64vr-* | mips64vrel-* \
+	| mips64octeon-* | mips64octeonel-* \
 	| mips64orion-* | mips64orionel-* \
+	| mips64r5900-* | mips64r5900el-* \
+	| mips64vr-* | mips64vrel-* \
 	| mips64vr4100-* | mips64vr4100el-* \
 	| mips64vr4300-* | mips64vr4300el-* \
 	| mips64vr5000-* | mips64vr5000el-* \
+	| mips64vr5900-* | mips64vr5900el-* \
 	| mipsisa32-* | mipsisa32el-* \
 	| mipsisa32r2-* | mipsisa32r2el-* \
 	| mipsisa64-* | mipsisa64el-* \
@@ -326,26 +402,39 @@ case $basic_machine in
 	| mipsisa64sb1-* | mipsisa64sb1el-* \
 	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
 	| mipstx39-* | mipstx39el-* \
+	| mmix-* \
+	| mt-* \
 	| msp430-* \
-	| none-* | np1-* | nv1-* | ns16k-* | ns32k-* \
+	| nds32-* | nds32le-* | nds32be-* \
+	| nios-* | nios2-* \
+	| none-* | np1-* | ns16k-* | ns32k-* \
+	| open8-* \
 	| orion-* \
 	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
-	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
 	| pyramid-* \
-	| romp-* | rs6000-* \
-	| sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \
+	| rl78-* | romp-* | rs6000-* | rx-* \
+	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
 	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
-	| sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \
-	| sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \
-	| tahoe-* | thumb-* \
+	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
+	| sparclite-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
+	| tahoe-* \
 	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tile*-* \
 	| tron-* \
-	| v850-* | v850e-* | vax-* \
+	| ubicom32-* \
+	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+	| vax-* \
 	| we32k-* \
-	| x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \
-	| xtensa-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* \
+	| xstormy16-* | xtensa*-* \
 	| ymp-* \
-	| z8k-*)
+	| z8k-* | z80-*)
+		;;
+	# Recognize the basic CPU types without company name, with glob match.
+	xtensa*)
+		basic_machine=$basic_machine-unknown
 		;;
 	# Recognize the various machine names and aliases which stand
 	# for a CPU type and a company and sometimes even an OS.
@@ -363,7 +452,7 @@ case $basic_machine in
 		basic_machine=a29k-amd
 		os=-udi
 		;;
-    	abacus)
+	abacus)
 		basic_machine=abacus-unknown
 		;;
 	adobe68k)
@@ -409,6 +498,10 @@ case $basic_machine in
 		basic_machine=m68k-apollo
 		os=-bsd
 		;;
+	aros)
+		basic_machine=i386-pc
+		os=-aros
+		;;
 	aux)
 		basic_machine=m68k-apple
 		os=-aux
@@ -417,10 +510,35 @@ case $basic_machine in
 		basic_machine=ns32k-sequent
 		os=-dynix
 		;;
+	blackfin)
+		basic_machine=bfin-unknown
+		os=-linux
+		;;
+	blackfin-*)
+		basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	bluegene*)
+		basic_machine=powerpc-ibm
+		os=-cnk
+		;;
+	c54x-*)
+		basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c55x-*)
+		basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	c6x-*)
+		basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	c90)
 		basic_machine=c90-cray
 		os=-unicos
 		;;
+	cegcc)
+		basic_machine=arm-unknown
+		os=-cegcc
+		;;
 	convex-c1)
 		basic_machine=c1-convex
 		os=-bsd
@@ -445,13 +563,20 @@ case $basic_machine in
 		basic_machine=j90-cray
 		os=-unicos
 		;;
-	cr16c)
-		basic_machine=cr16c-unknown
+	craynv)
+		basic_machine=craynv-cray
+		os=-unicosmp
+		;;
+	cr16 | cr16-*)
+		basic_machine=cr16-unknown
 		os=-elf
 		;;
 	crds | unos)
 		basic_machine=m68k-crds
 		;;
+	crisv32 | crisv32-* | etraxfs*)
+		basic_machine=crisv32-axis
+		;;
 	cris | cris-* | etrax*)
 		basic_machine=cris-axis
 		;;
@@ -481,6 +606,14 @@ case $basic_machine in
 		basic_machine=m88k-motorola
 		os=-sysv3
 		;;
+	dicos)
+		basic_machine=i686-pc
+		os=-dicos
+		;;
+	djgpp)
+		basic_machine=i586-pc
+		os=-msdosdjgpp
+		;;
 	dpx20 | dpx20-*)
 		basic_machine=rs6000-bull
 		os=-bosx
@@ -592,7 +725,6 @@ case $basic_machine in
 	i370-ibm* | ibm*)
 		basic_machine=i370-ibm
 		;;
-# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
 	i*86v32)
 		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
 		os=-sysv32
@@ -631,6 +763,14 @@ case $basic_machine in
 		basic_machine=m68k-isi
 		os=-sysv
 		;;
+	m68knommu)
+		basic_machine=m68k-unknown
+		os=-linux
+		;;
+	m68knommu-*)
+		basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
 	m88k-omron*)
 		basic_machine=m88k-omron
 		;;
@@ -642,10 +782,17 @@ case $basic_machine in
 		basic_machine=ns32k-utek
 		os=-sysv
 		;;
+	microblaze)
+		basic_machine=microblaze-xilinx
+		;;
 	mingw32)
 		basic_machine=i386-pc
 		os=-mingw32
 		;;
+	mingw32ce)
+		basic_machine=arm-unknown
+		os=-mingw32ce
+		;;
 	miniframe)
 		basic_machine=m68000-convergent
 		;;
@@ -659,10 +806,6 @@ case $basic_machine in
 	mips3*)
 		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
 		;;
-	mmix*)
-		basic_machine=mmix-knuth
-		os=-mmixware
-		;;
 	monitor)
 		basic_machine=m68k-rom68k
 		os=-coff
@@ -675,10 +818,21 @@ case $basic_machine in
 		basic_machine=i386-pc
 		os=-msdos
 		;;
+	ms1-*)
+		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
+		;;
+	msys)
+		basic_machine=i386-pc
+		os=-msys
+		;;
 	mvs)
 		basic_machine=i370-ibm
 		os=-mvs
 		;;
+	nacl)
+		basic_machine=le32-unknown
+		os=-nacl
+		;;
 	ncr3000)
 		basic_machine=i486-ncr
 		os=-sysv4
@@ -743,9 +897,11 @@ case $basic_machine in
 	np1)
 		basic_machine=np1-gould
 		;;
-	nv1)
-		basic_machine=nv1-cray
-		os=-unicosmp
+	neo-tandem)
+		basic_machine=neo-tandem
+		;;
+	nse-tandem)
+		basic_machine=nse-tandem
 		;;
 	nsr-tandem)
 		basic_machine=nsr-tandem
@@ -754,9 +910,8 @@ case $basic_machine in
 		basic_machine=hppa1.1-oki
 		os=-proelf
 		;;
-	or32 | or32-*)
+	openrisc | openrisc-*)
 		basic_machine=or32-unknown
-		os=-coff
 		;;
 	os400)
 		basic_machine=powerpc-ibm
@@ -778,6 +933,14 @@ case $basic_machine in
 		basic_machine=i860-intel
 		os=-osf
 		;;
+	parisc)
+		basic_machine=hppa-unknown
+		os=-linux
+		;;
+	parisc-*)
+		basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
 	pbd)
 		basic_machine=sparc-tti
 		;;
@@ -787,6 +950,12 @@ case $basic_machine in
 	pc532 | pc532-*)
 		basic_machine=ns32k-pc532
 		;;
+	pc98)
+		basic_machine=i386-pc
+		;;
+	pc98-*)
+		basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	pentium | p5 | k5 | k6 | nexgen | viac3)
 		basic_machine=i586-pc
 		;;
@@ -816,9 +985,10 @@ case $basic_machine in
 		;;
 	power)	basic_machine=power-ibm
 		;;
-	ppc)	basic_machine=powerpc-unknown
+	ppc | ppcbe)	basic_machine=powerpc-unknown
 		;;
-	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+	ppc-* | ppcbe-*)
+		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
 		;;
 	ppcle | powerpclittle | ppc-le | powerpc-little)
 		basic_machine=powerpcle-unknown
@@ -843,6 +1013,10 @@ case $basic_machine in
 		basic_machine=i586-unknown
 		os=-pw32
 		;;
+	rdos)
+		basic_machine=i386-pc
+		os=-rdos
+		;;
 	rom68k)
 		basic_machine=m68k-rom68k
 		os=-coff
@@ -869,6 +1043,10 @@ case $basic_machine in
 	sb1el)
 		basic_machine=mipsisa64sb1el-unknown
 		;;
+	sde)
+		basic_machine=mipsisa32-sde
+		os=-elf
+		;;
 	sei)
 		basic_machine=mips-sei
 		os=-seiux
@@ -880,6 +1058,9 @@ case $basic_machine in
 		basic_machine=sh-hitachi
 		os=-hms
 		;;
+	sh5el)
+		basic_machine=sh5le-unknown
+		;;
 	sh64)
 		basic_machine=sh64-unknown
 		;;
@@ -901,6 +1082,9 @@ case $basic_machine in
 		basic_machine=i860-stratus
 		os=-sysv4
 		;;
+	strongarm-* | thumb-*)
+		basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
 	sun2)
 		basic_machine=m68000-sun
 		;;
@@ -957,17 +1141,9 @@ case $basic_machine in
 		basic_machine=t90-cray
 		os=-unicos
 		;;
-	tic54x | c54x*)
-		basic_machine=tic54x-unknown
-		os=-coff
-		;;
-	tic55x | c55x*)
-		basic_machine=tic55x-unknown
-		os=-coff
-		;;
-	tic6x | c6x*)
-		basic_machine=tic6x-unknown
-		os=-coff
+	tile*)
+		basic_machine=$basic_machine-unknown
+		os=-linux-gnu
 		;;
 	tx39)
 		basic_machine=mipstx39-unknown
@@ -1029,9 +1205,16 @@ case $basic_machine in
 		basic_machine=hppa1.1-winbond
 		os=-proelf
 		;;
+	xbox)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
 	xps | xps100)
 		basic_machine=xps100-honeywell
 		;;
+	xscale-* | xscalee[bl]-*)
+		basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
+		;;
 	ymp)
 		basic_machine=ymp-cray
 		os=-unicos
@@ -1040,6 +1223,10 @@ case $basic_machine in
 		basic_machine=z8k-unknown
 		os=-sim
 		;;
+	z80-*-coff)
+		basic_machine=z80-unknown
+		os=-sim
+		;;
 	none)
 		basic_machine=none-none
 		os=-none
@@ -1059,6 +1246,9 @@ case $basic_machine in
 	romp)
 		basic_machine=romp-ibm
 		;;
+	mmix)
+		basic_machine=mmix-knuth
+		;;
 	rs6000)
 		basic_machine=rs6000-ibm
 		;;
@@ -1075,13 +1265,10 @@ case $basic_machine in
 	we32k)
 		basic_machine=we32k-att
 		;;
-	sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele)
+	sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
 		basic_machine=sh-unknown
 		;;
-	sh64)
-		basic_machine=sh64-unknown
-		;;
-	sparc | sparcv9 | sparcv9b)
+	sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
 		basic_machine=sparc-sun
 		;;
 	cydra)
@@ -1125,9 +1312,12 @@ esac
 if [ x"$os" != x"" ]
 then
 case $os in
-        # First match some system type aliases
-        # that might get confused with valid system types.
+	# First match some system type aliases
+	# that might get confused with valid system types.
 	# -solaris* is a basic system type, with this one exception.
+	-auroraux)
+		os=-auroraux
+		;;
 	-solaris1 | -solaris1.*)
 		os=`echo $os | sed -e 's|solaris1|sunos4|'`
 		;;
@@ -1148,26 +1338,31 @@ case $os in
 	# Each alternative MUST END IN A *, to match a version number.
 	# -sysv* is not here because it comes later, after sysvr4.
 	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
-	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
-	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
+	      | -sym* | -kopensolaris* \
 	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-	      | -aos* \
+	      | -aos* | -aros* \
 	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
 	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
-	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \
+	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
+	      | -openbsd* | -solidbsd* \
 	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
 	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
 	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
 	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
-	      | -chorusos* | -chorusrdb* \
-	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \
+	      | -chorusos* | -chorusrdb* | -cegcc* \
+	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -mingw32* | -linux-gnu* | -linux-android* \
+	      | -linux-newlib* | -linux-uclibc* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* \
 	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
 	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
 	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
 	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
 	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
-	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*)
+	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es*)
 	# Remember, each alternative MUST END IN *, to match a version number.
 		;;
 	-qnx*)
@@ -1185,7 +1380,7 @@ case $os in
 		os=`echo $os | sed -e 's|nto|nto-qnx|'`
 		;;
 	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
-	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
+	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
 	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
 		;;
 	-mac*)
@@ -1206,7 +1401,7 @@ case $os in
 	-opened*)
 		os=-openedition
 		;;
-        -os400*)
+	-os400*)
 		os=-os400
 		;;
 	-wince*)
@@ -1255,7 +1450,7 @@ case $os in
 	-sinix*)
 		os=-sysv4
 		;;
-        -tpf*)
+	-tpf*)
 		os=-tpf
 		;;
 	-triton*)
@@ -1294,6 +1489,14 @@ case $os in
 	-kaos*)
 		os=-kaos
 		;;
+	-zvmoe)
+		os=-zvmoe
+		;;
+	-dicos*)
+		os=-dicos
+		;;
+	-nacl*)
+		;;
 	-none)
 		;;
 	*)
@@ -1316,6 +1519,12 @@ else
 # system, and we'll never get to this point.
 
 case $basic_machine in
+	score-*)
+		os=-elf
+		;;
+	spu-*)
+		os=-elf
+		;;
 	*-acorn)
 		os=-riscix1.2
 		;;
@@ -1325,9 +1534,18 @@ case $basic_machine in
 	arm*-semi)
 		os=-aout
 		;;
-    c4x-* | tic4x-*)
-        os=-coff
-        ;;
+	c4x-* | tic4x-*)
+		os=-coff
+		;;
+	tic54x-*)
+		os=-coff
+		;;
+	tic55x-*)
+		os=-coff
+		;;
+	tic6x-*)
+		os=-coff
+		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
 		os=-tops20
@@ -1346,13 +1564,13 @@ case $basic_machine in
 		;;
 	m68000-sun)
 		os=-sunos3
-		# This also exists in the configure program, but was not the
-		# default.
-		# os=-sunos4
 		;;
 	m68*-cisco)
 		os=-aout
 		;;
+	mep-*)
+		os=-elf
+		;;
 	mips*-cisco)
 		os=-elf
 		;;
@@ -1371,9 +1589,15 @@ case $basic_machine in
 	*-be)
 		os=-beos
 		;;
+	*-haiku)
+		os=-haiku
+		;;
 	*-ibm)
 		os=-aix
 		;;
+	*-knuth)
+		os=-mmixware
+		;;
 	*-wec)
 		os=-proelf
 		;;
@@ -1476,7 +1700,7 @@ case $basic_machine in
 			-sunos*)
 				vendor=sun
 				;;
-			-aix*)
+			-cnk*|-aix*)
 				vendor=ibm
 				;;
 			-beos*)
@@ -1539,7 +1763,7 @@ case $basic_machine in
 esac
 
 echo $basic_machine$os
-exit 0
+exit
 
 # Local variables:
 # eval: (add-hook 'write-file-hooks 'time-stamp)
diff --git a/deps/jemalloc/configure b/deps/jemalloc/configure
index 610884fb..1d2b8bee 100755
--- a/deps/jemalloc/configure
+++ b/deps/jemalloc/configure
@@ -1,11 +1,11 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.65.
+# Generated by GNU Autoconf 2.68.
 #
 #
 # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
-# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
+# Foundation, Inc.
 #
 #
 # This configure script is free software; the Free Software Foundation
@@ -89,6 +89,7 @@ fi
 IFS=" ""	$as_nl"
 
 # Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
 case $0 in #((
   *[\\/]* ) as_myself=$0 ;;
   *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
@@ -214,11 +215,18 @@ IFS=$as_save_IFS
   # We cannot yet assume a decent shell, so we have to provide a
 	# neutralization value for shells without unset; and this also
 	# works around shells that cannot unset nonexistent variables.
+	# Preserve -v and -x to the replacement shell.
 	BASH_ENV=/dev/null
 	ENV=/dev/null
 	(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
 	export CONFIG_SHELL
-	exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+	case $- in # ((((
+	  *v*x* | *x*v* ) as_opts=-vx ;;
+	  *v* ) as_opts=-v ;;
+	  *x* ) as_opts=-x ;;
+	  * ) as_opts= ;;
+	esac
+	exec "$CONFIG_SHELL" $as_opts "$as_myself" ${1+"$@"}
 fi
 
     if test x$as_have_required = xno; then :
@@ -316,7 +324,7 @@ $as_echo X"$as_dir" |
       test -d "$as_dir" && break
     done
     test -z "$as_dirs" || eval "mkdir $as_dirs"
-  } || test -d "$as_dir" || as_fn_error "cannot create directory $as_dir"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
 
 
 } # as_fn_mkdir_p
@@ -356,19 +364,19 @@ else
 fi # as_fn_arith
 
 
-# as_fn_error ERROR [LINENO LOG_FD]
-# ---------------------------------
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
 # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
 # provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with status $?, using 1 if that was 0.
+# script with STATUS, using 1 if that was 0.
 as_fn_error ()
 {
-  as_status=$?; test $as_status -eq 0 && as_status=1
-  if test "$3"; then
-    as_lineno=${as_lineno-"$2"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-    $as_echo "$as_me:${as_lineno-$LINENO}: error: $1" >&$3
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
   fi
-  $as_echo "$as_me: error: $1" >&2
+  $as_echo "$as_me: error: $2" >&2
   as_fn_exit $as_status
 } # as_fn_error
 
@@ -530,7 +538,7 @@ test -n "$DJDIR" || exec 7<&0 </dev/null
 exec 6>&1
 
 # Name of the host.
-# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
 # so uname gets run too.
 ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
 
@@ -605,18 +613,19 @@ jemalloc_version_bugfix
 jemalloc_version_minor
 jemalloc_version_major
 jemalloc_version
-enable_dynamic_page_shift
-enable_sysv
 enable_xmalloc
+enable_valgrind
+enable_utrace
 enable_fill
 enable_dss
-enable_swap
+enable_munmap
+enable_mremap
 enable_tcache
-enable_tiny
 enable_prof
 enable_stats
 enable_debug
 install_suffix
+enable_experimental
 AUTOCONF
 LD
 AR
@@ -626,6 +635,21 @@ INSTALL_SCRIPT
 INSTALL_PROGRAM
 enable_autogen
 RPATH_EXTRA
+CC_MM
+MKLIB
+LDTARGET
+CTARGET
+PIC_CFLAGS
+SOREV
+EXTRA_LDFLAGS
+DSO_LDFLAGS
+libprefix
+exe
+a
+o
+importlib
+so
+LD_PRELOAD_VAR
 RPATH
 abi
 host_os
@@ -658,6 +682,7 @@ abs_objroot
 objroot
 abs_srcroot
 srcroot
+rev
 target_alias
 host_alias
 build_alias
@@ -702,6 +727,8 @@ enable_option_checking
 with_xslroot
 with_rpath
 enable_autogen
+enable_experimental
+with_mangling
 with_jemalloc_prefix
 with_private_namespace
 with_install_suffix
@@ -713,14 +740,14 @@ enable_prof_libunwind
 with_static_libunwind
 enable_prof_libgcc
 enable_prof_gcc
-enable_tiny
 enable_tcache
-enable_swap
+enable_mremap
+enable_munmap
 enable_dss
 enable_fill
+enable_utrace
+enable_valgrind
 enable_xmalloc
-enable_sysv
-enable_dynamic_page_shift
 enable_lazy_lock
 enable_tls
 '
@@ -795,8 +822,9 @@ do
   fi
 
   case $ac_option in
-  *=*)	ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
-  *)	ac_optarg=yes ;;
+  *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *=)   ac_optarg= ;;
+  *)    ac_optarg=yes ;;
   esac
 
   # Accept the important Cygnus configure options, so we can diagnose typos.
@@ -841,7 +869,7 @@ do
     ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
     # Reject names that are not valid shell variable names.
     expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid feature name: $ac_useropt"
+      as_fn_error $? "invalid feature name: $ac_useropt"
     ac_useropt_orig=$ac_useropt
     ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
     case $ac_user_opts in
@@ -867,7 +895,7 @@ do
     ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
     # Reject names that are not valid shell variable names.
     expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid feature name: $ac_useropt"
+      as_fn_error $? "invalid feature name: $ac_useropt"
     ac_useropt_orig=$ac_useropt
     ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
     case $ac_user_opts in
@@ -1071,7 +1099,7 @@ do
     ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
     # Reject names that are not valid shell variable names.
     expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid package name: $ac_useropt"
+      as_fn_error $? "invalid package name: $ac_useropt"
     ac_useropt_orig=$ac_useropt
     ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
     case $ac_user_opts in
@@ -1087,7 +1115,7 @@ do
     ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
     # Reject names that are not valid shell variable names.
     expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
-      as_fn_error "invalid package name: $ac_useropt"
+      as_fn_error $? "invalid package name: $ac_useropt"
     ac_useropt_orig=$ac_useropt
     ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
     case $ac_user_opts in
@@ -1117,8 +1145,8 @@ do
   | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
     x_libraries=$ac_optarg ;;
 
-  -*) as_fn_error "unrecognized option: \`$ac_option'
-Try \`$0 --help' for more information."
+  -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
     ;;
 
   *=*)
@@ -1126,7 +1154,7 @@ Try \`$0 --help' for more information."
     # Reject names that are not valid shell variable names.
     case $ac_envvar in #(
       '' | [0-9]* | *[!_$as_cr_alnum]* )
-      as_fn_error "invalid variable name: \`$ac_envvar'" ;;
+      as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
     esac
     eval $ac_envvar=\$ac_optarg
     export $ac_envvar ;;
@@ -1136,7 +1164,7 @@ Try \`$0 --help' for more information."
     $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
     expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
       $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
-    : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+    : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
     ;;
 
   esac
@@ -1144,13 +1172,13 @@ done
 
 if test -n "$ac_prev"; then
   ac_option=--`echo $ac_prev | sed 's/_/-/g'`
-  as_fn_error "missing argument to $ac_option"
+  as_fn_error $? "missing argument to $ac_option"
 fi
 
 if test -n "$ac_unrecognized_opts"; then
   case $enable_option_checking in
     no) ;;
-    fatal) as_fn_error "unrecognized options: $ac_unrecognized_opts" ;;
+    fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
     *)     $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
   esac
 fi
@@ -1173,7 +1201,7 @@ do
     [\\/$]* | ?:[\\/]* )  continue;;
     NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
   esac
-  as_fn_error "expected an absolute directory name for --$ac_var: $ac_val"
+  as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
 done
 
 # There might be people who depend on the old broken behavior: `$host'
@@ -1187,8 +1215,8 @@ target=$target_alias
 if test "x$host_alias" != x; then
   if test "x$build_alias" = x; then
     cross_compiling=maybe
-    $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
-    If a cross compiler is detected then cross compile mode will be used." >&2
+    $as_echo "$as_me: WARNING: if you wanted to set the --build type, don't use --host.
+    If a cross compiler is detected then cross compile mode will be used" >&2
   elif test "x$build_alias" != "x$host_alias"; then
     cross_compiling=yes
   fi
@@ -1203,9 +1231,9 @@ test "$silent" = yes && exec 6>/dev/null
 ac_pwd=`pwd` && test -n "$ac_pwd" &&
 ac_ls_di=`ls -di .` &&
 ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
-  as_fn_error "working directory cannot be determined"
+  as_fn_error $? "working directory cannot be determined"
 test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
-  as_fn_error "pwd does not report name of working directory"
+  as_fn_error $? "pwd does not report name of working directory"
 
 
 # Find the source files, if location was not specified.
@@ -1244,11 +1272,11 @@ else
 fi
 if test ! -r "$srcdir/$ac_unique_file"; then
   test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
-  as_fn_error "cannot find sources ($ac_unique_file) in $srcdir"
+  as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
 fi
 ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
 ac_abs_confdir=`(
-	cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error "$ac_msg"
+	cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
 	pwd)`
 # When building in place, set srcdir=.
 if test "$ac_abs_confdir" = "$ac_pwd"; then
@@ -1288,7 +1316,7 @@ Configuration:
       --help=short        display options specific to this package
       --help=recursive    display the short help of all the included packages
   -V, --version           display version information and exit
-  -q, --quiet, --silent   do not print \`checking...' messages
+  -q, --quiet, --silent   do not print \`checking ...' messages
       --cache-file=FILE   cache test results in FILE [disabled]
   -C, --config-cache      alias for \`--cache-file=config.cache'
   -n, --no-create         do not create output files
@@ -1346,25 +1374,24 @@ Optional Features:
   --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
   --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
   --enable-autogen        Automatically regenerate configure output
+  --disable-experimental  Disable support for the experimental API
   --enable-cc-silence     Silence irrelevant compiler warnings
   --enable-debug          Build debugging code
-  --enable-stats          Enable statistics calculation/reporting
+  --disable-stats         Disable statistics calculation/reporting
   --enable-prof           Enable allocation profiling
   --enable-prof-libunwind Use libunwind for backtracing
   --disable-prof-libgcc   Do not use libgcc for backtracing
   --disable-prof-gcc      Do not use gcc intrinsics for backtracing
-  --disable-tiny          Disable tiny (sub-quantum) allocations
   --disable-tcache        Disable per thread caches
-  --enable-swap           Enable mmap()ped swap files
+  --enable-mremap         Enable mremap(2) for huge realloc()
+  --disable-munmap        Disable VM deallocation via munmap(2)
   --enable-dss            Enable allocation from DSS
-  --enable-fill           Support junk/zero filling option
+  --disable-fill          Disable support for junk/zero filling, quarantine,
+                          and redzones
+  --enable-utrace         Enable utrace(2)-based tracing
+  --disable-valgrind      Disable support for Valgrind
   --enable-xmalloc        Support xmalloc option
-  --enable-sysv           Support SYSV semantics option
-  --enable-dynamic-page-shift
-                          Determine page size at run time (don't trust
-                          configure result)
-  --disable-lazy-lock     Disable lazy locking (always lock, even when
-                          single-threaded)
+  --enable-lazy-lock      Enable lazy locking (only lock when multi-threaded)
   --disable-tls           Disable thread-local storage (__thread keyword)
 
 Optional Packages:
@@ -1372,6 +1399,7 @@ Optional Packages:
   --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
   --with-xslroot=<path>   XSL stylesheet root path
   --with-rpath=<rpath>    Colon-separated rpath (ELF systems only)
+  --with-mangling=<map>   Mangle symbols in <map>
   --with-jemalloc-prefix=<prefix>
                           Prefix to prepend to all public APIs
   --with-private-namespace=<prefix>
@@ -1459,9 +1487,9 @@ test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
 configure
-generated by GNU Autoconf 2.65
+generated by GNU Autoconf 2.68
 
-Copyright (C) 2009 Free Software Foundation, Inc.
+Copyright (C) 2010 Free Software Foundation, Inc.
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 _ACEOF
@@ -1505,11 +1533,48 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 	ac_retval=1
 fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
   as_fn_set_status $ac_retval
 
 } # ac_fn_c_try_compile
 
+# ac_fn_c_try_cpp LINENO
+# ----------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_cpp ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } > conftest.i && {
+	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+	 test ! -s conftest.err
+       }; then :
+  ac_retval=0
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+    ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_cpp
+
 # ac_fn_c_try_run LINENO
 # ----------------------
 # Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
@@ -1547,48 +1612,11 @@ sed 's/^/| /' conftest.$ac_ext >&5
        ac_retval=$ac_status
 fi
   rm -rf conftest.dSYM conftest_ipa8_conftest.oo
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
   as_fn_set_status $ac_retval
 
 } # ac_fn_c_try_run
 
-# ac_fn_c_try_cpp LINENO
-# ----------------------
-# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_cpp ()
-{
-  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-  if { { ac_try="$ac_cpp conftest.$ac_ext"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
-  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
-  ac_status=$?
-  if test -s conftest.err; then
-    grep -v '^ *+' conftest.err >conftest.er1
-    cat conftest.er1 >&5
-    mv -f conftest.er1 conftest.err
-  fi
-  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then :
-  ac_retval=0
-else
-  $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-    ac_retval=1
-fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-  as_fn_set_status $ac_retval
-
-} # ac_fn_c_try_cpp
-
 # ac_fn_c_compute_int LINENO EXPR VAR INCLUDES
 # --------------------------------------------
 # Tries to find the compile-time value of EXPR in a program that includes
@@ -1762,7 +1790,7 @@ rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
 rm -f conftest.val
 
   fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
   as_fn_set_status $ac_retval
 
 } # ac_fn_c_compute_int
@@ -1776,7 +1804,7 @@ ac_fn_c_check_header_compile ()
   as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
 $as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -1794,97 +1822,10 @@ fi
 eval ac_res=\$$3
 	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
 $as_echo "$ac_res" >&6; }
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
 
 } # ac_fn_c_check_header_compile
 
-# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
-# -------------------------------------------------------
-# Tests whether HEADER exists, giving a warning if it cannot be compiled using
-# the include files in INCLUDES and setting the cache variable VAR
-# accordingly.
-ac_fn_c_check_header_mongrel ()
-{
-  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-  if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
-  $as_echo_n "(cached) " >&6
-fi
-eval ac_res=\$$3
-	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-else
-  # Is the header compilable?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
-$as_echo_n "checking $2 usability... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-$4
-#include <$2>
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-  ac_header_compiler=yes
-else
-  ac_header_compiler=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
-$as_echo "$ac_header_compiler" >&6; }
-
-# Is the header present?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
-$as_echo_n "checking $2 presence... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-#include <$2>
-_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
-  ac_header_preproc=yes
-else
-  ac_header_preproc=no
-fi
-rm -f conftest.err conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
-$as_echo "$ac_header_preproc" >&6; }
-
-# So?  What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
-  yes:no: )
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
-$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
-    ;;
-  no:yes:* )
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
-$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
-$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
-$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
-$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
-    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
-    ;;
-esac
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
-  $as_echo_n "(cached) " >&6
-else
-  eval "$3=\$ac_header_compiler"
-fi
-eval ac_res=\$$3
-	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-fi
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-
-} # ac_fn_c_check_header_mongrel
-
 # ac_fn_c_try_link LINENO
 # -----------------------
 # Try to link conftest.$ac_ext, and return whether this succeeded.
@@ -1926,7 +1867,7 @@ fi
   # interfere with the next link command; also delete a directory that is
   # left behind by Apple's compiler.  We do this before executing the actions.
   rm -rf conftest.dSYM conftest_ipa8_conftest.oo
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
   as_fn_set_status $ac_retval
 
 } # ac_fn_c_try_link
@@ -1939,7 +1880,7 @@ ac_fn_c_check_func ()
   as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
 $as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -1994,10 +1935,97 @@ fi
 eval ac_res=\$$3
 	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
 $as_echo "$ac_res" >&6; }
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
 
 } # ac_fn_c_check_func
 
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if eval \${$3+:} false; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  ac_header_compiler=yes
+else
+  ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+  ac_header_preproc=yes
+else
+  ac_header_preproc=no
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+  yes:no: )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+    ;;
+  no:yes:* )
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2:     check for missing prerequisite headers?" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+    ;;
+esac
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_mongrel
+
 # ac_fn_c_check_type LINENO TYPE VAR INCLUDES
 # -------------------------------------------
 # Tests whether TYPE exists after having included INCLUDES, setting cache
@@ -2007,7 +2035,7 @@ ac_fn_c_check_type ()
   as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
 $as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   eval "$3=no"
@@ -2048,7 +2076,7 @@ fi
 eval ac_res=\$$3
 	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
 $as_echo "$ac_res" >&6; }
-  eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
 
 } # ac_fn_c_check_type
 cat >config.log <<_ACEOF
@@ -2056,7 +2084,7 @@ This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
 It was created by $as_me, which was
-generated by GNU Autoconf 2.65.  Invocation command line was
+generated by GNU Autoconf 2.68.  Invocation command line was
 
   $ $0 $@
 
@@ -2166,11 +2194,9 @@ trap 'exit_status=$?
   {
     echo
 
-    cat <<\_ASBOX
-## ---------------- ##
+    $as_echo "## ---------------- ##
 ## Cache variables. ##
-## ---------------- ##
-_ASBOX
+## ---------------- ##"
     echo
     # The following way of writing the cache mishandles newlines in values,
 (
@@ -2204,11 +2230,9 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
 )
     echo
 
-    cat <<\_ASBOX
-## ----------------- ##
+    $as_echo "## ----------------- ##
 ## Output variables. ##
-## ----------------- ##
-_ASBOX
+## ----------------- ##"
     echo
     for ac_var in $ac_subst_vars
     do
@@ -2221,11 +2245,9 @@ _ASBOX
     echo
 
     if test -n "$ac_subst_files"; then
-      cat <<\_ASBOX
-## ------------------- ##
+      $as_echo "## ------------------- ##
 ## File substitutions. ##
-## ------------------- ##
-_ASBOX
+## ------------------- ##"
       echo
       for ac_var in $ac_subst_files
       do
@@ -2239,11 +2261,9 @@ _ASBOX
     fi
 
     if test -s confdefs.h; then
-      cat <<\_ASBOX
-## ----------- ##
+      $as_echo "## ----------- ##
 ## confdefs.h. ##
-## ----------- ##
-_ASBOX
+## ----------- ##"
       echo
       cat confdefs.h
       echo
@@ -2298,7 +2318,12 @@ _ACEOF
 ac_site_file1=NONE
 ac_site_file2=NONE
 if test -n "$CONFIG_SITE"; then
-  ac_site_file1=$CONFIG_SITE
+  # We do not want a PATH search for config.site.
+  case $CONFIG_SITE in #((
+    -*)  ac_site_file1=./$CONFIG_SITE;;
+    */*) ac_site_file1=$CONFIG_SITE;;
+    *)   ac_site_file1=./$CONFIG_SITE;;
+  esac
 elif test "x$prefix" != xNONE; then
   ac_site_file1=$prefix/share/config.site
   ac_site_file2=$prefix/etc/config.site
@@ -2313,7 +2338,11 @@ do
     { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
 $as_echo "$as_me: loading site script $ac_site_file" >&6;}
     sed 's/^/| /' "$ac_site_file" >&5
-    . "$ac_site_file"
+    . "$ac_site_file" \
+      || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
   fi
 done
 
@@ -2389,7 +2418,7 @@ if $ac_cache_corrupted; then
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
   { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
 $as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
-  as_fn_error "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+  as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
 fi
 ## -------------------- ##
 ## Main body of script. ##
@@ -2409,6 +2438,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
+rev=1
+
+
 srcroot=$srcdir
 if test "x${srcroot}" = "x." ; then
   srcroot=""
@@ -2452,7 +2484,7 @@ MANDIR=`eval echo $MANDIR`
 set dummy xsltproc; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_XSLTPROC+set}" = set; then :
+if ${ac_cv_path_XSLTPROC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $XSLTPROC in
@@ -2488,16 +2520,25 @@ $as_echo "no" >&6; }
 fi
 
 
+if test -d "/usr/share/xml/docbook/stylesheet/docbook-xsl" ; then
+  DEFAULT_XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl"
+elif test -d "/usr/share/sgml/docbook/xsl-stylesheets" ; then
+  DEFAULT_XSLROOT="/usr/share/sgml/docbook/xsl-stylesheets"
+else
+    DEFAULT_XSLROOT=""
+fi
 
 # Check whether --with-xslroot was given.
 if test "${with_xslroot+set}" = set; then :
-  withval=$with_xslroot; if test "x$with_xslroot" = "xno" ; then
-  XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl"
+  withval=$with_xslroot;
+if test "x$with_xslroot" = "xno" ; then
+  XSLROOT="${DEFAULT_XSLROOT}"
 else
   XSLROOT="${with_xslroot}"
 fi
+
 else
-  XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl"
+  XSLROOT="${DEFAULT_XSLROOT}"
 
 fi
 
@@ -2514,7 +2555,7 @@ if test -n "$ac_tool_prefix"; then
 set dummy ${ac_tool_prefix}gcc; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CC"; then
@@ -2554,7 +2595,7 @@ if test -z "$ac_cv_prog_CC"; then
 set dummy gcc; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$ac_ct_CC"; then
@@ -2607,7 +2648,7 @@ if test -z "$CC"; then
 set dummy ${ac_tool_prefix}cc; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CC"; then
@@ -2647,7 +2688,7 @@ if test -z "$CC"; then
 set dummy cc; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CC"; then
@@ -2706,7 +2747,7 @@ if test -z "$CC"; then
 set dummy $ac_tool_prefix$ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$CC"; then
@@ -2750,7 +2791,7 @@ do
 set dummy $ac_prog; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$ac_ct_CC"; then
@@ -2804,8 +2845,8 @@ fi
 
 test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "no acceptable C compiler found in \$PATH
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
 
 # Provide some information about the compiler.
 $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
@@ -2919,9 +2960,8 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "C compiler cannot create executables
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
 else
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
@@ -2963,8 +3003,8 @@ done
 else
   { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot compute suffix of executables: cannot compile and link
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
 fi
 rm -f conftest conftest$ac_cv_exeext
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
@@ -3021,9 +3061,9 @@ $as_echo "$ac_try_echo"; } >&5
     else
 	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run C compiled programs.
+as_fn_error $? "cannot run C compiled programs.
 If you meant to cross compile, use \`--host'.
-See \`config.log' for more details." "$LINENO" 5; }
+See \`config.log' for more details" "$LINENO" 5; }
     fi
   fi
 fi
@@ -3034,7 +3074,7 @@ rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
 ac_clean_files=$ac_clean_files_save
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
 $as_echo_n "checking for suffix of object files... " >&6; }
-if test "${ac_cv_objext+set}" = set; then :
+if ${ac_cv_objext+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -3074,8 +3114,8 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot compute suffix of object files: cannot compile
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
 fi
 rm -f conftest.$ac_cv_objext conftest.$ac_ext
 fi
@@ -3085,7 +3125,7 @@ OBJEXT=$ac_cv_objext
 ac_objext=$OBJEXT
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
 $as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
-if test "${ac_cv_c_compiler_gnu+set}" = set; then :
+if ${ac_cv_c_compiler_gnu+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -3122,7 +3162,7 @@ ac_test_CFLAGS=${CFLAGS+set}
 ac_save_CFLAGS=$CFLAGS
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
 $as_echo_n "checking whether $CC accepts -g... " >&6; }
-if test "${ac_cv_prog_cc_g+set}" = set; then :
+if ${ac_cv_prog_cc_g+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_save_c_werror_flag=$ac_c_werror_flag
@@ -3200,7 +3240,7 @@ else
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
 $as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
-if test "${ac_cv_prog_cc_c89+set}" = set; then :
+if ${ac_cv_prog_cc_c89+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_cv_prog_cc_c89=no
@@ -3295,12 +3335,44 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
-if test "x$CFLAGS" = "x" ; then
-  no_CFLAGS="yes"
-  if test "x$GCC" = "xyes" ; then
+if test "x$GCC" != "xyes" ; then
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler is MSVC" >&5
+$as_echo_n "checking whether compiler is MSVC... " >&6; }
+if ${je_cv_msvc+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
 
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu99" >&5
+int
+main ()
+{
+
+#ifndef _MSC_VER
+  int fail-1;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  je_cv_msvc=yes
+else
+  je_cv_msvc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_msvc" >&5
+$as_echo "$je_cv_msvc" >&6; }
+fi
+
+if test "x$CFLAGS" = "x" ; then
+  no_CFLAGS="yes"
+  if test "x$GCC" = "xyes" ; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -std=gnu99" >&5
 $as_echo_n "checking whether compiler supports -std=gnu99... " >&6; }
 TCFLAGS="${CFLAGS}"
 if test "x${CFLAGS}" = "x" ; then
@@ -3308,13 +3380,7 @@ if test "x${CFLAGS}" = "x" ; then
 else
   CFLAGS="${CFLAGS} -std=gnu99"
 fi
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
@@ -3328,7 +3394,7 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+if ac_fn_c_try_compile "$LINENO"; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 else
@@ -3337,10 +3403,7 @@ $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Wall" >&5
@@ -3351,13 +3414,7 @@ if test "x${CFLAGS}" = "x" ; then
 else
   CFLAGS="${CFLAGS} -Wall"
 fi
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
@@ -3371,7 +3428,7 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+if ac_fn_c_try_compile "$LINENO"; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 else
@@ -3380,10 +3437,7 @@ $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -pipe" >&5
@@ -3394,13 +3448,7 @@ if test "x${CFLAGS}" = "x" ; then
 else
   CFLAGS="${CFLAGS} -pipe"
 fi
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
@@ -3414,7 +3462,7 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+if ac_fn_c_try_compile "$LINENO"; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 else
@@ -3423,10 +3471,7 @@ $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -g3" >&5
@@ -3437,13 +3482,43 @@ if test "x${CFLAGS}" = "x" ; then
 else
   CFLAGS="${CFLAGS} -g3"
 fi
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
 else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+  elif test "x$je_cv_msvc" = "xyes" ; then
+    CC="$CC -nologo"
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Zi" >&5
+$as_echo_n "checking whether compiler supports -Zi... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-Zi"
+else
+  CFLAGS="${CFLAGS} -Zi"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
@@ -3457,7 +3532,7 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+if ac_fn_c_try_compile "$LINENO"; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 else
@@ -3466,11 +3541,77 @@ $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -MT" >&5
+$as_echo_n "checking whether compiler supports -MT... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-MT"
+else
+  CFLAGS="${CFLAGS} -MT"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -W3" >&5
+$as_echo_n "checking whether compiler supports -W3... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-W3"
+else
+  CFLAGS="${CFLAGS} -W3"
 fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
 
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
+    CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat"
   fi
 fi
 if test "x$EXTRA_CFLAGS" != "x" ; then
@@ -3483,13 +3624,7 @@ if test "x${CFLAGS}" = "x" ; then
 else
   CFLAGS="${CFLAGS} $EXTRA_CFLAGS"
 fi
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
@@ -3503,7 +3638,7 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+if ac_fn_c_try_compile "$LINENO"; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 else
@@ -3512,10 +3647,7 @@ $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 fi
 ac_ext=c
@@ -3530,7 +3662,7 @@ if test -n "$CPP" && test -d "$CPP"; then
   CPP=
 fi
 if test -z "$CPP"; then
-  if test "${ac_cv_prog_CPP+set}" = set; then :
+  if ${ac_cv_prog_CPP+:} false; then :
   $as_echo_n "(cached) " >&6
 else
       # Double quotes because CPP needs to be expanded
@@ -3560,7 +3692,7 @@ else
   # Broken: fails on valid input.
 continue
 fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
 
   # OK, works on sane cases.  Now check whether nonexistent headers
   # can be detected and how.
@@ -3576,11 +3708,11 @@ else
 ac_preproc_ok=:
 break
 fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
 
 done
 # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
 if $ac_preproc_ok; then :
   break
 fi
@@ -3619,7 +3751,7 @@ else
   # Broken: fails on valid input.
 continue
 fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
 
   # OK, works on sane cases.  Now check whether nonexistent headers
   # can be detected and how.
@@ -3635,18 +3767,18 @@ else
 ac_preproc_ok=:
 break
 fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
 
 done
 # Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
 if $ac_preproc_ok; then :
 
 else
   { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "C preprocessor \"$CPP\" fails sanity check
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
 fi
 
 ac_ext=c
@@ -3659,7 +3791,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
 $as_echo_n "checking for grep that handles long lines and -e... " >&6; }
-if test "${ac_cv_path_GREP+set}" = set; then :
+if ${ac_cv_path_GREP+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -z "$GREP"; then
@@ -3708,7 +3840,7 @@ esac
   done
 IFS=$as_save_IFS
   if test -z "$ac_cv_path_GREP"; then
-    as_fn_error "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+    as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
   fi
 else
   ac_cv_path_GREP=$GREP
@@ -3722,7 +3854,7 @@ $as_echo "$ac_cv_path_GREP" >&6; }
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
 $as_echo_n "checking for egrep... " >&6; }
-if test "${ac_cv_path_EGREP+set}" = set; then :
+if ${ac_cv_path_EGREP+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
@@ -3774,7 +3906,7 @@ esac
   done
 IFS=$as_save_IFS
   if test -z "$ac_cv_path_EGREP"; then
-    as_fn_error "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+    as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
   fi
 else
   ac_cv_path_EGREP=$EGREP
@@ -3789,7 +3921,7 @@ $as_echo "$ac_cv_path_EGREP" >&6; }
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
 $as_echo_n "checking for ANSI C header files... " >&6; }
-if test "${ac_cv_header_stdc+set}" = set; then :
+if ${ac_cv_header_stdc+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -3906,8 +4038,7 @@ do :
   as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
 ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
 "
-eval as_val=\$$as_ac_Header
-   if test "x$as_val" = x""yes; then :
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
   cat >>confdefs.h <<_ACEOF
 #define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
 _ACEOF
@@ -3923,7 +4054,7 @@ done
 # This bug is HP SR number 8606223364.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking size of void *" >&5
 $as_echo_n "checking size of void *... " >&6; }
-if test "${ac_cv_sizeof_void_p+set}" = set; then :
+if ${ac_cv_sizeof_void_p+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (void *))" "ac_cv_sizeof_void_p"        "$ac_includes_default"; then :
@@ -3932,9 +4063,8 @@ else
   if test "$ac_cv_type_void_p" = yes; then
      { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (void *)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (void *)
+See \`config.log' for more details" "$LINENO" 5; }
    else
      ac_cv_sizeof_void_p=0
    fi
@@ -3956,7 +4086,7 @@ if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
 elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
   LG_SIZEOF_PTR=2
 else
-  as_fn_error "Unsupported pointer size: ${ac_cv_sizeof_void_p}" "$LINENO" 5
+  as_fn_error $? "Unsupported pointer size: ${ac_cv_sizeof_void_p}" "$LINENO" 5
 fi
 cat >>confdefs.h <<_ACEOF
 #define LG_SIZEOF_PTR $LG_SIZEOF_PTR
@@ -3969,7 +4099,7 @@ _ACEOF
 # This bug is HP SR number 8606223364.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking size of int" >&5
 $as_echo_n "checking size of int... " >&6; }
-if test "${ac_cv_sizeof_int+set}" = set; then :
+if ${ac_cv_sizeof_int+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (int))" "ac_cv_sizeof_int"        "$ac_includes_default"; then :
@@ -3978,9 +4108,8 @@ else
   if test "$ac_cv_type_int" = yes; then
      { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (int)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (int)
+See \`config.log' for more details" "$LINENO" 5; }
    else
      ac_cv_sizeof_int=0
    fi
@@ -4002,7 +4131,7 @@ if test "x${ac_cv_sizeof_int}" = "x8" ; then
 elif test "x${ac_cv_sizeof_int}" = "x4" ; then
   LG_SIZEOF_INT=2
 else
-  as_fn_error "Unsupported int size: ${ac_cv_sizeof_int}" "$LINENO" 5
+  as_fn_error $? "Unsupported int size: ${ac_cv_sizeof_int}" "$LINENO" 5
 fi
 cat >>confdefs.h <<_ACEOF
 #define LG_SIZEOF_INT $LG_SIZEOF_INT
@@ -4015,7 +4144,7 @@ _ACEOF
 # This bug is HP SR number 8606223364.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking size of long" >&5
 $as_echo_n "checking size of long... " >&6; }
-if test "${ac_cv_sizeof_long+set}" = set; then :
+if ${ac_cv_sizeof_long+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (long))" "ac_cv_sizeof_long"        "$ac_includes_default"; then :
@@ -4024,9 +4153,8 @@ else
   if test "$ac_cv_type_long" = yes; then
      { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (long)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (long)
+See \`config.log' for more details" "$LINENO" 5; }
    else
      ac_cv_sizeof_long=0
    fi
@@ -4048,25 +4176,78 @@ if test "x${ac_cv_sizeof_long}" = "x8" ; then
 elif test "x${ac_cv_sizeof_long}" = "x4" ; then
   LG_SIZEOF_LONG=2
 else
-  as_fn_error "Unsupported long size: ${ac_cv_sizeof_long}" "$LINENO" 5
+  as_fn_error $? "Unsupported long size: ${ac_cv_sizeof_long}" "$LINENO" 5
 fi
 cat >>confdefs.h <<_ACEOF
 #define LG_SIZEOF_LONG $LG_SIZEOF_LONG
 _ACEOF
 
 
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of intmax_t" >&5
+$as_echo_n "checking size of intmax_t... " >&6; }
+if ${ac_cv_sizeof_intmax_t+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (intmax_t))" "ac_cv_sizeof_intmax_t"        "$ac_includes_default"; then :
+
+else
+  if test "$ac_cv_type_intmax_t" = yes; then
+     { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "cannot compute sizeof (intmax_t)
+See \`config.log' for more details" "$LINENO" 5; }
+   else
+     ac_cv_sizeof_intmax_t=0
+   fi
+fi
+
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_intmax_t" >&5
+$as_echo "$ac_cv_sizeof_intmax_t" >&6; }
+
+
+
+cat >>confdefs.h <<_ACEOF
+#define SIZEOF_INTMAX_T $ac_cv_sizeof_intmax_t
+_ACEOF
+
+
+if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then
+  LG_SIZEOF_INTMAX_T=4
+elif test "x${ac_cv_sizeof_intmax_t}" = "x8" ; then
+  LG_SIZEOF_INTMAX_T=3
+elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then
+  LG_SIZEOF_INTMAX_T=2
+else
+  as_fn_error $? "Unsupported intmax_t size: ${ac_cv_sizeof_intmax_t}" "$LINENO" 5
+fi
+cat >>confdefs.h <<_ACEOF
+#define LG_SIZEOF_INTMAX_T $LG_SIZEOF_INTMAX_T
+_ACEOF
+
+
 ac_aux_dir=
 for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
-  for ac_t in install-sh install.sh shtool; do
-    if test -f "$ac_dir/$ac_t"; then
-      ac_aux_dir=$ac_dir
-      ac_install_sh="$ac_aux_dir/$ac_t -c"
-      break 2
-    fi
-  done
+  if test -f "$ac_dir/install-sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install-sh -c"
+    break
+  elif test -f "$ac_dir/install.sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install.sh -c"
+    break
+  elif test -f "$ac_dir/shtool"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/shtool install -c"
+    break
+  fi
 done
 if test -z "$ac_aux_dir"; then
-  as_fn_error "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
+  as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
 fi
 
 # These three variables are undocumented and unsupported,
@@ -4080,27 +4261,27 @@ ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
 
 # Make sure we can run config.sub.
 $SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
-  as_fn_error "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
+  as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5
 $as_echo_n "checking build system type... " >&6; }
-if test "${ac_cv_build+set}" = set; then :
+if ${ac_cv_build+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_build_alias=$build_alias
 test "x$ac_build_alias" = x &&
   ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"`
 test "x$ac_build_alias" = x &&
-  as_fn_error "cannot guess build type; you must specify one" "$LINENO" 5
+  as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5
 ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` ||
-  as_fn_error "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
+  as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
 
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5
 $as_echo "$ac_cv_build" >&6; }
 case $ac_cv_build in
 *-*-*) ;;
-*) as_fn_error "invalid value of canonical build" "$LINENO" 5;;
+*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;;
 esac
 build=$ac_cv_build
 ac_save_IFS=$IFS; IFS='-'
@@ -4118,14 +4299,14 @@ case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5
 $as_echo_n "checking host system type... " >&6; }
-if test "${ac_cv_host+set}" = set; then :
+if ${ac_cv_host+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test "x$host_alias" = x; then
   ac_cv_host=$ac_cv_build
 else
   ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
-    as_fn_error "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
+    as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
 fi
 
 fi
@@ -4133,7 +4314,7 @@ fi
 $as_echo "$ac_cv_host" >&6; }
 case $ac_cv_host in
 *-*-*) ;;
-*) as_fn_error "invalid value of canonical host" "$LINENO" 5;;
+*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;;
 esac
 host=$ac_cv_host
 ac_save_IFS=$IFS; IFS='-'
@@ -4157,11 +4338,8 @@ case "${host_cpu}" in
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __asm__ is compilable" >&5
 $as_echo_n "checking whether __asm__ is compilable... " >&6; }
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
+if ${je_cv_asm+:} false; then :
+  $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
@@ -4174,22 +4352,18 @@ __asm__ volatile("pause"); return 0;
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-              asm="yes"
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_asm=yes
 else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-              asm="no"
-
+  je_cv_asm=no
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_asm" >&5
+$as_echo "$je_cv_asm" >&6; }
 
-
-	if test "x${asm}" = "xyes" ; then
+	if test "x${je_cv_asm}" = "xyes" ; then
 	    CPU_SPINWAIT='__asm__ volatile("pause")'
 	fi
 	;;
@@ -4197,11 +4371,8 @@ fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __asm__ syntax is compilable" >&5
 $as_echo_n "checking whether __asm__ syntax is compilable... " >&6; }
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
+if ${je_cv_asm+:} false; then :
+  $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
@@ -4214,22 +4385,18 @@ __asm__ volatile("pause"); return 0;
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-              asm="yes"
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_asm=yes
 else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-              asm="no"
-
+  je_cv_asm=no
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_asm" >&5
+$as_echo "$je_cv_asm" >&6; }
 
-
-	if test "x${asm}" = "xyes" ; then
+	if test "x${je_cv_asm}" = "xyes" ; then
 	    CPU_SPINWAIT='__asm__ volatile("pause")'
 	fi
 	;;
@@ -4241,28 +4408,54 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
 
+LD_PRELOAD_VAR="LD_PRELOAD"
+so="so"
+importlib="${so}"
+o="$ac_objext"
+a="a"
+exe="$ac_exeext"
+libprefix="lib"
+DSO_LDFLAGS='-shared -Wl,-soname,$(@F)'
+RPATH='-Wl,-rpath,$(1)'
+SOREV="${so}.${rev}"
+PIC_CFLAGS='-fPIC -DPIC'
+CTARGET='-o $@'
+LDTARGET='-o $@'
+EXTRA_LDFLAGS=
+MKLIB='ar crus $@'
+CC_MM=1
+
+default_munmap="1"
 case "${host}" in
   *-*-darwin*)
-	CFLAGS="$CFLAGS -fno-common -no-cpp-precomp"
+	CFLAGS="$CFLAGS"
 	abi="macho"
-	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE 1" >>confdefs.h
+	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
 	RPATH=""
+	LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES"
+	so="dylib"
+	importlib="${so}"
+	force_tls="0"
+	DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)'
+	SOREV="${rev}.${so}"
 	;;
   *-*-freebsd*)
 	CFLAGS="$CFLAGS"
 	abi="elf"
-	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE 1" >>confdefs.h
+	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
-	RPATH="-Wl,-rpath,"
+	force_lazy_lock="1"
 	;;
   *-*-linux*)
 	CFLAGS="$CFLAGS"
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
-	$as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED 1" >>confdefs.h
+	$as_echo "#define JEMALLOC_PURGE_MADVISE_DONTNEED  " >>confdefs.h
+
+	$as_echo "#define JEMALLOC_THREADED_INIT  " >>confdefs.h
 
-	RPATH="-Wl,-rpath,"
+	default_munmap="0"
 	;;
   *-*-netbsd*)
 	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking ABI" >&5
@@ -4291,35 +4484,79 @@ fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $abi" >&5
 $as_echo "$abi" >&6; }
-	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE 1" >>confdefs.h
+	$as_echo "#define JEMALLOC_PURGE_MADVISE_FREE  " >>confdefs.h
 
-	RPATH="-Wl,-rpath,"
 	;;
   *-*-solaris2*)
 	CFLAGS="$CFLAGS"
 	abi="elf"
-	RPATH="-Wl,-R,"
+	RPATH='-Wl,-R,$(1)'
 		CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
 	LIBS="$LIBS -lposix4 -lsocket -lnsl"
 	;;
+  *-ibm-aix*)
+	if "$LG_SIZEOF_PTR" = "8"; then
+	  	  LD_PRELOAD_VAR="LDR_PRELOAD64"
+	else
+	  	  LD_PRELOAD_VAR="LDR_PRELOAD"
+	fi
+	abi="xcoff"
+	;;
+  *-*-mingw*)
+	abi="pecoff"
+	force_tls="0"
+	RPATH=""
+	so="dll"
+	if test "x$je_cv_msvc" = "xyes" ; then
+	  importlib="lib"
+	  DSO_LDFLAGS="-LD"
+	  EXTRA_LDFLAGS="-link -DEBUG"
+	  CTARGET='-Fo$@'
+	  LDTARGET='-Fe$@'
+	  MKLIB='lib -nologo -out:$@'
+	  CC_MM=
+        else
+	  importlib="${so}"
+	  DSO_LDFLAGS="-shared"
+	fi
+	a="lib"
+	libprefix=""
+	SOREV="${so}"
+	PIC_CFLAGS=""
+	;;
   *)
 	{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Unsupported operating system: ${host}" >&5
 $as_echo "Unsupported operating system: ${host}" >&6; }
 	abi="elf"
-	RPATH="-Wl,-rpath,"
 	;;
 esac
 
 
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+if test "x$abi" != "xpecoff"; then
+    LIBS="$LIBS -lm"
+fi
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether __attribute__ syntax is compilable" >&5
 $as_echo_n "checking whether __attribute__ syntax is compilable... " >&6; }
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
+if ${je_cv_attribute+:} false; then :
+  $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
@@ -4332,22 +4569,18 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-              attribute="yes"
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_attribute=yes
 else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-              attribute="no"
-
+  je_cv_attribute=no
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_attribute" >&5
+$as_echo "$je_cv_attribute" >&6; }
 
-
-if test "x${attribute}" = "xyes" ; then
+if test "x${je_cv_attribute}" = "xyes" ; then
   $as_echo "#define JEMALLOC_HAVE_ATTR  " >>confdefs.h
 
   if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
@@ -4360,13 +4593,7 @@ if test "x${CFLAGS}" = "x" ; then
 else
   CFLAGS="${CFLAGS} -fvisibility=hidden"
 fi
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
@@ -4380,7 +4607,7 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+if ac_fn_c_try_compile "$LINENO"; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 else
@@ -4389,56 +4616,81 @@ $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
   fi
 fi
+SAVED_CFLAGS="${CFLAGS}"
 
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mremap(...MREMAP_FIXED...) is compilable" >&5
-$as_echo_n "checking whether mremap(...MREMAP_FIXED...) is compilable... " >&6; }
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -Werror" >&5
+$as_echo_n "checking whether compiler supports -Werror... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-Werror"
 else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+  CFLAGS="${CFLAGS} -Werror"
+fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
-#define _GNU_SOURCE
-#include <sys/mman.h>
 
 int
 main ()
 {
 
-void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0);
+    return 0;
 
   ;
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+if ac_fn_c_try_compile "$LINENO"; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
-              mremap_fixed="yes"
 else
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
 $as_echo "no" >&6; }
-              mremap_fixed="no"
+              CFLAGS="${TCFLAGS}"
 
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether tls_model attribute is compilable" >&5
+$as_echo_n "checking whether tls_model attribute is compilable... " >&6; }
+if ${je_cv_tls_model+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+static __thread int
+               __attribute__((tls_model("initial-exec"))) foo;
+               foo = 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_tls_model=yes
+else
+  je_cv_tls_model=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_tls_model" >&5
+$as_echo "$je_cv_tls_model" >&6; }
 
+CFLAGS="${SAVED_CFLAGS}"
+if test "x${je_cv_tls_model}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_TLS_MODEL __attribute__((tls_model(\"initial-exec\")))" >>confdefs.h
 
-if test "x${mremap_fixed}" = "xyes" ; then
-  $as_echo "#define JEMALLOC_MREMAP_FIXED 1" >>confdefs.h
+else
+  $as_echo "#define JEMALLOC_TLS_MODEL  " >>confdefs.h
 
 fi
 
@@ -4489,7 +4741,7 @@ fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
 $as_echo_n "checking for a BSD-compatible install... " >&6; }
 if test -z "$INSTALL"; then
-if test "${ac_cv_path_install+set}" = set; then :
+if ${ac_cv_path_install+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
@@ -4570,7 +4822,7 @@ if test -n "$ac_tool_prefix"; then
 set dummy ${ac_tool_prefix}ranlib; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RANLIB+set}" = set; then :
+if ${ac_cv_prog_RANLIB+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$RANLIB"; then
@@ -4610,7 +4862,7 @@ if test -z "$ac_cv_prog_RANLIB"; then
 set dummy ranlib; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then :
+if ${ac_cv_prog_ac_ct_RANLIB+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   if test -n "$ac_ct_RANLIB"; then
@@ -4661,7 +4913,7 @@ fi
 set dummy ar; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_AR+set}" = set; then :
+if ${ac_cv_path_AR+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $AR in
@@ -4701,7 +4953,7 @@ fi
 set dummy ld; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_LD+set}" = set; then :
+if ${ac_cv_path_LD+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $LD in
@@ -4741,7 +4993,7 @@ fi
 set dummy autoconf; ac_word=$2
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
 $as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_AUTOCONF+set}" = set; then :
+if ${ac_cv_path_AUTOCONF+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   case $AUTOCONF in
@@ -4778,12 +5030,68 @@ fi
 
 
 
+public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib"
+
+ac_fn_c_check_func "$LINENO" "memalign" "ac_cv_func_memalign"
+if test "x$ac_cv_func_memalign" = xyes; then :
+  $as_echo "#define JEMALLOC_OVERRIDE_MEMALIGN  " >>confdefs.h
+
+	       public_syms="${public_syms} memalign"
+fi
+
+ac_fn_c_check_func "$LINENO" "valloc" "ac_cv_func_valloc"
+if test "x$ac_cv_func_valloc" = xyes; then :
+  $as_echo "#define JEMALLOC_OVERRIDE_VALLOC  " >>confdefs.h
+
+	       public_syms="${public_syms} valloc"
+fi
+
+
+# Check whether --enable-experimental was given.
+if test "${enable_experimental+set}" = set; then :
+  enableval=$enable_experimental; if test "x$enable_experimental" = "xno" ; then
+  enable_experimental="0"
+else
+  enable_experimental="1"
+fi
+
+else
+  enable_experimental="1"
+
+fi
+
+if test "x$enable_experimental" = "x1" ; then
+  $as_echo "#define JEMALLOC_EXPERIMENTAL  " >>confdefs.h
+
+  public_syms="${public_syms} allocm dallocm nallocm rallocm sallocm"
+fi
+
+
+
+# Check whether --with-mangling was given.
+if test "${with_mangling+set}" = set; then :
+  withval=$with_mangling; mangling_map="$with_mangling"
+else
+  mangling_map=""
+fi
+
+for nm in `echo ${mangling_map} |tr ',' ' '` ; do
+  k="`echo ${nm} |tr ':' ' ' |awk '{print $1}'`"
+  n="je_${k}"
+  m=`echo ${nm} |tr ':' ' ' |awk '{print $2}'`
+  cat >>confdefs.h <<_ACEOF
+#define ${n} ${m}
+_ACEOF
+
+    public_syms=`for sym in ${public_syms}; do echo "${sym}"; done |grep -v "^${k}\$" |tr '\n' ' '`
+done
+
 
 # Check whether --with-jemalloc_prefix was given.
 if test "${with_jemalloc_prefix+set}" = set; then :
   withval=$with_jemalloc_prefix; JEMALLOC_PREFIX="$with_jemalloc_prefix"
 else
-  if test "x$abi" != "xmacho" ; then
+  if test "x$abi" != "xmacho" -a "x$abi" != "xpecoff"; then
   JEMALLOC_PREFIX=""
 else
   JEMALLOC_PREFIX="je_"
@@ -4801,11 +5109,15 @@ _ACEOF
 #define JEMALLOC_CPREFIX "$JEMALLOC_CPREFIX"
 _ACEOF
 
+fi
+for stem in ${public_syms}; do
+  n="je_${stem}"
+  m="${JEMALLOC_PREFIX}${stem}"
   cat >>confdefs.h <<_ACEOF
-#define JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix) ${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix
+#define ${n} ${m}
 _ACEOF
 
-fi
+done
 
 
 # Check whether --with-private_namespace was given.
@@ -4869,8 +5181,10 @@ cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h"
 cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in"
 
 cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in"
+cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh"
 
 cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h"
+cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/size_classes.h"
 
 cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in"
 
@@ -4888,7 +5202,7 @@ else
 fi
 
 if test "x$enable_cc_silence" = "x1" ; then
-  $as_echo "#define JEMALLOC_CC_SILENCE 1" >>confdefs.h
+  $as_echo "#define JEMALLOC_CC_SILENCE  " >>confdefs.h
 
 fi
 
@@ -4927,13 +5241,7 @@ if test "x${CFLAGS}" = "x" ; then
 else
   CFLAGS="${CFLAGS} -O3"
 fi
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
@@ -4947,7 +5255,7 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+if ac_fn_c_try_compile "$LINENO"; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 else
@@ -4956,10 +5264,7 @@ $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -funroll-loops" >&5
@@ -4970,13 +5275,7 @@ if test "x${CFLAGS}" = "x" ; then
 else
   CFLAGS="${CFLAGS} -funroll-loops"
 fi
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
@@ -4990,7 +5289,7 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+if ac_fn_c_try_compile "$LINENO"; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 else
@@ -4999,11 +5298,43 @@ $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+    elif test "x$je_cv_msvc" = "xyes" ; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O2" >&5
+$as_echo_n "checking whether compiler supports -O2... " >&6; }
+TCFLAGS="${CFLAGS}"
+if test "x${CFLAGS}" = "x" ; then
+  CFLAGS="-O2"
+else
+  CFLAGS="${CFLAGS} -O2"
 fi
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
 
 
+int
+main ()
+{
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              CFLAGS="${TCFLAGS}"
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
     else
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -O" >&5
@@ -5014,13 +5345,7 @@ if test "x${CFLAGS}" = "x" ; then
 else
   CFLAGS="${CFLAGS} -O"
 fi
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
 
@@ -5034,7 +5359,7 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+if ac_fn_c_try_compile "$LINENO"; then :
   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
 else
@@ -5043,10 +5368,7 @@ $as_echo "no" >&6; }
               CFLAGS="${TCFLAGS}"
 
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
     fi
   fi
@@ -5061,7 +5383,7 @@ else
 fi
 
 else
-  enable_stats="0"
+  enable_stats="1"
 
 fi
 
@@ -5110,7 +5432,7 @@ if test "${with_static_libunwind+set}" = set; then :
   LUNWIND="-lunwind"
 else
   if test ! -f "$with_static_libunwind" ; then
-    as_fn_error "Static libunwind not found: $with_static_libunwind" "$LINENO" 5
+    as_fn_error $? "Static libunwind not found: $with_static_libunwind" "$LINENO" 5
   fi
   LUNWIND="$with_static_libunwind"
 fi
@@ -5123,7 +5445,7 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
   for ac_header in libunwind.h
 do :
   ac_fn_c_check_header_mongrel "$LINENO" "libunwind.h" "ac_cv_header_libunwind_h" "$ac_includes_default"
-if test "x$ac_cv_header_libunwind_h" = x""yes; then :
+if test "x$ac_cv_header_libunwind_h" = xyes; then :
   cat >>confdefs.h <<_ACEOF
 #define HAVE_LIBUNWIND_H 1
 _ACEOF
@@ -5137,7 +5459,7 @@ done
   if test "x$LUNWIND" = "x-lunwind" ; then
     { $as_echo "$as_me:${as_lineno-$LINENO}: checking for backtrace in -lunwind" >&5
 $as_echo_n "checking for backtrace in -lunwind... " >&6; }
-if test "${ac_cv_lib_unwind_backtrace+set}" = set; then :
+if ${ac_cv_lib_unwind_backtrace+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_check_lib_save_LIBS=$LIBS
@@ -5171,7 +5493,7 @@ LIBS=$ac_check_lib_save_LIBS
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_unwind_backtrace" >&5
 $as_echo "$ac_cv_lib_unwind_backtrace" >&6; }
-if test "x$ac_cv_lib_unwind_backtrace" = x""yes; then :
+if test "x$ac_cv_lib_unwind_backtrace" = xyes; then :
   LIBS="$LIBS $LUNWIND"
 else
   enable_prof_libunwind="0"
@@ -5205,7 +5527,7 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \
   for ac_header in unwind.h
 do :
   ac_fn_c_check_header_mongrel "$LINENO" "unwind.h" "ac_cv_header_unwind_h" "$ac_includes_default"
-if test "x$ac_cv_header_unwind_h" = x""yes; then :
+if test "x$ac_cv_header_unwind_h" = xyes; then :
   cat >>confdefs.h <<_ACEOF
 #define HAVE_UNWIND_H 1
 _ACEOF
@@ -5218,7 +5540,7 @@ done
 
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _Unwind_Backtrace in -lgcc" >&5
 $as_echo_n "checking for _Unwind_Backtrace in -lgcc... " >&6; }
-if test "${ac_cv_lib_gcc__Unwind_Backtrace+set}" = set; then :
+if ${ac_cv_lib_gcc__Unwind_Backtrace+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_check_lib_save_LIBS=$LIBS
@@ -5252,7 +5574,7 @@ LIBS=$ac_check_lib_save_LIBS
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_gcc__Unwind_Backtrace" >&5
 $as_echo "$ac_cv_lib_gcc__Unwind_Backtrace" >&6; }
-if test "x$ac_cv_lib_gcc__Unwind_Backtrace" = x""yes; then :
+if test "x$ac_cv_lib_gcc__Unwind_Backtrace" = xyes; then :
   LIBS="$LIBS -lgcc"
 else
   enable_prof_libgcc="0"
@@ -5316,65 +5638,106 @@ $as_echo_n "checking configured backtracing method... " >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $backtrace_method" >&5
 $as_echo "$backtrace_method" >&6; }
 if test "x$enable_prof" = "x1" ; then
-  LIBS="$LIBS -lm"
+  if test "x${force_tls}" = "x0" ; then
+    as_fn_error $? "Heap profiling requires TLS" "$LINENO" 5;
+  fi
+  force_tls="1"
   $as_echo "#define JEMALLOC_PROF  " >>confdefs.h
 
 fi
 
 
-# Check whether --enable-tiny was given.
-if test "${enable_tiny+set}" = set; then :
-  enableval=$enable_tiny; if test "x$enable_tiny" = "xno" ; then
-  enable_tiny="0"
+# Check whether --enable-tcache was given.
+if test "${enable_tcache+set}" = set; then :
+  enableval=$enable_tcache; if test "x$enable_tcache" = "xno" ; then
+  enable_tcache="0"
 else
-  enable_tiny="1"
+  enable_tcache="1"
 fi
 
 else
-  enable_tiny="1"
+  enable_tcache="1"
 
 fi
 
-if test "x$enable_tiny" = "x1" ; then
-  $as_echo "#define JEMALLOC_TINY  " >>confdefs.h
+if test "x$enable_tcache" = "x1" ; then
+  $as_echo "#define JEMALLOC_TCACHE  " >>confdefs.h
 
 fi
 
 
-# Check whether --enable-tcache was given.
-if test "${enable_tcache+set}" = set; then :
-  enableval=$enable_tcache; if test "x$enable_tcache" = "xno" ; then
-  enable_tcache="0"
+# Check whether --enable-mremap was given.
+if test "${enable_mremap+set}" = set; then :
+  enableval=$enable_mremap; if test "x$enable_mremap" = "xno" ; then
+  enable_mremap="0"
 else
-  enable_tcache="1"
+  enable_mremap="1"
 fi
 
 else
-  enable_tcache="1"
+  enable_mremap="0"
 
 fi
 
-if test "x$enable_tcache" = "x1" ; then
-  $as_echo "#define JEMALLOC_TCACHE  " >>confdefs.h
+if test "x$enable_mremap" = "x1" ; then
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mremap(...MREMAP_FIXED...) is compilable" >&5
+$as_echo_n "checking whether mremap(...MREMAP_FIXED...) is compilable... " >&6; }
+if ${je_cv_mremap_fixed+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#define _GNU_SOURCE
+#include <sys/mman.h>
+
+int
+main ()
+{
+
+void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0);
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_mremap_fixed=yes
+else
+  je_cv_mremap_fixed=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_mremap_fixed" >&5
+$as_echo "$je_cv_mremap_fixed" >&6; }
+
+  if test "x${je_cv_mremap_fixed}" = "xno" ; then
+    enable_mremap="0"
+  fi
+fi
+if test "x$enable_mremap" = "x1" ; then
+  $as_echo "#define JEMALLOC_MREMAP  " >>confdefs.h
 
 fi
 
 
-# Check whether --enable-swap was given.
-if test "${enable_swap+set}" = set; then :
-  enableval=$enable_swap; if test "x$enable_swap" = "xno" ; then
-  enable_swap="0"
+# Check whether --enable-munmap was given.
+if test "${enable_munmap+set}" = set; then :
+  enableval=$enable_munmap; if test "x$enable_munmap" = "xno" ; then
+  enable_munmap="0"
 else
-  enable_swap="1"
+  enable_munmap="1"
 fi
 
 else
-  enable_swap="0"
+  enable_munmap="${default_munmap}"
 
 fi
 
-if test "x$enable_swap" = "x1" ; then
-  $as_echo "#define JEMALLOC_SWAP  " >>confdefs.h
+if test "x$enable_munmap" = "x1" ; then
+  $as_echo "#define JEMALLOC_MUNMAP  " >>confdefs.h
 
 fi
 
@@ -5392,6 +5755,20 @@ else
 
 fi
 
+ac_fn_c_check_func "$LINENO" "sbrk" "ac_cv_func_sbrk"
+if test "x$ac_cv_func_sbrk" = xyes; then :
+  have_sbrk="1"
+else
+  have_sbrk="0"
+fi
+
+if test "x$have_sbrk" = "x1" ; then
+  $as_echo "#define JEMALLOC_HAVE_SBRK  " >>confdefs.h
+
+else
+  enable_dss="0"
+fi
+
 if test "x$enable_dss" = "x1" ; then
   $as_echo "#define JEMALLOC_DSS  " >>confdefs.h
 
@@ -5407,7 +5784,7 @@ else
 fi
 
 else
-  enable_fill="0"
+  enable_fill="1"
 
 fi
 
@@ -5417,76 +5794,163 @@ if test "x$enable_fill" = "x1" ; then
 fi
 
 
-# Check whether --enable-xmalloc was given.
-if test "${enable_xmalloc+set}" = set; then :
-  enableval=$enable_xmalloc; if test "x$enable_xmalloc" = "xno" ; then
-  enable_xmalloc="0"
+# Check whether --enable-utrace was given.
+if test "${enable_utrace+set}" = set; then :
+  enableval=$enable_utrace; if test "x$enable_utrace" = "xno" ; then
+  enable_utrace="0"
 else
-  enable_xmalloc="1"
+  enable_utrace="1"
 fi
 
 else
-  enable_xmalloc="0"
+  enable_utrace="0"
 
 fi
 
-if test "x$enable_xmalloc" = "x1" ; then
-  $as_echo "#define JEMALLOC_XMALLOC  " >>confdefs.h
 
-fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether utrace(2) is compilable" >&5
+$as_echo_n "checking whether utrace(2) is compilable... " >&6; }
+if ${je_cv_utrace+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
 
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/ktrace.h>
 
-# Check whether --enable-sysv was given.
-if test "${enable_sysv+set}" = set; then :
-  enableval=$enable_sysv; if test "x$enable_sysv" = "xno" ; then
-  enable_sysv="0"
-else
-  enable_sysv="1"
-fi
+int
+main ()
+{
 
-else
-  enable_sysv="0"
+	utrace((void *)0, 0);
 
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_utrace=yes
+else
+  je_cv_utrace=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_utrace" >&5
+$as_echo "$je_cv_utrace" >&6; }
 
-if test "x$enable_sysv" = "x1" ; then
-  $as_echo "#define JEMALLOC_SYSV  " >>confdefs.h
+if test "x${je_cv_utrace}" = "xno" ; then
+  enable_utrace="0"
+fi
+if test "x$enable_utrace" = "x1" ; then
+  $as_echo "#define JEMALLOC_UTRACE  " >>confdefs.h
 
 fi
 
 
-# Check whether --enable-dynamic_page_shift was given.
-if test "${enable_dynamic_page_shift+set}" = set; then :
-  enableval=$enable_dynamic_page_shift; if test "x$enable_dynamic_page_shift" = "xno" ; then
-  enable_dynamic_page_shift="0"
+# Check whether --enable-valgrind was given.
+if test "${enable_valgrind+set}" = set; then :
+  enableval=$enable_valgrind; if test "x$enable_valgrind" = "xno" ; then
+  enable_valgrind="0"
 else
-  enable_dynamic_page_shift="1"
+  enable_valgrind="1"
 fi
 
 else
-  enable_dynamic_page_shift="0"
+  enable_valgrind="1"
 
 fi
 
-if test "x$enable_dynamic_page_shift" = "x1" ; then
-  $as_echo "#define DYNAMIC_PAGE_SHIFT  " >>confdefs.h
+if test "x$enable_valgrind" = "x1" ; then
 
-fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether valgrind is compilable" >&5
+$as_echo_n "checking whether valgrind is compilable... " >&6; }
+if ${je_cv_valgrind+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
 
+#include <valgrind/valgrind.h>
+#include <valgrind/memcheck.h>
 
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking STATIC_PAGE_SHIFT" >&5
+#if !defined(VALGRIND_RESIZEINPLACE_BLOCK)
+#  error "Incompatible Valgrind version"
+#endif
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_valgrind=yes
+else
+  je_cv_valgrind=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_valgrind" >&5
+$as_echo "$je_cv_valgrind" >&6; }
+
+  if test "x${je_cv_valgrind}" = "xno" ; then
+    enable_valgrind="0"
+  fi
+  if test "x$enable_valgrind" = "x1" ; then
+    $as_echo "#define JEMALLOC_VALGRIND  " >>confdefs.h
+
+  fi
+fi
+
+
+# Check whether --enable-xmalloc was given.
+if test "${enable_xmalloc+set}" = set; then :
+  enableval=$enable_xmalloc; if test "x$enable_xmalloc" = "xno" ; then
+  enable_xmalloc="0"
+else
+  enable_xmalloc="1"
+fi
+
+else
+  enable_xmalloc="0"
+
+fi
+
+if test "x$enable_xmalloc" = "x1" ; then
+  $as_echo "#define JEMALLOC_XMALLOC  " >>confdefs.h
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking STATIC_PAGE_SHIFT" >&5
 $as_echo_n "checking STATIC_PAGE_SHIFT... " >&6; }
-if test "$cross_compiling" = yes; then :
+if ${je_cv_static_page_shift+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  if test "$cross_compiling" = yes; then :
   { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "cannot run test program while cross compiling
+See \`config.log' for more details" "$LINENO" 5; }
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
-#include <stdio.h>
-#include <unistd.h>
+
 #include <strings.h>
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+#include <stdio.h>
 
 int
 main ()
@@ -5495,16 +5959,24 @@ main ()
     long result;
     FILE *f;
 
+#ifdef _WIN32
+    SYSTEM_INFO si;
+    GetSystemInfo(&si);
+    result = si.dwPageSize;
+#else
     result = sysconf(_SC_PAGESIZE);
+#endif
     if (result == -1) {
 	return 1;
     }
+    result = ffsl(result) - 1;
+
     f = fopen("conftest.out", "w");
     if (f == NULL) {
 	return 1;
     }
-    fprintf(f, "%u\n", ffs((int)result) - 1);
-    close(f);
+    fprintf(f, "%u\n", result);
+    fclose(f);
 
     return 0;
 
@@ -5513,21 +5985,26 @@ main ()
 }
 _ACEOF
 if ac_fn_c_try_run "$LINENO"; then :
-  STATIC_PAGE_SHIFT=`cat conftest.out`
-              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $STATIC_PAGE_SHIFT" >&5
-$as_echo "$STATIC_PAGE_SHIFT" >&6; }
-              cat >>confdefs.h <<_ACEOF
-#define STATIC_PAGE_SHIFT $STATIC_PAGE_SHIFT
-_ACEOF
-
+  je_cv_static_page_shift=`cat conftest.out`
 else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: error" >&5
-$as_echo "error" >&6; }
+  je_cv_static_page_shift=undefined
 fi
 rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
   conftest.$ac_objext conftest.beam conftest.$ac_ext
 fi
 
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_static_page_shift" >&5
+$as_echo "$je_cv_static_page_shift" >&6; }
+
+if test "x$je_cv_static_page_shift" != "xundefined"; then
+   cat >>confdefs.h <<_ACEOF
+#define STATIC_PAGE_SHIFT $je_cv_static_page_shift
+_ACEOF
+
+else
+   as_fn_error $? "cannot determine value for STATIC_PAGE_SHIFT" "$LINENO" 5
+fi
 
 
 if test -d "${srcroot}.git" ; then
@@ -5547,23 +6024,24 @@ jemalloc_version_gid=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print $5}'
 
 
 
-for ac_header in pthread.h
+if test "x$abi" != "xpecoff" ; then
+  for ac_header in pthread.h
 do :
   ac_fn_c_check_header_mongrel "$LINENO" "pthread.h" "ac_cv_header_pthread_h" "$ac_includes_default"
-if test "x$ac_cv_header_pthread_h" = x""yes; then :
+if test "x$ac_cv_header_pthread_h" = xyes; then :
   cat >>confdefs.h <<_ACEOF
 #define HAVE_PTHREAD_H 1
 _ACEOF
 
 else
-  as_fn_error "pthread.h is missing" "$LINENO" 5
+  as_fn_error $? "pthread.h is missing" "$LINENO" 5
 fi
 
 done
 
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
+      { $as_echo "$as_me:${as_lineno-$LINENO}: checking for pthread_create in -lpthread" >&5
 $as_echo_n "checking for pthread_create in -lpthread... " >&6; }
-if test "${ac_cv_lib_pthread_pthread_create+set}" = set; then :
+if ${ac_cv_lib_pthread_pthread_create+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   ac_check_lib_save_LIBS=$LIBS
@@ -5597,15 +6075,100 @@ LIBS=$ac_check_lib_save_LIBS
 fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_pthread_pthread_create" >&5
 $as_echo "$ac_cv_lib_pthread_pthread_create" >&6; }
-if test "x$ac_cv_lib_pthread_pthread_create" = x""yes; then :
+if test "x$ac_cv_lib_pthread_pthread_create" = xyes; then :
   LIBS="$LIBS -lpthread"
 else
-  as_fn_error "libpthread is missing" "$LINENO" 5
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing pthread_create" >&5
+$as_echo_n "checking for library containing pthread_create... " >&6; }
+if ${ac_cv_search_pthread_create+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_create ();
+int
+main ()
+{
+return pthread_create ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' ; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_search_pthread_create=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext
+  if ${ac_cv_search_pthread_create+:} false; then :
+  break
+fi
+done
+if ${ac_cv_search_pthread_create+:} false; then :
+
+else
+  ac_cv_search_pthread_create=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_pthread_create" >&5
+$as_echo "$ac_cv_search_pthread_create" >&6; }
+ac_res=$ac_cv_search_pthread_create
+if test "$ac_res" != no; then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
 
+else
+  as_fn_error $? "libpthread is missing" "$LINENO" 5
+fi
+
+fi
+
+fi
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 
+ac_fn_c_check_func "$LINENO" "_malloc_thread_cleanup" "ac_cv_func__malloc_thread_cleanup"
+if test "x$ac_cv_func__malloc_thread_cleanup" = xyes; then :
+  have__malloc_thread_cleanup="1"
+else
+  have__malloc_thread_cleanup="0"
+
+fi
+
+if test "x$have__malloc_thread_cleanup" = "x1" ; then
+  $as_echo "#define JEMALLOC_MALLOC_THREAD_CLEANUP  " >>confdefs.h
+
+  force_tls="1"
+fi
+
+ac_fn_c_check_func "$LINENO" "_pthread_mutex_init_calloc_cb" "ac_cv_func__pthread_mutex_init_calloc_cb"
+if test "x$ac_cv_func__pthread_mutex_init_calloc_cb" = xyes; then :
+  have__pthread_mutex_init_calloc_cb="1"
+else
+  have__pthread_mutex_init_calloc_cb="0"
+
+fi
+
+if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then
+  $as_echo "#define JEMALLOC_MUTEX_INIT_CB 1" >>confdefs.h
+
+fi
+
 # Check whether --enable-lazy_lock was given.
 if test "${enable_lazy_lock+set}" = set; then :
   enableval=$enable_lazy_lock; if test "x$enable_lazy_lock" = "xno" ; then
@@ -5615,193 +6178,378 @@ else
 fi
 
 else
-  enable_lazy_lock="1"
+  enable_lazy_lock="0"
 
 fi
 
+if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&5
+$as_echo "Forcing lazy-lock to avoid allocator/threading bootstrap issues" >&6; }
+  enable_lazy_lock="1"
+fi
 if test "x$enable_lazy_lock" = "x1" ; then
-  for ac_header in dlfcn.h
+  if test "x$abi" != "xpecoff" ; then
+    for ac_header in dlfcn.h
 do :
   ac_fn_c_check_header_mongrel "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default"
-if test "x$ac_cv_header_dlfcn_h" = x""yes; then :
+if test "x$ac_cv_header_dlfcn_h" = xyes; then :
   cat >>confdefs.h <<_ACEOF
 #define HAVE_DLFCN_H 1
 _ACEOF
 
-else
-  as_fn_error "dlfcn.h is missing" "$LINENO" 5
+else
+  as_fn_error $? "dlfcn.h is missing" "$LINENO" 5
+fi
+
+done
+
+    ac_fn_c_check_func "$LINENO" "dlsym" "ac_cv_func_dlsym"
+if test "x$ac_cv_func_dlsym" = xyes; then :
+
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlsym in -ldl" >&5
+$as_echo_n "checking for dlsym in -ldl... " >&6; }
+if ${ac_cv_lib_dl_dlsym+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldl  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlsym ();
+int
+main ()
+{
+return dlsym ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_dl_dlsym=yes
+else
+  ac_cv_lib_dl_dlsym=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlsym" >&5
+$as_echo "$ac_cv_lib_dl_dlsym" >&6; }
+if test "x$ac_cv_lib_dl_dlsym" = xyes; then :
+  LIBS="$LIBS -ldl"
+else
+  as_fn_error $? "libdl is missing" "$LINENO" 5
+fi
+
+
+fi
+
+  fi
+  $as_echo "#define JEMALLOC_LAZY_LOCK  " >>confdefs.h
+
+fi
+
+
+# Check whether --enable-tls was given.
+if test "${enable_tls+set}" = set; then :
+  enableval=$enable_tls; if test "x$enable_tls" = "xno" ; then
+  enable_tls="0"
+else
+  enable_tls="1"
+fi
+
+else
+  enable_tls="1"
+
+fi
+
+if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing TLS to avoid allocator/threading bootstrap issues" >&5
+$as_echo "Forcing TLS to avoid allocator/threading bootstrap issues" >&6; }
+  enable_tls="1"
+fi
+if test "x${enable_tls}" = "x1" -a "x${force_tls}" = "x0" ; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: Forcing no TLS to avoid allocator/threading bootstrap issues" >&5
+$as_echo "Forcing no TLS to avoid allocator/threading bootstrap issues" >&6; }
+  enable_tls="0"
+fi
+if test "x${enable_tls}" = "x1" ; then
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for TLS" >&5
+$as_echo_n "checking for TLS... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+    __thread int x;
+
+int
+main ()
+{
+
+    x = 42;
+
+    return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+              enable_tls="0"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+if test "x${enable_tls}" = "x1" ; then
+  cat >>confdefs.h <<_ACEOF
+#define JEMALLOC_TLS
+_ACEOF
+
+elif test "x${force_tls}" = "x1" ; then
+  as_fn_error $? "Failed to configure TLS, which is mandatory for correct function" "$LINENO" 5
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program using ffsl is compilable" >&5
+$as_echo_n "checking whether a program using ffsl is compilable... " >&6; }
+if ${je_cv_function_ffsl+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <strings.h>
+#include <string.h>
+
+int
+main ()
+{
+
+	{
+		int rv = ffsl(0x08);
+	}
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_function_ffsl=yes
+else
+  je_cv_function_ffsl=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_function_ffsl" >&5
+$as_echo "$je_cv_function_ffsl" >&6; }
+
+if test "x${je_cv_function_ffsl}" != "xyes" ; then
+   as_fn_error $? "Cannot build without ffsl(3)" "$LINENO" 5
+fi
+
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether atomic(9) is compilable" >&5
+$as_echo_n "checking whether atomic(9) is compilable... " >&6; }
+if ${je_cv_atomic9+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <sys/types.h>
+#include <machine/atomic.h>
+#include <inttypes.h>
+
+int
+main ()
+{
+
+	{
+		uint32_t x32 = 0;
+		volatile uint32_t *x32p = &x32;
+		atomic_fetchadd_32(x32p, 1);
+	}
+	{
+		unsigned long xlong = 0;
+		volatile unsigned long *xlongp = &xlong;
+		atomic_fetchadd_long(xlongp, 1);
+	}
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_atomic9=yes
+else
+  je_cv_atomic9=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_atomic9" >&5
+$as_echo "$je_cv_atomic9" >&6; }
+
+if test "x${je_cv_atomic9}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_ATOMIC9 1" >>confdefs.h
+
 fi
 
-done
 
-  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
-$as_echo_n "checking for dlopen in -ldl... " >&6; }
-if test "${ac_cv_lib_dl_dlopen+set}" = set; then :
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin OSAtomic*() is compilable" >&5
+$as_echo_n "checking whether Darwin OSAtomic*() is compilable... " >&6; }
+if ${je_cv_osatomic+:} false; then :
   $as_echo_n "(cached) " >&6
 else
-  ac_check_lib_save_LIBS=$LIBS
-LIBS="-ldl  $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
-/* Override any GCC internal prototype to avoid an error.
-   Use char because int might match the return type of a GCC
-   builtin and then its argument prototype would still apply.  */
-#ifdef __cplusplus
-extern "C"
-#endif
-char dlopen ();
+#include <libkern/OSAtomic.h>
+#include <inttypes.h>
+
 int
 main ()
 {
-return dlopen ();
+
+	{
+		int32_t x32 = 0;
+		volatile int32_t *x32p = &x32;
+		OSAtomicAdd32(1, x32p);
+	}
+	{
+		int64_t x64 = 0;
+		volatile int64_t *x64p = &x64;
+		OSAtomicAdd64(1, x64p);
+	}
+
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_c_try_link "$LINENO"; then :
-  ac_cv_lib_dl_dlopen=yes
+  je_cv_osatomic=yes
 else
-  ac_cv_lib_dl_dlopen=no
+  je_cv_osatomic=no
 fi
 rm -f core conftest.err conftest.$ac_objext \
     conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
-$as_echo "$ac_cv_lib_dl_dlopen" >&6; }
-if test "x$ac_cv_lib_dl_dlopen" = x""yes; then :
-  LIBS="$LIBS -ldl"
-else
-  as_fn_error "libdl is missing" "$LINENO" 5
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_osatomic" >&5
+$as_echo "$je_cv_osatomic" >&6; }
 
-  $as_echo "#define JEMALLOC_LAZY_LOCK  " >>confdefs.h
+if test "x${je_cv_osatomic}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_OSATOMIC  " >>confdefs.h
 
 fi
 
 
-# Check whether --enable-tls was given.
-if test "${enable_tls+set}" = set; then :
-  enableval=$enable_tls; if test "x$enable_tls" = "xno" ; then
-  enable_tls="0"
-else
-  enable_tls="1"
-fi
 
-else
-  enable_tls="1"
 
-fi
+if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then
 
-if test "x${enable_tls}" = "x1" ; then
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for TLS" >&5
-$as_echo_n "checking for TLS... " >&6; }
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to force 32-bit __sync_{add,sub}_and_fetch()" >&5
+$as_echo_n "checking whether to force 32-bit __sync_{add,sub}_and_fetch()... " >&6; }
+if ${je_cv_sync_compare_and_swap_4+:} false; then :
+  $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
-    __thread int x;
+                                                 #include <stdint.h>
 
 int
 main ()
 {
 
-    x = 42;
-
-    return 0;
+                                                 #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+                                                 {
+                                                    uint32_t x32 = 0;
+                                                    __sync_add_and_fetch(&x32, 42);
+                                                    __sync_sub_and_fetch(&x32, 1);
+                                                 }
+                                                 #else
+                                                 #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 is defined, no need to force
+                                                 #endif
 
   ;
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_sync_compare_and_swap_4=yes
 else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-              enable_tls="0"
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-fi
-
-if test "x${enable_tls}" = "x0" ; then
-  cat >>confdefs.h <<_ACEOF
-#define NO_TLS
-_ACEOF
-
+  je_cv_sync_compare_and_swap_4=no
 fi
-
-
-ac_fn_c_check_func "$LINENO" "ffsl" "ac_cv_func_ffsl"
-if test "x$ac_cv_func_ffsl" = x""yes; then :
-
-else
-  as_fn_error "Cannot build without ffsl(3)" "$LINENO" 5
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_sync_compare_and_swap_4" >&5
+$as_echo "$je_cv_sync_compare_and_swap_4" >&6; }
 
+  if test "x${je_cv_sync_compare_and_swap_4}" = "xyes" ; then
+    $as_echo "#define JE_FORCE_SYNC_COMPARE_AND_SWAP_4  " >>confdefs.h
 
+  fi
 
 
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin OSAtomic*() is compilable" >&5
-$as_echo_n "checking whether Darwin OSAtomic*() is compilable... " >&6; }
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to force 64-bit __sync_{add,sub}_and_fetch()" >&5
+$as_echo_n "checking whether to force 64-bit __sync_{add,sub}_and_fetch()... " >&6; }
+if ${je_cv_sync_compare_and_swap_8+:} false; then :
+  $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
-#include <libkern/OSAtomic.h>
-#include <inttypes.h>
+                                                 #include <stdint.h>
 
 int
 main ()
 {
 
-	{
-		int32_t x32 = 0;
-		volatile int32_t *x32p = &x32;
-		OSAtomicAdd32(1, x32p);
-	}
-	{
-		int64_t x64 = 0;
-		volatile int64_t *x64p = &x64;
-		OSAtomicAdd64(1, x64p);
-	}
+                                                 #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+                                                 {
+                                                    uint64_t x64 = 0;
+                                                    __sync_add_and_fetch(&x64, 42);
+                                                    __sync_sub_and_fetch(&x64, 1);
+                                                 }
+                                                 #else
+                                                 #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 is defined, no need to force
+                                                 #endif
 
   ;
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-              osatomic="yes"
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_sync_compare_and_swap_8=yes
 else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-              osatomic="no"
-
+  je_cv_sync_compare_and_swap_8=no
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_sync_compare_and_swap_8" >&5
+$as_echo "$je_cv_sync_compare_and_swap_8" >&6; }
 
+  if test "x${je_cv_sync_compare_and_swap_8}" = "xyes" ; then
+    $as_echo "#define JE_FORCE_SYNC_COMPARE_AND_SWAP_8  " >>confdefs.h
 
-if test "x${osatomic}" = "xyes" ; then
-  $as_echo "#define JEMALLOC_OSATOMIC 1" >>confdefs.h
+  fi
 
 fi
 
@@ -5809,11 +6557,8 @@ fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether Darwin OSSpin*() is compilable" >&5
 $as_echo_n "checking whether Darwin OSSpin*() is compilable... " >&6; }
-if test "$cross_compiling" = yes; then :
-  { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run test program while cross compiling
-See \`config.log' for more details." "$LINENO" 5; }
+if ${je_cv_osspin+:} false; then :
+  $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
@@ -5833,110 +6578,183 @@ main ()
   return 0;
 }
 _ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-              osspin="yes"
+if ac_fn_c_try_link "$LINENO"; then :
+  je_cv_osspin=yes
 else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-              osspin="no"
-
+  je_cv_osspin=no
 fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
-  conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
 fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $je_cv_osspin" >&5
+$as_echo "$je_cv_osspin" >&6; }
 
-
-if test "x${osspin}" = "xyes" ; then
-  $as_echo "#define JEMALLOC_OSSPIN 1" >>confdefs.h
+if test "x${je_cv_osspin}" = "xyes" ; then
+  $as_echo "#define JEMALLOC_OSSPIN  " >>confdefs.h
 
 fi
 
 
-ac_fn_c_check_func "$LINENO" "memalign" "ac_cv_func_memalign"
-if test "x$ac_cv_func_memalign" = x""yes; then :
-  $as_echo "#define JEMALLOC_OVERRIDE_MEMALIGN 1" >>confdefs.h
+if test "x${abi}" = "xmacho" ; then
+  $as_echo "#define JEMALLOC_IVSALLOC  " >>confdefs.h
 
-fi
+  $as_echo "#define JEMALLOC_ZONE  " >>confdefs.h
 
-ac_fn_c_check_func "$LINENO" "valloc" "ac_cv_func_valloc"
-if test "x$ac_cv_func_valloc" = x""yes; then :
-  $as_echo "#define JEMALLOC_OVERRIDE_VALLOC 1" >>confdefs.h
 
-fi
+        { $as_echo "$as_me:${as_lineno-$LINENO}: checking malloc zone version" >&5
+$as_echo_n "checking malloc zone version... " >&6; }
 
 
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <malloc/malloc.h>
+int
+main ()
+{
+static foo[sizeof(malloc_zone_t) == sizeof(void *) * 14 ? 1 : -1]
 
-if test "x${abi}" = "xmacho" ; then
-  $as_echo "#define JEMALLOC_IVSALLOC 1" >>confdefs.h
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  JEMALLOC_ZONE_VERSION=3
+else
 
-  $as_echo "#define JEMALLOC_ZONE 1" >>confdefs.h
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <malloc/malloc.h>
+int
+main ()
+{
+static foo[sizeof(malloc_zone_t) == sizeof(void *) * 15 ? 1 : -1]
 
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  JEMALLOC_ZONE_VERSION=5
+else
 
-        { $as_echo "$as_me:${as_lineno-$LINENO}: checking malloc zone version" >&5
-$as_echo_n "checking malloc zone version... " >&6; }
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
-#include <stdlib.h>
 #include <malloc/malloc.h>
 int
 main ()
 {
+static foo[sizeof(malloc_zone_t) == sizeof(void *) * 16 ? 1 : -1]
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
 
-	static malloc_zone_t zone;
-	static struct malloc_introspection_t zone_introspect;
-
-	zone.size = NULL;
-	zone.malloc = NULL;
-	zone.calloc = NULL;
-	zone.valloc = NULL;
-	zone.free = NULL;
-	zone.realloc = NULL;
-	zone.destroy = NULL;
-	zone.zone_name = "jemalloc_zone";
-	zone.batch_malloc = NULL;
-	zone.batch_free = NULL;
-	zone.introspect = &zone_introspect;
-	zone.version = 6;
-	zone.memalign = NULL;
-	zone.free_definite_size = NULL;
-
-	zone_introspect.enumerator = NULL;
-	zone_introspect.good_size = NULL;
-	zone_introspect.check = NULL;
-	zone_introspect.print = NULL;
-	zone_introspect.log = NULL;
-	zone_introspect.force_lock = NULL;
-	zone_introspect.force_unlock = NULL;
-	zone_introspect.statistics = NULL;
-	zone_introspect.zone_locked = NULL;
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <malloc/malloc.h>
+int
+main ()
+{
+static foo[sizeof(malloc_introspection_t) == sizeof(void *) * 9 ? 1 : -1]
 
   ;
   return 0;
 }
 _ACEOF
 if ac_fn_c_try_compile "$LINENO"; then :
-  cat >>confdefs.h <<_ACEOF
-#define JEMALLOC_ZONE_VERSION 6
+  JEMALLOC_ZONE_VERSION=6
+else
+
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <malloc/malloc.h>
+int
+main ()
+{
+static foo[sizeof(malloc_introspection_t) == sizeof(void *) * 13 ? 1 : -1]
+
+  ;
+  return 0;
+}
 _ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  JEMALLOC_ZONE_VERSION=7
+else
+  JEMALLOC_ZONE_VERSION=
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <malloc/malloc.h>
+int
+main ()
+{
+static foo[sizeof(malloc_zone_t) == sizeof(void *) * 17 ? 1 : -1]
 
-    { $as_echo "$as_me:${as_lineno-$LINENO}: result: 6" >&5
-$as_echo "6" >&6; }
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  JEMALLOC_ZONE_VERSION=8
 else
-  cat >>confdefs.h <<_ACEOF
-#define JEMALLOC_ZONE_VERSION 3
+
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <malloc/malloc.h>
+int
+main ()
+{
+static foo[sizeof(malloc_zone_t) > sizeof(void *) * 17 ? 1 : -1]
+
+  ;
+  return 0;
+}
 _ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  JEMALLOC_ZONE_VERSION=9
+else
+  JEMALLOC_ZONE_VERSION=
 
-   { $as_echo "$as_me:${as_lineno-$LINENO}: result: 3" >&5
-$as_echo "3" >&6; }
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  if test "x${JEMALLOC_ZONE_VERSION}" = "x"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+$as_echo "unsupported" >&6; }
+    as_fn_error $? "Unsupported malloc zone version" "$LINENO" 5
+  fi
+  if test "${JEMALLOC_ZONE_VERSION}" = 9; then
+    JEMALLOC_ZONE_VERSION=8
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: > 8" >&5
+$as_echo "> 8" >&6; }
+  else
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $JEMALLOC_ZONE_VERSION" >&5
+$as_echo "$JEMALLOC_ZONE_VERSION" >&6; }
+  fi
+  cat >>confdefs.h <<_ACEOF
+#define JEMALLOC_ZONE_VERSION $JEMALLOC_ZONE_VERSION
+_ACEOF
+
 fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for stdbool.h that conforms to C99" >&5
 $as_echo_n "checking for stdbool.h that conforms to C99... " >&6; }
-if test "${ac_cv_header_stdbool_h+set}" = set; then :
+if ${ac_cv_header_stdbool_h+:} false; then :
   $as_echo_n "(cached) " >&6
 else
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
@@ -5968,7 +6786,7 @@ else
 	char b[false == 0 ? 1 : -1];
 	char c[__bool_true_false_are_defined == 1 ? 1 : -1];
 	char d[(bool) 0.5 == true ? 1 : -1];
-	bool e = &s;
+	/* See body of main program for 'e'.  */
 	char f[(_Bool) 0.0 == false ? 1 : -1];
 	char g[true];
 	char h[sizeof (_Bool)];
@@ -5979,25 +6797,6 @@ else
 	_Bool n[m];
 	char o[sizeof n == m * sizeof n[0] ? 1 : -1];
 	char p[-1 - (_Bool) 0 < 0 && -1 - (bool) 0 < 0 ? 1 : -1];
-#	if defined __xlc__ || defined __GNUC__
-	 /* Catch a bug in IBM AIX xlc compiler version 6.0.0.0
-	    reported by James Lemley on 2005-10-05; see
-	    http://lists.gnu.org/archive/html/bug-coreutils/2005-10/msg00086.html
-	    This test is not quite right, since xlc is allowed to
-	    reject this program, as the initializer for xlcbug is
-	    not one of the forms that C requires support for.
-	    However, doing the test right would require a runtime
-	    test, and that would make cross-compilation harder.
-	    Let us hope that IBM fixes the xlc bug, and also adds
-	    support for this kind of constant expression.  In the
-	    meantime, this test will reject xlc, which is OK, since
-	    our stdbool.h substitute should suffice.  We also test
-	    this with GCC, where it should work, to detect more
-	    quickly whether someone messes up the test in the
-	    future.  */
-	 char digs[] = "0123456789";
-	 int xlcbug = 1 / (&(digs + 5)[-2 + (bool) 1] == &digs[4] ? 1 : -1);
-#	endif
 	/* Catch a bug in an HP-UX C compiler.  See
 	   http://gcc.gnu.org/ml/gcc-patches/2003-12/msg02303.html
 	   http://lists.gnu.org/archive/html/bug-coreutils/2005-11/msg00161.html
@@ -6009,6 +6808,7 @@ int
 main ()
 {
 
+	bool e = &s;
 	*pq |= q;
 	*pq |= ! q;
 	/* Refer to every declared value, to avoid compiler optimizations.  */
@@ -6029,7 +6829,7 @@ fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdbool_h" >&5
 $as_echo "$ac_cv_header_stdbool_h" >&6; }
 ac_fn_c_check_type "$LINENO" "_Bool" "ac_cv_type__Bool" "$ac_includes_default"
-if test "x$ac_cv_type__Bool" = x""yes; then :
+if test "x$ac_cv_type__Bool" = xyes; then :
 
 cat >>confdefs.h <<_ACEOF
 #define HAVE__BOOL 1
@@ -6045,12 +6845,15 @@ $as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h
 fi
 
 
+ac_config_commands="$ac_config_commands include/jemalloc/internal/size_classes.h"
+
+
 
 
 ac_config_headers="$ac_config_headers $cfghdrs_tup"
 
 
-ac_config_files="$ac_config_files $cfgoutputs_tup config.stamp"
+ac_config_files="$ac_config_files $cfgoutputs_tup config.stamp bin/jemalloc.sh"
 
 
 
@@ -6118,10 +6921,21 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;;
      :end' >>confcache
 if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
   if test -w "$cache_file"; then
-    test "x$cache_file" != "x/dev/null" &&
+    if test "x$cache_file" != "x/dev/null"; then
       { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
 $as_echo "$as_me: updating cache $cache_file" >&6;}
-    cat confcache >$cache_file
+      if test ! -f "$cache_file" || test -h "$cache_file"; then
+	cat confcache >"$cache_file"
+      else
+        case $cache_file in #(
+        */* | ?:*)
+	  mv -f confcache "$cache_file"$$ &&
+	  mv -f "$cache_file"$$ "$cache_file" ;; #(
+        *)
+	  mv -f confcache "$cache_file" ;;
+	esac
+      fi
+    fi
   else
     { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
 $as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
@@ -6137,6 +6951,7 @@ DEFS=-DHAVE_CONFIG_H
 
 ac_libobjs=
 ac_ltlibobjs=
+U=
 for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
   # 1. Remove the extension, and $U if already installed.
   ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
@@ -6152,7 +6967,7 @@ LTLIBOBJS=$ac_ltlibobjs
 
 
 
-: ${CONFIG_STATUS=./config.status}
+: "${CONFIG_STATUS=./config.status}"
 ac_write_fail=0
 ac_clean_files_save=$ac_clean_files
 ac_clean_files="$ac_clean_files $CONFIG_STATUS"
@@ -6253,6 +7068,7 @@ fi
 IFS=" ""	$as_nl"
 
 # Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
 case $0 in #((
   *[\\/]* ) as_myself=$0 ;;
   *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
@@ -6298,19 +7114,19 @@ export LANGUAGE
 (unset CDPATH) >/dev/null 2>&1 && unset CDPATH
 
 
-# as_fn_error ERROR [LINENO LOG_FD]
-# ---------------------------------
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
 # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
 # provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with status $?, using 1 if that was 0.
+# script with STATUS, using 1 if that was 0.
 as_fn_error ()
 {
-  as_status=$?; test $as_status -eq 0 && as_status=1
-  if test "$3"; then
-    as_lineno=${as_lineno-"$2"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
-    $as_echo "$as_me:${as_lineno-$LINENO}: error: $1" >&$3
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
   fi
-  $as_echo "$as_me: error: $1" >&2
+  $as_echo "$as_me: error: $2" >&2
   as_fn_exit $as_status
 } # as_fn_error
 
@@ -6506,7 +7322,7 @@ $as_echo X"$as_dir" |
       test -d "$as_dir" && break
     done
     test -z "$as_dirs" || eval "mkdir $as_dirs"
-  } || test -d "$as_dir" || as_fn_error "cannot create directory $as_dir"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
 
 
 } # as_fn_mkdir_p
@@ -6560,7 +7376,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # values after options handling.
 ac_log="
 This file was extended by $as_me, which was
-generated by GNU Autoconf 2.65.  Invocation command line was
+generated by GNU Autoconf 2.68.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
   CONFIG_HEADERS  = $CONFIG_HEADERS
@@ -6586,6 +7402,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 # Files that config.status was made for.
 config_files="$ac_config_files"
 config_headers="$ac_config_headers"
+config_commands="$ac_config_commands"
 
 _ACEOF
 
@@ -6615,6 +7432,9 @@ $config_files
 Configuration headers:
 $config_headers
 
+Configuration commands:
+$config_commands
+
 Report bugs to the package provider."
 
 _ACEOF
@@ -6622,10 +7442,10 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
 config.status
-configured by $0, generated by GNU Autoconf 2.65,
+configured by $0, generated by GNU Autoconf 2.68,
   with options \\"\$ac_cs_config\\"
 
-Copyright (C) 2009 Free Software Foundation, Inc.
+Copyright (C) 2010 Free Software Foundation, Inc.
 This config.status script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it."
 
@@ -6641,11 +7461,16 @@ ac_need_defaults=:
 while test $# != 0
 do
   case $1 in
-  --*=*)
+  --*=?*)
     ac_option=`expr "X$1" : 'X\([^=]*\)='`
     ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
     ac_shift=:
     ;;
+  --*=)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=
+    ac_shift=:
+    ;;
   *)
     ac_option=$1
     ac_optarg=$2
@@ -6667,6 +7492,7 @@ do
     $ac_shift
     case $ac_optarg in
     *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    '') as_fn_error $? "missing file argument" ;;
     esac
     as_fn_append CONFIG_FILES " '$ac_optarg'"
     ac_need_defaults=false;;
@@ -6679,7 +7505,7 @@ do
     ac_need_defaults=false;;
   --he | --h)
     # Conflict between --help and --header
-    as_fn_error "ambiguous option: \`$1'
+    as_fn_error $? "ambiguous option: \`$1'
 Try \`$0 --help' for more information.";;
   --help | --hel | -h )
     $as_echo "$ac_cs_usage"; exit ;;
@@ -6688,7 +7514,7 @@ Try \`$0 --help' for more information.";;
     ac_cs_silent=: ;;
 
   # This is an error.
-  -*) as_fn_error "unrecognized option: \`$1'
+  -*) as_fn_error $? "unrecognized option: \`$1'
 Try \`$0 --help' for more information." ;;
 
   *) as_fn_append ac_config_targets " $1"
@@ -6737,11 +7563,13 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 for ac_config_target in $ac_config_targets
 do
   case $ac_config_target in
+    "include/jemalloc/internal/size_classes.h") CONFIG_COMMANDS="$CONFIG_COMMANDS include/jemalloc/internal/size_classes.h" ;;
     "$cfghdrs_tup") CONFIG_HEADERS="$CONFIG_HEADERS $cfghdrs_tup" ;;
     "$cfgoutputs_tup") CONFIG_FILES="$CONFIG_FILES $cfgoutputs_tup" ;;
     "config.stamp") CONFIG_FILES="$CONFIG_FILES config.stamp" ;;
+    "bin/jemalloc.sh") CONFIG_FILES="$CONFIG_FILES bin/jemalloc.sh" ;;
 
-  *) as_fn_error "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+  *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
   esac
 done
 
@@ -6753,6 +7581,7 @@ done
 if $ac_need_defaults; then
   test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
   test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
 fi
 
 # Have a temporary directory for convenience.  Make it in the build tree
@@ -6763,9 +7592,10 @@ fi
 # after its creation but before its name has been assigned to `$tmp'.
 $debug ||
 {
-  tmp=
+  tmp= ac_tmp=
   trap 'exit_status=$?
-  { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
+  : "${ac_tmp:=$tmp}"
+  { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
 ' 0
   trap 'as_fn_exit 1' 1 2 13 15
 }
@@ -6773,12 +7603,13 @@ $debug ||
 
 {
   tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
-  test -n "$tmp" && test -d "$tmp"
+  test -d "$tmp"
 }  ||
 {
   tmp=./conf$$-$RANDOM
   (umask 077 && mkdir "$tmp")
-} || as_fn_error "cannot create a temporary directory in ." "$LINENO" 5
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
 
 # Set up the scripts for CONFIG_FILES section.
 # No need to generate them if there are no CONFIG_FILES.
@@ -6795,12 +7626,12 @@ if test "x$ac_cr" = x; then
 fi
 ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
 if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
-  ac_cs_awk_cr='\r'
+  ac_cs_awk_cr='\\r'
 else
   ac_cs_awk_cr=$ac_cr
 fi
 
-echo 'BEGIN {' >"$tmp/subs1.awk" &&
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
 _ACEOF
 
 
@@ -6809,18 +7640,18 @@ _ACEOF
   echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
   echo "_ACEOF"
 } >conf$$subs.sh ||
-  as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
-ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'`
+  as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   . ./conf$$subs.sh ||
-    as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
 
   ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
   if test $ac_delim_n = $ac_delim_num; then
     break
   elif $ac_last_try; then
-    as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
   else
     ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
   fi
@@ -6828,7 +7659,7 @@ done
 rm -f conf$$subs.sh
 
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-cat >>"\$tmp/subs1.awk" <<\\_ACAWK &&
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
 _ACEOF
 sed -n '
 h
@@ -6876,7 +7707,7 @@ t delim
 rm -f conf$$subs.awk
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 _ACAWK
-cat >>"\$tmp/subs1.awk" <<_ACAWK &&
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
   for (key in S) S_is_set[key] = 1
   FS = ""
 
@@ -6908,21 +7739,29 @@ if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
   sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
 else
   cat
-fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \
-  || as_fn_error "could not setup config files machinery" "$LINENO" 5
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+  || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
 _ACEOF
 
-# VPATH may cause trouble with some makes, so we remove $(srcdir),
-# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
 # trailing colons and then remove the whole line if VPATH becomes empty
 # (actually we leave an empty line to preserve line numbers).
 if test "x$srcdir" = x.; then
-  ac_vpsub='/^[	 ]*VPATH[	 ]*=/{
-s/:*\$(srcdir):*/:/
-s/:*\${srcdir}:*/:/
-s/:*@srcdir@:*/:/
-s/^\([^=]*=[	 ]*\):*/\1/
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=[	 ]*/{
+h
+s///
+s/^/:/
+s/[	 ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
 s/:*$//
+x
+s/\(=[	 ]*\).*/\1/
+G
+s/\n//
 s/^[^=]*=[	 ]*$//
 }'
 fi
@@ -6934,7 +7773,7 @@ fi # test -n "$CONFIG_FILES"
 # No need to generate them if there are no CONFIG_HEADERS.
 # This happens for instance with `./config.status Makefile'.
 if test -n "$CONFIG_HEADERS"; then
-cat >"$tmp/defines.awk" <<\_ACAWK ||
+cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
 BEGIN {
 _ACEOF
 
@@ -6946,11 +7785,11 @@ _ACEOF
 # handling of long lines.
 ac_delim='%!_!# '
 for ac_last_try in false false :; do
-  ac_t=`sed -n "/$ac_delim/p" confdefs.h`
-  if test -z "$ac_t"; then
+  ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
+  if test -z "$ac_tt"; then
     break
   elif $ac_last_try; then
-    as_fn_error "could not make $CONFIG_HEADERS" "$LINENO" 5
+    as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
   else
     ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
   fi
@@ -7035,11 +7874,11 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 _ACAWK
 _ACEOF
 cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
-  as_fn_error "could not setup config headers machinery" "$LINENO" 5
+  as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
 fi # test -n "$CONFIG_HEADERS"
 
 
-eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS    "
+eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS    :C $CONFIG_COMMANDS"
 shift
 for ac_tag
 do
@@ -7048,7 +7887,7 @@ do
   esac
   case $ac_mode$ac_tag in
   :[FHL]*:*);;
-  :L* | :C*:*) as_fn_error "invalid tag \`$ac_tag'" "$LINENO" 5;;
+  :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
   :[FH]-) ac_tag=-:-;;
   :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
   esac
@@ -7067,7 +7906,7 @@ do
     for ac_f
     do
       case $ac_f in
-      -) ac_f="$tmp/stdin";;
+      -) ac_f="$ac_tmp/stdin";;
       *) # Look for the file first in the build tree, then in the source tree
 	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
 	 # because $ac_f cannot contain `:'.
@@ -7076,7 +7915,7 @@ do
 	   [\\/$]*) false;;
 	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
 	   esac ||
-	   as_fn_error "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+	   as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
       esac
       case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
       as_fn_append ac_file_inputs " '$ac_f'"
@@ -7102,8 +7941,8 @@ $as_echo "$as_me: creating $ac_file" >&6;}
     esac
 
     case $ac_tag in
-    *:-:* | *:-) cat >"$tmp/stdin" \
-      || as_fn_error "could not create $ac_file" "$LINENO" 5 ;;
+    *:-:* | *:-) cat >"$ac_tmp/stdin" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
     esac
     ;;
   esac
@@ -7233,23 +8072,24 @@ s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
 s&@INSTALL@&$ac_INSTALL&;t t
 $ac_datarootdir_hack
 "
-eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \
-  || as_fn_error "could not create $ac_file" "$LINENO" 5
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+  >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
 
 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
-  { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
-  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' \
+      "$ac_tmp/out"`; test -z "$ac_out"; } &&
   { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined.  Please make sure it is defined." >&5
+which seems to be undefined.  Please make sure it is defined" >&5
 $as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined.  Please make sure it is defined." >&2;}
+which seems to be undefined.  Please make sure it is defined" >&2;}
 
-  rm -f "$tmp/stdin"
+  rm -f "$ac_tmp/stdin"
   case $ac_file in
-  -) cat "$tmp/out" && rm -f "$tmp/out";;
-  *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";;
+  -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+  *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
   esac \
-  || as_fn_error "could not create $ac_file" "$LINENO" 5
+  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
  ;;
   :H)
   #
@@ -7258,27 +8098,37 @@ which seems to be undefined.  Please make sure it is defined." >&2;}
   if test x"$ac_file" != x-; then
     {
       $as_echo "/* $configure_input  */" \
-      && eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs"
-    } >"$tmp/config.h" \
-      || as_fn_error "could not create $ac_file" "$LINENO" 5
-    if diff "$ac_file" "$tmp/config.h" >/dev/null 2>&1; then
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
+    } >"$ac_tmp/config.h" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
       { $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
 $as_echo "$as_me: $ac_file is unchanged" >&6;}
     else
       rm -f "$ac_file"
-      mv "$tmp/config.h" "$ac_file" \
-	|| as_fn_error "could not create $ac_file" "$LINENO" 5
+      mv "$ac_tmp/config.h" "$ac_file" \
+	|| as_fn_error $? "could not create $ac_file" "$LINENO" 5
     fi
   else
     $as_echo "/* $configure_input  */" \
-      && eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs" \
-      || as_fn_error "could not create -" "$LINENO" 5
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
+      || as_fn_error $? "could not create -" "$LINENO" 5
   fi
  ;;
 
-
+  :C)  { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
+$as_echo "$as_me: executing $ac_file commands" >&6;}
+ ;;
   esac
 
+
+  case $ac_file$ac_mode in
+    "include/jemalloc/internal/size_classes.h":C)
+  mkdir -p "include/jemalloc/internal"
+  "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h"
+ ;;
+
+  esac
 done # for ac_tag
 
 
@@ -7287,7 +8137,7 @@ _ACEOF
 ac_clean_files=$ac_clean_files_save
 
 test $ac_write_fail = 0 ||
-  as_fn_error "write failure creating $CONFIG_STATUS" "$LINENO" 5
+  as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
 
 
 # configure is writing to config.log, and then calls config.status.
@@ -7308,7 +8158,7 @@ if test "$no_create" != yes; then
   exec 5>>config.log
   # Use ||, not &&, to avoid exiting from the if with $? = 1, which
   # would make configure fail if this is the last instruction.
-  $ac_cs_success || as_fn_exit $?
+  $ac_cs_success || as_fn_exit 1
 fi
 if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
   { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
@@ -7318,8 +8168,10 @@ fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: ===============================================================================" >&5
 $as_echo "===============================================================================" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: jemalloc version   : $jemalloc_version" >&5
-$as_echo "jemalloc version   : $jemalloc_version" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: jemalloc version   : ${jemalloc_version}" >&5
+$as_echo "jemalloc version   : ${jemalloc_version}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: library revision   : ${rev}" >&5
+$as_echo "library revision   : ${rev}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5
 $as_echo "" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: CC                 : ${CC}" >&5
@@ -7376,6 +8228,8 @@ $as_echo "                   : ${JEMALLOC_PRIVATE_NAMESPACE}" >&6; }
 $as_echo "install_suffix     : ${install_suffix}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: autogen            : ${enable_autogen}" >&5
 $as_echo "autogen            : ${enable_autogen}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: experimental       : ${enable_experimental}" >&5
+$as_echo "experimental       : ${enable_experimental}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: cc-silence         : ${enable_cc_silence}" >&5
 $as_echo "cc-silence         : ${enable_cc_silence}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: debug              : ${enable_debug}" >&5
@@ -7390,22 +8244,22 @@ $as_echo "prof-libunwind     : ${enable_prof_libunwind}" >&6; }
 $as_echo "prof-libgcc        : ${enable_prof_libgcc}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: prof-gcc           : ${enable_prof_gcc}" >&5
 $as_echo "prof-gcc           : ${enable_prof_gcc}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: tiny               : ${enable_tiny}" >&5
-$as_echo "tiny               : ${enable_tiny}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: tcache             : ${enable_tcache}" >&5
 $as_echo "tcache             : ${enable_tcache}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: fill               : ${enable_fill}" >&5
 $as_echo "fill               : ${enable_fill}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: utrace             : ${enable_utrace}" >&5
+$as_echo "utrace             : ${enable_utrace}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: valgrind           : ${enable_valgrind}" >&5
+$as_echo "valgrind           : ${enable_valgrind}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: xmalloc            : ${enable_xmalloc}" >&5
 $as_echo "xmalloc            : ${enable_xmalloc}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: sysv               : ${enable_sysv}" >&5
-$as_echo "sysv               : ${enable_sysv}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: swap               : ${enable_swap}" >&5
-$as_echo "swap               : ${enable_swap}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: mremap             : ${enable_mremap}" >&5
+$as_echo "mremap             : ${enable_mremap}" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: munmap             : ${enable_munmap}" >&5
+$as_echo "munmap             : ${enable_munmap}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: dss                : ${enable_dss}" >&5
 $as_echo "dss                : ${enable_dss}" >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: dynamic_page_shift : ${enable_dynamic_page_shift}" >&5
-$as_echo "dynamic_page_shift : ${enable_dynamic_page_shift}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: lazy_lock          : ${enable_lazy_lock}" >&5
 $as_echo "lazy_lock          : ${enable_lazy_lock}" >&6; }
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: tls                : ${enable_tls}" >&5
diff --git a/deps/jemalloc/configure.ac b/deps/jemalloc/configure.ac
index b58aa520..a72019e5 100644
--- a/deps/jemalloc/configure.ac
+++ b/deps/jemalloc/configure.ac
@@ -14,7 +14,7 @@ if test "x${CFLAGS}" = "x" ; then
 else
   CFLAGS="${CFLAGS} $1"
 fi
-AC_RUN_IFELSE([AC_LANG_PROGRAM(
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 [[
 ]], [[
     return 0;
@@ -26,20 +26,25 @@ AC_RUN_IFELSE([AC_LANG_PROGRAM(
 ])
 
 dnl JE_COMPILABLE(label, hcode, mcode, rvar)
+dnl 
+dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors
+dnl cause failure.
 AC_DEFUN([JE_COMPILABLE],
 [
-AC_MSG_CHECKING([whether $1 is compilable])
-AC_RUN_IFELSE([AC_LANG_PROGRAM(
-[$2], [$3])],
-              AC_MSG_RESULT([yes])
-              [$4="yes"],
-              AC_MSG_RESULT([no])
-              [$4="no"]
-)
+AC_CACHE_CHECK([whether $1 is compilable],
+               [$4],
+               [AC_LINK_IFELSE([AC_LANG_PROGRAM([$2],
+                                                [$3])],
+                               [$4=yes],
+                               [$4=no])])
 ])
 
 dnl ============================================================================
 
+dnl Library revision.
+rev=1
+AC_SUBST([rev])
+
 srcroot=$srcdir
 if test "x${srcroot}" = "x." ; then
   srcroot=""
@@ -82,14 +87,23 @@ AC_SUBST([MANDIR])
 
 dnl Support for building documentation.
 AC_PATH_PROG([XSLTPROC], [xsltproc], , [$PATH])
+if test -d "/usr/share/xml/docbook/stylesheet/docbook-xsl" ; then
+  DEFAULT_XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl"
+elif test -d "/usr/share/sgml/docbook/xsl-stylesheets" ; then
+  DEFAULT_XSLROOT="/usr/share/sgml/docbook/xsl-stylesheets"
+else
+  dnl Documentation building will fail if this default gets used.
+  DEFAULT_XSLROOT=""
+fi
 AC_ARG_WITH([xslroot],
-  [AS_HELP_STRING([--with-xslroot=<path>], [XSL stylesheet root path])],
+  [AS_HELP_STRING([--with-xslroot=<path>], [XSL stylesheet root path])], [
 if test "x$with_xslroot" = "xno" ; then
-  XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl"
+  XSLROOT="${DEFAULT_XSLROOT}"
 else
   XSLROOT="${with_xslroot}"
-fi,
-  XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl"
+fi
+],
+  XSLROOT="${DEFAULT_XSLROOT}"
 )
 AC_SUBST([XSLROOT])
 
@@ -97,6 +111,19 @@ dnl If CFLAGS isn't defined, set CFLAGS to something reasonable.  Otherwise,
 dnl just prevent autoconf from molesting CFLAGS.
 CFLAGS=$CFLAGS
 AC_PROG_CC
+if test "x$GCC" != "xyes" ; then
+  AC_CACHE_CHECK([whether compiler is MSVC],
+                 [je_cv_msvc],
+                 [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+                                                     [
+#ifndef _MSC_VER
+  int fail[-1];
+#endif
+])],
+                               [je_cv_msvc=yes],
+                               [je_cv_msvc=no])])
+fi
+
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
@@ -104,6 +131,12 @@ if test "x$CFLAGS" = "x" ; then
     JE_CFLAGS_APPEND([-Wall])
     JE_CFLAGS_APPEND([-pipe])
     JE_CFLAGS_APPEND([-g3])
+  elif test "x$je_cv_msvc" = "xyes" ; then
+    CC="$CC -nologo"
+    JE_CFLAGS_APPEND([-Zi])
+    JE_CFLAGS_APPEND([-MT])
+    JE_CFLAGS_APPEND([-W3])
+    CPPFLAGS="$CPPFLAGS -I${srcroot}/include/msvc_compat"
   fi
 fi
 dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
@@ -142,6 +175,18 @@ else
 fi
 AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
 
+AC_CHECK_SIZEOF([intmax_t])
+if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then
+  LG_SIZEOF_INTMAX_T=4
+elif test "x${ac_cv_sizeof_intmax_t}" = "x8" ; then
+  LG_SIZEOF_INTMAX_T=3
+elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then
+  LG_SIZEOF_INTMAX_T=2
+else
+  AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_intmax_t}])
+fi
+AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T])
+
 AC_CANONICAL_HOST
 dnl CPU-specific settings.
 CPU_SPINWAIT=""
@@ -150,15 +195,15 @@ case "${host_cpu}" in
 	;;
   i686)
 	JE_COMPILABLE([__asm__], [], [[__asm__ volatile("pause"); return 0;]],
-	              [asm])
-	if test "x${asm}" = "xyes" ; then
+	              [je_cv_asm])
+	if test "x${je_cv_asm}" = "xyes" ; then
 	    CPU_SPINWAIT='__asm__ volatile("pause")'
 	fi
 	;;
   x86_64)
 	JE_COMPILABLE([__asm__ syntax], [],
-	              [[__asm__ volatile("pause"); return 0;]], [asm])
-	if test "x${asm}" = "xyes" ; then
+	              [[__asm__ volatile("pause"); return 0;]], [je_cv_asm])
+	if test "x${je_cv_asm}" = "xyes" ; then
 	    CPU_SPINWAIT='__asm__ volatile("pause")'
 	fi
 	;;
@@ -167,6 +212,23 @@ case "${host_cpu}" in
 esac
 AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
 
+LD_PRELOAD_VAR="LD_PRELOAD"
+so="so"
+importlib="${so}"
+o="$ac_objext"
+a="a"
+exe="$ac_exeext"
+libprefix="lib"
+DSO_LDFLAGS='-shared -Wl,-soname,$(@F)'
+RPATH='-Wl,-rpath,$(1)'
+SOREV="${so}.${rev}"
+PIC_CFLAGS='-fPIC -DPIC'
+CTARGET='-o $@'
+LDTARGET='-o $@'
+EXTRA_LDFLAGS=
+MKLIB='ar crus $@'
+CC_MM=1
+
 dnl Platform-specific settings.  abi and RPATH can probably be determined
 dnl programmatically, but doing so is error-prone, which makes it generally
 dnl not worth the trouble.
@@ -174,25 +236,33 @@ dnl
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
+default_munmap="1"
 case "${host}" in
   *-*-darwin*)
-	CFLAGS="$CFLAGS -fno-common -no-cpp-precomp"
+	CFLAGS="$CFLAGS"
 	abi="macho"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH=""
+	LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES"
+	so="dylib"
+	importlib="${so}"
+	force_tls="0"
+	DSO_LDFLAGS='-shared -Wl,-dylib_install_name,$(@F)'
+	SOREV="${rev}.${so}"
 	;;
   *-*-freebsd*)
 	CFLAGS="$CFLAGS"
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
-	RPATH="-Wl,-rpath,"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+	force_lazy_lock="1"
 	;;
   *-*-linux*)
 	CFLAGS="$CFLAGS"
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED])
-	RPATH="-Wl,-rpath,"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
+	default_munmap="0"
 	;;
   *-*-netbsd*)
 	AC_MSG_CHECKING([ABI])
@@ -206,45 +276,100 @@ case "${host}" in
                           [CFLAGS="$CFLAGS"; abi="elf"],
                           [abi="aout"])
 	AC_MSG_RESULT([$abi])
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE])
-	RPATH="-Wl,-rpath,"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-solaris2*)
 	CFLAGS="$CFLAGS"
 	abi="elf"
-	RPATH="-Wl,-R,"
+	RPATH='-Wl,-R,$(1)'
 	dnl Solaris needs this for sigwait().
 	CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
 	LIBS="$LIBS -lposix4 -lsocket -lnsl"
 	;;
+  *-ibm-aix*)
+	if "$LG_SIZEOF_PTR" = "8"; then
+	  dnl 64bit AIX
+	  LD_PRELOAD_VAR="LDR_PRELOAD64"
+	else
+	  dnl 32bit AIX
+	  LD_PRELOAD_VAR="LDR_PRELOAD"
+	fi
+	abi="xcoff"
+	;;
+  *-*-mingw*)
+	abi="pecoff"
+	force_tls="0"
+	RPATH=""
+	so="dll"
+	if test "x$je_cv_msvc" = "xyes" ; then
+	  importlib="lib"
+	  DSO_LDFLAGS="-LD"
+	  EXTRA_LDFLAGS="-link -DEBUG"
+	  CTARGET='-Fo$@'
+	  LDTARGET='-Fe$@'
+	  MKLIB='lib -nologo -out:$@'
+	  CC_MM=
+        else
+	  importlib="${so}"
+	  DSO_LDFLAGS="-shared"
+	fi
+	a="lib"
+	libprefix=""
+	SOREV="${so}"
+	PIC_CFLAGS=""
+	;;
   *)
 	AC_MSG_RESULT([Unsupported operating system: ${host}])
 	abi="elf"
-	RPATH="-Wl,-rpath,"
 	;;
 esac
 AC_SUBST([abi])
 AC_SUBST([RPATH])
+AC_SUBST([LD_PRELOAD_VAR])
+AC_SUBST([so])
+AC_SUBST([importlib])
+AC_SUBST([o])
+AC_SUBST([a])
+AC_SUBST([exe])
+AC_SUBST([libprefix])
+AC_SUBST([DSO_LDFLAGS])
+AC_SUBST([EXTRA_LDFLAGS])
+AC_SUBST([SOREV])
+AC_SUBST([PIC_CFLAGS])
+AC_SUBST([CTARGET])
+AC_SUBST([LDTARGET])
+AC_SUBST([MKLIB])
+AC_SUBST([CC_MM])
+
+if test "x$abi" != "xpecoff"; then
+  dnl Heap profiling uses the log(3) function.
+  LIBS="$LIBS -lm"
+fi
 
 JE_COMPILABLE([__attribute__ syntax],
               [static __attribute__((unused)) void foo(void){}],
               [],
-              [attribute])
-if test "x${attribute}" = "xyes" ; then
+              [je_cv_attribute])
+if test "x${je_cv_attribute}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
   if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
     JE_CFLAGS_APPEND([-fvisibility=hidden])
   fi
 fi
-
-JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [
-#define _GNU_SOURCE
-#include <sys/mman.h>
-], [
-void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0);
-], [mremap_fixed])
-if test "x${mremap_fixed}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_MREMAP_FIXED])
+dnl Check for tls_model attribute support (clang 3.0 still lacks support).
+SAVED_CFLAGS="${CFLAGS}"
+JE_CFLAGS_APPEND([-Werror])
+JE_COMPILABLE([tls_model attribute], [],
+              [static __thread int
+               __attribute__((tls_model("initial-exec"))) foo;
+               foo = 0;],
+              [je_cv_tls_model])
+CFLAGS="${SAVED_CFLAGS}"
+if test "x${je_cv_tls_model}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_TLS_MODEL],
+            [__attribute__((tls_model("initial-exec")))])
+else
+  AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
 fi
 
 dnl Support optional additions to rpath.
@@ -278,11 +403,52 @@ AC_PATH_PROG([AR], [ar], , [$PATH])
 AC_PATH_PROG([LD], [ld], , [$PATH])
 AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH])
 
+public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free malloc_usable_size malloc_stats_print mallctl mallctlnametomib mallctlbymib"
+
+dnl Check for allocator-related functions that should be wrapped.
+AC_CHECK_FUNC([memalign],
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
+	       public_syms="${public_syms} memalign"])
+AC_CHECK_FUNC([valloc],
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
+	       public_syms="${public_syms} valloc"])
+
+dnl Support the experimental API by default.
+AC_ARG_ENABLE([experimental],
+  [AS_HELP_STRING([--disable-experimental],
+   [Disable support for the experimental API])],
+[if test "x$enable_experimental" = "xno" ; then
+  enable_experimental="0"
+else
+  enable_experimental="1"
+fi
+],
+[enable_experimental="1"]
+)
+if test "x$enable_experimental" = "x1" ; then
+  AC_DEFINE([JEMALLOC_EXPERIMENTAL], [ ])
+  public_syms="${public_syms} allocm dallocm nallocm rallocm sallocm"
+fi
+AC_SUBST([enable_experimental])
+
+dnl Perform no name mangling by default.
+AC_ARG_WITH([mangling],
+  [AS_HELP_STRING([--with-mangling=<map>], [Mangle symbols in <map>])],
+  [mangling_map="$with_mangling"], [mangling_map=""])
+for nm in `echo ${mangling_map} |tr ',' ' '` ; do
+  k="`echo ${nm} |tr ':' ' ' |awk '{print $1}'`"
+  n="je_${k}"
+  m=`echo ${nm} |tr ':' ' ' |awk '{print $2}'`
+  AC_DEFINE_UNQUOTED([${n}], [${m}])
+  dnl Remove key from public_syms so that it isn't redefined later.
+  public_syms=`for sym in ${public_syms}; do echo "${sym}"; done |grep -v "^${k}\$" |tr '\n' ' '`
+done
+
 dnl Do not prefix public APIs by default.
 AC_ARG_WITH([jemalloc_prefix],
   [AS_HELP_STRING([--with-jemalloc-prefix=<prefix>], [Prefix to prepend to all public APIs])],
   [JEMALLOC_PREFIX="$with_jemalloc_prefix"],
-  [if test "x$abi" != "xmacho" ; then
+  [if test "x$abi" != "xmacho" -a "x$abi" != "xpecoff"; then
   JEMALLOC_PREFIX=""
 else
   JEMALLOC_PREFIX="je_"
@@ -292,8 +458,15 @@ if test "x$JEMALLOC_PREFIX" != "x" ; then
   JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
   AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
   AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
-  AC_DEFINE_UNQUOTED([JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix)], [${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix])
 fi
+dnl Generate macros to rename public symbols.  All public symbols are prefixed
+dnl with je_ in the source code, so these macro definitions are needed even if
+dnl --with-jemalloc-prefix wasn't specified.
+for stem in ${public_syms}; do
+  n="je_${stem}"
+  m="${JEMALLOC_PREFIX}${stem}"
+  AC_DEFINE_UNQUOTED([${n}], [${m}])
+done
 
 dnl Do not mangle library-private APIs by default.
 AC_ARG_WITH([private_namespace],
@@ -342,8 +515,10 @@ cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h"
 cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in"
 
 cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in"
+cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh"
 
 cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h"
+cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/size_classes.h"
 
 cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in"
 
@@ -361,7 +536,7 @@ fi
 [enable_cc_silence="0"]
 )
 if test "x$enable_cc_silence" = "x1" ; then
-  AC_DEFINE([JEMALLOC_CC_SILENCE])
+  AC_DEFINE([JEMALLOC_CC_SILENCE], [ ])
 fi
 
 dnl Do not compile with debugging by default.
@@ -390,22 +565,25 @@ if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then
     if test "x$GCC" = "xyes" ; then
       JE_CFLAGS_APPEND([-O3])
       JE_CFLAGS_APPEND([-funroll-loops])
+    elif test "x$je_cv_msvc" = "xyes" ; then
+      JE_CFLAGS_APPEND([-O2])
     else
       JE_CFLAGS_APPEND([-O])
     fi
   fi
 fi
 
-dnl Do not enable statistics calculation by default.
+dnl Enable statistics calculation by default.
 AC_ARG_ENABLE([stats],
-  [AS_HELP_STRING([--enable-stats], [Enable statistics calculation/reporting])],
+  [AS_HELP_STRING([--disable-stats],
+                  [Disable statistics calculation/reporting])],
 [if test "x$enable_stats" = "xno" ; then
   enable_stats="0"
 else
   enable_stats="1"
 fi
 ],
-[enable_stats="0"]
+[enable_stats="1"]
 )
 if test "x$enable_stats" = "x1" ; then
   AC_DEFINE([JEMALLOC_STATS], [ ])
@@ -531,27 +709,14 @@ fi
 AC_MSG_CHECKING([configured backtracing method])
 AC_MSG_RESULT([$backtrace_method])
 if test "x$enable_prof" = "x1" ; then
-  LIBS="$LIBS -lm"
+  if test "x${force_tls}" = "x0" ; then
+    AC_MSG_ERROR([Heap profiling requires TLS]);
+  fi
+  force_tls="1"
   AC_DEFINE([JEMALLOC_PROF], [ ])
 fi
 AC_SUBST([enable_prof])
 
-dnl Enable tiny allocations by default.
-AC_ARG_ENABLE([tiny],
-  [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])],
-[if test "x$enable_tiny" = "xno" ; then
-  enable_tiny="0"
-else
-  enable_tiny="1"
-fi
-],
-[enable_tiny="1"]
-)
-if test "x$enable_tiny" = "x1" ; then
-  AC_DEFINE([JEMALLOC_TINY], [ ])
-fi
-AC_SUBST([enable_tiny])
-
 dnl Enable thread-specific caching by default.
 AC_ARG_ENABLE([tcache],
   [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])],
@@ -568,21 +733,48 @@ if test "x$enable_tcache" = "x1" ; then
 fi
 AC_SUBST([enable_tcache])
 
-dnl Do not enable mmap()ped swap files by default.
-AC_ARG_ENABLE([swap],
-  [AS_HELP_STRING([--enable-swap], [Enable mmap()ped swap files])],
-[if test "x$enable_swap" = "xno" ; then
-  enable_swap="0"
+dnl Disable mremap() for huge realloc() by default.
+AC_ARG_ENABLE([mremap],
+  [AS_HELP_STRING([--enable-mremap], [Enable mremap(2) for huge realloc()])],
+[if test "x$enable_mremap" = "xno" ; then
+  enable_mremap="0"
 else
-  enable_swap="1"
+  enable_mremap="1"
 fi
 ],
-[enable_swap="0"]
+[enable_mremap="0"]
 )
-if test "x$enable_swap" = "x1" ; then
-  AC_DEFINE([JEMALLOC_SWAP], [ ])
+if test "x$enable_mremap" = "x1" ; then
+  JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [
+#define _GNU_SOURCE
+#include <sys/mman.h>
+], [
+void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0);
+], [je_cv_mremap_fixed])
+  if test "x${je_cv_mremap_fixed}" = "xno" ; then
+    enable_mremap="0"
+  fi
+fi
+if test "x$enable_mremap" = "x1" ; then
+  AC_DEFINE([JEMALLOC_MREMAP], [ ])
 fi
-AC_SUBST([enable_swap])
+AC_SUBST([enable_mremap])
+
+dnl Enable VM deallocation via munmap() by default.
+AC_ARG_ENABLE([munmap],
+  [AS_HELP_STRING([--disable-munmap], [Disable VM deallocation via munmap(2)])],
+[if test "x$enable_munmap" = "xno" ; then
+  enable_munmap="0"
+else
+  enable_munmap="1"
+fi
+],
+[enable_munmap="${default_munmap}"]
+)
+if test "x$enable_munmap" = "x1" ; then
+  AC_DEFINE([JEMALLOC_MUNMAP], [ ])
+fi
+AC_SUBST([enable_munmap])
 
 dnl Do not enable allocation from DSS by default.
 AC_ARG_ENABLE([dss],
@@ -595,102 +787,154 @@ fi
 ],
 [enable_dss="0"]
 )
+dnl Check whether the BSD/SUSv1 sbrk() exists.  If not, disable DSS support.
+AC_CHECK_FUNC([sbrk], [have_sbrk="1"], [have_sbrk="0"])
+if test "x$have_sbrk" = "x1" ; then
+  AC_DEFINE([JEMALLOC_HAVE_SBRK], [ ])
+else
+  enable_dss="0"
+fi
+
 if test "x$enable_dss" = "x1" ; then
   AC_DEFINE([JEMALLOC_DSS], [ ])
 fi
 AC_SUBST([enable_dss])
 
-dnl Do not support the junk/zero filling option by default.
+dnl Support the junk/zero filling option by default.
 AC_ARG_ENABLE([fill],
-  [AS_HELP_STRING([--enable-fill], [Support junk/zero filling option])],
+  [AS_HELP_STRING([--disable-fill],
+                  [Disable support for junk/zero filling, quarantine, and redzones])],
 [if test "x$enable_fill" = "xno" ; then
   enable_fill="0"
 else
   enable_fill="1"
 fi
 ],
-[enable_fill="0"]
+[enable_fill="1"]
 )
 if test "x$enable_fill" = "x1" ; then
   AC_DEFINE([JEMALLOC_FILL], [ ])
 fi
 AC_SUBST([enable_fill])
 
-dnl Do not support the xmalloc option by default.
-AC_ARG_ENABLE([xmalloc],
-  [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])],
-[if test "x$enable_xmalloc" = "xno" ; then
-  enable_xmalloc="0"
+dnl Disable utrace(2)-based tracing by default.
+AC_ARG_ENABLE([utrace],
+  [AS_HELP_STRING([--enable-utrace], [Enable utrace(2)-based tracing])],
+[if test "x$enable_utrace" = "xno" ; then
+  enable_utrace="0"
 else
-  enable_xmalloc="1"
+  enable_utrace="1"
 fi
 ],
-[enable_xmalloc="0"]
+[enable_utrace="0"]
 )
-if test "x$enable_xmalloc" = "x1" ; then
-  AC_DEFINE([JEMALLOC_XMALLOC], [ ])
+JE_COMPILABLE([utrace(2)], [
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+#include <sys/ktrace.h>
+], [
+	utrace((void *)0, 0);
+], [je_cv_utrace])
+if test "x${je_cv_utrace}" = "xno" ; then
+  enable_utrace="0"
 fi
-AC_SUBST([enable_xmalloc])
+if test "x$enable_utrace" = "x1" ; then
+  AC_DEFINE([JEMALLOC_UTRACE], [ ])
+fi
+AC_SUBST([enable_utrace])
 
-dnl Do not support the SYSV option by default.
-AC_ARG_ENABLE([sysv],
-  [AS_HELP_STRING([--enable-sysv], [Support SYSV semantics option])],
-[if test "x$enable_sysv" = "xno" ; then
-  enable_sysv="0"
+dnl Support Valgrind by default.
+AC_ARG_ENABLE([valgrind],
+  [AS_HELP_STRING([--disable-valgrind], [Disable support for Valgrind])],
+[if test "x$enable_valgrind" = "xno" ; then
+  enable_valgrind="0"
 else
-  enable_sysv="1"
+  enable_valgrind="1"
 fi
 ],
-[enable_sysv="0"]
+[enable_valgrind="1"]
 )
-if test "x$enable_sysv" = "x1" ; then
-  AC_DEFINE([JEMALLOC_SYSV], [ ])
+if test "x$enable_valgrind" = "x1" ; then
+  JE_COMPILABLE([valgrind], [
+#include <valgrind/valgrind.h>
+#include <valgrind/memcheck.h>
+
+#if !defined(VALGRIND_RESIZEINPLACE_BLOCK)
+#  error "Incompatible Valgrind version"
+#endif
+], [], [je_cv_valgrind])
+  if test "x${je_cv_valgrind}" = "xno" ; then
+    enable_valgrind="0"
+  fi
+  if test "x$enable_valgrind" = "x1" ; then
+    AC_DEFINE([JEMALLOC_VALGRIND], [ ])
+  fi
 fi
-AC_SUBST([enable_sysv])
+AC_SUBST([enable_valgrind])
 
-dnl Do not determine page shift at run time by default.
-AC_ARG_ENABLE([dynamic_page_shift],
-  [AS_HELP_STRING([--enable-dynamic-page-shift],
-  [Determine page size at run time (don't trust configure result)])],
-[if test "x$enable_dynamic_page_shift" = "xno" ; then
-  enable_dynamic_page_shift="0"
+dnl Do not support the xmalloc option by default.
+AC_ARG_ENABLE([xmalloc],
+  [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])],
+[if test "x$enable_xmalloc" = "xno" ; then
+  enable_xmalloc="0"
 else
-  enable_dynamic_page_shift="1"
+  enable_xmalloc="1"
 fi
 ],
-[enable_dynamic_page_shift="0"]
+[enable_xmalloc="0"]
 )
-if test "x$enable_dynamic_page_shift" = "x1" ; then
-  AC_DEFINE([DYNAMIC_PAGE_SHIFT], [ ])
+if test "x$enable_xmalloc" = "x1" ; then
+  AC_DEFINE([JEMALLOC_XMALLOC], [ ])
 fi
-AC_SUBST([enable_dynamic_page_shift])
+AC_SUBST([enable_xmalloc])
 
-AC_MSG_CHECKING([STATIC_PAGE_SHIFT])
-AC_RUN_IFELSE([AC_LANG_PROGRAM(
-[[#include <stdio.h>
-#include <unistd.h>
+AC_CACHE_CHECK([STATIC_PAGE_SHIFT],
+               [je_cv_static_page_shift],
+               AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
 #include <strings.h>
-]], [[
+#ifdef _WIN32
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+#include <stdio.h>
+]],
+[[
     long result;
     FILE *f;
 
+#ifdef _WIN32
+    SYSTEM_INFO si;
+    GetSystemInfo(&si);
+    result = si.dwPageSize;
+#else
     result = sysconf(_SC_PAGESIZE);
+#endif
     if (result == -1) {
 	return 1;
     }
+    result = ffsl(result) - 1;
+
     f = fopen("conftest.out", "w");
     if (f == NULL) {
 	return 1;
     }
-    fprintf(f, "%u\n", ffs((int)result) - 1);
-    close(f);
+    fprintf(f, "%u\n", result);
+    fclose(f);
 
     return 0;
 ]])],
-              [STATIC_PAGE_SHIFT=`cat conftest.out`]
-              AC_MSG_RESULT([$STATIC_PAGE_SHIFT])
-              AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$STATIC_PAGE_SHIFT]),
-              AC_MSG_RESULT([error]))
+                             [je_cv_static_page_shift=`cat conftest.out`],
+                             [je_cv_static_page_shift=undefined]))
+
+if test "x$je_cv_static_page_shift" != "xundefined"; then
+   AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$je_cv_static_page_shift])
+else
+   AC_MSG_ERROR([cannot determine value for STATIC_PAGE_SHIFT])
+fi
 
 dnl ============================================================================
 dnl jemalloc configuration.
@@ -716,28 +960,65 @@ AC_SUBST([jemalloc_version_gid])
 dnl ============================================================================
 dnl Configure pthreads.
 
-AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
-AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"],
-             [AC_MSG_ERROR([libpthread is missing])])
+if test "x$abi" != "xpecoff" ; then
+  AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
+  dnl Some systems may embed pthreads functionality in libc; check for libpthread
+  dnl first, but try libc too before failing.
+  AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"],
+               [AC_SEARCH_LIBS([pthread_create], , ,
+                               AC_MSG_ERROR([libpthread is missing]))])
+fi
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 
-dnl Enable lazy locking by default.
+dnl Check whether the BSD-specific _malloc_thread_cleanup() exists.  If so, use
+dnl it rather than pthreads TSD cleanup functions to support cleanup during
+dnl thread exit, in order to avoid pthreads library recursion during
+dnl bootstrapping.
+AC_CHECK_FUNC([_malloc_thread_cleanup],
+              [have__malloc_thread_cleanup="1"],
+              [have__malloc_thread_cleanup="0"]
+             )
+if test "x$have__malloc_thread_cleanup" = "x1" ; then
+  AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ])
+  force_tls="1"
+fi
+
+dnl Check whether the BSD-specific _pthread_mutex_init_calloc_cb() exists.  If
+dnl so, mutex initialization causes allocation, and we need to implement this
+dnl callback function in order to prevent recursive allocation.
+AC_CHECK_FUNC([_pthread_mutex_init_calloc_cb],
+              [have__pthread_mutex_init_calloc_cb="1"],
+              [have__pthread_mutex_init_calloc_cb="0"]
+             )
+if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then
+  AC_DEFINE([JEMALLOC_MUTEX_INIT_CB])
+fi
+
+dnl Disable lazy locking by default.
 AC_ARG_ENABLE([lazy_lock],
-  [AS_HELP_STRING([--disable-lazy-lock],
-  [Disable lazy locking (always lock, even when single-threaded)])],
+  [AS_HELP_STRING([--enable-lazy-lock],
+  [Enable lazy locking (only lock when multi-threaded)])],
 [if test "x$enable_lazy_lock" = "xno" ; then
   enable_lazy_lock="0"
 else
   enable_lazy_lock="1"
 fi
 ],
-[enable_lazy_lock="1"]
+[enable_lazy_lock="0"]
 )
+if test "x$enable_lazy_lock" = "x0" -a "x${force_lazy_lock}" = "x1" ; then
+  AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
+  enable_lazy_lock="1"
+fi
 if test "x$enable_lazy_lock" = "x1" ; then
-  AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
-  AC_CHECK_LIB([dl], [dlopen], [LIBS="$LIBS -ldl"],
-               [AC_MSG_ERROR([libdl is missing])])
+  if test "x$abi" != "xpecoff" ; then
+    AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
+    AC_CHECK_FUNC([dlsym], [],
+      [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"],
+                    [AC_MSG_ERROR([libdl is missing])])
+      ])
+  fi
   AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
 fi
 AC_SUBST([enable_lazy_lock])
@@ -752,9 +1033,17 @@ fi
 ,
 enable_tls="1"
 )
+if test "x${enable_tls}" = "x0" -a "x${force_tls}" = "x1" ; then
+  AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues])
+  enable_tls="1"
+fi
+if test "x${enable_tls}" = "x1" -a "x${force_tls}" = "x0" ; then
+  AC_MSG_RESULT([Forcing no TLS to avoid allocator/threading bootstrap issues])
+  enable_tls="0"
+fi
 if test "x${enable_tls}" = "x1" ; then
 AC_MSG_CHECKING([for TLS])
-AC_RUN_IFELSE([AC_LANG_PROGRAM(
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 [[
     __thread int x;
 ]], [[
@@ -767,17 +1056,50 @@ AC_RUN_IFELSE([AC_LANG_PROGRAM(
               enable_tls="0")
 fi
 AC_SUBST([enable_tls])
-if test "x${enable_tls}" = "x0" ; then
-  AC_DEFINE_UNQUOTED([NO_TLS], [ ])
+if test "x${enable_tls}" = "x1" ; then
+  AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ])
+elif test "x${force_tls}" = "x1" ; then
+  AC_MSG_ERROR([Failed to configure TLS, which is mandatory for correct function])
 fi
 
 dnl ============================================================================
 dnl Check for ffsl(3), and fail if not found.  This function exists on all
 dnl platforms that jemalloc currently has a chance of functioning on without
 dnl modification.
+JE_COMPILABLE([a program using ffsl], [
+#include <strings.h>
+#include <string.h>
+], [
+	{
+		int rv = ffsl(0x08);
+	}
+], [je_cv_function_ffsl])
+if test "x${je_cv_function_ffsl}" != "xyes" ; then
+   AC_MSG_ERROR([Cannot build without ffsl(3)])
+fi
 
-AC_CHECK_FUNC([ffsl], [],
-	      [AC_MSG_ERROR([Cannot build without ffsl(3)])])
+dnl ============================================================================
+dnl Check for atomic(9) operations as provided on FreeBSD.
+
+JE_COMPILABLE([atomic(9)], [
+#include <sys/types.h>
+#include <machine/atomic.h>
+#include <inttypes.h>
+], [
+	{
+		uint32_t x32 = 0;
+		volatile uint32_t *x32p = &x32;
+		atomic_fetchadd_32(x32p, 1);
+	}
+	{
+		unsigned long xlong = 0;
+		volatile unsigned long *xlongp = &xlong;
+		atomic_fetchadd_long(xlongp, 1);
+	}
+], [je_cv_atomic9])
+if test "x${je_cv_atomic9}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_ATOMIC9])
+fi
 
 dnl ============================================================================
 dnl Check for atomic(3) operations as provided on Darwin.
@@ -796,9 +1118,43 @@ JE_COMPILABLE([Darwin OSAtomic*()], [
 		volatile int64_t *x64p = &x64;
 		OSAtomicAdd64(1, x64p);
 	}
-], [osatomic])
-if test "x${osatomic}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_OSATOMIC])
+], [je_cv_osatomic])
+if test "x${je_cv_osatomic}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_OSATOMIC], [ ])
+fi
+
+dnl ============================================================================
+dnl Check whether __sync_{add,sub}_and_fetch() are available despite
+dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined.
+
+AC_DEFUN([JE_SYNC_COMPARE_AND_SWAP_CHECK],[
+  AC_CACHE_CHECK([whether to force $1-bit __sync_{add,sub}_and_fetch()],
+               [je_cv_sync_compare_and_swap_$2],
+               [AC_LINK_IFELSE([AC_LANG_PROGRAM([
+                                                 #include <stdint.h>
+                                                ],
+                                                [
+                                                 #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2
+                                                 {
+                                                    uint$1_t x$1 = 0;
+                                                    __sync_add_and_fetch(&x$1, 42);
+                                                    __sync_sub_and_fetch(&x$1, 1);
+                                                 }
+                                                 #else
+                                                 #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 is defined, no need to force
+                                                 #endif
+                                                ])],
+                               [je_cv_sync_compare_and_swap_$2=yes],
+                               [je_cv_sync_compare_and_swap_$2=no])])
+
+  if test "x${je_cv_sync_compare_and_swap_$2}" = "xyes" ; then
+    AC_DEFINE([JE_FORCE_SYNC_COMPARE_AND_SWAP_$2], [ ])
+  fi
+])
+
+if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then
+  JE_SYNC_COMPARE_AND_SWAP_CHECK(32, 4)
+  JE_SYNC_COMPARE_AND_SWAP_CHECK(64, 8)
 fi
 
 dnl ============================================================================
@@ -811,69 +1167,59 @@ JE_COMPILABLE([Darwin OSSpin*()], [
 	OSSpinLock lock = 0;
 	OSSpinLockLock(&lock);
 	OSSpinLockUnlock(&lock);
-], [osspin])
-if test "x${osspin}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_OSSPIN])
+], [je_cv_osspin])
+if test "x${je_cv_osspin}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_OSSPIN], [ ])
 fi
 
-dnl ============================================================================
-dnl Check for allocator-related functions that should be wrapped.
-
-AC_CHECK_FUNC([memalign],
-	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN])])
-AC_CHECK_FUNC([valloc],
-	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC])])
-
 dnl ============================================================================
 dnl Darwin-related configuration.
 
 if test "x${abi}" = "xmacho" ; then
-  AC_DEFINE([JEMALLOC_IVSALLOC])
-  AC_DEFINE([JEMALLOC_ZONE])
+  AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
+  AC_DEFINE([JEMALLOC_ZONE], [ ])
 
   dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6
   dnl releases.  malloc_zone_t and malloc_introspection_t have new fields in
   dnl 10.6, which is the only source-level indication of the change.
   AC_MSG_CHECKING([malloc zone version])
-  AC_TRY_COMPILE([#include <stdlib.h>
-#include <malloc/malloc.h>], [
-	static malloc_zone_t zone;
-	static struct malloc_introspection_t zone_introspect;
-
-	zone.size = NULL;
-	zone.malloc = NULL;
-	zone.calloc = NULL;
-	zone.valloc = NULL;
-	zone.free = NULL;
-	zone.realloc = NULL;
-	zone.destroy = NULL;
-	zone.zone_name = "jemalloc_zone";
-	zone.batch_malloc = NULL;
-	zone.batch_free = NULL;
-	zone.introspect = &zone_introspect;
-	zone.version = 6;
-	zone.memalign = NULL;
-	zone.free_definite_size = NULL;
-
-	zone_introspect.enumerator = NULL;
-	zone_introspect.good_size = NULL;
-	zone_introspect.check = NULL;
-	zone_introspect.print = NULL;
-	zone_introspect.log = NULL;
-	zone_introspect.force_lock = NULL;
-	zone_introspect.force_unlock = NULL;
-	zone_introspect.statistics = NULL;
-	zone_introspect.zone_locked = NULL;
-], [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [6])
-    AC_MSG_RESULT([6])],
-   [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [3])
-   AC_MSG_RESULT([3])])
+  AC_DEFUN([JE_ZONE_PROGRAM],
+    [AC_LANG_PROGRAM(
+      [#include <malloc/malloc.h>],
+      [static foo[[sizeof($1) $2 sizeof(void *) * $3 ? 1 : -1]]]
+    )])
+
+  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,14)],[JEMALLOC_ZONE_VERSION=3],[
+  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,15)],[JEMALLOC_ZONE_VERSION=5],[
+  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,16)],[
+    AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,9)],[JEMALLOC_ZONE_VERSION=6],[
+    AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,13)],[JEMALLOC_ZONE_VERSION=7],[JEMALLOC_ZONE_VERSION=]
+  )])],[
+  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,17)],[JEMALLOC_ZONE_VERSION=8],[
+  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,>,17)],[JEMALLOC_ZONE_VERSION=9],[JEMALLOC_ZONE_VERSION=]
+  )])])])])
+  if test "x${JEMALLOC_ZONE_VERSION}" = "x"; then
+    AC_MSG_RESULT([unsupported])
+    AC_MSG_ERROR([Unsupported malloc zone version])
+  fi
+  if test "${JEMALLOC_ZONE_VERSION}" = 9; then
+    JEMALLOC_ZONE_VERSION=8
+    AC_MSG_RESULT([> 8])
+  else
+    AC_MSG_RESULT([$JEMALLOC_ZONE_VERSION])
+  fi
+  AC_DEFINE_UNQUOTED(JEMALLOC_ZONE_VERSION, [$JEMALLOC_ZONE_VERSION])
 fi
 
 dnl ============================================================================
 dnl Check for typedefs, structures, and compiler characteristics.
 AC_HEADER_STDBOOL
 
+AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [
+  mkdir -p "include/jemalloc/internal"
+  "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h"
+])
+
 dnl Process .in files.
 AC_SUBST([cfghdrs_in])
 AC_SUBST([cfghdrs_out])
@@ -881,7 +1227,7 @@ AC_CONFIG_HEADERS([$cfghdrs_tup])
 
 dnl ============================================================================
 dnl Generate outputs.
-AC_CONFIG_FILES([$cfgoutputs_tup config.stamp])
+AC_CONFIG_FILES([$cfgoutputs_tup config.stamp bin/jemalloc.sh])
 AC_SUBST([cfgoutputs_in])
 AC_SUBST([cfgoutputs_out])
 AC_OUTPUT
@@ -889,7 +1235,8 @@ AC_OUTPUT
 dnl ============================================================================
 dnl Print out the results of configuration.
 AC_MSG_RESULT([===============================================================================])
-AC_MSG_RESULT([jemalloc version   : $jemalloc_version])
+AC_MSG_RESULT([jemalloc version   : ${jemalloc_version}])
+AC_MSG_RESULT([library revision   : ${rev}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([CC                 : ${CC}])
 AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
@@ -918,6 +1265,7 @@ AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE])
 AC_MSG_RESULT([                   : ${JEMALLOC_PRIVATE_NAMESPACE}])
 AC_MSG_RESULT([install_suffix     : ${install_suffix}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
+AC_MSG_RESULT([experimental       : ${enable_experimental}])
 AC_MSG_RESULT([cc-silence         : ${enable_cc_silence}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
 AC_MSG_RESULT([stats              : ${enable_stats}])
@@ -925,14 +1273,14 @@ AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
 AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
-AC_MSG_RESULT([tiny               : ${enable_tiny}])
 AC_MSG_RESULT([tcache             : ${enable_tcache}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
+AC_MSG_RESULT([utrace             : ${enable_utrace}])
+AC_MSG_RESULT([valgrind           : ${enable_valgrind}])
 AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
-AC_MSG_RESULT([sysv               : ${enable_sysv}])
-AC_MSG_RESULT([swap               : ${enable_swap}])
+AC_MSG_RESULT([mremap             : ${enable_mremap}])
+AC_MSG_RESULT([munmap             : ${enable_munmap}])
 AC_MSG_RESULT([dss                : ${enable_dss}])
-AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}])
 AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
 AC_MSG_RESULT([tls                : ${enable_tls}])
 AC_MSG_RESULT([===============================================================================])
diff --git a/deps/jemalloc/doc/jemalloc.3 b/deps/jemalloc/doc/jemalloc.3
index 0401cfe8..5b5c78c4 100644
--- a/deps/jemalloc/doc/jemalloc.3
+++ b/deps/jemalloc/doc/jemalloc.3
@@ -1,13 +1,13 @@
 '\" t
 .\"     Title: JEMALLOC
 .\"    Author: Jason Evans
-.\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
-.\"      Date: 11/14/2011
+.\" Generator: DocBook XSL Stylesheets v1.76.1 <http://docbook.sf.net/>
+.\"      Date: 05/11/2012
 .\"    Manual: User Manual
-.\"    Source: jemalloc 2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760
+.\"    Source: jemalloc 3.0.0-0-gfc9b1dbf69f59d7ecfc4ac68da9847e017e1d046
 .\"  Language: English
 .\"
-.TH "JEMALLOC" "3" "11/14/2011" "jemalloc 2.2.5-0-gfc1bb70e5f0d" "User Manual"
+.TH "JEMALLOC" "3" "05/11/2012" "jemalloc 3.0.0-0-gfc9b1dbf69f5" "User Manual"
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
@@ -31,7 +31,7 @@
 jemalloc \- general purpose memory allocation functions
 .SH "LIBRARY"
 .PP
-This manual describes jemalloc 2\&.2\&.5\-0\-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760\&. More information can be found at the
+This manual describes jemalloc 3\&.0\&.0\-0\-gfc9b1dbf69f59d7ecfc4ac68da9847e017e1d046\&. More information can be found at the
 \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&.
 .SH "SYNOPSIS"
 .sp
@@ -48,6 +48,8 @@ This manual describes jemalloc 2\&.2\&.5\-0\-gfc1bb70e5f0d9a58b39efa39cc549b5af5
 .BI "void *calloc(size_t\ " "number" ", size_t\ " "size" ");"
 .HP \w'int\ posix_memalign('u
 .BI "int posix_memalign(void\ **" "ptr" ", size_t\ " "alignment" ", size_t\ " "size" ");"
+.HP \w'void\ *aligned_alloc('u
+.BI "void *aligned_alloc(size_t\ " "alignment" ", size_t\ " "size" ");"
 .HP \w'void\ *realloc('u
 .BI "void *realloc(void\ *" "ptr" ", size_t\ " "size" ");"
 .HP \w'void\ free('u
@@ -76,6 +78,8 @@ const char *\fImalloc_conf\fR;
 .BI "int sallocm(const\ void\ *" "ptr" ", size_t\ *" "rsize" ", int\ " "flags" ");"
 .HP \w'int\ dallocm('u
 .BI "int dallocm(void\ *" "ptr" ", int\ " "flags" ");"
+.HP \w'int\ nallocm('u
+.BI "int nallocm(size_t\ *" "rsize" ", size_t\ " "size" ", int\ " "flags" ");"
 .SH "DESCRIPTION"
 .SS "Standard API"
 .PP
@@ -110,6 +114,18 @@ must be a power of 2 at least as large as
 sizeof(\fBvoid *\fR)\&.
 .PP
 The
+\fBaligned_alloc\fR\fB\fR
+function allocates
+\fIsize\fR
+bytes of memory such that the allocation\*(Aqs base address is an even multiple of
+\fIalignment\fR\&. The requested
+\fIalignment\fR
+must be a power of 2\&. Behavior is undefined if
+\fIsize\fR
+is not an integral multiple of
+\fIalignment\fR\&.
+.PP
+The
 \fBrealloc\fR\fB\fR
 function changes the size of the previously allocated memory referenced by
 \fIptr\fR
@@ -236,13 +252,16 @@ for (i = 0; i < nbins; i++) {
 .\}
 .SS "Experimental API"
 .PP
-The experimental API is subject to change or removal without regard for backward compatibility\&.
+The experimental API is subject to change or removal without regard for backward compatibility\&. If
+\fB\-\-disable\-experimental\fR
+is specified during configuration, the experimental API is omitted\&.
 .PP
 The
 \fBallocm\fR\fB\fR,
 \fBrallocm\fR\fB\fR,
-\fBsallocm\fR\fB\fR, and
-\fBdallocm\fR\fB\fR
+\fBsallocm\fR\fB\fR,
+\fBdallocm\fR\fB\fR, and
+\fBnallocm\fR\fB\fR
 functions all have a
 \fIflags\fR
 argument that can be used to specify options\&. The functions only check the options that are contextually relevant\&. Use bitwise or (|) operations to specify one or more of the following:
@@ -286,7 +305,10 @@ to the base address of the allocation, and sets
 to the real size of the allocation if
 \fIrsize\fR
 is not
-\fBNULL\fR\&.
+\fBNULL\fR\&. Behavior is undefined if
+\fIsize\fR
+is
+\fB0\fR\&.
 .PP
 The
 \fBrallocm\fR\fB\fR
@@ -306,6 +328,9 @@ is not
 is non\-zero, an attempt is made to resize the allocation to be at least
 \fIsize\fR + \fIextra\fR)
 bytes, though inability to allocate the extra byte(s) will not by itself result in failure\&. Behavior is undefined if
+\fIsize\fR
+is
+\fB0\fR, or if
 (\fIsize\fR + \fIextra\fR > \fBSIZE_T_MAX\fR)\&.
 .PP
 The
@@ -319,6 +344,23 @@ The
 function causes the memory referenced by
 \fIptr\fR
 to be made available for future allocations\&.
+.PP
+The
+\fBnallocm\fR\fB\fR
+function allocates no memory, but it performs the same size computation as the
+\fBallocm\fR\fB\fR
+function, and if
+\fIrsize\fR
+is not
+\fBNULL\fR
+it sets
+\fI*rsize\fR
+to the real size of the allocation that would result from the equivalent
+\fBallocm\fR\fB\fR
+function call\&. Behavior is undefined if
+\fIsize\fR
+is
+\fB0\fR\&.
 .SH "TUNING"
 .PP
 Once, when the first call is made to one of the memory allocation routines, the allocator initializes its internals based in part on various options that can be specified at compile\- or run\-time\&.
@@ -346,9 +388,9 @@ Traditionally, allocators have used
 to obtain memory, which is suboptimal for several reasons, including race conditions, increased fragmentation, and artificial limitations on maximum usable memory\&. If
 \fB\-\-enable\-dss\fR
 is specified during configuration, this allocator uses both
-\fBsbrk\fR(2)
+\fBmmap\fR(2)
 and
-\fBmmap\fR(2), in that order of preference; otherwise only
+\fBsbrk\fR(2), in that order of preference; otherwise only
 \fBmmap\fR(2)
 is used\&.
 .PP
@@ -364,14 +406,8 @@ User objects are broken into three categories according to size: small, large, a
 .PP
 Each chunk that is managed by an arena tracks its contents as runs of contiguous pages (unused, backing a set of small objects, or backing one large object)\&. The combination of chunk alignment and chunk page maps makes it possible to determine all metadata regarding small and large allocations in constant time\&.
 .PP
-Small objects are managed in groups by page runs\&. Each run maintains a frontier and free list to track which regions are in use\&. Unless
-\fB\-\-disable\-tiny\fR
-is specified during configuration, allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least
-sizeof(\fBvoid *\fR)\&. Allocation requests that are more than half the quantum, but no more than the minimum cacheline\-multiple size class (see the
-"opt\&.lg_qspace_max"
-option) are rounded up to the nearest multiple of the quantum\&. Allocation requests that are more than the minimum cacheline\-multiple size class, but no more than the minimum subpage\-multiple size class (see the
-"opt\&.lg_cspace_max"
-option) are rounded up to the nearest multiple of the cacheline size (64)\&. Allocation requests that are more than the minimum subpage\-multiple size class, but no more than the maximum subpage\-multiple size class are rounded up to the nearest multiple of the subpage size (256)\&. Allocation requests that are more than the maximum subpage\-multiple size class, but small enough to fit in an arena\-managed chunk (see the
+Small objects are managed in groups by page runs\&. Each run maintains a frontier and free list to track which regions are in use\&. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least
+sizeof(\fBdouble\fR)\&. All other small object size classes are multiples of the quantum, spaced such that internal fragmentation is limited to approximately 25% for all but the smallest size classes\&. Allocation requests that are larger than the maximum small size class, but small enough to fit in an arena\-managed chunk (see the
 "opt\&.lg_chunk"
 option), are rounded up to the nearest run size\&. Allocation requests that are too large to fit in an arena\-managed chunk are rounded up to the nearest multiple of the chunk size\&.
 .PP
@@ -387,51 +423,73 @@ Table 1\&.
 .B Table\ \&1.\ \&Size classes
 .TS
 allbox tab(:);
-lB lB lB.
+lB rB lB.
 T{
 Category
 T}:T{
-Subcategory
+Spacing
 T}:T{
 Size
 T}
 .T&
-l l l
-^ l l
-^ l l
-^ l l
-l s l
-l s l.
+l r l
+^ r l
+^ r l
+^ r l
+^ r l
+^ r l
+^ r l
+l r l
+l r l.
 T{
 Small
 T}:T{
-Tiny
+lg
 T}:T{
 [8]
 T}
 :T{
-Quantum\-spaced
+16
 T}:T{
 [16, 32, 48, \&.\&.\&., 128]
 T}
 :T{
-Cacheline\-spaced
+32
+T}:T{
+[160, 192, 224, 256]
+T}
+:T{
+64
+T}:T{
+[320, 384, 448, 512]
+T}
+:T{
+128
+T}:T{
+[640, 768, 896, 1024]
+T}
+:T{
+256
 T}:T{
-[192, 256, 320, \&.\&.\&., 512]
+[1280, 1536, 1792, 2048]
 T}
 :T{
-Subpage\-spaced
+512
 T}:T{
-[768, 1024, 1280, \&.\&.\&., 3840]
+[2560, 3072, 3584]
 T}
 T{
 Large
 T}:T{
+4 KiB
+T}:T{
 [4 KiB, 8 KiB, 12 KiB, \&.\&.\&., 4072 KiB]
 T}
 T{
 Huge
 T}:T{
+4 MiB
+T}:T{
 [4 MiB, 8 MiB, 12 MiB, \&.\&.\&.]
 T}
 .TE
@@ -481,12 +539,6 @@ was specified during build configuration\&.
 was specified during build configuration\&.
 .RE
 .PP
-"config\&.dynamic_page_shift" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-dynamic\-page\-shift\fR
-was specified during build configuration\&.
-.RE
-.PP
 "config\&.fill" (\fBbool\fR) r\-
 .RS 4
 \fB\-\-enable\-fill\fR
@@ -499,6 +551,18 @@ was specified during build configuration\&.
 was specified during build configuration\&.
 .RE
 .PP
+"config\&.mremap" (\fBbool\fR) r\-
+.RS 4
+\fB\-\-enable\-mremap\fR
+was specified during build configuration\&.
+.RE
+.PP
+"config\&.munmap" (\fBbool\fR) r\-
+.RS 4
+\fB\-\-enable\-munmap\fR
+was specified during build configuration\&.
+.RE
+.PP
 "config\&.prof" (\fBbool\fR) r\-
 .RS 4
 \fB\-\-enable\-prof\fR
@@ -523,34 +587,28 @@ was specified during build configuration\&.
 was specified during build configuration\&.
 .RE
 .PP
-"config\&.swap" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-swap\fR
-was specified during build configuration\&.
-.RE
-.PP
-"config\&.sysv" (\fBbool\fR) r\-
-.RS 4
-\fB\-\-enable\-sysv\fR
-was specified during build configuration\&.
-.RE
-.PP
 "config\&.tcache" (\fBbool\fR) r\-
 .RS 4
 \fB\-\-disable\-tcache\fR
 was not specified during build configuration\&.
 .RE
 .PP
-"config\&.tiny" (\fBbool\fR) r\-
+"config\&.tls" (\fBbool\fR) r\-
 .RS 4
-\fB\-\-disable\-tiny\fR
+\fB\-\-disable\-tls\fR
 was not specified during build configuration\&.
 .RE
 .PP
-"config\&.tls" (\fBbool\fR) r\-
+"config\&.utrace" (\fBbool\fR) r\-
 .RS 4
-\fB\-\-disable\-tls\fR
-was not specified during build configuration\&.
+\fB\-\-enable\-utrace\fR
+was specified during build configuration\&.
+.RE
+.PP
+"config\&.valgrind" (\fBbool\fR) r\-
+.RS 4
+\fB\-\-enable\-valgrind\fR
+was specified during build configuration\&.
 .RE
 .PP
 "config\&.xmalloc" (\fBbool\fR) r\-
@@ -568,16 +626,6 @@ in these cases\&. This option is disabled by default unless
 is specified during configuration, in which case it is enabled by default\&.
 .RE
 .PP
-"opt\&.lg_qspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Size (log base 2) of the maximum size class that is a multiple of the quantum (8 or 16 bytes, depending on architecture)\&. Above this size, cacheline spacing is used for size classes\&. The default value is 128 bytes (2^7)\&.
-.RE
-.PP
-"opt\&.lg_cspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Size (log base 2) of the maximum size class that is a multiple of the cacheline size (64)\&. Above this size, subpage spacing (256 bytes) is used for size classes\&. The default value is 512 bytes (2^9)\&.
-.RE
-.PP
 "opt\&.lg_chunk" (\fBsize_t\fR) r\-
 .RS 4
 Virtual memory chunk size (log base 2)\&. The default chunk size is 4 MiB (2^22)\&.
@@ -615,6 +663,22 @@ Junk filling enabled/disabled\&. If enabled, each byte of uninitialized allocate
 is specified during configuration, in which case it is enabled by default\&.
 .RE
 .PP
+"opt\&.quarantine" (\fBsize_t\fR) r\- [\fB\-\-enable\-fill\fR]
+.RS 4
+Per thread quarantine size in bytes\&. If non\-zero, each thread maintains a FIFO object quarantine that stores up to the specified number of bytes of memory\&. The quarantined memory is not freed until it is released from quarantine, though it is immediately junk\-filled if the
+"opt\&.junk"
+option is enabled\&. This feature is of particular use in combination with
+\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which can detect attempts to access quarantined objects\&. This is intended for debugging and will impact performance negatively\&. The default quarantine size is 0\&.
+.RE
+.PP
+"opt\&.redzone" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR]
+.RS 4
+Redzones enabled/disabled\&. If enabled, small allocations have redzones before and after them\&. Furthermore, if the
+"opt\&.junk"
+option is enabled, the redzones are checked for corruption during deallocation\&. However, the primary intended purpose of this feature is to be used in combination with
+\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2, which needs redzones in order to do effective buffer overflow/underflow detection\&. This option is intended for debugging and will impact performance negatively\&. This option is disabled by default\&.
+.RE
+.PP
 "opt\&.zero" (\fBbool\fR) r\- [\fB\-\-enable\-fill\fR]
 .RS 4
 Zero filling enabled/disabled\&. If enabled, each byte of uninitialized allocated memory will be initialized to 0\&. Note that this initialization only happens once for each byte, so
@@ -624,13 +688,25 @@ and
 calls do not zero memory that was previously allocated\&. This is intended for debugging and will impact performance negatively\&. This option is disabled by default\&.
 .RE
 .PP
-"opt\&.sysv" (\fBbool\fR) r\- [\fB\-\-enable\-sysv\fR]
+"opt\&.utrace" (\fBbool\fR) r\- [\fB\-\-enable\-utrace\fR]
 .RS 4
-If enabled, attempting to allocate zero bytes will return a
-\fBNULL\fR
-pointer instead of a valid pointer\&. (The default behavior is to make a minimal allocation and return a pointer to it\&.) This option is provided for System V compatibility\&. This option is incompatible with the
-"opt\&.xmalloc"
-option\&. This option is disabled by default\&.
+Allocation tracing based on
+\fButrace\fR(2)
+enabled/disabled\&. This option is disabled by default\&.
+.RE
+.PP
+"opt\&.valgrind" (\fBbool\fR) r\- [\fB\-\-enable\-valgrind\fR]
+.RS 4
+\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2
+support enabled/disabled\&. If enabled, several other options are automatically modified during options processing to work well with Valgrind:
+"opt\&.junk"
+and
+"opt\&.zero"
+are set to false,
+"opt\&.quarantine"
+is set to 16 MiB, and
+"opt\&.redzone"
+is set to true\&. This option is disabled by default\&.
 .RE
 .PP
 "opt\&.xmalloc" (\fBbool\fR) r\- [\fB\-\-enable\-xmalloc\fR]
@@ -656,15 +732,8 @@ This option is disabled by default\&.
 "opt\&.tcache" (\fBbool\fR) r\- [\fB\-\-enable\-tcache\fR]
 .RS 4
 Thread\-specific caching enabled/disabled\&. When there are multiple threads, each thread uses a thread\-specific cache for objects up to a certain size\&. Thread\-specific caching allows many allocations to be satisfied without performing any thread synchronization, at the cost of increased memory use\&. See the
-"opt\&.lg_tcache_gc_sweep"
-and
 "opt\&.lg_tcache_max"
-options for related tuning information\&. This option is enabled by default\&.
-.RE
-.PP
-"opt\&.lg_tcache_gc_sweep" (\fBssize_t\fR) r\- [\fB\-\-enable\-tcache\fR]
-.RS 4
-Approximate interval (log base 2) between full thread\-specific cache garbage collection sweeps, counted in terms of thread\-specific cache allocation/deallocation events\&. Garbage collection is actually performed incrementally, one size class at a time, in order to avoid large collection pauses\&. The default sweep interval is 8192 (2^13); setting this option to \-1 will disable garbage collection\&.
+option for related tuning information\&. This option is enabled by default\&.
 .RE
 .PP
 "opt\&.lg_tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR]
@@ -674,31 +743,22 @@ Maximum size class (log base 2) to cache in the thread\-specific cache\&. At a m
 .PP
 "opt\&.prof" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
 .RS 4
-Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity, and use an
-\fBatexit\fR(3)
-function to dump final memory usage to a file named according to the pattern
-<prefix>\&.<pid>\&.<seq>\&.f\&.heap, where
-<prefix>
-is controlled by the
-"opt\&.prof_prefix"
-option\&. See the
-"opt\&.lg_prof_bt_max"
-option for backtrace depth control\&. See the
+Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity\&. See the
 "opt\&.prof_active"
 option for on\-the\-fly activation/deactivation\&. See the
 "opt\&.lg_prof_sample"
 option for probabilistic sampling control\&. See the
 "opt\&.prof_accum"
 option for control of cumulative sample reporting\&. See the
-"opt\&.lg_prof_tcmax"
-option for control of per thread backtrace caching\&. See the
 "opt\&.lg_prof_interval"
-option for information on interval\-triggered profile dumping, and the
+option for information on interval\-triggered profile dumping, the
 "opt\&.prof_gdump"
-option for information on high\-water\-triggered profile dumping\&. Profile output is compatible with the included
+option for information on high\-water\-triggered profile dumping, and the
+"opt\&.prof_final"
+option for final profile dumping\&. Profile output is compatible with the included
 \fBpprof\fR
 Perl script, which originates from the
-\m[blue]\fBgoogle\-perftools package\fR\m[]\&\s-2\u[2]\d\s+2\&.
+\m[blue]\fBgperftools package\fR\m[]\&\s-2\u[3]\d\s+2\&.
 .RE
 .PP
 "opt\&.prof_prefix" (\fBconst char *\fR) r\- [\fB\-\-enable\-prof\fR]
@@ -707,11 +767,6 @@ Filename prefix for profile dumps\&. If the prefix is set to the empty string, n
 jeprof\&.
 .RE
 .PP
-"opt\&.lg_prof_bt_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Maximum backtrace depth (log base 2) when profiling memory allocation activity\&. The default is 128 (2^7)\&.
-.RE
-.PP
 "opt\&.prof_active" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
 .RS 4
 Profiling activated/deactivated\&. This is a secondary control mechanism that makes it possible to start the application with profiling enabled (see the
@@ -723,21 +778,12 @@ mallctl\&. This option is enabled by default\&.
 .PP
 "opt\&.lg_prof_sample" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR]
 .RS 4
-Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity\&. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead\&. The default sample interval is 1 (2^0) (i\&.e\&. all allocations are sampled)\&.
+Average interval (log base 2) between allocation samples, as measured in bytes of allocation activity\&. Increasing the sampling interval decreases profile fidelity, but also decreases the computational overhead\&. The default sample interval is 512 KiB (2^19 B)\&.
 .RE
 .PP
 "opt\&.prof_accum" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
 .RS 4
-Reporting of cumulative object/byte counts in profile dumps enabled/disabled\&. If this option is enabled, every unique backtrace must be stored for the duration of execution\&. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest\&. See the
-"opt\&.lg_prof_tcmax"
-option for control of per thread backtrace caching, which has important interactions\&. This option is enabled by default\&.
-.RE
-.PP
-"opt\&.lg_prof_tcmax" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR]
-.RS 4
-Maximum per thread backtrace cache (log base 2) used for heap profiling\&. A backtrace can only be discarded if the
-"opt\&.prof_accum"
-option is disabled, and no thread caches currently refer to the backtrace\&. Therefore, a backtrace cache limit should be imposed if the intention is to limit how much memory is used by backtraces\&. By default, no limit is imposed (encoded as \-1)\&.
+Reporting of cumulative object/byte counts in profile dumps enabled/disabled\&. If this option is enabled, every unique backtrace must be stored for the duration of execution\&. Depending on the application, this can impose a large memory overhead, and the cumulative counts are not always of interest\&. This option is disabled by default\&.
 .RE
 .PP
 "opt\&.lg_prof_interval" (\fBssize_t\fR) r\- [\fB\-\-enable\-prof\fR]
@@ -760,33 +806,27 @@ is controlled by the
 option\&. This option is disabled by default\&.
 .RE
 .PP
+"opt\&.prof_final" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
+.RS 4
+Use an
+\fBatexit\fR(3)
+function to dump final memory usage to a file named according to the pattern
+<prefix>\&.<pid>\&.<seq>\&.f\&.heap, where
+<prefix>
+is controlled by the
+"opt\&.prof_prefix"
+option\&. This option is enabled by default\&.
+.RE
+.PP
 "opt\&.prof_leak" (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR]
 .RS 4
 Leak reporting enabled/disabled\&. If enabled, use an
 \fBatexit\fR(3)
 function to report memory leaks detected by allocation sampling\&. See the
-"opt\&.lg_prof_bt_max"
-option for backtrace depth control\&. See the
 "opt\&.prof"
 option for information on analyzing heap profile output\&. This option is disabled by default\&.
 .RE
 .PP
-"opt\&.overcommit" (\fBbool\fR) r\- [\fB\-\-enable\-swap\fR]
-.RS 4
-Over\-commit enabled/disabled\&. If enabled, over\-commit memory as a side effect of using anonymous
-\fBmmap\fR(2)
-or
-\fBsbrk\fR(2)
-for virtual memory allocation\&. In order for overcommit to be disabled, the
-"swap\&.fds"
-mallctl must have been successfully written to\&. This option is enabled by default\&.
-.RE
-.PP
-"tcache\&.flush" (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR]
-.RS 4
-Flush calling thread\*(Aqs tcache\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs thread\-specific cache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&.
-.RE
-.PP
 "thread\&.arena" (\fBunsigned\fR) rw
 .RS 4
 Get or set the arena associated with the calling thread\&. The arena index must be less than the maximum number of arenas (see the
@@ -824,6 +864,17 @@ mallctl\&. This is useful for avoiding the overhead of repeated
 calls\&.
 .RE
 .PP
+"thread\&.tcache\&.enabled" (\fBbool\fR) rw [\fB\-\-enable\-tcache\fR]
+.RS 4
+Enable/disable calling thread\*(Aqs tcache\&. The tcache is implicitly flushed as a side effect of becoming disabled (see
+"thread\&.tcache\&.flush")\&.
+.RE
+.PP
+"thread\&.tcache\&.flush" (\fBvoid\fR) \-\- [\fB\-\-enable\-tcache\fR]
+.RS 4
+Flush calling thread\*(Aqs tcache\&. This interface releases all cached objects and internal data structures associated with the calling thread\*(Aqs thread\-specific cache\&. Ordinarily, this interface need not be called, since automatic periodic incremental garbage collection occurs, and the thread cache is automatically discarded when a thread exits\&. However, garbage collection is triggered by allocation activity, so it is possible for a thread that stops allocating/deallocating to retain its cache indefinitely, in which case the developer may find manual flushing useful\&.
+.RE
+.PP
 "arenas\&.narenas" (\fBunsigned\fR) r\-
 .RS 4
 Maximum number of arenas\&.
@@ -841,94 +892,19 @@ booleans\&. Each boolean indicates whether the corresponding arena is initialize
 Quantum size\&.
 .RE
 .PP
-"arenas\&.cacheline" (\fBsize_t\fR) r\-
-.RS 4
-Assumed cacheline size\&.
-.RE
-.PP
-"arenas\&.subpage" (\fBsize_t\fR) r\-
-.RS 4
-Subpage size class interval\&.
-.RE
-.PP
-"arenas\&.pagesize" (\fBsize_t\fR) r\-
+"arenas\&.page" (\fBsize_t\fR) r\-
 .RS 4
 Page size\&.
 .RE
 .PP
-"arenas\&.chunksize" (\fBsize_t\fR) r\-
-.RS 4
-Chunk size\&.
-.RE
-.PP
-"arenas\&.tspace_min" (\fBsize_t\fR) r\-
-.RS 4
-Minimum tiny size class\&. Tiny size classes are powers of two\&.
-.RE
-.PP
-"arenas\&.tspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Maximum tiny size class\&. Tiny size classes are powers of two\&.
-.RE
-.PP
-"arenas\&.qspace_min" (\fBsize_t\fR) r\-
-.RS 4
-Minimum quantum\-spaced size class\&.
-.RE
-.PP
-"arenas\&.qspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Maximum quantum\-spaced size class\&.
-.RE
-.PP
-"arenas\&.cspace_min" (\fBsize_t\fR) r\-
-.RS 4
-Minimum cacheline\-spaced size class\&.
-.RE
-.PP
-"arenas\&.cspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Maximum cacheline\-spaced size class\&.
-.RE
-.PP
-"arenas\&.sspace_min" (\fBsize_t\fR) r\-
-.RS 4
-Minimum subpage\-spaced size class\&.
-.RE
-.PP
-"arenas\&.sspace_max" (\fBsize_t\fR) r\-
-.RS 4
-Maximum subpage\-spaced size class\&.
-.RE
-.PP
 "arenas\&.tcache_max" (\fBsize_t\fR) r\- [\fB\-\-enable\-tcache\fR]
 .RS 4
 Maximum thread\-cached size class\&.
 .RE
 .PP
-"arenas\&.ntbins" (\fBunsigned\fR) r\-
-.RS 4
-Number of tiny bin size classes\&.
-.RE
-.PP
-"arenas\&.nqbins" (\fBunsigned\fR) r\-
-.RS 4
-Number of quantum\-spaced bin size classes\&.
-.RE
-.PP
-"arenas\&.ncbins" (\fBunsigned\fR) r\-
-.RS 4
-Number of cacheline\-spaced bin size classes\&.
-.RE
-.PP
-"arenas\&.nsbins" (\fBunsigned\fR) r\-
-.RS 4
-Number of subpage\-spaced bin size classes\&.
-.RE
-.PP
 "arenas\&.nbins" (\fBunsigned\fR) r\-
 .RS 4
-Total number of bin size classes\&.
+Number of bin size classes\&.
 .RE
 .PP
 "arenas\&.nhbins" (\fBunsigned\fR) r\- [\fB\-\-enable\-tcache\fR]
@@ -1011,12 +987,12 @@ Total number of bytes in active pages allocated by the application\&. This is a
 "stats\&.mapped" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
 .RS 4
 Total number of bytes in chunks mapped on behalf of the application\&. This is a multiple of the chunk size, and is at least as large as
-"stats\&.active"\&. This does not include inactive chunks backed by swap files\&. his does not include inactive chunks embedded in the DSS\&.
+"stats\&.active"\&. This does not include inactive chunks\&.
 .RE
 .PP
 "stats\&.chunks\&.current" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
 .RS 4
-Total number of chunks actively mapped on behalf of the application\&. This does not include inactive chunks backed by swap files\&. This does not include inactive chunks embedded in the DSS\&.
+Total number of chunks actively mapped on behalf of the application\&. This does not include inactive chunks\&.
 .RE
 .PP
 "stats\&.chunks\&.total" (\fBuint64_t\fR) r\- [\fB\-\-enable\-stats\fR]
@@ -1163,11 +1139,6 @@ Cumulative number of runs created\&.
 Cumulative number of times the current run from which to allocate changed\&.
 .RE
 .PP
-"stats\&.arenas\&.<i>\&.bins\&.<j>\&.highruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Maximum number of runs at any time thus far\&.
-.RE
-.PP
 "stats\&.arenas\&.<i>\&.bins\&.<j>\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
 .RS 4
 Current number of runs\&.
@@ -1188,44 +1159,10 @@ Cumulative number of deallocation requests for this size class served directly b
 Cumulative number of allocation requests for this size class\&.
 .RE
 .PP
-"stats\&.arenas\&.<i>\&.lruns\&.<j>\&.highruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
-.RS 4
-Maximum number of runs at any time thus far for this size class\&.
-.RE
-.PP
 "stats\&.arenas\&.<i>\&.lruns\&.<j>\&.curruns" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR]
 .RS 4
 Current number of runs for this size class\&.
 .RE
-.PP
-"swap\&.avail" (\fBsize_t\fR) r\- [\fB\-\-enable\-stats \-\-enable\-swap\fR]
-.RS 4
-Number of swap file bytes that are currently not associated with any chunk (i\&.e\&. mapped, but otherwise completely unmanaged)\&.
-.RE
-.PP
-"swap\&.prezeroed" (\fBbool\fR) rw [\fB\-\-enable\-swap\fR]
-.RS 4
-If true, the allocator assumes that the swap file(s) contain nothing but nil bytes\&. If this assumption is violated, allocator behavior is undefined\&. This value becomes read\-only after
-"swap\&.fds"
-is successfully written to\&.
-.RE
-.PP
-"swap\&.nfds" (\fBsize_t\fR) r\- [\fB\-\-enable\-swap\fR]
-.RS 4
-Number of file descriptors in use for swap\&.
-.RE
-.PP
-"swap\&.fds" (\fBint *\fR) rw [\fB\-\-enable\-swap\fR]
-.RS 4
-When written to, the files associated with the specified file descriptors are contiguously mapped via
-\fBmmap\fR(2)\&. The resulting virtual memory region is preferred over anonymous
-\fBmmap\fR(2)
-and
-\fBsbrk\fR(2)
-memory\&. Note that if a file\*(Aqs size is not a multiple of the page size, it is automatically truncated to the nearest page size multiple\&. See the
-"swap\&.prezeroed"
-mallctl for specifying that the files are pre\-zeroed\&.
-.RE
 .SH "DEBUGGING MALLOC PROBLEMS"
 .PP
 When debugging, it is a good idea to configure/build jemalloc with the
@@ -1240,7 +1177,13 @@ option) tends to expose such bugs in the form of obviously incorrect results and
 "opt\&.zero"
 option) eliminates the symptoms of such bugs\&. Between these two options, it is usually possible to quickly detect, diagnose, and eliminate such bugs\&.
 .PP
-This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive\&. There are a number of allocator implementations available on the Internet which focus on detecting and pinpointing problems by trading performance for extra sanity checks and detailed diagnostics\&.
+This implementation does not provide much detail about the problems it detects, because the performance impact for storing such information would be prohibitive\&. However, jemalloc does integrate with the most excellent
+\m[blue]\fBValgrind\fR\m[]\&\s-2\u[2]\d\s+2
+tool if the
+\fB\-\-enable\-valgrind\fR
+configuration option is enabled and the
+"opt\&.valgrind"
+option is enabled\&.
 .SH "DIAGNOSTIC MESSAGES"
 .PP
 If any of the memory allocation/deallocation functions detect an error or warning condition, a message will be printed to file descriptor
@@ -1296,6 +1239,28 @@ Memory allocation error\&.
 .RE
 .PP
 The
+\fBaligned_alloc\fR\fB\fR
+function returns a pointer to the allocated memory if successful; otherwise a
+\fBNULL\fR
+pointer is returned and
+\fIerrno\fR
+is set\&. The
+\fBaligned_alloc\fR\fB\fR
+function will fail if:
+.PP
+EINVAL
+.RS 4
+The
+\fIalignment\fR
+parameter is not a power of 2\&.
+.RE
+.PP
+ENOMEM
+.RS 4
+Memory allocation error\&.
+.RE
+.PP
+The
 \fBrealloc\fR\fB\fR
 function returns a pointer, possibly identical to
 \fIptr\fR, to the allocated memory if successful; otherwise a
@@ -1370,14 +1335,15 @@ read/write processing\&.
 The
 \fBallocm\fR\fB\fR,
 \fBrallocm\fR\fB\fR,
-\fBsallocm\fR\fB\fR, and
-\fBdallocm\fR\fB\fR
+\fBsallocm\fR\fB\fR,
+\fBdallocm\fR\fB\fR, and
+\fBnallocm\fR\fB\fR
 functions return
 \fBALLOCM_SUCCESS\fR
 on success; otherwise they return an error value\&. The
-\fBallocm\fR\fB\fR
-and
-\fBrallocm\fR\fB\fR
+\fBallocm\fR\fB\fR,
+\fBrallocm\fR\fB\fR, and
+\fBnallocm\fR\fB\fR
 functions will fail if:
 .PP
 ALLOCM_ERR_OOM
@@ -1442,6 +1408,7 @@ malloc_conf = "lg_chunk:24";
 \fBmadvise\fR(2),
 \fBmmap\fR(2),
 \fBsbrk\fR(2),
+\fButrace\fR(2),
 \fBalloca\fR(3),
 \fBatexit\fR(3),
 \fBgetpagesize\fR(3)
@@ -1469,7 +1436,12 @@ jemalloc website
 \%http://www.canonware.com/jemalloc/
 .RE
 .IP " 2." 4
-google-perftools package
+Valgrind
+.RS 4
+\%http://valgrind.org/
+.RE
+.IP " 3." 4
+gperftools package
 .RS 4
-\%http://code.google.com/p/google-perftools/
+\%http://code.google.com/p/gperftools/
 .RE
diff --git a/deps/jemalloc/doc/jemalloc.html b/deps/jemalloc/doc/jemalloc.html
index fc2ba878..415e298d 100644
--- a/deps/jemalloc/doc/jemalloc.html
+++ b/deps/jemalloc/doc/jemalloc.html
@@ -1,8 +1,8 @@
-<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>JEMALLOC</title><meta name="generator" content="DocBook XSL Stylesheets V1.75.2"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry" title="JEMALLOC"><a name="id2968890"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>jemalloc &#8212; general purpose memory allocation functions</p></div><div class="refsect1" title="LIBRARY"><a name="library"></a><h2>LIBRARY</h2><p>This manual describes jemalloc 2.2.5-0-gfc1bb70e5f0d9a58b39efa39cc549b5af5104760.  More information
+<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>JEMALLOC</title><meta name="generator" content="DocBook XSL Stylesheets V1.76.1"></head><body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF"><div class="refentry" title="JEMALLOC"><a name="id286949398"></a><div class="titlepage"></div><div class="refnamediv"><h2>Name</h2><p>jemalloc &#8212; general purpose memory allocation functions</p></div><div class="refsect1" title="LIBRARY"><a name="library"></a><h2>LIBRARY</h2><p>This manual describes jemalloc 3.0.0-0-gfc9b1dbf69f59d7ecfc4ac68da9847e017e1d046.  More information
     can be found at the <a class="ulink" href="http://www.canonware.com/jemalloc/" target="_top">jemalloc website</a>.</p></div><div class="refsynopsisdiv" title="SYNOPSIS"><h2>SYNOPSIS</h2><div class="funcsynopsis"><pre class="funcsynopsisinfo">#include &lt;<code class="filename">stdlib.h</code>&gt;
-#include &lt;<code class="filename">jemalloc/jemalloc.h</code>&gt;</pre><div class="refsect2" title="Standard API"><a name="id2992781"></a><h3>Standard API</h3><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void *<b class="fsfunc">malloc</b>(</code></td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void *<b class="fsfunc">calloc</b>(</code></td><td>size_t <var class="pdparam">number</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">posix_memalign</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">alignment</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void *<b class="fsfunc">realloc</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void <b class="fsfunc">free</b>(</code></td><td>void *<var class="pdparam">ptr</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="refsect2" title="Non-standard API"><a name="id2998350"></a><h3>Non-standard API</h3><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">size_t <b class="fsfunc">malloc_usable_size</b>(</code></td><td>const void *<var class="pdparam">ptr</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void <b class="fsfunc">malloc_stats_print</b>(</code></td><td>void <var class="pdparam">(*write_cb)</var>
+#include &lt;<code class="filename">jemalloc/jemalloc.h</code>&gt;</pre><div class="refsect2" title="Standard API"><a name="id286949667"></a><h3>Standard API</h3><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void *<b class="fsfunc">malloc</b>(</code></td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void *<b class="fsfunc">calloc</b>(</code></td><td>size_t <var class="pdparam">number</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">posix_memalign</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">alignment</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void *<b class="fsfunc">aligned_alloc</b>(</code></td><td>size_t <var class="pdparam">alignment</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void *<b class="fsfunc">realloc</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void <b class="fsfunc">free</b>(</code></td><td>void *<var class="pdparam">ptr</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div><div class="refsect2" title="Non-standard API"><a name="id286949803"></a><h3>Non-standard API</h3><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">size_t <b class="fsfunc">malloc_usable_size</b>(</code></td><td>const void *<var class="pdparam">ptr</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void <b class="fsfunc">malloc_stats_print</b>(</code></td><td>void <var class="pdparam">(*write_cb)</var>
             <code>(</code>void *, const char *<code>)</code>
-          , </td></tr><tr><td> </td><td>void *<var class="pdparam">cbopaque</var>, </td></tr><tr><td> </td><td>const char *<var class="pdparam">opts</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">mallctl</b>(</code></td><td>const char *<var class="pdparam">name</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">oldp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">oldlenp</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">newp</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">newlen</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">mallctlnametomib</b>(</code></td><td>const char *<var class="pdparam">name</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">mibp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">miblenp</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">mallctlbymib</b>(</code></td><td>const size_t *<var class="pdparam">mib</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">miblen</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">oldp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">oldlenp</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">newp</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">newlen</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void <b class="fsfunc">(*malloc_message)</b>(</code></td><td>void *<var class="pdparam">cbopaque</var>, </td></tr><tr><td> </td><td>const char *<var class="pdparam">s</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><p><span class="type">const char *</span><code class="varname">malloc_conf</code>;</p></div><div class="refsect2" title="Experimental API"><a name="id3014125"></a><h3>Experimental API</h3><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">allocm</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">rallocm</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">extra</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">sallocm</b>(</code></td><td>const void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">dallocm</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div></div></div><div class="refsect1" title="DESCRIPTION"><a name="description"></a><h2>DESCRIPTION</h2><div class="refsect2" title="Standard API"><a name="id3014924"></a><h3>Standard API</h3><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>) function allocates
+          , </td></tr><tr><td> </td><td>void *<var class="pdparam">cbopaque</var>, </td></tr><tr><td> </td><td>const char *<var class="pdparam">opts</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">mallctl</b>(</code></td><td>const char *<var class="pdparam">name</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">oldp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">oldlenp</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">newp</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">newlen</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">mallctlnametomib</b>(</code></td><td>const char *<var class="pdparam">name</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">mibp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">miblenp</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">mallctlbymib</b>(</code></td><td>const size_t *<var class="pdparam">mib</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">miblen</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">oldp</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">oldlenp</var>, </td></tr><tr><td> </td><td>void *<var class="pdparam">newp</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">newlen</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">void <b class="fsfunc">(*malloc_message)</b>(</code></td><td>void *<var class="pdparam">cbopaque</var>, </td></tr><tr><td> </td><td>const char *<var class="pdparam">s</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><p><span class="type">const char *</span><code class="varname">malloc_conf</code>;</p></div><div class="refsect2" title="Experimental API"><a name="id286950044"></a><h3>Experimental API</h3><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">allocm</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">rallocm</b>(</code></td><td>void **<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">extra</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">sallocm</b>(</code></td><td>const void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">dallocm</b>(</code></td><td>void *<var class="pdparam">ptr</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div><table border="0" summary="Function synopsis" cellspacing="0" cellpadding="0" class="funcprototype-table"><tr><td><code class="funcdef">int <b class="fsfunc">nallocm</b>(</code></td><td>size_t *<var class="pdparam">rsize</var>, </td></tr><tr><td> </td><td>size_t <var class="pdparam">size</var>, </td></tr><tr><td> </td><td>int <var class="pdparam">flags</var><code>)</code>;</td></tr></table><div class="funcprototype-spacer"> </div></div></div></div><div class="refsect1" title="DESCRIPTION"><a name="description"></a><h2>DESCRIPTION</h2><div class="refsect2" title="Standard API"><a name="id286950218"></a><h3>Standard API</h3><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>) function allocates
       <em class="parameter"><code>size</code></em> bytes of uninitialized memory.  The allocated
       space is suitably aligned (after possible pointer coercion) for storage
       of any type of object.</p><p>The <code class="function">calloc</code>(<em class="parameter"><code></code></em>) function allocates
@@ -17,7 +17,13 @@
       <em class="parameter"><code>alignment</code></em>, and returns the allocation in the value
       pointed to by <em class="parameter"><code>ptr</code></em>.  The requested
       <em class="parameter"><code>alignment</code></em> must be a power of 2 at least as large
-      as <code class="code">sizeof(<span class="type">void *</span>)</code>.</p><p>The <code class="function">realloc</code>(<em class="parameter"><code></code></em>) function changes the
+      as <code class="code">sizeof(<span class="type">void *</span>)</code>.</p><p>The <code class="function">aligned_alloc</code>(<em class="parameter"><code></code></em>) function
+      allocates <em class="parameter"><code>size</code></em> bytes of memory such that the
+      allocation's base address is an even multiple of
+      <em class="parameter"><code>alignment</code></em>.  The requested
+      <em class="parameter"><code>alignment</code></em> must be a power of 2.  Behavior is
+      undefined if <em class="parameter"><code>size</code></em> is not an integral multiple of
+      <em class="parameter"><code>alignment</code></em>.</p><p>The <code class="function">realloc</code>(<em class="parameter"><code></code></em>) function changes the
       size of the previously allocated memory referenced by
       <em class="parameter"><code>ptr</code></em> to <em class="parameter"><code>size</code></em> bytes.  The
       contents of the memory are unchanged up to the lesser of the new and old
@@ -32,7 +38,7 @@
       <code class="function">malloc</code>(<em class="parameter"><code></code></em>) for the specified size.</p><p>The <code class="function">free</code>(<em class="parameter"><code></code></em>) function causes the
       allocated memory referenced by <em class="parameter"><code>ptr</code></em> to be made
       available for future allocations.  If <em class="parameter"><code>ptr</code></em> is
-      <code class="constant">NULL</code>, no action occurs.</p></div><div class="refsect2" title="Non-standard API"><a name="id3025603"></a><h3>Non-standard API</h3><p>The <code class="function">malloc_usable_size</code>(<em class="parameter"><code></code></em>) function
+      <code class="constant">NULL</code>, no action occurs.</p></div><div class="refsect2" title="Non-standard API"><a name="id286950483"></a><h3>Non-standard API</h3><p>The <code class="function">malloc_usable_size</code>(<em class="parameter"><code></code></em>) function
       returns the usable size of the allocation pointed to by
       <em class="parameter"><code>ptr</code></em>.  The return value may be larger than the size
       that was requested during allocation.  The
@@ -112,11 +118,14 @@ for (i = 0; i &lt; nbins; i++) {
 	len = sizeof(bin_size);
 	mallctlbymib(mib, miblen, &amp;bin_size, &amp;len, NULL, 0);
 	/* Do something with bin_size... */
-}</pre></div><div class="refsect2" title="Experimental API"><a name="id3013809"></a><h3>Experimental API</h3><p>The experimental API is subject to change or removal without regard
-      for backward compatibility.</p><p>The <code class="function">allocm</code>(<em class="parameter"><code></code></em>),
+}</pre></div><div class="refsect2" title="Experimental API"><a name="id286950724"></a><h3>Experimental API</h3><p>The experimental API is subject to change or removal without regard
+      for backward compatibility.  If <code class="option">--disable-experimental</code>
+      is specified during configuration, the experimental API is
+      omitted.</p><p>The <code class="function">allocm</code>(<em class="parameter"><code></code></em>),
       <code class="function">rallocm</code>(<em class="parameter"><code></code></em>),
-      <code class="function">sallocm</code>(<em class="parameter"><code></code></em>), and
-      <code class="function">dallocm</code>(<em class="parameter"><code></code></em>) functions all have a
+      <code class="function">sallocm</code>(<em class="parameter"><code></code></em>),
+      <code class="function">dallocm</code>(<em class="parameter"><code></code></em>), and
+      <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) functions all have a
       <em class="parameter"><code>flags</code></em> argument that can be used to specify
       options.  The functions only check the options that are contextually
       relevant.  Use bitwise or (<code class="code">|</code>) operations to
@@ -142,7 +151,9 @@ for (i = 0; i &lt; nbins; i++) {
       least <em class="parameter"><code>size</code></em> bytes of memory, sets
       <em class="parameter"><code>*ptr</code></em> to the base address of the allocation, and
       sets <em class="parameter"><code>*rsize</code></em> to the real size of the allocation if
-      <em class="parameter"><code>rsize</code></em> is not <code class="constant">NULL</code>.</p><p>The <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) function resizes the
+      <em class="parameter"><code>rsize</code></em> is not <code class="constant">NULL</code>.  Behavior
+      is undefined if <em class="parameter"><code>size</code></em> is
+      <code class="constant">0</code>.</p><p>The <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) function resizes the
       allocation at <em class="parameter"><code>*ptr</code></em> to be at least
       <em class="parameter"><code>size</code></em> bytes, sets <em class="parameter"><code>*ptr</code></em> to
       the base address of the allocation if it moved, and sets
@@ -152,12 +163,20 @@ for (i = 0; i &lt; nbins; i++) {
       the allocation to be at least <code class="code"><em class="parameter"><code>size</code></em> +
       <em class="parameter"><code>extra</code></em>)</code> bytes, though inability to allocate
       the extra byte(s) will not by itself result in failure.  Behavior is
-      undefined if <code class="code">(<em class="parameter"><code>size</code></em> +
+      undefined if <em class="parameter"><code>size</code></em> is <code class="constant">0</code>, or if
+      <code class="code">(<em class="parameter"><code>size</code></em> +
       <em class="parameter"><code>extra</code></em> &gt;
       <code class="constant">SIZE_T_MAX</code>)</code>.</p><p>The <code class="function">sallocm</code>(<em class="parameter"><code></code></em>) function sets
       <em class="parameter"><code>*rsize</code></em> to the real size of the allocation.</p><p>The <code class="function">dallocm</code>(<em class="parameter"><code></code></em>) function causes the
       memory referenced by <em class="parameter"><code>ptr</code></em> to be made available for
-      future allocations.</p></div></div><div class="refsect1" title="TUNING"><a name="tuning"></a><h2>TUNING</h2><p>Once, when the first call is made to one of the memory allocation
+      future allocations.</p><p>The <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) function allocates no
+      memory, but it performs the same size computation as the
+      <code class="function">allocm</code>(<em class="parameter"><code></code></em>) function, and if
+      <em class="parameter"><code>rsize</code></em> is not <code class="constant">NULL</code> it sets
+      <em class="parameter"><code>*rsize</code></em> to the real size of the allocation that
+      would result from the equivalent <code class="function">allocm</code>(<em class="parameter"><code></code></em>)
+      function call.  Behavior is undefined if
+      <em class="parameter"><code>size</code></em> is <code class="constant">0</code>.</p></div></div><div class="refsect1" title="TUNING"><a name="tuning"></a><h2>TUNING</h2><p>Once, when the first call is made to one of the memory allocation
     routines, the allocator initializes its internals based in part on various
     options that can be specified at compile- or run-time.</p><p>The string pointed to by the global variable
     <code class="varname">malloc_conf</code>, the &#8220;name&#8221; of the file
@@ -180,8 +199,8 @@ for (i = 0; i &lt; nbins; i++) {
     suboptimal for several reasons, including race conditions, increased
     fragmentation, and artificial limitations on maximum usable memory.  If
     <code class="option">--enable-dss</code> is specified during configuration, this
-    allocator uses both <span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span> and
-    <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span>, in that order of preference;
+    allocator uses both <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> and
+    <span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span>, in that order of preference;
     otherwise only <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> is used.</p><p>This allocator uses multiple arenas in order to reduce lock
     contention for threaded programs on multi-processor systems.  This works
     well with regard to threading scalability, but incurs some costs.  There is
@@ -212,26 +231,14 @@ for (i = 0; i &lt; nbins; i++) {
     large object).  The combination of chunk alignment and chunk page maps
     makes it possible to determine all metadata regarding small and large
     allocations in constant time.</p><p>Small objects are managed in groups by page runs.  Each run maintains
-    a frontier and free list to track which regions are in use.  Unless
-    <code class="option">--disable-tiny</code> is specified during configuration,
-    allocation requests that are no more than half the quantum (8 or 16,
-    depending on architecture) are rounded up to the nearest power of two that
-    is at least <code class="code">sizeof(<span class="type">void *</span>)</code>.
-    Allocation requests that are more than half the quantum, but no more than
-    the minimum cacheline-multiple size class (see the <a class="link" href="#opt.lg_qspace_max">
-    "<code class="mallctl">opt.lg_qspace_max</code>"
-  </a>
-    option) are rounded up to the nearest multiple of the quantum.  Allocation
-    requests that are more than the minimum cacheline-multiple size class, but
-    no more than the minimum subpage-multiple size class (see the <a class="link" href="#opt.lg_cspace_max">
-    "<code class="mallctl">opt.lg_cspace_max</code>"
-  </a>
-    option) are rounded up to the nearest multiple of the cacheline size (64).
-    Allocation requests that are more than the minimum subpage-multiple size
-    class, but no more than the maximum subpage-multiple size class are rounded
-    up to the nearest multiple of the subpage size (256).  Allocation requests
-    that are more than the maximum subpage-multiple size class, but small
-    enough to fit in an arena-managed chunk (see the <a class="link" href="#opt.lg_chunk">
+    a frontier and free list to track which regions are in use.  Allocation
+    requests that are no more than half the quantum (8 or 16, depending on
+    architecture) are rounded up to the nearest power of two that is at least
+    <code class="code">sizeof(<span class="type">double</span>)</code>.  All other small
+    object size classes are multiples of the quantum, spaced such that internal
+    fragmentation is limited to approximately 25% for all but the smallest size
+    classes.  Allocation requests that are larger than the maximum small size
+    class, but small enough to fit in an arena-managed chunk (see the <a class="link" href="#opt.lg_chunk">
     "<code class="mallctl">opt.lg_chunk</code>"
   </a> option), are
     rounded up to the nearest run size.  Allocation requests that are too large
@@ -241,7 +248,7 @@ for (i = 0; i &lt; nbins; i++) {
     suffer from cacheline sharing, round your allocation requests up to the
     nearest multiple of the cacheline size, or specify cacheline alignment when
     allocating.</p><p>Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit
-    system, the size classes in each category are as shown in <a class="xref" href="#size_classes" title="Table 1. Size classes">Table 1</a>.</p><div class="table"><a name="size_classes"></a><p class="title"><b>Table 1. Size classes</b></p><div class="table-contents"><table summary="Size classes" border="1"><colgroup><col align="left"><col align="left"><col align="left"></colgroup><thead><tr><th align="left">Category</th><th align="left">Subcategory</th><th align="left">Size</th></tr></thead><tbody><tr><td rowspan="4" align="left">Small</td><td align="left">Tiny</td><td align="left">[8]</td></tr><tr><td align="left">Quantum-spaced</td><td align="left">[16, 32, 48, ..., 128]</td></tr><tr><td align="left">Cacheline-spaced</td><td align="left">[192, 256, 320, ..., 512]</td></tr><tr><td align="left">Subpage-spaced</td><td align="left">[768, 1024, 1280, ..., 3840]</td></tr><tr><td colspan="2" align="left">Large</td><td align="left">[4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB]</td></tr><tr><td colspan="2" align="left">Huge</td><td align="left">[4 MiB, 8 MiB, 12 MiB, ...]</td></tr></tbody></table></div></div><br class="table-break"></div><div class="refsect1" title="MALLCTL NAMESPACE"><a name="mallctl_namespace"></a><h2>MALLCTL NAMESPACE</h2><p>The following names are defined in the namespace accessible via the
+    system, the size classes in each category are as shown in <a class="xref" href="#size_classes" title="Table 1. Size classes">Table 1</a>.</p><div class="table"><a name="size_classes"></a><p class="title"><b>Table 1. Size classes</b></p><div class="table-contents"><table summary="Size classes" border="1"><colgroup><col align="left" class="c1"><col align="right" class="c2"><col align="left" class="c3"></colgroup><thead><tr><th align="left">Category</th><th align="right">Spacing</th><th align="left">Size</th></tr></thead><tbody><tr><td rowspan="7" align="left">Small</td><td align="right">lg</td><td align="left">[8]</td></tr><tr><td align="right">16</td><td align="left">[16, 32, 48, ..., 128]</td></tr><tr><td align="right">32</td><td align="left">[160, 192, 224, 256]</td></tr><tr><td align="right">64</td><td align="left">[320, 384, 448, 512]</td></tr><tr><td align="right">128</td><td align="left">[640, 768, 896, 1024]</td></tr><tr><td align="right">256</td><td align="left">[1280, 1536, 1792, 2048]</td></tr><tr><td align="right">512</td><td align="left">[2560, 3072, 3584]</td></tr><tr><td align="left">Large</td><td align="right">4 KiB</td><td align="left">[4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB]</td></tr><tr><td align="left">Huge</td><td align="right">4 MiB</td><td align="left">[4 MiB, 8 MiB, 12 MiB, ...]</td></tr></tbody></table></div></div><br class="table-break"></div><div class="refsect1" title="MALLCTL NAMESPACE"><a name="mallctl_namespace"></a><h2>MALLCTL NAMESPACE</h2><p>The following names are defined in the namespace accessible via the
     <code class="function">mallctl*</code>(<em class="parameter"><code></code></em>) functions.  Value types are
     specified in parentheses, their readable/writable statuses are encoded as
     <code class="literal">rw</code>, <code class="literal">r-</code>, <code class="literal">-w</code>, or
@@ -290,13 +297,6 @@ for (i = 0; i &lt; nbins; i++) {
         </span></dt><dd><p><code class="option">--enable-dss</code> was specified during
         build configuration.</p></dd><dt><span class="term">
           
-    "<code class="mallctl">config.dynamic_page_shift</code>"
-  
-          (<span class="type">bool</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p><code class="option">--enable-dynamic-page-shift</code> was
-        specified during build configuration.</p></dd><dt><span class="term">
-          
     "<code class="mallctl">config.fill</code>"
   
           (<span class="type">bool</span>)
@@ -311,6 +311,20 @@ for (i = 0; i &lt; nbins; i++) {
         </span></dt><dd><p><code class="option">--enable-lazy-lock</code> was specified
         during build configuration.</p></dd><dt><span class="term">
           
+    "<code class="mallctl">config.mremap</code>"
+  
+          (<span class="type">bool</span>)
+          <code class="literal">r-</code>
+        </span></dt><dd><p><code class="option">--enable-mremap</code> was specified during
+        build configuration.</p></dd><dt><span class="term">
+          
+    "<code class="mallctl">config.munmap</code>"
+  
+          (<span class="type">bool</span>)
+          <code class="literal">r-</code>
+        </span></dt><dd><p><code class="option">--enable-munmap</code> was specified during
+        build configuration.</p></dd><dt><span class="term">
+          
     "<code class="mallctl">config.prof</code>"
   
           (<span class="type">bool</span>)
@@ -339,39 +353,32 @@ for (i = 0; i &lt; nbins; i++) {
         </span></dt><dd><p><code class="option">--enable-stats</code> was specified during
         build configuration.</p></dd><dt><span class="term">
           
-    "<code class="mallctl">config.swap</code>"
+    "<code class="mallctl">config.tcache</code>"
   
           (<span class="type">bool</span>)
           <code class="literal">r-</code>
-        </span></dt><dd><p><code class="option">--enable-swap</code> was specified during
-        build configuration.</p></dd><dt><span class="term">
+        </span></dt><dd><p><code class="option">--disable-tcache</code> was not specified
+        during build configuration.</p></dd><dt><span class="term">
           
-    "<code class="mallctl">config.sysv</code>"
+    "<code class="mallctl">config.tls</code>"
   
           (<span class="type">bool</span>)
           <code class="literal">r-</code>
-        </span></dt><dd><p><code class="option">--enable-sysv</code> was specified during
+        </span></dt><dd><p><code class="option">--disable-tls</code> was not specified during
         build configuration.</p></dd><dt><span class="term">
           
-    "<code class="mallctl">config.tcache</code>"
-  
-          (<span class="type">bool</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p><code class="option">--disable-tcache</code> was not specified
-        during build configuration.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">config.tiny</code>"
+    "<code class="mallctl">config.utrace</code>"
   
           (<span class="type">bool</span>)
           <code class="literal">r-</code>
-        </span></dt><dd><p><code class="option">--disable-tiny</code> was not specified
-        during build configuration.</p></dd><dt><span class="term">
+        </span></dt><dd><p><code class="option">--enable-utrace</code> was specified during
+        build configuration.</p></dd><dt><span class="term">
           
-    "<code class="mallctl">config.tls</code>"
+    "<code class="mallctl">config.valgrind</code>"
   
           (<span class="type">bool</span>)
           <code class="literal">r-</code>
-        </span></dt><dd><p><code class="option">--disable-tls</code> was not specified during
+        </span></dt><dd><p><code class="option">--enable-valgrind</code> was specified during
         build configuration.</p></dd><dt><span class="term">
           
     "<code class="mallctl">config.xmalloc</code>"
@@ -390,25 +397,7 @@ for (i = 0; i &lt; nbins; i++) {
         <span class="citerefentry"><span class="refentrytitle">abort</span>(3)</span> in these cases.  This option is
         disabled by default unless <code class="option">--enable-debug</code> is
         specified during configuration, in which case it is enabled by default.
-        </p></dd><dt><a name="opt.lg_qspace_max"></a><span class="term">
-          
-    "<code class="mallctl">opt.lg_qspace_max</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Size (log base 2) of the maximum size class that is a
-        multiple of the quantum (8 or 16 bytes, depending on architecture).
-        Above this size, cacheline spacing is used for size classes.  The
-        default value is 128 bytes (2^7).</p></dd><dt><a name="opt.lg_cspace_max"></a><span class="term">
-          
-    "<code class="mallctl">opt.lg_cspace_max</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Size (log base 2) of the maximum size class that is a
-        multiple of the cacheline size (64).  Above this size, subpage spacing
-        (256 bytes) is used for size classes.  The default value is 512 bytes
-        (2^9).</p></dd><dt><a name="opt.lg_chunk"></a><span class="term">
+        </p></dd><dt><a name="opt.lg_chunk"></a><span class="term">
           
     "<code class="mallctl">opt.lg_chunk</code>"
   
@@ -465,7 +454,42 @@ for (i = 0; i &lt; nbins; i++) {
         <code class="literal">0x5a</code>.  This is intended for debugging and will
         impact performance negatively.  This option is disabled by default
         unless <code class="option">--enable-debug</code> is specified during
-        configuration, in which case it is enabled by default.</p></dd><dt><a name="opt.zero"></a><span class="term">
+        configuration, in which case it is enabled by default.</p></dd><dt><a name="opt.quarantine"></a><span class="term">
+          
+    "<code class="mallctl">opt.quarantine</code>"
+  
+          (<span class="type">size_t</span>)
+          <code class="literal">r-</code>
+          [<code class="option">--enable-fill</code>]
+        </span></dt><dd><p>Per thread quarantine size in bytes.  If non-zero, each
+        thread maintains a FIFO object quarantine that stores up to the
+        specified number of bytes of memory.  The quarantined memory is not
+        freed until it is released from quarantine, though it is immediately
+        junk-filled if the <a class="link" href="#opt.junk">
+    "<code class="mallctl">opt.junk</code>"
+  </a> option is
+        enabled.  This feature is of particular use in combination with <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>, which can detect attempts
+        to access quarantined objects.  This is intended for debugging and will
+        impact performance negatively.  The default quarantine size is
+        0.</p></dd><dt><a name="opt.redzone"></a><span class="term">
+          
+    "<code class="mallctl">opt.redzone</code>"
+  
+          (<span class="type">bool</span>)
+          <code class="literal">r-</code>
+          [<code class="option">--enable-fill</code>]
+        </span></dt><dd><p>Redzones enabled/disabled.  If enabled, small
+        allocations have redzones before and after them.  Furthermore, if the
+        <a class="link" href="#opt.junk">
+    "<code class="mallctl">opt.junk</code>"
+  </a> option is
+        enabled, the redzones are checked for corruption during deallocation.
+        However, the primary intended purpose of this feature is to be used in
+        combination with <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>,
+        which needs redzones in order to do effective buffer overflow/underflow
+        detection.  This option is intended for debugging and will impact
+        performance negatively.  This option is disabled by
+        default.</p></dd><dt><a name="opt.zero"></a><span class="term">
           
     "<code class="mallctl">opt.zero</code>"
   
@@ -479,21 +503,38 @@ for (i = 0; i &lt; nbins; i++) {
         <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) calls do not zero memory that
         was previously allocated.  This is intended for debugging and will
         impact performance negatively.  This option is disabled by default.
-        </p></dd><dt><a name="opt.sysv"></a><span class="term">
+        </p></dd><dt><a name="opt.utrace"></a><span class="term">
           
-    "<code class="mallctl">opt.sysv</code>"
+    "<code class="mallctl">opt.utrace</code>"
   
           (<span class="type">bool</span>)
           <code class="literal">r-</code>
-          [<code class="option">--enable-sysv</code>]
-        </span></dt><dd><p>If enabled, attempting to allocate zero bytes will
-        return a <code class="constant">NULL</code> pointer instead of a valid pointer.
-        (The default behavior is to make a minimal allocation and return a
-        pointer to it.) This option is provided for System V compatibility.
-        This option is incompatible with the <a class="link" href="#opt.xmalloc">
-    "<code class="mallctl">opt.xmalloc</code>"
-  </a> option.
-        This option is disabled by default.</p></dd><dt><a name="opt.xmalloc"></a><span class="term">
+          [<code class="option">--enable-utrace</code>]
+        </span></dt><dd><p>Allocation tracing based on
+        <span class="citerefentry"><span class="refentrytitle">utrace</span>(2)</span> enabled/disabled.  This option
+        is disabled by default.</p></dd><dt><a name="opt.valgrind"></a><span class="term">
+          
+    "<code class="mallctl">opt.valgrind</code>"
+  
+          (<span class="type">bool</span>)
+          <code class="literal">r-</code>
+          [<code class="option">--enable-valgrind</code>]
+        </span></dt><dd><p><a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a>
+        support enabled/disabled.  If enabled, several other options are
+        automatically modified during options processing to work well with
+        Valgrind: <a class="link" href="#opt.junk">
+    "<code class="mallctl">opt.junk</code>"
+  </a>
+        and <a class="link" href="#opt.zero">
+    "<code class="mallctl">opt.zero</code>"
+  </a> are set
+        to false, <a class="link" href="#opt.quarantine">
+    "<code class="mallctl">opt.quarantine</code>"
+  </a> is
+        set to 16 MiB, and <a class="link" href="#opt.redzone">
+    "<code class="mallctl">opt.redzone</code>"
+  </a> is set to
+        true.  This option is disabled by default.</p></dd><dt><a name="opt.xmalloc"></a><span class="term">
           
     "<code class="mallctl">opt.xmalloc</code>"
   
@@ -521,27 +562,11 @@ malloc_conf = "xmalloc:true";</pre><p>
         objects up to a certain size.  Thread-specific caching allows many
         allocations to be satisfied without performing any thread
         synchronization, at the cost of increased memory use.  See the
-        <a class="link" href="#opt.lg_tcache_gc_sweep">
-    "<code class="mallctl">opt.lg_tcache_gc_sweep</code>"
-  </a>
-        and <a class="link" href="#opt.lg_tcache_max">
+        <a class="link" href="#opt.lg_tcache_max">
     "<code class="mallctl">opt.lg_tcache_max</code>"
   </a>
-        options for related tuning information.  This option is enabled by
-        default.</p></dd><dt><a name="opt.lg_tcache_gc_sweep"></a><span class="term">
-          
-    "<code class="mallctl">opt.lg_tcache_gc_sweep</code>"
-  
-          (<span class="type">ssize_t</span>)
-          <code class="literal">r-</code>
-          [<code class="option">--enable-tcache</code>]
-        </span></dt><dd><p>Approximate interval (log base 2) between full
-        thread-specific cache garbage collection sweeps, counted in terms of
-        thread-specific cache allocation/deallocation events.  Garbage
-        collection is actually performed incrementally, one size class at a
-        time, in order to avoid large collection pauses.  The default sweep
-        interval is 8192 (2^13); setting this option to -1 will disable garbage
-        collection.</p></dd><dt><a name="opt.lg_tcache_max"></a><span class="term">
+        option for related tuning information.  This option is enabled by
+        default.</p></dd><dt><a name="opt.lg_tcache_max"></a><span class="term">
           
     "<code class="mallctl">opt.lg_tcache_max</code>"
   
@@ -559,17 +584,7 @@ malloc_conf = "xmalloc:true";</pre><p>
           <code class="literal">r-</code>
           [<code class="option">--enable-prof</code>]
         </span></dt><dd><p>Memory profiling enabled/disabled.  If enabled, profile
-        memory allocation activity, and use an
-        <span class="citerefentry"><span class="refentrytitle">atexit</span>(3)</span> function to dump final memory
-        usage to a file named according to the pattern
-        <code class="filename">&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</code>,
-        where <code class="literal">&lt;prefix&gt;</code> is controlled by the <a class="link" href="#opt.prof_prefix">
-    "<code class="mallctl">opt.prof_prefix</code>"
-  </a>
-        option.  See the <a class="link" href="#opt.lg_prof_bt_max">
-    "<code class="mallctl">opt.lg_prof_bt_max</code>"
-  </a>
-        option for backtrace depth control.  See the <a class="link" href="#opt.prof_active">
+        memory allocation activity.  See the <a class="link" href="#opt.prof_active">
     "<code class="mallctl">opt.prof_active</code>"
   </a>
         option for on-the-fly activation/deactivation.  See the <a class="link" href="#opt.lg_prof_sample">
@@ -578,19 +593,19 @@ malloc_conf = "xmalloc:true";</pre><p>
         option for probabilistic sampling control.  See the <a class="link" href="#opt.prof_accum">
     "<code class="mallctl">opt.prof_accum</code>"
   </a>
-        option for control of cumulative sample reporting.  See the <a class="link" href="#opt.lg_prof_tcmax">
-    "<code class="mallctl">opt.lg_prof_tcmax</code>"
-  </a>
-        option for control of per thread backtrace caching.  See the <a class="link" href="#opt.lg_prof_interval">
+        option for control of cumulative sample reporting.  See the <a class="link" href="#opt.lg_prof_interval">
     "<code class="mallctl">opt.lg_prof_interval</code>"
   </a>
-        option for information on interval-triggered profile dumping, and the
-        <a class="link" href="#opt.prof_gdump">
+        option for information on interval-triggered profile dumping, the <a class="link" href="#opt.prof_gdump">
     "<code class="mallctl">opt.prof_gdump</code>"
   </a>
-        option for information on high-water-triggered profile dumping.
-        Profile output is compatible with the included <span class="command"><strong>pprof</strong></span>
-        Perl script, which originates from the <a class="ulink" href="http://code.google.com/p/google-perftools/" target="_top">google-perftools
+        option for information on high-water-triggered profile dumping, and the
+        <a class="link" href="#opt.prof_final">
+    "<code class="mallctl">opt.prof_final</code>"
+  </a>
+        option for final profile dumping.  Profile output is compatible with
+        the included <span class="command"><strong>pprof</strong></span> Perl script, which originates
+        from the <a class="ulink" href="http://code.google.com/p/gperftools/" target="_top">gperftools
         package</a>.</p></dd><dt><a name="opt.prof_prefix"></a><span class="term">
           
     "<code class="mallctl">opt.prof_prefix</code>"
@@ -602,15 +617,7 @@ malloc_conf = "xmalloc:true";</pre><p>
         set to the empty string, no automatic dumps will occur; this is
         primarily useful for disabling the automatic final heap dump (which
         also disables leak reporting, if enabled).  The default prefix is
-        <code class="filename">jeprof</code>.</p></dd><dt><a name="opt.lg_prof_bt_max"></a><span class="term">
-          
-    "<code class="mallctl">opt.lg_prof_bt_max</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-          [<code class="option">--enable-prof</code>]
-        </span></dt><dd><p>Maximum backtrace depth (log base 2) when profiling
-        memory allocation activity.  The default is 128 (2^7).</p></dd><dt><a name="opt.prof_active"></a><span class="term">
+        <code class="filename">jeprof</code>.</p></dd><dt><a name="opt.prof_active"></a><span class="term">
           
     "<code class="mallctl">opt.prof_active</code>"
   
@@ -636,8 +643,8 @@ malloc_conf = "xmalloc:true";</pre><p>
         </span></dt><dd><p>Average interval (log base 2) between allocation
         samples, as measured in bytes of allocation activity.  Increasing the
         sampling interval decreases profile fidelity, but also decreases the
-        computational overhead.  The default sample interval is 1 (2^0) (i.e.
-        all allocations are sampled).</p></dd><dt><a name="opt.prof_accum"></a><span class="term">
+        computational overhead.  The default sample interval is 512 KiB (2^19
+        B).</p></dd><dt><a name="opt.prof_accum"></a><span class="term">
           
     "<code class="mallctl">opt.prof_accum</code>"
   
@@ -648,28 +655,8 @@ malloc_conf = "xmalloc:true";</pre><p>
         dumps enabled/disabled.  If this option is enabled, every unique
         backtrace must be stored for the duration of execution.  Depending on
         the application, this can impose a large memory overhead, and the
-        cumulative counts are not always of interest.  See the
-        <a class="link" href="#opt.lg_prof_tcmax">
-    "<code class="mallctl">opt.lg_prof_tcmax</code>"
-  </a>
-        option for control of per thread backtrace caching, which has important
-        interactions.  This option is enabled by default.</p></dd><dt><a name="opt.lg_prof_tcmax"></a><span class="term">
-          
-    "<code class="mallctl">opt.lg_prof_tcmax</code>"
-  
-          (<span class="type">ssize_t</span>)
-          <code class="literal">r-</code>
-          [<code class="option">--enable-prof</code>]
-        </span></dt><dd><p>Maximum per thread backtrace cache (log base 2) used
-        for heap profiling.  A backtrace can only be discarded if the
-        <a class="link" href="#opt.prof_accum">
-    "<code class="mallctl">opt.prof_accum</code>"
-  </a>
-        option is disabled, and no thread caches currently refer to the
-        backtrace.  Therefore, a backtrace cache limit should be imposed if the
-        intention is to limit how much memory is used by backtraces.  By
-        default, no limit is imposed (encoded as -1).
-        </p></dd><dt><a name="opt.lg_prof_interval"></a><span class="term">
+        cumulative counts are not always of interest.  This option is disabled
+        by default.</p></dd><dt><a name="opt.lg_prof_interval"></a><span class="term">
           
     "<code class="mallctl">opt.lg_prof_interval</code>"
   
@@ -702,7 +689,21 @@ malloc_conf = "xmalloc:true";</pre><p>
         where <code class="literal">&lt;prefix&gt;</code> is controlled by the <a class="link" href="#opt.prof_prefix">
     "<code class="mallctl">opt.prof_prefix</code>"
   </a>
-        option.  This option is disabled by default.</p></dd><dt><a name="opt.prof_leak"></a><span class="term">
+        option.  This option is disabled by default.</p></dd><dt><a name="opt.prof_final"></a><span class="term">
+          
+    "<code class="mallctl">opt.prof_final</code>"
+  
+          (<span class="type">bool</span>)
+          <code class="literal">r-</code>
+          [<code class="option">--enable-prof</code>]
+        </span></dt><dd><p>Use an
+        <span class="citerefentry"><span class="refentrytitle">atexit</span>(3)</span> function to dump final memory
+        usage to a file named according to the pattern
+        <code class="filename">&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</code>,
+        where <code class="literal">&lt;prefix&gt;</code> is controlled by the <a class="link" href="#opt.prof_prefix">
+    "<code class="mallctl">opt.prof_prefix</code>"
+  </a>
+        option.  This option is enabled by default.</p></dd><dt><a name="opt.prof_leak"></a><span class="term">
           
     "<code class="mallctl">opt.prof_leak</code>"
   
@@ -712,45 +713,11 @@ malloc_conf = "xmalloc:true";</pre><p>
         </span></dt><dd><p>Leak reporting enabled/disabled.  If enabled, use an
         <span class="citerefentry"><span class="refentrytitle">atexit</span>(3)</span> function to report memory leaks
         detected by allocation sampling.  See the
-        <a class="link" href="#opt.lg_prof_bt_max">
-    "<code class="mallctl">opt.lg_prof_bt_max</code>"
-  </a>
-        option for backtrace depth control.  See the
         <a class="link" href="#opt.prof">
     "<code class="mallctl">opt.prof</code>"
   </a> option for
         information on analyzing heap profile output.  This option is disabled
-        by default.</p></dd><dt><a name="opt.overcommit"></a><span class="term">
-          
-    "<code class="mallctl">opt.overcommit</code>"
-  
-          (<span class="type">bool</span>)
-          <code class="literal">r-</code>
-          [<code class="option">--enable-swap</code>]
-        </span></dt><dd><p>Over-commit enabled/disabled.  If enabled, over-commit
-        memory as a side effect of using anonymous
-        <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> or
-        <span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span> for virtual memory allocation.
-        In order for overcommit to be disabled, the <a class="link" href="#swap.fds">
-    "<code class="mallctl">swap.fds</code>"
-  </a> mallctl must have
-        been successfully written to.  This option is enabled by
-        default.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">tcache.flush</code>"
-  
-          (<span class="type">void</span>)
-          <code class="literal">--</code>
-          [<code class="option">--enable-tcache</code>]
-        </span></dt><dd><p>Flush calling thread's tcache.  This interface releases
-        all cached objects and internal data structures associated with the
-        calling thread's thread-specific cache.  Ordinarily, this interface
-        need not be called, since automatic periodic incremental garbage
-        collection occurs, and the thread cache is automatically discarded when
-        a thread exits.  However, garbage collection is triggered by allocation
-        activity, so it is possible for a thread that stops
-        allocating/deallocating to retain its cache indefinitely, in which case
-        the developer may find manual flushing useful.</p></dd><dt><span class="term">
+        by default.</p></dd><dt><span class="term">
           
     "<code class="mallctl">thread.arena</code>"
   
@@ -810,7 +777,34 @@ malloc_conf = "xmalloc:true";</pre><p>
     "<code class="mallctl">thread.deallocated</code>"
   </a>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <code class="function">mallctl*</code>(<em class="parameter"><code></code></em>) calls.</p></dd><dt><a name="arenas.narenas"></a><span class="term">
+        <code class="function">mallctl*</code>(<em class="parameter"><code></code></em>) calls.</p></dd><dt><span class="term">
+          
+    "<code class="mallctl">thread.tcache.enabled</code>"
+  
+          (<span class="type">bool</span>)
+          <code class="literal">rw</code>
+          [<code class="option">--enable-tcache</code>]
+        </span></dt><dd><p>Enable/disable calling thread's tcache.  The tcache is
+        implicitly flushed as a side effect of becoming
+        disabled (see 
+    "<code class="mallctl">thread.tcache.flush</code>"
+  ).
+        </p></dd><dt><span class="term">
+          
+    "<code class="mallctl">thread.tcache.flush</code>"
+  
+          (<span class="type">void</span>)
+          <code class="literal">--</code>
+          [<code class="option">--enable-tcache</code>]
+        </span></dt><dd><p>Flush calling thread's tcache.  This interface releases
+        all cached objects and internal data structures associated with the
+        calling thread's thread-specific cache.  Ordinarily, this interface
+        need not be called, since automatic periodic incremental garbage
+        collection occurs, and the thread cache is automatically discarded when
+        a thread exits.  However, garbage collection is triggered by allocation
+        activity, so it is possible for a thread that stops
+        allocating/deallocating to retain its cache indefinitely, in which case
+        the developer may find manual flushing useful.</p></dd><dt><a name="arenas.narenas"></a><span class="term">
           
     "<code class="mallctl">arenas.narenas</code>"
   
@@ -834,80 +828,12 @@ malloc_conf = "xmalloc:true";</pre><p>
           <code class="literal">r-</code>
         </span></dt><dd><p>Quantum size.</p></dd><dt><span class="term">
           
-    "<code class="mallctl">arenas.cacheline</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Assumed cacheline size.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.subpage</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Subpage size class interval.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.pagesize</code>"
+    "<code class="mallctl">arenas.page</code>"
   
           (<span class="type">size_t</span>)
           <code class="literal">r-</code>
         </span></dt><dd><p>Page size.</p></dd><dt><span class="term">
           
-    "<code class="mallctl">arenas.chunksize</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Chunk size.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.tspace_min</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Minimum tiny size class.  Tiny size classes are powers
-        of two.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.tspace_max</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Maximum tiny size class.  Tiny size classes are powers
-        of two.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.qspace_min</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Minimum quantum-spaced size class.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.qspace_max</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Maximum quantum-spaced size class.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.cspace_min</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Minimum cacheline-spaced size class.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.cspace_max</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Maximum cacheline-spaced size class.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.sspace_min</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Minimum subpage-spaced size class.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.sspace_max</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Maximum subpage-spaced size class.</p></dd><dt><span class="term">
-          
     "<code class="mallctl">arenas.tcache_max</code>"
   
           (<span class="type">size_t</span>)
@@ -915,38 +841,11 @@ malloc_conf = "xmalloc:true";</pre><p>
           [<code class="option">--enable-tcache</code>]
         </span></dt><dd><p>Maximum thread-cached size class.</p></dd><dt><span class="term">
           
-    "<code class="mallctl">arenas.ntbins</code>"
-  
-          (<span class="type">unsigned</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Number of tiny bin size classes.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.nqbins</code>"
-  
-          (<span class="type">unsigned</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Number of quantum-spaced bin size
-        classes.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.ncbins</code>"
-  
-          (<span class="type">unsigned</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Number of cacheline-spaced bin size
-        classes.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">arenas.nsbins</code>"
-  
-          (<span class="type">unsigned</span>)
-          <code class="literal">r-</code>
-        </span></dt><dd><p>Number of subpage-spaced bin size
-        classes.</p></dd><dt><span class="term">
-          
     "<code class="mallctl">arenas.nbins</code>"
   
           (<span class="type">unsigned</span>)
           <code class="literal">r-</code>
-        </span></dt><dd><p>Total number of bin size classes.</p></dd><dt><span class="term">
+        </span></dt><dd><p>Number of bin size classes.</p></dd><dt><span class="term">
           
     "<code class="mallctl">arenas.nhbins</code>"
   
@@ -1079,8 +978,7 @@ malloc_conf = "xmalloc:true";</pre><p>
         large as <a class="link" href="#stats.active">
     "<code class="mallctl">stats.active</code>"
   </a>.  This
-        does not include inactive chunks backed by swap files.  his does not
-        include inactive chunks embedded in the DSS.</p></dd><dt><span class="term">
+        does not include inactive chunks.</p></dd><dt><span class="term">
           
     "<code class="mallctl">stats.chunks.current</code>"
   
@@ -1088,8 +986,7 @@ malloc_conf = "xmalloc:true";</pre><p>
           <code class="literal">r-</code>
           [<code class="option">--enable-stats</code>]
         </span></dt><dd><p>Total number of chunks actively mapped on behalf of the
-        application.  This does not include inactive chunks backed by swap
-        files.  This does not include inactive chunks embedded in the DSS.
+        application.  This does not include inactive chunks.
         </p></dd><dt><span class="term">
           
     "<code class="mallctl">stats.chunks.total</code>"
@@ -1309,14 +1206,6 @@ malloc_conf = "xmalloc:true";</pre><p>
         </span></dt><dd><p>Cumulative number of times the current run from which
         to allocate changed.</p></dd><dt><span class="term">
           
-    "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.highruns</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-          [<code class="option">--enable-stats</code>]
-        </span></dt><dd><p>Maximum number of runs at any time thus far.
-        </p></dd><dt><span class="term">
-          
     "<code class="mallctl">stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.curruns</code>"
   
           (<span class="type">size_t</span>)
@@ -1348,69 +1237,13 @@ malloc_conf = "xmalloc:true";</pre><p>
         </span></dt><dd><p>Cumulative number of allocation requests for this size
         class.</p></dd><dt><span class="term">
           
-    "<code class="mallctl">stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.highruns</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-          [<code class="option">--enable-stats</code>]
-        </span></dt><dd><p>Maximum number of runs at any time thus far for this
-        size class.</p></dd><dt><span class="term">
-          
     "<code class="mallctl">stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.curruns</code>"
   
           (<span class="type">size_t</span>)
           <code class="literal">r-</code>
           [<code class="option">--enable-stats</code>]
         </span></dt><dd><p>Current number of runs for this size class.
-        </p></dd><dt><span class="term">
-          
-    "<code class="mallctl">swap.avail</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-          [<code class="option">--enable-stats --enable-swap</code>]
-        </span></dt><dd><p>Number of swap file bytes that are currently not
-        associated with any chunk (i.e. mapped, but otherwise completely
-        unmanaged).</p></dd><dt><a name="swap.prezeroed"></a><span class="term">
-          
-    "<code class="mallctl">swap.prezeroed</code>"
-  
-          (<span class="type">bool</span>)
-          <code class="literal">rw</code>
-          [<code class="option">--enable-swap</code>]
-        </span></dt><dd><p>If true, the allocator assumes that the swap file(s)
-        contain nothing but nil bytes.  If this assumption is violated,
-        allocator behavior is undefined.  This value becomes read-only after
-        <a class="link" href="#swap.fds">
-    "<code class="mallctl">swap.fds</code>"
-  </a> is
-        successfully written to.</p></dd><dt><span class="term">
-          
-    "<code class="mallctl">swap.nfds</code>"
-  
-          (<span class="type">size_t</span>)
-          <code class="literal">r-</code>
-          [<code class="option">--enable-swap</code>]
-        </span></dt><dd><p>Number of file descriptors in use for swap.
-        </p></dd><dt><a name="swap.fds"></a><span class="term">
-          
-    "<code class="mallctl">swap.fds</code>"
-  
-          (<span class="type">int *</span>)
-          <code class="literal">rw</code>
-          [<code class="option">--enable-swap</code>]
-        </span></dt><dd><p>When written to, the files associated with the
-        specified file descriptors are contiguously mapped via
-        <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span>.  The resulting virtual memory
-        region is preferred over anonymous
-        <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span> and
-        <span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span> memory.  Note that if a file's
-        size is not a multiple of the page size, it is automatically truncated
-        to the nearest page size multiple.  See the
-        <a class="link" href="#swap.prezeroed">
-    "<code class="mallctl">swap.prezeroed</code>"
-  </a>
-        mallctl for specifying that the files are pre-zeroed.</p></dd></dl></div></div><div class="refsect1" title="DEBUGGING MALLOC PROBLEMS"><a name="debugging_malloc_problems"></a><h2>DEBUGGING MALLOC PROBLEMS</h2><p>When debugging, it is a good idea to configure/build jemalloc with
+        </p></dd></dl></div></div><div class="refsect1" title="DEBUGGING MALLOC PROBLEMS"><a name="debugging_malloc_problems"></a><h2>DEBUGGING MALLOC PROBLEMS</h2><p>When debugging, it is a good idea to configure/build jemalloc with
     the <code class="option">--enable-debug</code> and <code class="option">--enable-fill</code>
     options, and recompile the program with suitable options and symbols for
     debugger support.  When so configured, jemalloc incorporates a wide variety
@@ -1428,10 +1261,13 @@ malloc_conf = "xmalloc:true";</pre><p>
     the symptoms of such bugs.  Between these two options, it is usually
     possible to quickly detect, diagnose, and eliminate such bugs.</p><p>This implementation does not provide much detail about the problems
     it detects, because the performance impact for storing such information
-    would be prohibitive.  There are a number of allocator implementations
-    available on the Internet which focus on detecting and pinpointing problems
-    by trading performance for extra sanity checks and detailed
-    diagnostics.</p></div><div class="refsect1" title="DIAGNOSTIC MESSAGES"><a name="diagnostic_messages"></a><h2>DIAGNOSTIC MESSAGES</h2><p>If any of the memory allocation/deallocation functions detect an
+    would be prohibitive.  However, jemalloc does integrate with the most
+    excellent <a class="ulink" href="http://valgrind.org/" target="_top">Valgrind</a> tool if the
+    <code class="option">--enable-valgrind</code> configuration option is enabled and the
+    <a class="link" href="#opt.valgrind">
+    "<code class="mallctl">opt.valgrind</code>"
+  </a> option
+    is enabled.</p></div><div class="refsect1" title="DIAGNOSTIC MESSAGES"><a name="diagnostic_messages"></a><h2>DIAGNOSTIC MESSAGES</h2><p>If any of the memory allocation/deallocation functions detect an
     error or warning condition, a message will be printed to file descriptor
     <code class="constant">STDERR_FILENO</code>.  Errors will result in the process
     dumping core.  If the <a class="link" href="#opt.abort">
@@ -1447,7 +1283,7 @@ malloc_conf = "xmalloc:true";</pre><p>
     <code class="function">malloc_stats_print</code>(<em class="parameter"><code></code></em>), followed by a string
     pointer.  Please note that doing anything which tries to allocate memory in
     this function is likely to result in a crash or deadlock.</p><p>All messages are prefixed by
-    &#8220;<code class="computeroutput">&lt;jemalloc&gt;: </code>&#8221;.</p></div><div class="refsect1" title="RETURN VALUES"><a name="return_values"></a><h2>RETURN VALUES</h2><div class="refsect2" title="Standard API"><a name="id3029250"></a><h3>Standard API</h3><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>) and
+    &#8220;<code class="computeroutput">&lt;jemalloc&gt;: </code>&#8221;.</p></div><div class="refsect1" title="RETURN VALUES"><a name="return_values"></a><h2>RETURN VALUES</h2><div class="refsect2" title="Standard API"><a name="id286955289"></a><h3>Standard API</h3><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>) and
       <code class="function">calloc</code>(<em class="parameter"><code></code></em>) functions return a pointer to the
       allocated memory if successful; otherwise a <code class="constant">NULL</code>
       pointer is returned and <code class="varname">errno</code> is set to
@@ -1459,6 +1295,14 @@ malloc_conf = "xmalloc:true";</pre><p>
             not a power of 2 at least as large as
             <code class="code">sizeof(<span class="type">void *</span>)</code>.
             </p></dd><dt><span class="term"><span class="errorname">ENOMEM</span></span></dt><dd><p>Memory allocation error.</p></dd></dl></div><p>
+      </p><p>The <code class="function">aligned_alloc</code>(<em class="parameter"><code></code></em>) function returns
+      a pointer to the allocated memory if successful; otherwise a
+      <code class="constant">NULL</code> pointer is returned and
+      <code class="varname">errno</code> is set.  The
+      <code class="function">aligned_alloc</code>(<em class="parameter"><code></code></em>) function will fail if:
+        </p><div class="variablelist"><dl><dt><span class="term"><span class="errorname">EINVAL</span></span></dt><dd><p>The <em class="parameter"><code>alignment</code></em> parameter is
+            not a power of 2.
+            </p></dd><dt><span class="term"><span class="errorname">ENOMEM</span></span></dt><dd><p>Memory allocation error.</p></dd></dl></div><p>
       </p><p>The <code class="function">realloc</code>(<em class="parameter"><code></code></em>) function returns a
       pointer, possibly identical to <em class="parameter"><code>ptr</code></em>, to the
       allocated memory if successful; otherwise a <code class="constant">NULL</code>
@@ -1467,7 +1311,7 @@ malloc_conf = "xmalloc:true";</pre><p>
       allocation failure.  The <code class="function">realloc</code>(<em class="parameter"><code></code></em>)
       function always leaves the original buffer intact when an error occurs.
       </p><p>The <code class="function">free</code>(<em class="parameter"><code></code></em>) function returns no
-      value.</p></div><div class="refsect2" title="Non-standard API"><a name="id3029403"></a><h3>Non-standard API</h3><p>The <code class="function">malloc_usable_size</code>(<em class="parameter"><code></code></em>) function
+      value.</p></div><div class="refsect2" title="Non-standard API"><a name="id286955505"></a><h3>Non-standard API</h3><p>The <code class="function">malloc_usable_size</code>(<em class="parameter"><code></code></em>) function
       returns the usable size of the allocation pointed to by
       <em class="parameter"><code>ptr</code></em>.  </p><p>The <code class="function">mallctl</code>(<em class="parameter"><code></code></em>),
       <code class="function">mallctlnametomib</code>(<em class="parameter"><code></code></em>), and
@@ -1486,13 +1330,15 @@ malloc_conf = "xmalloc:true";</pre><p>
             occurred.</p></dd><dt><span class="term"><span class="errorname">EFAULT</span></span></dt><dd><p>An interface with side effects failed in some way
             not directly related to <code class="function">mallctl*</code>(<em class="parameter"><code></code></em>)
             read/write processing.</p></dd></dl></div><p>
-      </p></div><div class="refsect2" title="Experimental API"><a name="id3029581"></a><h3>Experimental API</h3><p>The <code class="function">allocm</code>(<em class="parameter"><code></code></em>),
+      </p></div><div class="refsect2" title="Experimental API"><a name="id286955658"></a><h3>Experimental API</h3><p>The <code class="function">allocm</code>(<em class="parameter"><code></code></em>),
       <code class="function">rallocm</code>(<em class="parameter"><code></code></em>),
-      <code class="function">sallocm</code>(<em class="parameter"><code></code></em>), and
-      <code class="function">dallocm</code>(<em class="parameter"><code></code></em>) functions return
+      <code class="function">sallocm</code>(<em class="parameter"><code></code></em>),
+      <code class="function">dallocm</code>(<em class="parameter"><code></code></em>), and
+      <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) functions return
       <code class="constant">ALLOCM_SUCCESS</code> on success; otherwise they return an
-      error value.  The <code class="function">allocm</code>(<em class="parameter"><code></code></em>) and
-      <code class="function">rallocm</code>(<em class="parameter"><code></code></em>) functions will fail if:
+      error value.  The <code class="function">allocm</code>(<em class="parameter"><code></code></em>),
+      <code class="function">rallocm</code>(<em class="parameter"><code></code></em>), and
+      <code class="function">nallocm</code>(<em class="parameter"><code></code></em>) functions will fail if:
         </p><div class="variablelist"><dl><dt><span class="term"><span class="errorname">ALLOCM_ERR_OOM</span></span></dt><dd><p>Out of memory.  Insufficient contiguous memory was
             available to service the allocation request.  The
             <code class="function">allocm</code>(<em class="parameter"><code></code></em>) function additionally sets
@@ -1516,6 +1362,7 @@ malloc_conf = "xmalloc:true";</pre><p>
 malloc_conf = "lg_chunk:24";</pre></div><div class="refsect1" title="SEE ALSO"><a name="see_also"></a><h2>SEE ALSO</h2><p><span class="citerefentry"><span class="refentrytitle">madvise</span>(2)</span>,
     <span class="citerefentry"><span class="refentrytitle">mmap</span>(2)</span>,
     <span class="citerefentry"><span class="refentrytitle">sbrk</span>(2)</span>,
+    <span class="citerefentry"><span class="refentrytitle">utrace</span>(2)</span>,
     <span class="citerefentry"><span class="refentrytitle">alloca</span>(3)</span>,
     <span class="citerefentry"><span class="refentrytitle">atexit</span>(3)</span>,
     <span class="citerefentry"><span class="refentrytitle">getpagesize</span>(3)</span></p></div><div class="refsect1" title="STANDARDS"><a name="standards"></a><h2>STANDARDS</h2><p>The <code class="function">malloc</code>(<em class="parameter"><code></code></em>),
diff --git a/deps/jemalloc/doc/jemalloc.xml.in b/deps/jemalloc/doc/jemalloc.xml.in
index 7a32879a..877c500f 100644
--- a/deps/jemalloc/doc/jemalloc.xml.in
+++ b/deps/jemalloc/doc/jemalloc.xml.in
@@ -30,6 +30,7 @@
     <refname>malloc</refname>
     <refname>calloc</refname>
     <refname>posix_memalign</refname>
+    <refname>aligned_alloc</refname>
     <refname>realloc</refname>
     <refname>free</refname>
     <refname>malloc_usable_size</refname>
@@ -41,6 +42,7 @@
     <refname>rallocm</refname>
     <refname>sallocm</refname>
     <refname>dallocm</refname>
+    <refname>nallocm</refname>
     -->
     <refpurpose>general purpose memory allocation functions</refpurpose>
   </refnamediv>
@@ -72,6 +74,11 @@
           <paramdef>size_t <parameter>alignment</parameter></paramdef>
           <paramdef>size_t <parameter>size</parameter></paramdef>
         </funcprototype>
+        <funcprototype>
+          <funcdef>void *<function>aligned_alloc</function></funcdef>
+          <paramdef>size_t <parameter>alignment</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+        </funcprototype>
         <funcprototype>
           <funcdef>void *<function>realloc</function></funcdef>
           <paramdef>void *<parameter>ptr</parameter></paramdef>
@@ -154,6 +161,12 @@
           <paramdef>void *<parameter>ptr</parameter></paramdef>
           <paramdef>int <parameter>flags</parameter></paramdef>
         </funcprototype>
+        <funcprototype>
+          <funcdef>int <function>nallocm</function></funcdef>
+          <paramdef>size_t *<parameter>rsize</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+          <paramdef>int <parameter>flags</parameter></paramdef>
+        </funcprototype>
       </refsect2>
     </funcsynopsis>
   </refsynopsisdiv>
@@ -183,6 +196,14 @@
       <parameter>alignment</parameter> must be a power of 2 at least as large
       as <code language="C">sizeof(<type>void *</type>)</code>.</para>
 
+      <para>The <function>aligned_alloc<parameter/></function> function
+      allocates <parameter>size</parameter> bytes of memory such that the
+      allocation's base address is an even multiple of
+      <parameter>alignment</parameter>.  The requested
+      <parameter>alignment</parameter> must be a power of 2.  Behavior is
+      undefined if <parameter>size</parameter> is not an integral multiple of
+      <parameter>alignment</parameter>.</para>
+
       <para>The <function>realloc<parameter/></function> function changes the
       size of the previously allocated memory referenced by
       <parameter>ptr</parameter> to <parameter>size</parameter> bytes.  The
@@ -297,12 +318,15 @@ for (i = 0; i < nbins; i++) {
     <refsect2>
       <title>Experimental API</title>
       <para>The experimental API is subject to change or removal without regard
-      for backward compatibility.</para>
+      for backward compatibility.  If <option>--disable-experimental</option>
+      is specified during configuration, the experimental API is
+      omitted.</para>
 
       <para>The <function>allocm<parameter/></function>,
       <function>rallocm<parameter/></function>,
-      <function>sallocm<parameter/></function>, and
-      <function>dallocm<parameter/></function> functions all have a
+      <function>sallocm<parameter/></function>,
+      <function>dallocm<parameter/></function>, and
+      <function>nallocm<parameter/></function> functions all have a
       <parameter>flags</parameter> argument that can be used to specify
       options.  The functions only check the options that are contextually
       relevant.  Use bitwise or (<code language="C">|</code>) operations to
@@ -351,7 +375,9 @@ for (i = 0; i < nbins; i++) {
       least <parameter>size</parameter> bytes of memory, sets
       <parameter>*ptr</parameter> to the base address of the allocation, and
       sets <parameter>*rsize</parameter> to the real size of the allocation if
-      <parameter>rsize</parameter> is not <constant>NULL</constant>.</para>
+      <parameter>rsize</parameter> is not <constant>NULL</constant>.  Behavior
+      is undefined if <parameter>size</parameter> is
+      <constant>0</constant>.</para>
 
       <para>The <function>rallocm<parameter/></function> function resizes the
       allocation at <parameter>*ptr</parameter> to be at least
@@ -364,7 +390,8 @@ for (i = 0; i < nbins; i++) {
       language="C"><parameter>size</parameter> +
       <parameter>extra</parameter>)</code> bytes, though inability to allocate
       the extra byte(s) will not by itself result in failure.  Behavior is
-      undefined if <code language="C">(<parameter>size</parameter> +
+      undefined if <parameter>size</parameter> is <constant>0</constant>, or if
+      <code language="C">(<parameter>size</parameter> +
       <parameter>extra</parameter> &gt;
       <constant>SIZE_T_MAX</constant>)</code>.</para>
 
@@ -374,6 +401,15 @@ for (i = 0; i < nbins; i++) {
       <para>The <function>dallocm<parameter/></function> function causes the
       memory referenced by <parameter>ptr</parameter> to be made available for
       future allocations.</para>
+
+      <para>The <function>nallocm<parameter/></function> function allocates no
+      memory, but it performs the same size computation as the
+      <function>allocm<parameter/></function> function, and if
+      <parameter>rsize</parameter> is not <constant>NULL</constant> it sets
+      <parameter>*rsize</parameter> to the real size of the allocation that
+      would result from the equivalent <function>allocm<parameter/></function>
+      function call.  Behavior is undefined if
+      <parameter>size</parameter> is <constant>0</constant>.</para>
     </refsect2>
   </refsect1>
   <refsect1 id="tuning">
@@ -408,9 +444,9 @@ for (i = 0; i < nbins; i++) {
     suboptimal for several reasons, including race conditions, increased
     fragmentation, and artificial limitations on maximum usable memory.  If
     <option>--enable-dss</option> is specified during configuration, this
-    allocator uses both <citerefentry><refentrytitle>sbrk</refentrytitle>
+    allocator uses both <citerefentry><refentrytitle>mmap</refentrytitle>
     <manvolnum>2</manvolnum></citerefentry> and
-    <citerefentry><refentrytitle>mmap</refentrytitle>
+    <citerefentry><refentrytitle>sbrk</refentrytitle>
     <manvolnum>2</manvolnum></citerefentry>, in that order of preference;
     otherwise only <citerefentry><refentrytitle>mmap</refentrytitle>
     <manvolnum>2</manvolnum></citerefentry> is used.</para>
@@ -455,24 +491,14 @@ for (i = 0; i < nbins; i++) {
     allocations in constant time.</para>
 
     <para>Small objects are managed in groups by page runs.  Each run maintains
-    a frontier and free list to track which regions are in use.  Unless
-    <option>--disable-tiny</option> is specified during configuration,
-    allocation requests that are no more than half the quantum (8 or 16,
-    depending on architecture) are rounded up to the nearest power of two that
-    is at least <code language="C">sizeof(<type>void *</type>)</code>.
-    Allocation requests that are more than half the quantum, but no more than
-    the minimum cacheline-multiple size class (see the <link
-    linkend="opt.lg_qspace_max"><mallctl>opt.lg_qspace_max</mallctl></link>
-    option) are rounded up to the nearest multiple of the quantum.  Allocation
-    requests that are more than the minimum cacheline-multiple size class, but
-    no more than the minimum subpage-multiple size class (see the <link
-    linkend="opt.lg_cspace_max"><mallctl>opt.lg_cspace_max</mallctl></link>
-    option) are rounded up to the nearest multiple of the cacheline size (64).
-    Allocation requests that are more than the minimum subpage-multiple size
-    class, but no more than the maximum subpage-multiple size class are rounded
-    up to the nearest multiple of the subpage size (256).  Allocation requests
-    that are more than the maximum subpage-multiple size class, but small
-    enough to fit in an arena-managed chunk (see the <link
+    a frontier and free list to track which regions are in use.  Allocation
+    requests that are no more than half the quantum (8 or 16, depending on
+    architecture) are rounded up to the nearest power of two that is at least
+    <code language="C">sizeof(<type>double</type>)</code>.  All other small
+    object size classes are multiples of the quantum, spaced such that internal
+    fragmentation is limited to approximately 25% for all but the smallest size
+    classes.  Allocation requests that are larger than the maximum small size
+    class, but small enough to fit in an arena-managed chunk (see the <link
     linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), are
     rounded up to the nearest run size.  Allocation requests that are too large
     to fit in an arena-managed chunk are rounded up to the nearest multiple of
@@ -490,41 +516,55 @@ for (i = 0; i < nbins; i++) {
 
     <table xml:id="size_classes" frame="all">
       <title>Size classes</title>
-      <tgroup cols="3" align="left" colsep="1" rowsep="1">
-      <colspec colname="c1"/>
-      <colspec colname="c2"/>
-      <colspec colname="c3"/>
+      <tgroup cols="3" colsep="1" rowsep="1">
+      <colspec colname="c1" align="left"/>
+      <colspec colname="c2" align="right"/>
+      <colspec colname="c3" align="left"/>
       <thead>
         <row>
           <entry>Category</entry>
-          <entry>Subcategory</entry>
+          <entry>Spacing</entry>
           <entry>Size</entry>
         </row>
       </thead>
       <tbody>
         <row>
-          <entry morerows="3">Small</entry>
-          <entry>Tiny</entry>
+          <entry morerows="6">Small</entry>
+          <entry>lg</entry>
           <entry>[8]</entry>
         </row>
         <row>
-          <entry>Quantum-spaced</entry>
+          <entry>16</entry>
           <entry>[16, 32, 48, ..., 128]</entry>
         </row>
         <row>
-          <entry>Cacheline-spaced</entry>
-          <entry>[192, 256, 320, ..., 512]</entry>
+          <entry>32</entry>
+          <entry>[160, 192, 224, 256]</entry>
         </row>
         <row>
-          <entry>Subpage-spaced</entry>
-          <entry>[768, 1024, 1280, ..., 3840]</entry>
+          <entry>64</entry>
+          <entry>[320, 384, 448, 512]</entry>
         </row>
         <row>
-          <entry namest="c1" nameend="c2">Large</entry>
+          <entry>128</entry>
+          <entry>[640, 768, 896, 1024]</entry>
+        </row>
+        <row>
+          <entry>256</entry>
+          <entry>[1280, 1536, 1792, 2048]</entry>
+        </row>
+        <row>
+          <entry>512</entry>
+          <entry>[2560, 3072, 3584]</entry>
+        </row>
+        <row>
+          <entry>Large</entry>
+          <entry>4 KiB</entry>
           <entry>[4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB]</entry>
         </row>
         <row>
-          <entry namest="c1" nameend="c2">Huge</entry>
+          <entry>Huge</entry>
+          <entry>4 MiB</entry>
           <entry>[4 MiB, 8 MiB, 12 MiB, ...]</entry>
         </row>
       </tbody>
@@ -592,32 +632,42 @@ for (i = 0; i < nbins; i++) {
 
       <varlistentry>
         <term>
-          <mallctl>config.dynamic_page_shift</mallctl>
+          <mallctl>config.fill</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para><option>--enable-dynamic-page-shift</option> was
-        specified during build configuration.</para></listitem>
+        <listitem><para><option>--enable-fill</option> was specified during
+        build configuration.</para></listitem>
       </varlistentry>
 
       <varlistentry>
         <term>
-          <mallctl>config.fill</mallctl>
+          <mallctl>config.lazy_lock</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para><option>--enable-fill</option> was specified during
+        <listitem><para><option>--enable-lazy-lock</option> was specified
+        during build configuration.</para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>config.mremap</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-mremap</option> was specified during
         build configuration.</para></listitem>
       </varlistentry>
 
       <varlistentry>
         <term>
-          <mallctl>config.lazy_lock</mallctl>
+          <mallctl>config.munmap</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para><option>--enable-lazy-lock</option> was specified
-        during build configuration.</para></listitem>
+        <listitem><para><option>--enable-munmap</option> was specified during
+        build configuration.</para></listitem>
       </varlistentry>
 
       <varlistentry>
@@ -662,51 +712,41 @@ for (i = 0; i < nbins; i++) {
 
       <varlistentry>
         <term>
-          <mallctl>config.swap</mallctl>
+          <mallctl>config.tcache</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para><option>--enable-swap</option> was specified during
-        build configuration.</para></listitem>
+        <listitem><para><option>--disable-tcache</option> was not specified
+        during build configuration.</para></listitem>
       </varlistentry>
 
       <varlistentry>
         <term>
-          <mallctl>config.sysv</mallctl>
+          <mallctl>config.tls</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para><option>--enable-sysv</option> was specified during
+        <listitem><para><option>--disable-tls</option> was not specified during
         build configuration.</para></listitem>
       </varlistentry>
 
       <varlistentry>
         <term>
-          <mallctl>config.tcache</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para><option>--disable-tcache</option> was not specified
-        during build configuration.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>config.tiny</mallctl>
+          <mallctl>config.utrace</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para><option>--disable-tiny</option> was not specified
-        during build configuration.</para></listitem>
+        <listitem><para><option>--enable-utrace</option> was specified during
+        build configuration.</para></listitem>
       </varlistentry>
 
       <varlistentry>
         <term>
-          <mallctl>config.tls</mallctl>
+          <mallctl>config.valgrind</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para><option>--disable-tls</option> was not specified during
+        <listitem><para><option>--enable-valgrind</option> was specified during
         build configuration.</para></listitem>
       </varlistentry>
 
@@ -735,30 +775,6 @@ for (i = 0; i < nbins; i++) {
         </para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.lg_qspace_max">
-        <term>
-          <mallctl>opt.lg_qspace_max</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Size (log base 2) of the maximum size class that is a
-        multiple of the quantum (8 or 16 bytes, depending on architecture).
-        Above this size, cacheline spacing is used for size classes.  The
-        default value is 128 bytes (2^7).</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="opt.lg_cspace_max">
-        <term>
-          <mallctl>opt.lg_cspace_max</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Size (log base 2) of the maximum size class that is a
-        multiple of the cacheline size (64).  Above this size, subpage spacing
-        (256 bytes) is used for size classes.  The default value is 512 bytes
-        (2^9).</para></listitem>
-      </varlistentry>
-
       <varlistentry id="opt.lg_chunk">
         <term>
           <mallctl>opt.lg_chunk</mallctl>
@@ -833,6 +849,45 @@ for (i = 0; i < nbins; i++) {
         configuration, in which case it is enabled by default.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.quarantine">
+        <term>
+          <mallctl>opt.quarantine</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-fill</option>]
+        </term>
+        <listitem><para>Per thread quarantine size in bytes.  If non-zero, each
+        thread maintains a FIFO object quarantine that stores up to the
+        specified number of bytes of memory.  The quarantined memory is not
+        freed until it is released from quarantine, though it is immediately
+        junk-filled if the <link
+        linkend="opt.junk"><mallctl>opt.junk</mallctl></link> option is
+        enabled.  This feature is of particular use in combination with <ulink
+        url="http://valgrind.org/">Valgrind</ulink>, which can detect attempts
+        to access quarantined objects.  This is intended for debugging and will
+        impact performance negatively.  The default quarantine size is
+        0.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.redzone">
+        <term>
+          <mallctl>opt.redzone</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-fill</option>]
+        </term>
+        <listitem><para>Redzones enabled/disabled.  If enabled, small
+        allocations have redzones before and after them.  Furthermore, if the
+        <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link> option is
+        enabled, the redzones are checked for corruption during deallocation.
+        However, the primary intended purpose of this feature is to be used in
+        combination with <ulink url="http://valgrind.org/">Valgrind</ulink>,
+        which needs redzones in order to do effective buffer overflow/underflow
+        detection.  This option is intended for debugging and will impact
+        performance negatively.  This option is disabled by
+        default.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.zero">
         <term>
           <mallctl>opt.zero</mallctl>
@@ -850,20 +905,36 @@ for (i = 0; i < nbins; i++) {
         </para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.sysv">
+      <varlistentry id="opt.utrace">
         <term>
-          <mallctl>opt.sysv</mallctl>
+          <mallctl>opt.utrace</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
-          [<option>--enable-sysv</option>]
+          [<option>--enable-utrace</option>]
         </term>
-        <listitem><para>If enabled, attempting to allocate zero bytes will
-        return a <constant>NULL</constant> pointer instead of a valid pointer.
-        (The default behavior is to make a minimal allocation and return a
-        pointer to it.) This option is provided for System V compatibility.
-        This option is incompatible with the <link
-        linkend="opt.xmalloc"><mallctl>opt.xmalloc</mallctl></link> option.
-        This option is disabled by default.</para></listitem>
+        <listitem><para>Allocation tracing based on
+        <citerefentry><refentrytitle>utrace</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> enabled/disabled.  This option
+        is disabled by default.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.valgrind">
+        <term>
+          <mallctl>opt.valgrind</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-valgrind</option>]
+        </term>
+        <listitem><para><ulink url="http://valgrind.org/">Valgrind</ulink>
+        support enabled/disabled.  If enabled, several other options are
+        automatically modified during options processing to work well with
+        Valgrind: <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link>
+        and <link linkend="opt.zero"><mallctl>opt.zero</mallctl></link> are set
+        to false, <link
+        linkend="opt.quarantine"><mallctl>opt.quarantine</mallctl></link> is
+        set to 16 MiB, and <link
+        linkend="opt.redzone"><mallctl>opt.redzone</mallctl></link> is set to
+        true.  This option is disabled by default.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.xmalloc">
@@ -899,29 +970,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         allocations to be satisfied without performing any thread
         synchronization, at the cost of increased memory use.  See the
         <link
-        linkend="opt.lg_tcache_gc_sweep"><mallctl>opt.lg_tcache_gc_sweep</mallctl></link>
-        and <link
         linkend="opt.lg_tcache_max"><mallctl>opt.lg_tcache_max</mallctl></link>
-        options for related tuning information.  This option is enabled by
+        option for related tuning information.  This option is enabled by
         default.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.lg_tcache_gc_sweep">
-        <term>
-          <mallctl>opt.lg_tcache_gc_sweep</mallctl>
-          (<type>ssize_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-tcache</option>]
-        </term>
-        <listitem><para>Approximate interval (log base 2) between full
-        thread-specific cache garbage collection sweeps, counted in terms of
-        thread-specific cache allocation/deallocation events.  Garbage
-        collection is actually performed incrementally, one size class at a
-        time, in order to avoid large collection pauses.  The default sweep
-        interval is 8192 (2^13); setting this option to -1 will disable garbage
-        collection.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="opt.lg_tcache_max">
         <term>
           <mallctl>opt.lg_tcache_max</mallctl>
@@ -943,31 +996,21 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           [<option>--enable-prof</option>]
         </term>
         <listitem><para>Memory profiling enabled/disabled.  If enabled, profile
-        memory allocation activity, and use an
-        <citerefentry><refentrytitle>atexit</refentrytitle>
-        <manvolnum>3</manvolnum></citerefentry> function to dump final memory
-        usage to a file named according to the pattern
-        <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
-        where <literal>&lt;prefix&gt;</literal> is controlled by the <link
-        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.  See the <link
-        linkend="opt.lg_prof_bt_max"><mallctl>opt.lg_prof_bt_max</mallctl></link>
-        option for backtrace depth control.  See the <link
+        memory allocation activity.  See the <link
         linkend="opt.prof_active"><mallctl>opt.prof_active</mallctl></link>
         option for on-the-fly activation/deactivation.  See the <link
         linkend="opt.lg_prof_sample"><mallctl>opt.lg_prof_sample</mallctl></link>
         option for probabilistic sampling control.  See the <link
         linkend="opt.prof_accum"><mallctl>opt.prof_accum</mallctl></link>
         option for control of cumulative sample reporting.  See the <link
-        linkend="opt.lg_prof_tcmax"><mallctl>opt.lg_prof_tcmax</mallctl></link>
-        option for control of per thread backtrace caching.  See the <link
         linkend="opt.lg_prof_interval"><mallctl>opt.lg_prof_interval</mallctl></link>
-        option for information on interval-triggered profile dumping, and the
-        <link linkend="opt.prof_gdump"><mallctl>opt.prof_gdump</mallctl></link>
-        option for information on high-water-triggered profile dumping.
-        Profile output is compatible with the included <command>pprof</command>
-        Perl script, which originates from the <ulink
-        url="http://code.google.com/p/google-perftools/">google-perftools
+        option for information on interval-triggered profile dumping, the <link
+        linkend="opt.prof_gdump"><mallctl>opt.prof_gdump</mallctl></link>
+        option for information on high-water-triggered profile dumping, and the
+        <link linkend="opt.prof_final"><mallctl>opt.prof_final</mallctl></link>
+        option for final profile dumping.  Profile output is compatible with
+        the included <command>pprof</command> Perl script, which originates
+        from the <ulink url="http://code.google.com/p/gperftools/">gperftools
         package</ulink>.</para></listitem>
       </varlistentry>
 
@@ -985,17 +1028,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <filename>jeprof</filename>.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.lg_prof_bt_max">
-        <term>
-          <mallctl>opt.lg_prof_bt_max</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-prof</option>]
-        </term>
-        <listitem><para>Maximum backtrace depth (log base 2) when profiling
-        memory allocation activity.  The default is 128 (2^7).</para></listitem>
-      </varlistentry>
-
       <varlistentry id="opt.prof_active">
         <term>
           <mallctl>opt.prof_active</mallctl>
@@ -1023,8 +1055,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <listitem><para>Average interval (log base 2) between allocation
         samples, as measured in bytes of allocation activity.  Increasing the
         sampling interval decreases profile fidelity, but also decreases the
-        computational overhead.  The default sample interval is 1 (2^0) (i.e.
-        all allocations are sampled).</para></listitem>
+        computational overhead.  The default sample interval is 512 KiB (2^19
+        B).</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.prof_accum">
@@ -1038,28 +1070,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         dumps enabled/disabled.  If this option is enabled, every unique
         backtrace must be stored for the duration of execution.  Depending on
         the application, this can impose a large memory overhead, and the
-        cumulative counts are not always of interest.  See the
-        <link
-        linkend="opt.lg_prof_tcmax"><mallctl>opt.lg_prof_tcmax</mallctl></link>
-        option for control of per thread backtrace caching, which has important
-        interactions.  This option is enabled by default.</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="opt.lg_prof_tcmax">
-        <term>
-          <mallctl>opt.lg_prof_tcmax</mallctl>
-          (<type>ssize_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-prof</option>]
-        </term>
-        <listitem><para>Maximum per thread backtrace cache (log base 2) used
-        for heap profiling.  A backtrace can only be discarded if the
-        <link linkend="opt.prof_accum"><mallctl>opt.prof_accum</mallctl></link>
-        option is disabled, and no thread caches currently refer to the
-        backtrace.  Therefore, a backtrace cache limit should be imposed if the
-        intention is to limit how much memory is used by backtraces.  By
-        default, no limit is imposed (encoded as -1).
-        </para></listitem>
+        cumulative counts are not always of interest.  This option is disabled
+        by default.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.lg_prof_interval">
@@ -1099,6 +1111,23 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         option.  This option is disabled by default.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.prof_final">
+        <term>
+          <mallctl>opt.prof_final</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Use an
+        <citerefentry><refentrytitle>atexit</refentrytitle>
+        <manvolnum>3</manvolnum></citerefentry> function to dump final memory
+        usage to a file named according to the pattern
+        <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
+        where <literal>&lt;prefix&gt;</literal> is controlled by the <link
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
+        option.  This option is enabled by default.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.prof_leak">
         <term>
           <mallctl>opt.prof_leak</mallctl>
@@ -1110,51 +1139,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <citerefentry><refentrytitle>atexit</refentrytitle>
         <manvolnum>3</manvolnum></citerefentry> function to report memory leaks
         detected by allocation sampling.  See the
-        <link
-        linkend="opt.lg_prof_bt_max"><mallctl>opt.lg_prof_bt_max</mallctl></link>
-        option for backtrace depth control.  See the
         <link linkend="opt.prof"><mallctl>opt.prof</mallctl></link> option for
         information on analyzing heap profile output.  This option is disabled
         by default.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.overcommit">
-        <term>
-          <mallctl>opt.overcommit</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-          [<option>--enable-swap</option>]
-        </term>
-        <listitem><para>Over-commit enabled/disabled.  If enabled, over-commit
-        memory as a side effect of using anonymous
-        <citerefentry><refentrytitle>mmap</refentrytitle>
-        <manvolnum>2</manvolnum></citerefentry> or
-        <citerefentry><refentrytitle>sbrk</refentrytitle>
-        <manvolnum>2</manvolnum></citerefentry> for virtual memory allocation.
-        In order for overcommit to be disabled, the <link
-        linkend="swap.fds"><mallctl>swap.fds</mallctl></link> mallctl must have
-        been successfully written to.  This option is enabled by
-        default.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>tcache.flush</mallctl>
-          (<type>void</type>)
-          <literal>--</literal>
-          [<option>--enable-tcache</option>]
-        </term>
-        <listitem><para>Flush calling thread's tcache.  This interface releases
-        all cached objects and internal data structures associated with the
-        calling thread's thread-specific cache.  Ordinarily, this interface
-        need not be called, since automatic periodic incremental garbage
-        collection occurs, and the thread cache is automatically discarded when
-        a thread exits.  However, garbage collection is triggered by allocation
-        activity, so it is possible for a thread that stops
-        allocating/deallocating to retain its cache indefinitely, in which case
-        the developer may find manual flushing useful.</para></listitem>
-      </varlistentry>
-
       <varlistentry>
         <term>
           <mallctl>thread.arena</mallctl>
@@ -1226,6 +1215,38 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <function>mallctl*<parameter/></function> calls.</para></listitem>
       </varlistentry>
 
+      <varlistentry>
+        <term>
+          <mallctl>thread.tcache.enabled</mallctl>
+          (<type>bool</type>)
+          <literal>rw</literal>
+          [<option>--enable-tcache</option>]
+        </term>
+        <listitem><para>Enable/disable calling thread's tcache.  The tcache is
+        implicitly flushed as a side effect of becoming
+        disabled (see <link
+        lenkend="thread.tcache.flush"><mallctl>thread.tcache.flush</mallctl></link>).
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry>
+        <term>
+          <mallctl>thread.tcache.flush</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+          [<option>--enable-tcache</option>]
+        </term>
+        <listitem><para>Flush calling thread's tcache.  This interface releases
+        all cached objects and internal data structures associated with the
+        calling thread's thread-specific cache.  Ordinarily, this interface
+        need not be called, since automatic periodic incremental garbage
+        collection occurs, and the thread cache is automatically discarded when
+        a thread exits.  However, garbage collection is triggered by allocation
+        activity, so it is possible for a thread that stops
+        allocating/deallocating to retain its cache indefinitely, in which case
+        the developer may find manual flushing useful.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arenas.narenas">
         <term>
           <mallctl>arenas.narenas</mallctl>
@@ -1258,114 +1279,13 @@ malloc_conf = "xmalloc:true";]]></programlisting>
 
       <varlistentry>
         <term>
-          <mallctl>arenas.cacheline</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Assumed cacheline size.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.subpage</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Subpage size class interval.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.pagesize</mallctl>
+          <mallctl>arenas.page</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Page size.</para></listitem>
       </varlistentry>
 
-      <varlistentry>
-        <term>
-          <mallctl>arenas.chunksize</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Chunk size.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.tspace_min</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Minimum tiny size class.  Tiny size classes are powers
-        of two.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.tspace_max</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Maximum tiny size class.  Tiny size classes are powers
-        of two.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.qspace_min</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Minimum quantum-spaced size class.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.qspace_max</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Maximum quantum-spaced size class.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.cspace_min</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Minimum cacheline-spaced size class.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.cspace_max</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Maximum cacheline-spaced size class.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.sspace_min</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Minimum subpage-spaced size class.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.sspace_max</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Maximum subpage-spaced size class.</para></listitem>
-      </varlistentry>
-
       <varlistentry>
         <term>
           <mallctl>arenas.tcache_max</mallctl>
@@ -1376,52 +1296,13 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <listitem><para>Maximum thread-cached size class.</para></listitem>
       </varlistentry>
 
-      <varlistentry>
-        <term>
-          <mallctl>arenas.ntbins</mallctl>
-          (<type>unsigned</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Number of tiny bin size classes.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.nqbins</mallctl>
-          (<type>unsigned</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Number of quantum-spaced bin size
-        classes.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.ncbins</mallctl>
-          (<type>unsigned</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Number of cacheline-spaced bin size
-        classes.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>arenas.nsbins</mallctl>
-          (<type>unsigned</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Number of subpage-spaced bin size
-        classes.</para></listitem>
-      </varlistentry>
-
       <varlistentry>
         <term>
           <mallctl>arenas.nbins</mallctl>
           (<type>unsigned</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Total number of bin size classes.</para></listitem>
+        <listitem><para>Number of bin size classes.</para></listitem>
       </varlistentry>
 
       <varlistentry>
@@ -1590,8 +1471,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         application.  This is a multiple of the chunk size, and is at least as
         large as <link
         linkend="stats.active"><mallctl>stats.active</mallctl></link>.  This
-        does not include inactive chunks backed by swap files.  his does not
-        include inactive chunks embedded in the DSS.</para></listitem>
+        does not include inactive chunks.</para></listitem>
       </varlistentry>
 
       <varlistentry>
@@ -1602,8 +1482,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           [<option>--enable-stats</option>]
         </term>
         <listitem><para>Total number of chunks actively mapped on behalf of the
-        application.  This does not include inactive chunks backed by swap
-        files.  This does not include inactive chunks embedded in the DSS.
+        application.  This does not include inactive chunks.
         </para></listitem>
       </varlistentry>
 
@@ -1908,17 +1787,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         to allocate changed.</para></listitem>
       </varlistentry>
 
-      <varlistentry>
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.highruns</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Maximum number of runs at any time thus far.
-        </para></listitem>
-      </varlistentry>
-
       <varlistentry>
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.curruns</mallctl>
@@ -1962,17 +1830,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         class.</para></listitem>
       </varlistentry>
 
-      <varlistentry>
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.highruns</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Maximum number of runs at any time thus far for this
-        size class.</para></listitem>
-      </varlistentry>
-
       <varlistentry>
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.curruns</mallctl>
@@ -1983,65 +1840,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <listitem><para>Current number of runs for this size class.
         </para></listitem>
       </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>swap.avail</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats --enable-swap</option>]
-        </term>
-        <listitem><para>Number of swap file bytes that are currently not
-        associated with any chunk (i.e. mapped, but otherwise completely
-        unmanaged).</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="swap.prezeroed">
-        <term>
-          <mallctl>swap.prezeroed</mallctl>
-          (<type>bool</type>)
-          <literal>rw</literal>
-          [<option>--enable-swap</option>]
-        </term>
-        <listitem><para>If true, the allocator assumes that the swap file(s)
-        contain nothing but nil bytes.  If this assumption is violated,
-        allocator behavior is undefined.  This value becomes read-only after
-        <link linkend="swap.fds"><mallctl>swap.fds</mallctl></link> is
-        successfully written to.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
-          <mallctl>swap.nfds</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-swap</option>]
-        </term>
-        <listitem><para>Number of file descriptors in use for swap.
-        </para></listitem>
-      </varlistentry>
-
-      <varlistentry id="swap.fds">
-        <term>
-          <mallctl>swap.fds</mallctl>
-          (<type>int *</type>)
-          <literal>rw</literal>
-          [<option>--enable-swap</option>]
-        </term>
-        <listitem><para>When written to, the files associated with the
-        specified file descriptors are contiguously mapped via
-        <citerefentry><refentrytitle>mmap</refentrytitle>
-        <manvolnum>2</manvolnum></citerefentry>.  The resulting virtual memory
-        region is preferred over anonymous
-        <citerefentry><refentrytitle>mmap</refentrytitle>
-        <manvolnum>2</manvolnum></citerefentry> and
-        <citerefentry><refentrytitle>sbrk</refentrytitle>
-        <manvolnum>2</manvolnum></citerefentry> memory.  Note that if a file's
-        size is not a multiple of the page size, it is automatically truncated
-        to the nearest page size multiple.  See the
-        <link linkend="swap.prezeroed"><mallctl>swap.prezeroed</mallctl></link>
-        mallctl for specifying that the files are pre-zeroed.</para></listitem>
-      </varlistentry>
     </variablelist>
   </refsect1>
   <refsect1 id="debugging_malloc_problems">
@@ -2065,10 +1863,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
 
     <para>This implementation does not provide much detail about the problems
     it detects, because the performance impact for storing such information
-    would be prohibitive.  There are a number of allocator implementations
-    available on the Internet which focus on detecting and pinpointing problems
-    by trading performance for extra sanity checks and detailed
-    diagnostics.</para>
+    would be prohibitive.  However, jemalloc does integrate with the most
+    excellent <ulink url="http://valgrind.org/">Valgrind</ulink> tool if the
+    <option>--enable-valgrind</option> configuration option is enabled and the
+    <link linkend="opt.valgrind"><mallctl>opt.valgrind</mallctl></link> option
+    is enabled.</para>
   </refsect1>
   <refsect1 id="diagnostic_messages">
     <title>DIAGNOSTIC MESSAGES</title>
@@ -2124,6 +1923,27 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         </variablelist>
       </para>
 
+      <para>The <function>aligned_alloc<parameter/></function> function returns
+      a pointer to the allocated memory if successful; otherwise a
+      <constant>NULL</constant> pointer is returned and
+      <varname>errno</varname> is set.  The
+      <function>aligned_alloc<parameter/></function> function will fail if:
+        <variablelist>
+          <varlistentry>
+            <term><errorname>EINVAL</errorname></term>
+
+            <listitem><para>The <parameter>alignment</parameter> parameter is
+            not a power of 2.
+            </para></listitem>
+          </varlistentry>
+          <varlistentry>
+            <term><errorname>ENOMEM</errorname></term>
+
+            <listitem><para>Memory allocation error.</para></listitem>
+          </varlistentry>
+        </variablelist>
+      </para>
+
       <para>The <function>realloc<parameter/></function> function returns a
       pointer, possibly identical to <parameter>ptr</parameter>, to the
       allocated memory if successful; otherwise a <constant>NULL</constant>
@@ -2196,11 +2016,13 @@ malloc_conf = "xmalloc:true";]]></programlisting>
       <title>Experimental API</title>
       <para>The <function>allocm<parameter/></function>,
       <function>rallocm<parameter/></function>,
-      <function>sallocm<parameter/></function>, and
-      <function>dallocm<parameter/></function> functions return
+      <function>sallocm<parameter/></function>,
+      <function>dallocm<parameter/></function>, and
+      <function>nallocm<parameter/></function> functions return
       <constant>ALLOCM_SUCCESS</constant> on success; otherwise they return an
-      error value.  The <function>allocm<parameter/></function> and
-      <function>rallocm<parameter/></function> functions will fail if:
+      error value.  The <function>allocm<parameter/></function>,
+      <function>rallocm<parameter/></function>, and
+      <function>nallocm<parameter/></function> functions will fail if:
         <variablelist>
           <varlistentry>
             <term><errorname>ALLOCM_ERR_OOM</errorname></term>
@@ -2259,6 +2081,8 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
     <manvolnum>2</manvolnum></citerefentry>,
     <citerefentry><refentrytitle>sbrk</refentrytitle>
     <manvolnum>2</manvolnum></citerefentry>,
+    <citerefentry><refentrytitle>utrace</refentrytitle>
+    <manvolnum>2</manvolnum></citerefentry>,
     <citerefentry><refentrytitle>alloca</refentrytitle>
     <manvolnum>3</manvolnum></citerefentry>,
     <citerefentry><refentrytitle>atexit</refentrytitle>
diff --git a/deps/jemalloc/include/jemalloc/internal/arena.h b/deps/jemalloc/include/jemalloc/internal/arena.h
index b80c118d..0b0f640a 100644
--- a/deps/jemalloc/include/jemalloc/internal/arena.h
+++ b/deps/jemalloc/include/jemalloc/internal/arena.h
@@ -1,41 +1,6 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-/*
- * Subpages are an artificially designated partitioning of pages.  Their only
- * purpose is to support subpage-spaced size classes.
- *
- * There must be at least 4 subpages per page, due to the way size classes are
- * handled.
- */
-#define	LG_SUBPAGE		8
-#define	SUBPAGE			((size_t)(1U << LG_SUBPAGE))
-#define	SUBPAGE_MASK		(SUBPAGE - 1)
-
-/* Return the smallest subpage multiple that is >= s. */
-#define	SUBPAGE_CEILING(s)						\
-	(((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
-
-#ifdef JEMALLOC_TINY
-   /* Smallest size class to support. */
-#  define LG_TINY_MIN		LG_SIZEOF_PTR
-#  define TINY_MIN		(1U << LG_TINY_MIN)
-#endif
-
-/*
- * Maximum size class that is a multiple of the quantum, but not (necessarily)
- * a power of 2.  Above this size, allocations are rounded up to the nearest
- * power of 2.
- */
-#define	LG_QSPACE_MAX_DEFAULT	7
-
-/*
- * Maximum size class that is a multiple of the cacheline, but not (necessarily)
- * a power of 2.  Above this size, allocations are rounded up to the nearest
- * power of 2.
- */
-#define	LG_CSPACE_MAX_DEFAULT	9
-
 /*
  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
  * as small as possible such that this setting is still honored, without
@@ -51,7 +16,7 @@
  * constraint is relaxed (ignored) for runs that are so small that the
  * per-region overhead is greater than:
  *
- *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
+ *   (RUN_MAX_OVRHD / (reg_interval << (3+RUN_BFP))
  */
 #define	RUN_BFP			12
 /*                                    \/   Implicit binary fixed point. */
@@ -62,6 +27,12 @@
 #define	LG_RUN_MAXREGS		11
 #define	RUN_MAXREGS		(1U << LG_RUN_MAXREGS)
 
+/*
+ * Minimum redzone size.  Redzones may be larger than this if necessary to
+ * preserve region alignment.
+ */
+#define	REDZONE_MINSIZE		16
+
 /*
  * The minimum ratio of active:dirty pages per arena is computed as:
  *
@@ -85,6 +56,15 @@ typedef struct arena_s arena_t;
 
 /* Each element of the chunk map corresponds to one page within the chunk. */
 struct arena_chunk_map_s {
+#ifndef JEMALLOC_PROF
+	/*
+	 * Overlay prof_ctx in order to allow it to be referenced by dead code.
+	 * Such antics aren't warranted for per arena data structures, but
+	 * chunk map overhead accounts for a percentage of memory, rather than
+	 * being just a fixed cost.
+	 */
+	union {
+#endif
 	union {
 		/*
 		 * Linkage for run trees.  There are two disjoint uses:
@@ -103,22 +83,23 @@ struct arena_chunk_map_s {
 		ql_elm(arena_chunk_map_t)	ql_link;
 	}				u;
 
-#ifdef JEMALLOC_PROF
 	/* Profile counters, used for large object runs. */
 	prof_ctx_t			*prof_ctx;
+#ifndef JEMALLOC_PROF
+	}; /* union { ... }; */
 #endif
 
 	/*
 	 * Run address (or size) and various flags are stored together.  The bit
 	 * layout looks like (assuming 32-bit system):
 	 *
-	 *   ???????? ???????? ????---- ----dula
+	 *   ???????? ???????? ????nnnn nnnndula
 	 *
 	 * ? : Unallocated: Run address for first/last pages, unset for internal
 	 *                  pages.
 	 *     Small: Run page offset.
 	 *     Large: Run size for first page, unset for trailing pages.
-	 * - : Unused.
+	 * n : binind for small size class, BININD_INVALID for large size class.
 	 * d : dirty?
 	 * u : unzeroed?
 	 * l : large?
@@ -128,7 +109,8 @@ struct arena_chunk_map_s {
 	 *
 	 * p : run page offset
 	 * s : run size
-	 * c : (binind+1) for size class (used only if prof_promote is true)
+	 * n : binind for size class; large objects set these to BININD_INVALID
+	 *     except for promoted allocations (see prof_promote)
 	 * x : don't care
 	 * - : 0
 	 * + : 1
@@ -136,37 +118,38 @@ struct arena_chunk_map_s {
 	 * [dula] : bit unset
 	 *
 	 *   Unallocated (clean):
-	 *     ssssssss ssssssss ssss---- ----du-a
-	 *     xxxxxxxx xxxxxxxx xxxx---- -----Uxx
-	 *     ssssssss ssssssss ssss---- ----dU-a
+	 *     ssssssss ssssssss ssss++++ ++++du-a
+	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxx-Uxx
+	 *     ssssssss ssssssss ssss++++ ++++dU-a
 	 *
 	 *   Unallocated (dirty):
-	 *     ssssssss ssssssss ssss---- ----D--a
-	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
-	 *     ssssssss ssssssss ssss---- ----D--a
+	 *     ssssssss ssssssss ssss++++ ++++D--a
+	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+	 *     ssssssss ssssssss ssss++++ ++++D--a
 	 *
 	 *   Small:
-	 *     pppppppp pppppppp pppp---- ----d--A
-	 *     pppppppp pppppppp pppp---- -------A
-	 *     pppppppp pppppppp pppp---- ----d--A
+	 *     pppppppp pppppppp ppppnnnn nnnnd--A
+	 *     pppppppp pppppppp ppppnnnn nnnn---A
+	 *     pppppppp pppppppp ppppnnnn nnnnd--A
 	 *
 	 *   Large:
-	 *     ssssssss ssssssss ssss---- ----D-LA
-	 *     xxxxxxxx xxxxxxxx xxxx---- ----xxxx
-	 *     -------- -------- -------- ----D-LA
+	 *     ssssssss ssssssss ssss++++ ++++D-LA
+	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
+	 *     -------- -------- ----++++ ++++D-LA
 	 *
-	 *   Large (sampled, size <= PAGE_SIZE):
-	 *     ssssssss ssssssss sssscccc ccccD-LA
+	 *   Large (sampled, size <= PAGE):
+	 *     ssssssss ssssssss ssssnnnn nnnnD-LA
 	 *
-	 *   Large (not sampled, size == PAGE_SIZE):
-	 *     ssssssss ssssssss ssss---- ----D-LA
+	 *   Large (not sampled, size == PAGE):
+	 *     ssssssss ssssssss ssss++++ ++++D-LA
 	 */
 	size_t				bits;
-#ifdef JEMALLOC_PROF
-#define	CHUNK_MAP_CLASS_SHIFT	4
-#define	CHUNK_MAP_CLASS_MASK	((size_t)0xff0U)
-#endif
-#define	CHUNK_MAP_FLAGS_MASK	((size_t)0xfU)
+#define	CHUNK_MAP_BININD_SHIFT	4
+#define	BININD_INVALID		((size_t)0xffU)
+/*     CHUNK_MAP_BININD_MASK == (BININD_INVALID << CHUNK_MAP_BININD_SHIFT) */
+#define	CHUNK_MAP_BININD_MASK	((size_t)0xff0U)
+#define	CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK
+#define	CHUNK_MAP_FLAGS_MASK	((size_t)0xcU)
 #define	CHUNK_MAP_DIRTY		((size_t)0x8U)
 #define	CHUNK_MAP_UNZEROED	((size_t)0x4U)
 #define	CHUNK_MAP_LARGE		((size_t)0x2U)
@@ -205,11 +188,6 @@ struct arena_chunk_s {
 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
 
 struct arena_run_s {
-#ifdef JEMALLOC_DEBUG
-	uint32_t	magic;
-#  define ARENA_RUN_MAGIC 0x384adf93
-#endif
-
 	/* Bin this run is associated with. */
 	arena_bin_t	*bin;
 
@@ -224,11 +202,50 @@ struct arena_run_s {
  * Read-only information associated with each element of arena_t's bins array
  * is stored separately, partly to reduce memory usage (only one copy, rather
  * than one per arena), but mainly to avoid false cacheline sharing.
+ *
+ * Each run has the following layout:
+ *
+ *               /--------------------\
+ *               | arena_run_t header |
+ *               | ...                |
+ * bitmap_offset | bitmap             |
+ *               | ...                |
+ *   ctx0_offset | ctx map            |
+ *               | ...                |
+ *               |--------------------|
+ *               | redzone            |
+ *   reg0_offset | region 0           |
+ *               | redzone            |
+ *               |--------------------| \
+ *               | redzone            | |
+ *               | region 1           |  > reg_interval
+ *               | redzone            | /
+ *               |--------------------|
+ *               | ...                |
+ *               | ...                |
+ *               | ...                |
+ *               |--------------------|
+ *               | redzone            |
+ *               | region nregs-1     |
+ *               | redzone            |
+ *               |--------------------|
+ *               | alignment pad?     |
+ *               \--------------------/
+ *
+ * reg_interval has at least the same minimum alignment as reg_size; this
+ * preserves the alignment constraint that sa2u() depends on.  Alignment pad is
+ * either 0 or redzone_size; it is present only if needed to align reg0_offset.
  */
 struct arena_bin_info_s {
 	/* Size of regions in a run for this bin's size class. */
 	size_t		reg_size;
 
+	/* Redzone size. */
+	size_t		redzone_size;
+
+	/* Interval between regions (reg_size + (redzone_size << 1)). */
+	size_t		reg_interval;
+
 	/* Total size of a run for this bin's size class. */
 	size_t		run_size;
 
@@ -247,13 +264,11 @@ struct arena_bin_info_s {
 	 */
 	bitmap_info_t	bitmap_info;
 
-#ifdef JEMALLOC_PROF
 	/*
 	 * Offset of first (prof_ctx_t *) in a run header for this bin's size
-	 * class, or 0 if (opt_prof == false).
+	 * class, or 0 if (config_prof == false || opt_prof == false).
 	 */
 	uint32_t	ctx0_offset;
-#endif
 
 	/* Offset of first region in a run for this bin's size class. */
 	uint32_t	reg0_offset;
@@ -283,18 +298,11 @@ struct arena_bin_s {
 	 */
 	arena_run_tree_t runs;
 
-#ifdef JEMALLOC_STATS
 	/* Bin statistics. */
 	malloc_bin_stats_t stats;
-#endif
 };
 
 struct arena_s {
-#ifdef JEMALLOC_DEBUG
-	uint32_t		magic;
-#  define ARENA_MAGIC 0x947d3d24
-#endif
-
 	/* This arena's index within the arenas array. */
 	unsigned		ind;
 
@@ -314,20 +322,14 @@ struct arena_s {
 	 */
 	malloc_mutex_t		lock;
 
-#ifdef JEMALLOC_STATS
 	arena_stats_t		stats;
-#  ifdef JEMALLOC_TCACHE
 	/*
 	 * List of tcaches for extant threads associated with this arena.
 	 * Stats from these are merged incrementally, and at exit.
 	 */
 	ql_head(tcache_t)	tcache_ql;
-#  endif
-#endif
 
-#ifdef JEMALLOC_PROF
 	uint64_t		prof_accumbytes;
-#endif
 
 	/* List of dirty-page-containing chunks this arena manages. */
 	ql_head(arena_chunk_t)	chunks_dirty;
@@ -378,140 +380,334 @@ struct arena_s {
 	arena_avail_tree_t	runs_avail_clean;
 	arena_avail_tree_t	runs_avail_dirty;
 
-	/*
-	 * bins is used to store trees of free regions of the following sizes,
-	 * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and
-	 * default MALLOC_CONF.
-	 *
-	 *   bins[i] |   size |
-	 *   --------+--------+
-	 *        0  |      8 |
-	 *   --------+--------+
-	 *        1  |     16 |
-	 *        2  |     32 |
-	 *        3  |     48 |
-	 *           :        :
-	 *        6  |     96 |
-	 *        7  |    112 |
-	 *        8  |    128 |
-	 *   --------+--------+
-	 *        9  |    192 |
-	 *       10  |    256 |
-	 *       11  |    320 |
-	 *       12  |    384 |
-	 *       13  |    448 |
-	 *       14  |    512 |
-	 *   --------+--------+
-	 *       15  |    768 |
-	 *       16  |   1024 |
-	 *       17  |   1280 |
-	 *           :        :
-	 *       25  |   3328 |
-	 *       26  |   3584 |
-	 *       27  |   3840 |
-	 *   --------+--------+
-	 */
-	arena_bin_t		bins[1]; /* Dynamically sized. */
+	/* bins is used to store trees of free regions. */
+	arena_bin_t		bins[NBINS];
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-extern size_t	opt_lg_qspace_max;
-extern size_t	opt_lg_cspace_max;
 extern ssize_t	opt_lg_dirty_mult;
 /*
  * small_size2bin is a compact lookup table that rounds request sizes up to
  * size classes.  In order to reduce cache footprint, the table is compressed,
  * and all accesses are via the SMALL_SIZE2BIN macro.
  */
-extern uint8_t const	*small_size2bin;
+extern uint8_t const	small_size2bin[];
 #define	SMALL_SIZE2BIN(s)	(small_size2bin[(s-1) >> LG_TINY_MIN])
 
-extern arena_bin_info_t	*arena_bin_info;
-
-/* Various bin-related settings. */
-#ifdef JEMALLOC_TINY		/* Number of (2^n)-spaced tiny bins. */
-#  define		ntbins	((unsigned)(LG_QUANTUM - LG_TINY_MIN))
-#else
-#  define		ntbins	0
-#endif
-extern unsigned		nqbins; /* Number of quantum-spaced bins. */
-extern unsigned		ncbins; /* Number of cacheline-spaced bins. */
-extern unsigned		nsbins; /* Number of subpage-spaced bins. */
-extern unsigned		nbins;
-#ifdef JEMALLOC_TINY
-#  define		tspace_max	((size_t)(QUANTUM >> 1))
-#endif
-#define			qspace_min	QUANTUM
-extern size_t		qspace_max;
-extern size_t		cspace_min;
-extern size_t		cspace_max;
-extern size_t		sspace_min;
-extern size_t		sspace_max;
-#define			small_maxclass	sspace_max
+extern arena_bin_info_t	arena_bin_info[NBINS];
 
+/* Number of large size classes. */
 #define			nlclasses (chunk_npages - map_bias)
 
 void	arena_purge_all(arena_t *arena);
-#ifdef JEMALLOC_PROF
 void	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
-#endif
-#ifdef JEMALLOC_TCACHE
 void	arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
-    size_t binind
-#  ifdef JEMALLOC_PROF
-    , uint64_t prof_accumbytes
-#  endif
-    );
-#endif
+    size_t binind, uint64_t prof_accumbytes);
+void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
+    bool zero);
+void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 void	*arena_malloc_small(arena_t *arena, size_t size, bool zero);
 void	*arena_malloc_large(arena_t *arena, size_t size, bool zero);
-void	*arena_malloc(size_t size, bool zero);
-void	*arena_palloc(arena_t *arena, size_t size, size_t alloc_size,
-    size_t alignment, bool zero);
-size_t	arena_salloc(const void *ptr);
-#ifdef JEMALLOC_PROF
+void	*arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero);
 void	arena_prof_promoted(const void *ptr, size_t size);
-size_t	arena_salloc_demote(const void *ptr);
-#endif
-void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+void	arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     arena_chunk_map_t *mapelm);
+void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    size_t pageind, arena_chunk_map_t *mapelm);
+void	arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    size_t pageind);
+void	arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk,
+    void *ptr);
 void	arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
-#ifdef JEMALLOC_STATS
 void	arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
     arena_stats_t *astats, malloc_bin_stats_t *bstats,
     malloc_large_stats_t *lstats);
-#endif
 void	*arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
 void	*arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
-    size_t alignment, bool zero);
+    size_t alignment, bool zero, bool try_tcache);
 bool	arena_new(arena_t *arena, unsigned ind);
-bool	arena_boot(void);
+void	arena_boot(void);
+void	arena_prefork(arena_t *arena);
+void	arena_postfork_parent(arena_t *arena);
+void	arena_postfork_child(arena_t *arena);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
+arena_chunk_map_t	*arena_mapp_get(arena_chunk_t *chunk, size_t pageind);
+size_t	*arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
+void	arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
+    size_t size, size_t flags);
+void	arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
+    size_t size);
+void	arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind,
+    size_t size, size_t flags);
+void	arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
+    size_t binind);
+void	arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind,
+    size_t runind, size_t binind, size_t flags);
+void	arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
+    size_t unzeroed);
+size_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 size_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
-#  ifdef JEMALLOC_PROF
 prof_ctx_t	*arena_prof_ctx_get(const void *ptr);
 void	arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
-#  endif
-void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+void	*arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache);
+size_t	arena_salloc(const void *ptr, bool demote);
+void	arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    bool try_tcache);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
+#  ifdef JEMALLOC_ARENA_INLINE_A
+JEMALLOC_INLINE arena_chunk_map_t *
+arena_mapp_get(arena_chunk_t *chunk, size_t pageind)
+{
+
+	assert(pageind >= map_bias);
+	assert(pageind < chunk_npages);
+
+	return (&chunk->map[pageind-map_bias]);
+}
+
+JEMALLOC_INLINE size_t *
+arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (&arena_mapp_get(chunk, pageind)->bits);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (*arena_mapbitsp_get(chunk, pageind));
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
+	return (mapbits & ~PAGE_MASK);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
+	    (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED));
+	return (mapbits & ~PAGE_MASK);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
+	    CHUNK_MAP_ALLOCATED);
+	return (mapbits >> LG_PAGE);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+	size_t binind;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
+	assert(binind < NBINS || binind == BININD_INVALID);
+	return (binind);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	return (mapbits & CHUNK_MAP_DIRTY);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	return (mapbits & CHUNK_MAP_UNZEROED);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	return (mapbits & CHUNK_MAP_LARGE);
+}
+
+JEMALLOC_INLINE size_t
+arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind)
+{
+	size_t mapbits;
+
+	mapbits = arena_mapbits_get(chunk, pageind);
+	return (mapbits & CHUNK_MAP_ALLOCATED);
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
+    size_t flags)
+{
+	size_t *mapbitsp;
+
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	assert((size & PAGE_MASK) == 0);
+	assert((flags & ~CHUNK_MAP_FLAGS_MASK) == 0);
+	assert((flags & (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == flags);
+	*mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags;
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
+    size_t size)
+{
+	size_t *mapbitsp;
+
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	assert((size & PAGE_MASK) == 0);
+	assert((*mapbitsp & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
+	*mapbitsp = size | (*mapbitsp & PAGE_MASK);
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
+    size_t flags)
+{
+	size_t *mapbitsp;
+	size_t unzeroed;
+
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	assert((size & PAGE_MASK) == 0);
+	assert((flags & CHUNK_MAP_DIRTY) == flags);
+	unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
+	*mapbitsp = size | CHUNK_MAP_BININD_INVALID | flags | unzeroed |
+	    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
+    size_t binind)
+{
+	size_t *mapbitsp;
+
+	assert(binind <= BININD_INVALID);
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	assert(arena_mapbits_large_size_get(chunk, pageind) == PAGE);
+	*mapbitsp = (*mapbitsp & ~CHUNK_MAP_BININD_MASK) | (binind <<
+	    CHUNK_MAP_BININD_SHIFT);
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
+    size_t binind, size_t flags)
+{
+	size_t *mapbitsp;
+	size_t unzeroed;
+
+	assert(binind < BININD_INVALID);
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	assert(pageind - runind >= map_bias);
+	assert((flags & CHUNK_MAP_DIRTY) == flags);
+	unzeroed = *mapbitsp & CHUNK_MAP_UNZEROED; /* Preserve unzeroed. */
+	*mapbitsp = (runind << LG_PAGE) | (binind << CHUNK_MAP_BININD_SHIFT) |
+	    flags | unzeroed | CHUNK_MAP_ALLOCATED;
+}
+
+JEMALLOC_INLINE void
+arena_mapbits_unzeroed_set(arena_chunk_t *chunk, size_t pageind,
+    size_t unzeroed)
+{
+	size_t *mapbitsp;
+
+	mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	*mapbitsp = (*mapbitsp & ~CHUNK_MAP_UNZEROED) | unzeroed;
+}
+
+JEMALLOC_INLINE size_t
+arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
+{
+	size_t binind;
+
+	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
+
+	if (config_debug) {
+		arena_chunk_t *chunk;
+		arena_t *arena;
+		size_t pageind;
+		size_t actual_mapbits;
+		arena_run_t *run;
+		arena_bin_t *bin;
+		size_t actual_binind;
+		arena_bin_info_t *bin_info;
+
+		assert(binind != BININD_INVALID);
+		assert(binind < NBINS);
+		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+		arena = chunk->arena;
+		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+		actual_mapbits = arena_mapbits_get(chunk, pageind);
+		assert(mapbits == actual_mapbits);
+		assert(arena_mapbits_large_get(chunk, pageind) == 0);
+		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+		run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+		    (actual_mapbits >> LG_PAGE)) << LG_PAGE));
+		bin = run->bin;
+		actual_binind = bin - arena->bins;
+		assert(binind == actual_binind);
+		bin_info = &arena_bin_info[actual_binind];
+		assert(((uintptr_t)ptr - ((uintptr_t)run +
+		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval
+		    == 0);
+	}
+
+	return (binind);
+}
+#  endif /* JEMALLOC_ARENA_INLINE_A */
+
+#  ifdef JEMALLOC_ARENA_INLINE_B
 JEMALLOC_INLINE size_t
 arena_bin_index(arena_t *arena, arena_bin_t *bin)
 {
 	size_t binind = bin - arena->bins;
-	assert(binind < nbins);
+	assert(binind < NBINS);
 	return (binind);
 }
 
@@ -519,9 +715,8 @@ JEMALLOC_INLINE unsigned
 arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 {
 	unsigned shift, diff, regind;
-	size_t size;
+	size_t interval;
 
-	dassert(run->magic == ARENA_RUN_MAGIC);
 	/*
 	 * Freeing a pointer lower than region zero can cause assertion
 	 * failure.
@@ -537,12 +732,12 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 	    bin_info->reg0_offset);
 
 	/* Rescale (factor powers of 2 out of the numerator and denominator). */
-	size = bin_info->reg_size;
-	shift = ffs(size) - 1;
+	interval = bin_info->reg_interval;
+	shift = ffs(interval) - 1;
 	diff >>= shift;
-	size >>= shift;
+	interval >>= shift;
 
-	if (size == 1) {
+	if (interval == 1) {
 		/* The divisor was a power of 2. */
 		regind = diff;
 	} else {
@@ -554,7 +749,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 		 *
 		 * becomes
 		 *
-		 *   (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
+		 *   (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT
 		 *
 		 * We can omit the first three elements, because we never
 		 * divide by 0, and 1 and 2 are both powers of two, which are
@@ -562,7 +757,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 		 */
 #define	SIZE_INV_SHIFT	((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
 #define	SIZE_INV(s)	(((1U << SIZE_INV_SHIFT) / (s)) + 1)
-		static const unsigned size_invs[] = {
+		static const unsigned interval_invs[] = {
 		    SIZE_INV(3),
 		    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
 		    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
@@ -573,20 +768,21 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 		    SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
 		};
 
-		if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
-			regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
-		else
-			regind = diff / size;
+		if (interval <= ((sizeof(interval_invs) / sizeof(unsigned)) +
+		    2)) {
+			regind = (diff * interval_invs[interval - 3]) >>
+			    SIZE_INV_SHIFT;
+		} else
+			regind = diff / interval;
 #undef SIZE_INV
 #undef SIZE_INV_SHIFT
 	}
-	assert(diff == regind * size);
+	assert(diff == regind * interval);
 	assert(regind < bin_info->nregs);
 
 	return (regind);
 }
 
-#ifdef JEMALLOC_PROF
 JEMALLOC_INLINE prof_ctx_t *
 arena_prof_ctx_get(const void *ptr)
 {
@@ -594,32 +790,33 @@ arena_prof_ctx_get(const void *ptr)
 	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
 
+	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
-	mapbits = chunk->map[pageind-map_bias].bits;
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
 	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
 		if (prof_promote)
 			ret = (prof_ctx_t *)(uintptr_t)1U;
 		else {
 			arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
-			    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
-			    PAGE_SHIFT));
-			size_t binind = arena_bin_index(chunk->arena, run->bin);
+			    (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
+			    LG_PAGE));
+			size_t binind = arena_ptr_small_binind_get(ptr,
+			    mapbits);
 			arena_bin_info_t *bin_info = &arena_bin_info[binind];
 			unsigned regind;
 
-			dassert(run->magic == ARENA_RUN_MAGIC);
 			regind = arena_run_regind(run, bin_info, ptr);
 			ret = *(prof_ctx_t **)((uintptr_t)run +
 			    bin_info->ctx0_offset + (regind *
 			    sizeof(prof_ctx_t *)));
 		}
 	} else
-		ret = chunk->map[pageind-map_bias].prof_ctx;
+		ret = arena_mapp_get(chunk, pageind)->prof_ctx;
 
 	return (ret);
 }
@@ -630,25 +827,24 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
 
+	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
-	mapbits = chunk->map[pageind-map_bias].bits;
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
 	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
 		if (prof_promote == false) {
 			arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
-			    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
-			    PAGE_SHIFT));
-			arena_bin_t *bin = run->bin;
+			    (uintptr_t)((pageind - (mapbits >> LG_PAGE)) <<
+			    LG_PAGE));
 			size_t binind;
 			arena_bin_info_t *bin_info;
 			unsigned regind;
 
-			dassert(run->magic == ARENA_RUN_MAGIC);
-			binind = arena_bin_index(chunk->arena, bin);
+			binind = arena_ptr_small_binind_get(ptr, mapbits);
 			bin_info = &arena_bin_info[binind];
 			regind = arena_run_regind(run, bin_info, ptr);
 
@@ -657,86 +853,122 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 		} else
 			assert((uintptr_t)ctx == (uintptr_t)1U);
 	} else
-		chunk->map[pageind-map_bias].prof_ctx = ctx;
+		arena_mapp_get(chunk, pageind)->prof_ctx = ctx;
+}
+
+JEMALLOC_INLINE void *
+arena_malloc(arena_t *arena, size_t size, bool zero, bool try_tcache)
+{
+	tcache_t *tcache;
+
+	assert(size != 0);
+	assert(size <= arena_maxclass);
+
+	if (size <= SMALL_MAXCLASS) {
+		if (try_tcache && (tcache = tcache_get(true)) != NULL)
+			return (tcache_alloc_small(tcache, size, zero));
+		else {
+			return (arena_malloc_small(choose_arena(arena), size,
+			    zero));
+		}
+	} else {
+		/*
+		 * Initialize tcache after checking size in order to avoid
+		 * infinite recursion during tcache initialization.
+		 */
+		if (try_tcache && size <= tcache_maxclass && (tcache =
+		    tcache_get(true)) != NULL)
+			return (tcache_alloc_large(tcache, size, zero));
+		else {
+			return (arena_malloc_large(choose_arena(arena), size,
+			    zero));
+		}
+	}
+}
+
+/* Return the size of the allocation pointed to by ptr. */
+JEMALLOC_INLINE size_t
+arena_salloc(const void *ptr, bool demote)
+{
+	size_t ret;
+	arena_chunk_t *chunk;
+	size_t pageind, binind;
+
+	assert(ptr != NULL);
+	assert(CHUNK_ADDR2BASE(ptr) != ptr);
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+	assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+	binind = arena_mapbits_binind_get(chunk, pageind);
+	if (binind == BININD_INVALID || (config_prof && demote == false &&
+	    prof_promote && arena_mapbits_large_get(chunk, pageind) != 0)) {
+		/*
+		 * Large allocation.  In the common case (demote == true), and
+		 * as this is an inline function, most callers will only end up
+		 * looking at binind to determine that ptr is a small
+		 * allocation.
+		 */
+		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+		ret = arena_mapbits_large_size_get(chunk, pageind);
+		assert(ret != 0);
+		assert(pageind + (ret>>LG_PAGE) <= chunk_npages);
+		assert(ret == PAGE || arena_mapbits_large_size_get(chunk,
+		    pageind+(ret>>LG_PAGE)-1) == 0);
+		assert(binind == arena_mapbits_binind_get(chunk,
+		    pageind+(ret>>LG_PAGE)-1));
+		assert(arena_mapbits_dirty_get(chunk, pageind) ==
+		    arena_mapbits_dirty_get(chunk, pageind+(ret>>LG_PAGE)-1));
+	} else {
+		/*
+		 * Small allocation (possibly promoted to a large object due to
+		 * prof_promote).
+		 */
+		assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
+		    arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
+		    pageind)) == binind);
+		ret = arena_bin_info[binind].reg_size;
+	}
+
+	return (ret);
 }
-#endif
 
 JEMALLOC_INLINE void
-arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr, bool try_tcache)
 {
-	size_t pageind;
-	arena_chunk_map_t *mapelm;
+	size_t pageind, mapbits;
+	tcache_t *tcache;
 
 	assert(arena != NULL);
-	dassert(arena->magic == ARENA_MAGIC);
 	assert(chunk->arena == arena);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
 
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
-	mapelm = &chunk->map[pageind-map_bias];
-	assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
-	if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+	mapbits = arena_mapbits_get(chunk, pageind);
+	assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
 		/* Small allocation. */
-#ifdef JEMALLOC_TCACHE
-		tcache_t *tcache;
+		if (try_tcache && (tcache = tcache_get(false)) != NULL) {
+			size_t binind;
 
-		if ((tcache = tcache_get()) != NULL)
-			tcache_dalloc_small(tcache, ptr);
-		else {
-#endif
-			arena_run_t *run;
-			arena_bin_t *bin;
-
-			run = (arena_run_t *)((uintptr_t)chunk +
-			    (uintptr_t)((pageind - (mapelm->bits >>
-			    PAGE_SHIFT)) << PAGE_SHIFT));
-			dassert(run->magic == ARENA_RUN_MAGIC);
-			bin = run->bin;
-#ifdef JEMALLOC_DEBUG
-			{
-				size_t binind = arena_bin_index(arena, bin);
-				arena_bin_info_t *bin_info =
-				    &arena_bin_info[binind];
-				assert(((uintptr_t)ptr - ((uintptr_t)run +
-				    (uintptr_t)bin_info->reg0_offset)) %
-				    bin_info->reg_size == 0);
-			}
-#endif
-			malloc_mutex_lock(&bin->lock);
-			arena_dalloc_bin(arena, chunk, ptr, mapelm);
-			malloc_mutex_unlock(&bin->lock);
-#ifdef JEMALLOC_TCACHE
-		}
-#endif
+			binind = arena_ptr_small_binind_get(ptr, mapbits);
+			tcache_dalloc_small(tcache, ptr, binind);
+		} else
+			arena_dalloc_small(arena, chunk, ptr, pageind);
 	} else {
-#ifdef JEMALLOC_TCACHE
-		size_t size = mapelm->bits & ~PAGE_MASK;
+		size_t size = arena_mapbits_large_size_get(chunk, pageind);
 
 		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
-		if (size <= tcache_maxclass) {
-			tcache_t *tcache;
-
-			if ((tcache = tcache_get()) != NULL)
-				tcache_dalloc_large(tcache, ptr, size);
-			else {
-				malloc_mutex_lock(&arena->lock);
-				arena_dalloc_large(arena, chunk, ptr);
-				malloc_mutex_unlock(&arena->lock);
-			}
-		} else {
-			malloc_mutex_lock(&arena->lock);
+
+		if (try_tcache && size <= tcache_maxclass && (tcache =
+		    tcache_get(false)) != NULL) {
+			tcache_dalloc_large(tcache, ptr, size);
+		} else
 			arena_dalloc_large(arena, chunk, ptr);
-			malloc_mutex_unlock(&arena->lock);
-		}
-#else
-		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
-		malloc_mutex_lock(&arena->lock);
-		arena_dalloc_large(arena, chunk, ptr);
-		malloc_mutex_unlock(&arena->lock);
-#endif
 	}
 }
+#  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/deps/jemalloc/include/jemalloc/internal/atomic.h b/deps/jemalloc/include/jemalloc/internal/atomic.h
index 9a298623..11a7b47f 100644
--- a/deps/jemalloc/include/jemalloc/internal/atomic.h
+++ b/deps/jemalloc/include/jemalloc/internal/atomic.h
@@ -11,22 +11,8 @@
 
 #define	atomic_read_uint64(p)	atomic_add_uint64(p, 0)
 #define	atomic_read_uint32(p)	atomic_add_uint32(p, 0)
-
-#if (LG_SIZEOF_PTR == 3)
-#  define atomic_read_z(p)						\
-    (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0)
-#  define atomic_add_z(p, x)						\
-    (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x)
-#  define atomic_sub_z(p, x)						\
-    (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x)
-#elif (LG_SIZEOF_PTR == 2)
-#  define atomic_read_z(p)						\
-    (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0)
-#  define atomic_add_z(p, x)						\
-    (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x)
-#  define atomic_sub_z(p, x)						\
-    (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x)
-#endif
+#define	atomic_read_z(p)	atomic_add_z(p, 0)
+#define	atomic_read_u(p)	atomic_add_u(p, 0)
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -37,12 +23,17 @@ uint64_t	atomic_add_uint64(uint64_t *p, uint64_t x);
 uint64_t	atomic_sub_uint64(uint64_t *p, uint64_t x);
 uint32_t	atomic_add_uint32(uint32_t *p, uint32_t x);
 uint32_t	atomic_sub_uint32(uint32_t *p, uint32_t x);
+size_t	atomic_add_z(size_t *p, size_t x);
+size_t	atomic_sub_z(size_t *p, size_t x);
+unsigned	atomic_add_u(unsigned *p, unsigned x);
+unsigned	atomic_sub_u(unsigned *p, unsigned x);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
 /******************************************************************************/
 /* 64-bit operations. */
-#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#  ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -56,6 +47,20 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)
 
 	return (__sync_sub_and_fetch(p, x));
 }
+#elif (defined(_MSC_VER))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (InterlockedExchangeAdd64(p, x));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (InterlockedExchangeAdd64(p, -((int64_t)x)));
+}
 #elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
@@ -70,7 +75,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)
 
 	return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
 }
-#elif (defined(__amd64_) || defined(__x86_64__))
+#  elif (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -97,8 +102,43 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)
 
 	return (x);
 }
-#else
-#  if (LG_SIZEOF_PTR == 3)
+#  elif (defined(JEMALLOC_ATOMIC9))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	/*
+	 * atomic_fetchadd_64() doesn't exist, but we only ever use this
+	 * function on LP64 systems, so atomic_fetchadd_long() will do.
+	 */
+	assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+	return (atomic_fetchadd_long(p, (unsigned long)x) + x);
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	assert(sizeof(uint64_t) == sizeof(unsigned long));
+
+	return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
+}
+#  elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
+JEMALLOC_INLINE uint64_t
+atomic_add_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint64_t
+atomic_sub_uint64(uint64_t *p, uint64_t x)
+{
+
+	return (__sync_sub_and_fetch(p, x));
+}
+#  else
 #    error "Missing implementation for 64-bit atomic operations"
 #  endif
 #endif
@@ -119,6 +159,20 @@ atomic_sub_uint32(uint32_t *p, uint32_t x)
 
 	return (__sync_sub_and_fetch(p, x));
 }
+#elif (defined(_MSC_VER))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (InterlockedExchangeAdd(p, x));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (InterlockedExchangeAdd(p, -((int32_t)x)));
+}
 #elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint32_t
 atomic_add_uint32(uint32_t *p, uint32_t x)
@@ -133,7 +187,7 @@ atomic_sub_uint32(uint32_t *p, uint32_t x)
 
 	return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
 }
-#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__))
+#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint32_t
 atomic_add_uint32(uint32_t *p, uint32_t x)
 {
@@ -160,9 +214,90 @@ atomic_sub_uint32(uint32_t *p, uint32_t x)
 
 	return (x);
 }
+#elif (defined(JEMALLOC_ATOMIC9))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (atomic_fetchadd_32(p, x) + x);
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
+}
+#elif (defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
+JEMALLOC_INLINE uint32_t
+atomic_add_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (__sync_add_and_fetch(p, x));
+}
+
+JEMALLOC_INLINE uint32_t
+atomic_sub_uint32(uint32_t *p, uint32_t x)
+{
+
+	return (__sync_sub_and_fetch(p, x));
+}
 #else
 #  error "Missing implementation for 32-bit atomic operations"
 #endif
+
+/******************************************************************************/
+/* size_t operations. */
+JEMALLOC_INLINE size_t
+atomic_add_z(size_t *p, size_t x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+	return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+#elif (LG_SIZEOF_PTR == 2)
+	return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+#endif
+}
+
+JEMALLOC_INLINE size_t
+atomic_sub_z(size_t *p, size_t x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+	return ((size_t)atomic_add_uint64((uint64_t *)p,
+	    (uint64_t)-((int64_t)x)));
+#elif (LG_SIZEOF_PTR == 2)
+	return ((size_t)atomic_add_uint32((uint32_t *)p,
+	    (uint32_t)-((int32_t)x)));
+#endif
+}
+
+/******************************************************************************/
+/* unsigned operations. */
+JEMALLOC_INLINE unsigned
+atomic_add_u(unsigned *p, unsigned x)
+{
+
+#if (LG_SIZEOF_INT == 3)
+	return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+#elif (LG_SIZEOF_INT == 2)
+	return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+#endif
+}
+
+JEMALLOC_INLINE unsigned
+atomic_sub_u(unsigned *p, unsigned x)
+{
+
+#if (LG_SIZEOF_INT == 3)
+	return ((unsigned)atomic_add_uint64((uint64_t *)p,
+	    (uint64_t)-((int64_t)x)));
+#elif (LG_SIZEOF_INT == 2)
+	return ((unsigned)atomic_add_uint32((uint32_t *)p,
+	    (uint32_t)-((int32_t)x)));
+#endif
+}
+/******************************************************************************/
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/deps/jemalloc/include/jemalloc/internal/base.h b/deps/jemalloc/include/jemalloc/internal/base.h
index e353f309..9cf75ffb 100644
--- a/deps/jemalloc/include/jemalloc/internal/base.h
+++ b/deps/jemalloc/include/jemalloc/internal/base.h
@@ -9,12 +9,14 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-extern malloc_mutex_t	base_mtx;
-
 void	*base_alloc(size_t size);
+void	*base_calloc(size_t number, size_t size);
 extent_node_t *base_node_alloc(void);
 void	base_node_dealloc(extent_node_t *node);
 bool	base_boot(void);
+void	base_prefork(void);
+void	base_postfork_parent(void);
+void	base_postfork_child(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk.h b/deps/jemalloc/include/jemalloc/internal/chunk.h
index 54b6a3ec..8fb1fe6d 100644
--- a/deps/jemalloc/include/jemalloc/internal/chunk.h
+++ b/deps/jemalloc/include/jemalloc/internal/chunk.h
@@ -28,20 +28,13 @@
 #ifdef JEMALLOC_H_EXTERNS
 
 extern size_t		opt_lg_chunk;
-#ifdef JEMALLOC_SWAP
-extern bool		opt_overcommit;
-#endif
 
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 /* Protects stats_chunks; currently not used for any other purpose. */
 extern malloc_mutex_t	chunks_mtx;
 /* Chunk statistics. */
 extern chunk_stats_t	stats_chunks;
-#endif
 
-#ifdef JEMALLOC_IVSALLOC
 extern rtree_t		*chunks_rtree;
-#endif
 
 extern size_t		chunksize;
 extern size_t		chunksize_mask; /* (chunksize - 1). */
@@ -49,7 +42,7 @@ extern size_t		chunk_npages;
 extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		arena_maxclass; /* Max size class for arenas. */
 
-void	*chunk_alloc(size_t size, bool base, bool *zero);
+void	*chunk_alloc(size_t size, size_t alignment, bool base, bool *zero);
 void	chunk_dealloc(void *chunk, size_t size, bool unmap);
 bool	chunk_boot(void);
 
@@ -60,6 +53,5 @@ bool	chunk_boot(void);
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
 
-#include "jemalloc/internal/chunk_swap.h"
 #include "jemalloc/internal/chunk_dss.h"
 #include "jemalloc/internal/chunk_mmap.h"
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h
index 6f005222..6e2643b2 100644
--- a/deps/jemalloc/include/jemalloc/internal/chunk_dss.h
+++ b/deps/jemalloc/include/jemalloc/internal/chunk_dss.h
@@ -1,4 +1,3 @@
-#ifdef JEMALLOC_DSS
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
@@ -10,16 +9,12 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-/*
- * Protects sbrk() calls.  This avoids malloc races among threads, though it
- * does not protect against races with threads that call sbrk() directly.
- */
-extern malloc_mutex_t	dss_mtx;
-
-void	*chunk_alloc_dss(size_t size, bool *zero);
+void	*chunk_alloc_dss(size_t size, size_t alignment, bool *zero);
 bool	chunk_in_dss(void *chunk);
-bool	chunk_dealloc_dss(void *chunk, size_t size);
 bool	chunk_dss_boot(void);
+void	chunk_dss_prefork(void);
+void	chunk_dss_postfork_parent(void);
+void	chunk_dss_postfork_child(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -27,4 +22,3 @@ bool	chunk_dss_boot(void);
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
-#endif /* JEMALLOC_DSS */
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h
index 07b50a4d..b29f39e9 100644
--- a/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h
+++ b/deps/jemalloc/include/jemalloc/internal/chunk_mmap.h
@@ -9,11 +9,10 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*chunk_alloc_mmap(size_t size);
-void	*chunk_alloc_mmap_noreserve(size_t size);
-void	chunk_dealloc_mmap(void *chunk, size_t size);
+void	pages_purge(void *addr, size_t length);
 
-bool	chunk_mmap_boot(void);
+void	*chunk_alloc_mmap(size_t size, size_t alignment, bool *zero);
+bool	chunk_dealloc_mmap(void *chunk, size_t size);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/chunk_swap.h b/deps/jemalloc/include/jemalloc/internal/chunk_swap.h
deleted file mode 100644
index 9faa739f..00000000
--- a/deps/jemalloc/include/jemalloc/internal/chunk_swap.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifdef JEMALLOC_SWAP
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern malloc_mutex_t	swap_mtx;
-extern bool		swap_enabled;
-extern bool		swap_prezeroed;
-extern size_t		swap_nfds;
-extern int		*swap_fds;
-#ifdef JEMALLOC_STATS
-extern size_t		swap_avail;
-#endif
-
-void	*chunk_alloc_swap(size_t size, bool *zero);
-bool	chunk_in_swap(void *chunk);
-bool	chunk_dealloc_swap(void *chunk, size_t size);
-bool	chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed);
-bool	chunk_swap_boot(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-#endif /* JEMALLOC_SWAP */
diff --git a/deps/jemalloc/include/jemalloc/internal/ckh.h b/deps/jemalloc/include/jemalloc/internal/ckh.h
index 3e4ad4c8..05d1fc03 100644
--- a/deps/jemalloc/include/jemalloc/internal/ckh.h
+++ b/deps/jemalloc/include/jemalloc/internal/ckh.h
@@ -30,11 +30,6 @@ struct ckhc_s {
 };
 
 struct ckh_s {
-#ifdef JEMALLOC_DEBUG
-#define	CKH_MAGIC	0x3af2489d
-	uint32_t	magic;
-#endif
-
 #ifdef CKH_COUNT
 	/* Counters used to get an idea of performance. */
 	uint64_t	ngrows;
@@ -47,7 +42,7 @@ struct ckh_s {
 	/* Used for pseudo-random number generation. */
 #define	CKH_A		1103515241
 #define	CKH_C		12347
-	uint32_t	prn_state;
+	uint32_t	prng_state;
 
 	/* Total number of items. */
 	size_t		count;
diff --git a/deps/jemalloc/include/jemalloc/internal/ctl.h b/deps/jemalloc/include/jemalloc/internal/ctl.h
index f1f5eb70..adf3827f 100644
--- a/deps/jemalloc/include/jemalloc/internal/ctl.h
+++ b/deps/jemalloc/include/jemalloc/internal/ctl.h
@@ -2,6 +2,8 @@
 #ifdef JEMALLOC_H_TYPES
 
 typedef struct ctl_node_s ctl_node_t;
+typedef struct ctl_named_node_s ctl_named_node_t;
+typedef struct ctl_indexed_node_s ctl_indexed_node_t;
 typedef struct ctl_arena_stats_s ctl_arena_stats_t;
 typedef struct ctl_stats_s ctl_stats_t;
 
@@ -11,20 +13,21 @@ typedef struct ctl_stats_s ctl_stats_t;
 
 struct ctl_node_s {
 	bool			named;
-	union {
-		struct {
-			const char	*name;
-			/* If (nchildren == 0), this is a terminal node. */
-			unsigned	nchildren;
-			const	ctl_node_t *children;
-		} named;
-		struct {
-			const ctl_node_t *(*index)(const size_t *, size_t,
-			    size_t);
-		} indexed;
-	} u;
-	int	(*ctl)(const size_t *, size_t, void *, size_t *, void *,
-	    size_t);
+};
+
+struct ctl_named_node_s {
+	struct ctl_node_s	node;
+	const char		*name;
+	/* If (nchildren == 0), this is a terminal node. */
+	unsigned		nchildren;
+	const			ctl_node_t *children;
+	int			(*ctl)(const size_t *, size_t, void *, size_t *,
+	    void *, size_t);
+};
+
+struct ctl_indexed_node_s {
+	struct ctl_node_s	node;
+	const ctl_named_node_t	*(*index)(const size_t *, size_t, size_t);
 };
 
 struct ctl_arena_stats_s {
@@ -32,7 +35,6 @@ struct ctl_arena_stats_s {
 	unsigned		nthreads;
 	size_t			pactive;
 	size_t			pdirty;
-#ifdef JEMALLOC_STATS
 	arena_stats_t		astats;
 
 	/* Aggregate stats for small size classes, based on bin stats. */
@@ -41,13 +43,11 @@ struct ctl_arena_stats_s {
 	uint64_t		ndalloc_small;
 	uint64_t		nrequests_small;
 
-	malloc_bin_stats_t	*bstats;	/* nbins elements. */
+	malloc_bin_stats_t	bstats[NBINS];
 	malloc_large_stats_t	*lstats;	/* nlclasses elements. */
-#endif
 };
 
 struct ctl_stats_s {
-#ifdef JEMALLOC_STATS
 	size_t			allocated;
 	size_t			active;
 	size_t			mapped;
@@ -61,11 +61,7 @@ struct ctl_stats_s {
 		uint64_t	nmalloc;	/* huge_nmalloc */
 		uint64_t	ndalloc;	/* huge_ndalloc */
 	} huge;
-#endif
 	ctl_arena_stats_t	*arenas;	/* (narenas + 1) elements. */
-#ifdef JEMALLOC_SWAP
-	size_t			swap_avail;
-#endif
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
@@ -81,27 +77,25 @@ int	ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 bool	ctl_boot(void);
 
 #define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
-	if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen)	\
+	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
 	    != 0) {							\
-		malloc_write("<jemalloc>: Failure in xmallctl(\"");	\
-		malloc_write(name);					\
-		malloc_write("\", ...)\n");				\
+		malloc_printf(						\
+		    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
+		    name);						\
 		abort();						\
 	}								\
 } while (0)
 
 #define	xmallctlnametomib(name, mibp, miblenp) do {			\
-	if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) {	\
-		malloc_write(						\
-		    "<jemalloc>: Failure in xmallctlnametomib(\"");	\
-		malloc_write(name);					\
-		malloc_write("\", ...)\n");				\
+	if (je_mallctlnametomib(name, mibp, miblenp) != 0) {		\
+		malloc_printf("<jemalloc>: Failure in "			\
+		    "xmallctlnametomib(\"%s\", ...)\n", name);		\
 		abort();						\
 	}								\
 } while (0)
 
 #define	xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
-	if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp,	\
+	if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp,		\
 	    newlen) != 0) {						\
 		malloc_write(						\
 		    "<jemalloc>: Failure in xmallctlbymib()\n");	\
diff --git a/deps/jemalloc/include/jemalloc/internal/extent.h b/deps/jemalloc/include/jemalloc/internal/extent.h
index 6fe9702b..36af8be8 100644
--- a/deps/jemalloc/include/jemalloc/internal/extent.h
+++ b/deps/jemalloc/include/jemalloc/internal/extent.h
@@ -9,18 +9,14 @@ typedef struct extent_node_s extent_node_t;
 
 /* Tree of extents. */
 struct extent_node_s {
-#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
 	/* Linkage for the size/address-ordered tree. */
 	rb_node(extent_node_t)	link_szad;
-#endif
 
 	/* Linkage for the address-ordered tree. */
 	rb_node(extent_node_t)	link_ad;
 
-#ifdef JEMALLOC_PROF
 	/* Profile counters, used for huge objects. */
 	prof_ctx_t		*prof_ctx;
-#endif
 
 	/* Pointer to the extent that this tree node is responsible for. */
 	void			*addr;
@@ -34,9 +30,7 @@ typedef rb_tree(extent_node_t) extent_tree_t;
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
 rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t)
-#endif
 
 rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
 
diff --git a/deps/jemalloc/include/jemalloc/internal/hash.h b/deps/jemalloc/include/jemalloc/internal/hash.h
index 8a46ce30..2f501f5d 100644
--- a/deps/jemalloc/include/jemalloc/internal/hash.h
+++ b/deps/jemalloc/include/jemalloc/internal/hash.h
@@ -26,7 +26,7 @@ uint64_t	hash(const void *key, size_t len, uint64_t seed);
 JEMALLOC_INLINE uint64_t
 hash(const void *key, size_t len, uint64_t seed)
 {
-	const uint64_t m = 0xc6a4a7935bd1e995LLU;
+	const uint64_t m = UINT64_C(0xc6a4a7935bd1e995);
 	const int r = 47;
 	uint64_t h = seed ^ (len * m);
 	const uint64_t *data = (const uint64_t *)key;
@@ -48,14 +48,14 @@ hash(const void *key, size_t len, uint64_t seed)
 
 	data2 = (const unsigned char *)data;
 	switch(len & 7) {
-		case 7: h ^= ((uint64_t)(data2[6])) << 48;
-		case 6: h ^= ((uint64_t)(data2[5])) << 40;
-		case 5: h ^= ((uint64_t)(data2[4])) << 32;
-		case 4: h ^= ((uint64_t)(data2[3])) << 24;
-		case 3: h ^= ((uint64_t)(data2[2])) << 16;
-		case 2: h ^= ((uint64_t)(data2[1])) << 8;
-		case 1: h ^= ((uint64_t)(data2[0]));
-			h *= m;
+	case 7: h ^= ((uint64_t)(data2[6])) << 48;
+	case 6: h ^= ((uint64_t)(data2[5])) << 40;
+	case 5: h ^= ((uint64_t)(data2[4])) << 32;
+	case 4: h ^= ((uint64_t)(data2[3])) << 24;
+	case 3: h ^= ((uint64_t)(data2[2])) << 16;
+	case 2: h ^= ((uint64_t)(data2[1])) << 8;
+	case 1: h ^= ((uint64_t)(data2[0]));
+		h *= m;
 	}
 
 	h ^= h >> r;
diff --git a/deps/jemalloc/include/jemalloc/internal/huge.h b/deps/jemalloc/include/jemalloc/internal/huge.h
index 66544cf8..e8513c93 100644
--- a/deps/jemalloc/include/jemalloc/internal/huge.h
+++ b/deps/jemalloc/include/jemalloc/internal/huge.h
@@ -9,12 +9,10 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-#ifdef JEMALLOC_STATS
 /* Huge allocation statistics. */
 extern uint64_t		huge_nmalloc;
 extern uint64_t		huge_ndalloc;
 extern size_t		huge_allocated;
-#endif
 
 /* Protects chunk-related data structures. */
 extern malloc_mutex_t	huge_mtx;
@@ -27,11 +25,12 @@ void	*huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
     size_t alignment, bool zero);
 void	huge_dalloc(void *ptr, bool unmap);
 size_t	huge_salloc(const void *ptr);
-#ifdef JEMALLOC_PROF
 prof_ctx_t	*huge_prof_ctx_get(const void *ptr);
 void	huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
-#endif
 bool	huge_boot(void);
+void	huge_prefork(void);
+void	huge_postfork_parent(void);
+void	huge_postfork_child(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
index a44f0978..268cd146 100644
--- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1,17 +1,33 @@
-#include <sys/mman.h>
-#include <sys/param.h>
-#include <sys/time.h>
+#ifndef JEMALLOC_INTERNAL_H
+#define JEMALLOC_INTERNAL_H
+#include <math.h>
+#ifdef _WIN32
+#  include <windows.h>
+#  define ENOENT ERROR_PATH_NOT_FOUND
+#  define EINVAL ERROR_BAD_ARGUMENTS
+#  define EAGAIN ERROR_OUTOFMEMORY
+#  define EPERM  ERROR_WRITE_FAULT
+#  define EFAULT ERROR_INVALID_ADDRESS
+#  define ENOMEM ERROR_NOT_ENOUGH_MEMORY
+#  undef ERANGE
+#  define ERANGE ERROR_INVALID_DATA
+#else
+#  include <sys/param.h>
+#  include <sys/mman.h>
+#  include <sys/syscall.h>
+#  if !defined(SYS_write) && defined(__NR_write)
+#    define SYS_write __NR_write
+#  endif
+#  include <sys/uio.h>
+#  include <pthread.h>
+#  include <errno.h>
+#endif
 #include <sys/types.h>
-#include <sys/sysctl.h>
-#include <sys/uio.h>
 
-#include <errno.h>
 #include <limits.h>
 #ifndef SIZE_T_MAX
 #  define SIZE_T_MAX	SIZE_MAX
 #endif
-#include <pthread.h>
-#include <sched.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
@@ -25,16 +41,156 @@
 #include <string.h>
 #include <strings.h>
 #include <ctype.h>
-#include <unistd.h>
+#ifdef _MSC_VER
+#  include <io.h>
+typedef intptr_t ssize_t;
+#  define PATH_MAX 1024
+#  define STDERR_FILENO 2
+#  define __func__ __FUNCTION__
+/* Disable warnings about deprecated system functions */
+#  pragma warning(disable: 4996)
+#else
+#  include <unistd.h>
+#endif
 #include <fcntl.h>
-#include <pthread.h>
-#include <math.h>
 
-#define	JEMALLOC_MANGLE
+#define	JEMALLOC_NO_DEMANGLE
 #include "../jemalloc@install_suffix@.h"
 
+#ifdef JEMALLOC_UTRACE
+#include <sys/ktrace.h>
+#endif
+
+#ifdef JEMALLOC_VALGRIND
+#include <valgrind/valgrind.h>
+#include <valgrind/memcheck.h>
+#endif
+
 #include "jemalloc/internal/private_namespace.h"
 
+#ifdef JEMALLOC_CC_SILENCE
+#define	UNUSED JEMALLOC_ATTR(unused)
+#else
+#define	UNUSED
+#endif
+
+static const bool config_debug =
+#ifdef JEMALLOC_DEBUG
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_dss =
+#ifdef JEMALLOC_DSS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_fill =
+#ifdef JEMALLOC_FILL
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_lazy_lock =
+#ifdef JEMALLOC_LAZY_LOCK
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_prof =
+#ifdef JEMALLOC_PROF
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_prof_libgcc =
+#ifdef JEMALLOC_PROF_LIBGCC
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_prof_libunwind =
+#ifdef JEMALLOC_PROF_LIBUNWIND
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_mremap =
+#ifdef JEMALLOC_MREMAP
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_munmap =
+#ifdef JEMALLOC_MUNMAP
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_stats =
+#ifdef JEMALLOC_STATS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_tcache =
+#ifdef JEMALLOC_TCACHE
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_tls =
+#ifdef JEMALLOC_TLS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_utrace =
+#ifdef JEMALLOC_UTRACE
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_valgrind =
+#ifdef JEMALLOC_VALGRIND
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_xmalloc =
+#ifdef JEMALLOC_XMALLOC
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_ivsalloc =
+#ifdef JEMALLOC_IVSALLOC
+    true
+#else
+    false
+#endif
+    ;
+
+#ifdef JEMALLOC_ATOMIC9
+#include <machine/atomic.h>
+#endif
+
 #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
 #include <libkern/OSAtomic.h>
 #endif
@@ -46,48 +202,11 @@
 #include <malloc/malloc.h>
 #endif
 
-#ifdef JEMALLOC_LAZY_LOCK
-#include <dlfcn.h>
-#endif
-
 #define	RB_COMPACT
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
 #include "jemalloc/internal/ql.h"
 
-extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
-
-/*
- * Define a custom assert() in order to reduce the chances of deadlock during
- * assertion failure.
- */
-#ifndef assert
-#  ifdef JEMALLOC_DEBUG
-#    define assert(e) do {						\
-	if (!(e)) {							\
-		char line_buf[UMAX2S_BUFSIZE];				\
-		malloc_write("<jemalloc>: ");				\
-		malloc_write(__FILE__);					\
-		malloc_write(":");					\
-		malloc_write(u2s(__LINE__, 10, line_buf));		\
-		malloc_write(": Failed assertion: ");			\
-		malloc_write("\"");					\
-		malloc_write(#e);					\
-		malloc_write("\"\n");					\
-		abort();						\
-	}								\
-} while (0)
-#  else
-#    define assert(e)
-#  endif
-#endif
-
-#ifdef JEMALLOC_DEBUG
-#  define dassert(e) assert(e)
-#else
-#  define dassert(e)
-#endif
-
 /*
  * jemalloc can conceptually be broken into components (arena, tcache, etc.),
  * but there are circular dependencies that cannot be broken without
@@ -119,38 +238,56 @@ extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
 #else
 #  define JEMALLOC_ENABLE_INLINE
 #  define JEMALLOC_INLINE static inline
+#  ifdef _MSC_VER
+#    define inline _inline
+#  endif
 #endif
 
-/* Size of stack-allocated buffer passed to buferror(). */
-#define	BUFERROR_BUF		64
+/* Smallest size class to support. */
+#define	LG_TINY_MIN		3
+#define	TINY_MIN		(1U << LG_TINY_MIN)
 
-/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */
-#ifdef __i386__
-#  define LG_QUANTUM		4
-#endif
-#ifdef __ia64__
-#  define LG_QUANTUM		4
-#endif
-#ifdef __alpha__
-#  define LG_QUANTUM		4
-#endif
-#ifdef __sparc64__
-#  define LG_QUANTUM		4
-#endif
-#if (defined(__amd64__) || defined(__x86_64__))
-#  define LG_QUANTUM		4
-#endif
-#ifdef __arm__
-#  define LG_QUANTUM		3
-#endif
-#ifdef __mips__
-#  define LG_QUANTUM		3
-#endif
-#ifdef __powerpc__
-#  define LG_QUANTUM		4
-#endif
-#ifdef __s390x__
-#  define LG_QUANTUM		4
+/*
+ * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+#ifndef LG_QUANTUM
+#  if (defined(__i386__) || defined(_M_IX86))
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __ia64__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __alpha__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __sparc64__
+#    define LG_QUANTUM		4
+#  endif
+#  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __arm__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __mips__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __powerpc__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __s390x__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __SH4__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __tile__
+#    define LG_QUANTUM		4
+#  endif
+#  ifndef LG_QUANTUM
+#    error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS"
+#  endif
 #endif
 
 #define	QUANTUM			((size_t)(1U << LG_QUANTUM))
@@ -164,67 +301,149 @@ extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
 #define	LONG_MASK		(LONG - 1)
 
 /* Return the smallest long multiple that is >= a. */
-#define	LONG_CEILING(a)						\
+#define	LONG_CEILING(a)							\
 	(((a) + LONG_MASK) & ~LONG_MASK)
 
 #define	SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
 #define	PTR_MASK		(SIZEOF_PTR - 1)
 
 /* Return the smallest (void *) multiple that is >= a. */
-#define	PTR_CEILING(a)						\
+#define	PTR_CEILING(a)							\
 	(((a) + PTR_MASK) & ~PTR_MASK)
 
 /*
  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
  * In addition, this controls the spacing of cacheline-spaced size classes.
+ *
+ * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
+ * only handle raw constants.
  */
 #define	LG_CACHELINE		6
-#define	CACHELINE		((size_t)(1U << LG_CACHELINE))
+#define	CACHELINE		64
 #define	CACHELINE_MASK		(CACHELINE - 1)
 
 /* Return the smallest cacheline multiple that is >= s. */
 #define	CACHELINE_CEILING(s)						\
 	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
 
-/*
- * Page size.  STATIC_PAGE_SHIFT is determined by the configure script.  If
- * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where
- * compile-time values are required for the purposes of defining data
- * structures.
- */
-#define	STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT))
-#define	STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1))
-
-#ifdef PAGE_SHIFT
-#  undef PAGE_SHIFT
-#endif
-#ifdef PAGE_SIZE
-#  undef PAGE_SIZE
-#endif
+/* Page size.  STATIC_PAGE_SHIFT is determined by the configure script. */
 #ifdef PAGE_MASK
 #  undef PAGE_MASK
 #endif
-
-#ifdef DYNAMIC_PAGE_SHIFT
-#  define PAGE_SHIFT	lg_pagesize
-#  define PAGE_SIZE	pagesize
-#  define PAGE_MASK	pagesize_mask
-#else
-#  define PAGE_SHIFT	STATIC_PAGE_SHIFT
-#  define PAGE_SIZE	STATIC_PAGE_SIZE
-#  define PAGE_MASK	STATIC_PAGE_MASK
-#endif
+#define	LG_PAGE		STATIC_PAGE_SHIFT
+#define	PAGE		((size_t)(1U << STATIC_PAGE_SHIFT))
+#define	PAGE_MASK	((size_t)(PAGE - 1))
 
 /* Return the smallest pagesize multiple that is >= s. */
 #define	PAGE_CEILING(s)							\
 	(((s) + PAGE_MASK) & ~PAGE_MASK)
 
+/* Return the nearest aligned address at or below a. */
+#define	ALIGNMENT_ADDR2BASE(a, alignment)				\
+	((void *)((uintptr_t)(a) & (-(alignment))))
+
+/* Return the offset between a and the nearest aligned address at or below a. */
+#define	ALIGNMENT_ADDR2OFFSET(a, alignment)				\
+	((size_t)((uintptr_t)(a) & (alignment - 1)))
+
+/* Return the smallest alignment multiple that is >= s. */
+#define	ALIGNMENT_CEILING(s, alignment)					\
+	(((s) + (alignment - 1)) & (-(alignment)))
+
+/* Declare a variable length array */
+#if __STDC_VERSION__ < 199901L
+#  ifdef _MSC_VER
+#    include <malloc.h>
+#    define alloca _alloca
+#  else
+#    include <alloca.h>
+#  endif
+#  define VARIABLE_ARRAY(type, name, count) \
+	type *name = alloca(sizeof(type) * count)
+#else
+#  define VARIABLE_ARRAY(type, name, count) type name[count]
+#endif
+
+#ifdef JEMALLOC_VALGRIND
+/*
+ * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions
+ * so that when Valgrind reports errors, there are no extra stack frames
+ * in the backtraces.
+ *
+ * The size that is reported to valgrind must be consistent through a chain of
+ * malloc..realloc..realloc calls.  Request size isn't recorded anywhere in
+ * jemalloc, so it is critical that all callers of these macros provide usize
+ * rather than request size.  As a result, buffer overflow detection is
+ * technically weakened for the standard API, though it is generally accepted
+ * practice to consider any extra bytes reported by malloc_usable_size() as
+ * usable space.
+ */
+#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {		\
+	if (config_valgrind && opt_valgrind && cond)			\
+		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero);	\
+} while (0)
+#define	JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize,	\
+    old_rzsize, zero)  do {						\
+	if (config_valgrind && opt_valgrind) {				\
+		size_t rzsize = p2rz(ptr);				\
+									\
+		if (ptr == old_ptr) {					\
+			VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize,	\
+			    usize, rzsize);				\
+			if (zero && old_usize < usize) {		\
+				VALGRIND_MAKE_MEM_DEFINED(		\
+				    (void *)((uintptr_t)ptr +		\
+				    old_usize), usize - old_usize);	\
+			}						\
+		} else {						\
+			if (old_ptr != NULL) {				\
+				VALGRIND_FREELIKE_BLOCK(old_ptr,	\
+				    old_rzsize);			\
+			}						\
+			if (ptr != NULL) {				\
+				size_t copy_size = (old_usize < usize)	\
+				    ?  old_usize : usize;		\
+				size_t tail_size = usize - copy_size;	\
+				VALGRIND_MALLOCLIKE_BLOCK(ptr, usize,	\
+				    rzsize, false);			\
+				if (copy_size > 0) {			\
+					VALGRIND_MAKE_MEM_DEFINED(ptr,	\
+					    copy_size);			\
+				}					\
+				if (zero && tail_size > 0) {		\
+					VALGRIND_MAKE_MEM_DEFINED(	\
+					    (void *)((uintptr_t)ptr +	\
+					    copy_size), tail_size);	\
+				}					\
+			}						\
+		}							\
+	}								\
+} while (0)
+#define	JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {			\
+	if (config_valgrind && opt_valgrind)				\
+		VALGRIND_FREELIKE_BLOCK(ptr, rzsize);			\
+} while (0)
+#else
+#define	VALGRIND_MALLOCLIKE_BLOCK(addr, sizeB, rzB, is_zeroed)
+#define	VALGRIND_RESIZEINPLACE_BLOCK(addr, oldSizeB, newSizeB, rzB)
+#define	VALGRIND_FREELIKE_BLOCK(addr, rzB)
+#define	VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr, _qzz_len)
+#define	VALGRIND_MAKE_MEM_DEFINED(_qzz_addr, _qzz_len)
+#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero)
+#define	JEMALLOC_VALGRIND_REALLOC(ptr, usize, old_ptr, old_usize,	\
+    old_rzsize, zero)
+#define	JEMALLOC_VALGRIND_FREE(ptr, rzsize)
+#endif
+
+#include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/arena.h"
@@ -235,21 +454,22 @@ extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
-#ifdef JEMALLOC_ZONE
-#include "jemalloc/internal/zone.h"
-#endif
+#include "jemalloc/internal/quarantine.h"
 #include "jemalloc/internal/prof.h"
 
 #undef JEMALLOC_H_TYPES
 /******************************************************************************/
 #define JEMALLOC_H_STRUCTS
 
+#include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
@@ -260,66 +480,37 @@ extern void	(*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s);
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
-#ifdef JEMALLOC_ZONE
-#include "jemalloc/internal/zone.h"
-#endif
+#include "jemalloc/internal/quarantine.h"
 #include "jemalloc/internal/prof.h"
 
-#ifdef JEMALLOC_STATS
 typedef struct {
 	uint64_t	allocated;
 	uint64_t	deallocated;
 } thread_allocated_t;
-#endif
+/*
+ * The JEMALLOC_CONCAT() wrapper is necessary to pass {0, 0} via a cpp macro
+ * argument.
+ */
+#define	THREAD_ALLOCATED_INITIALIZER	JEMALLOC_CONCAT({0, 0})
 
 #undef JEMALLOC_H_STRUCTS
 /******************************************************************************/
 #define JEMALLOC_H_EXTERNS
 
 extern bool	opt_abort;
-#ifdef JEMALLOC_FILL
 extern bool	opt_junk;
-#endif
-#ifdef JEMALLOC_SYSV
-extern bool	opt_sysv;
-#endif
-#ifdef JEMALLOC_XMALLOC
+extern size_t	opt_quarantine;
+extern bool	opt_redzone;
+extern bool	opt_utrace;
+extern bool	opt_valgrind;
 extern bool	opt_xmalloc;
-#endif
-#ifdef JEMALLOC_FILL
 extern bool	opt_zero;
-#endif
 extern size_t	opt_narenas;
 
-#ifdef DYNAMIC_PAGE_SHIFT
-extern size_t		pagesize;
-extern size_t		pagesize_mask;
-extern size_t		lg_pagesize;
-#endif
-
 /* Number of CPUs. */
 extern unsigned		ncpus;
 
 extern malloc_mutex_t	arenas_lock; /* Protects arenas initialization. */
-extern pthread_key_t	arenas_tsd;
-#ifndef NO_TLS
-/*
- * Map of pthread_self() --> arenas[???], used for selecting an arena to use
- * for allocations.
- */
-extern __thread arena_t	*arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
-#  define ARENA_GET()	arenas_tls
-#  define ARENA_SET(v)	do {						\
-	arenas_tls = (v);						\
-	pthread_setspecific(arenas_tsd, (void *)(v));			\
-} while (0)
-#else
-#  define ARENA_GET()	((arena_t *)pthread_getspecific(arenas_tsd))
-#  define ARENA_SET(v)	do {						\
-	pthread_setspecific(arenas_tsd, (void *)(v));			\
-} while (0)
-#endif
-
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
@@ -327,45 +518,22 @@ extern __thread arena_t	*arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
 extern arena_t		**arenas;
 extern unsigned		narenas;
 
-#ifdef JEMALLOC_STATS
-#  ifndef NO_TLS
-extern __thread thread_allocated_t	thread_allocated_tls;
-#    define ALLOCATED_GET() (thread_allocated_tls.allocated)
-#    define ALLOCATEDP_GET() (&thread_allocated_tls.allocated)
-#    define DEALLOCATED_GET() (thread_allocated_tls.deallocated)
-#    define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated)
-#    define ALLOCATED_ADD(a, d) do {					\
-	thread_allocated_tls.allocated += a;				\
-	thread_allocated_tls.deallocated += d;				\
-} while (0)
-#  else
-extern pthread_key_t	thread_allocated_tsd;
-thread_allocated_t	*thread_allocated_get_hard(void);
-
-#    define ALLOCATED_GET() (thread_allocated_get()->allocated)
-#    define ALLOCATEDP_GET() (&thread_allocated_get()->allocated)
-#    define DEALLOCATED_GET() (thread_allocated_get()->deallocated)
-#    define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated)
-#    define ALLOCATED_ADD(a, d) do {					\
-	thread_allocated_t *thread_allocated = thread_allocated_get();	\
-	thread_allocated->allocated += (a);				\
-	thread_allocated->deallocated += (d);				\
-} while (0)
-#  endif
-#endif
-
 arena_t	*arenas_extend(unsigned ind);
+void	arenas_cleanup(void *arg);
 arena_t	*choose_arena_hard(void);
-int	buferror(int errnum, char *buf, size_t buflen);
 void	jemalloc_prefork(void);
-void	jemalloc_postfork(void);
+void	jemalloc_postfork_parent(void);
+void	jemalloc_postfork_child(void);
 
+#include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
@@ -376,21 +544,22 @@ void	jemalloc_postfork(void);
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
-#ifdef JEMALLOC_ZONE
-#include "jemalloc/internal/zone.h"
-#endif
+#include "jemalloc/internal/quarantine.h"
 #include "jemalloc/internal/prof.h"
 
 #undef JEMALLOC_H_EXTERNS
 /******************************************************************************/
 #define JEMALLOC_H_INLINES
 
+#include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/prn.h"
+#include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
@@ -398,34 +567,20 @@ void	jemalloc_postfork(void);
 #include "jemalloc/internal/huge.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
-size_t	pow2_ceil(size_t x);
+malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *)
+
 size_t	s2u(size_t size);
-size_t	sa2u(size_t size, size_t alignment, size_t *run_size_p);
-void	malloc_write(const char *s);
-arena_t	*choose_arena(void);
-#  if (defined(JEMALLOC_STATS) && defined(NO_TLS))
-thread_allocated_t	*thread_allocated_get(void);
-#  endif
+size_t	sa2u(size_t size, size_t alignment);
+arena_t	*choose_arena(arena_t *arena);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-/* Compute the smallest power of 2 that is >= x. */
-JEMALLOC_INLINE size_t
-pow2_ceil(size_t x)
-{
-
-	x--;
-	x |= x >> 1;
-	x |= x >> 2;
-	x |= x >> 4;
-	x |= x >> 8;
-	x |= x >> 16;
-#if (LG_SIZEOF_PTR == 3)
-	x |= x >> 32;
-#endif
-	x++;
-	return (x);
-}
+/*
+ * Map of pthread_self() --> arenas[???], used for selecting an arena to use
+ * for allocations.
+ */
+malloc_tsd_externs(arenas, arena_t *)
+malloc_tsd_funcs(JEMALLOC_INLINE, arenas, arena_t *, NULL, arenas_cleanup)
 
 /*
  * Compute usable size that would result from allocating an object with the
@@ -435,7 +590,7 @@ JEMALLOC_INLINE size_t
 s2u(size_t size)
 {
 
-	if (size <= small_maxclass)
+	if (size <= SMALL_MAXCLASS)
 		return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size);
 	if (size <= arena_maxclass)
 		return (PAGE_CEILING(size));
@@ -447,10 +602,12 @@ s2u(size_t size)
  * specified size and alignment.
  */
 JEMALLOC_INLINE size_t
-sa2u(size_t size, size_t alignment, size_t *run_size_p)
+sa2u(size_t size, size_t alignment)
 {
 	size_t usize;
 
+	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
+
 	/*
 	 * Round size up to the nearest multiple of alignment.
 	 *
@@ -464,12 +621,8 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p)
 	 *     96 |  1100000 |  32
 	 *    144 | 10100000 |  32
 	 *    192 | 11000000 |  64
-	 *
-	 * Depending on runtime settings, it is possible that arena_malloc()
-	 * will further round up to a power of two, but that never causes
-	 * correctness issues.
 	 */
-	usize = (size + (alignment - 1)) & (-alignment);
+	usize = ALIGNMENT_CEILING(size, alignment);
 	/*
 	 * (usize < size) protects against the combination of maximal
 	 * alignment and size greater than maximal alignment.
@@ -479,8 +632,8 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p)
 		return (0);
 	}
 
-	if (usize <= arena_maxclass && alignment <= PAGE_SIZE) {
-		if (usize <= small_maxclass)
+	if (usize <= arena_maxclass && alignment <= PAGE) {
+		if (usize <= SMALL_MAXCLASS)
 			return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size);
 		return (PAGE_CEILING(usize));
 	} else {
@@ -494,7 +647,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p)
 		usize = PAGE_CEILING(size);
 		/*
 		 * (usize < size) protects against very large sizes within
-		 * PAGE_SIZE of SIZE_T_MAX.
+		 * PAGE of SIZE_T_MAX.
 		 *
 		 * (usize + alignment < usize) protects against the
 		 * combination of maximal alignment and usize large enough
@@ -512,93 +665,63 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p)
 		/*
 		 * Calculate the size of the over-size run that arena_palloc()
 		 * would need to allocate in order to guarantee the alignment.
+		 * If the run wouldn't fit within a chunk, round up to a huge
+		 * allocation size.
 		 */
-		if (usize >= alignment)
-			run_size = usize + alignment - PAGE_SIZE;
-		else {
-			/*
-			 * It is possible that (alignment << 1) will cause
-			 * overflow, but it doesn't matter because we also
-			 * subtract PAGE_SIZE, which in the case of overflow
-			 * leaves us with a very large run_size.  That causes
-			 * the first conditional below to fail, which means
-			 * that the bogus run_size value never gets used for
-			 * anything important.
-			 */
-			run_size = (alignment << 1) - PAGE_SIZE;
-		}
-		if (run_size_p != NULL)
-			*run_size_p = run_size;
-
+		run_size = usize + alignment - PAGE;
 		if (run_size <= arena_maxclass)
 			return (PAGE_CEILING(usize));
 		return (CHUNK_CEILING(usize));
 	}
 }
 
-/*
- * Wrapper around malloc_message() that avoids the need for
- * JEMALLOC_P(malloc_message)(...) throughout the code.
- */
-JEMALLOC_INLINE void
-malloc_write(const char *s)
-{
-
-	JEMALLOC_P(malloc_message)(NULL, s);
-}
-
-/*
- * Choose an arena based on a per-thread value (fast-path code, calls slow-path
- * code if necessary).
- */
+/* Choose an arena based on a per-thread value. */
 JEMALLOC_INLINE arena_t *
-choose_arena(void)
+choose_arena(arena_t *arena)
 {
 	arena_t *ret;
 
-	ret = ARENA_GET();
-	if (ret == NULL) {
+	if (arena != NULL)
+		return (arena);
+
+	if ((ret = *arenas_tsd_get()) == NULL) {
 		ret = choose_arena_hard();
 		assert(ret != NULL);
 	}
 
 	return (ret);
 }
-
-#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
-JEMALLOC_INLINE thread_allocated_t *
-thread_allocated_get(void)
-{
-	thread_allocated_t *thread_allocated = (thread_allocated_t *)
-	    pthread_getspecific(thread_allocated_tsd);
-
-	if (thread_allocated == NULL)
-		return (thread_allocated_get_hard());
-	return (thread_allocated);
-}
-#endif
 #endif
 
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/rtree.h"
+/*
+ * Include arena.h twice in order to resolve circular dependencies with
+ * tcache.h.
+ */
+#define	JEMALLOC_ARENA_INLINE_A
+#include "jemalloc/internal/arena.h"
+#undef JEMALLOC_ARENA_INLINE_A
 #include "jemalloc/internal/tcache.h"
+#define	JEMALLOC_ARENA_INLINE_B
 #include "jemalloc/internal/arena.h"
+#undef JEMALLOC_ARENA_INLINE_B
 #include "jemalloc/internal/hash.h"
-#ifdef JEMALLOC_ZONE
-#include "jemalloc/internal/zone.h"
-#endif
+#include "jemalloc/internal/quarantine.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 void	*imalloc(size_t size);
 void	*icalloc(size_t size);
 void	*ipalloc(size_t usize, size_t alignment, bool zero);
-size_t	isalloc(const void *ptr);
-#  ifdef JEMALLOC_IVSALLOC
-size_t	ivsalloc(const void *ptr);
-#  endif
+size_t	isalloc(const void *ptr, bool demote);
+size_t	ivsalloc(const void *ptr, bool demote);
+size_t	u2rz(size_t usize);
+size_t	p2rz(const void *ptr);
 void	idalloc(void *ptr);
+void	iqalloc(void *ptr);
 void	*iralloc(void *ptr, size_t size, size_t extra, size_t alignment,
     bool zero, bool no_move);
+malloc_tsd_protos(JEMALLOC_ATTR(unused), thread_allocated, thread_allocated_t)
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -609,7 +732,7 @@ imalloc(size_t size)
 	assert(size != 0);
 
 	if (size <= arena_maxclass)
-		return (arena_malloc(size, false));
+		return (arena_malloc(NULL, size, false, true));
 	else
 		return (huge_malloc(size, false));
 }
@@ -619,7 +742,7 @@ icalloc(size_t size)
 {
 
 	if (size <= arena_maxclass)
-		return (arena_malloc(size, true));
+		return (arena_malloc(NULL, size, true, true));
 	else
 		return (huge_malloc(size, true));
 }
@@ -630,75 +753,80 @@ ipalloc(size_t usize, size_t alignment, bool zero)
 	void *ret;
 
 	assert(usize != 0);
-	assert(usize == sa2u(usize, alignment, NULL));
+	assert(usize == sa2u(usize, alignment));
 
-	if (usize <= arena_maxclass && alignment <= PAGE_SIZE)
-		ret = arena_malloc(usize, zero);
+	if (usize <= arena_maxclass && alignment <= PAGE)
+		ret = arena_malloc(NULL, usize, zero, true);
 	else {
-		size_t run_size
-#ifdef JEMALLOC_CC_SILENCE
-		    = 0
-#endif
-		    ;
-
-		/*
-		 * Ideally we would only ever call sa2u() once per aligned
-		 * allocation request, and the caller of this function has
-		 * already done so once.  However, it's rather burdensome to
-		 * require every caller to pass in run_size, especially given
-		 * that it's only relevant to large allocations.  Therefore,
-		 * just call it again here in order to get run_size.
-		 */
-		sa2u(usize, alignment, &run_size);
-		if (run_size <= arena_maxclass) {
-			ret = arena_palloc(choose_arena(), usize, run_size,
-			    alignment, zero);
+		if (usize <= arena_maxclass) {
+			ret = arena_palloc(choose_arena(NULL), usize, alignment,
+			    zero);
 		} else if (alignment <= chunksize)
 			ret = huge_malloc(usize, zero);
 		else
 			ret = huge_palloc(usize, alignment, zero);
 	}
 
-	assert(((uintptr_t)ret & (alignment - 1)) == 0);
+	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	return (ret);
 }
 
+/*
+ * Typical usage:
+ *   void *ptr = [...]
+ *   size_t sz = isalloc(ptr, config_prof);
+ */
 JEMALLOC_INLINE size_t
-isalloc(const void *ptr)
+isalloc(const void *ptr, bool demote)
 {
 	size_t ret;
 	arena_chunk_t *chunk;
 
 	assert(ptr != NULL);
+	/* Demotion only makes sense if config_prof is true. */
+	assert(config_prof || demote == false);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	if (chunk != ptr) {
-		/* Region. */
-		dassert(chunk->arena->magic == ARENA_MAGIC);
-
-#ifdef JEMALLOC_PROF
-		ret = arena_salloc_demote(ptr);
-#else
-		ret = arena_salloc(ptr);
-#endif
-	} else
+	if (chunk != ptr)
+		ret = arena_salloc(ptr, demote);
+	else
 		ret = huge_salloc(ptr);
 
 	return (ret);
 }
 
-#ifdef JEMALLOC_IVSALLOC
 JEMALLOC_INLINE size_t
-ivsalloc(const void *ptr)
+ivsalloc(const void *ptr, bool demote)
 {
 
 	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
 	if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL)
 		return (0);
 
-	return (isalloc(ptr));
+	return (isalloc(ptr, demote));
+}
+
+JEMALLOC_INLINE size_t
+u2rz(size_t usize)
+{
+	size_t ret;
+
+	if (usize <= SMALL_MAXCLASS) {
+		size_t binind = SMALL_SIZE2BIN(usize);
+		ret = arena_bin_info[binind].redzone_size;
+	} else
+		ret = 0;
+
+	return (ret);
+}
+
+JEMALLOC_INLINE size_t
+p2rz(const void *ptr)
+{
+	size_t usize = isalloc(ptr, false);
+
+	return (u2rz(usize));
 }
-#endif
 
 JEMALLOC_INLINE void
 idalloc(void *ptr)
@@ -709,11 +837,21 @@ idalloc(void *ptr)
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (chunk != ptr)
-		arena_dalloc(chunk->arena, chunk, ptr);
+		arena_dalloc(chunk->arena, chunk, ptr, true);
 	else
 		huge_dalloc(ptr, true);
 }
 
+JEMALLOC_INLINE void
+iqalloc(void *ptr)
+{
+
+	if (config_fill && opt_quarantine)
+		quarantine(ptr);
+	else
+		idalloc(ptr);
+}
+
 JEMALLOC_INLINE void *
 iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
     bool no_move)
@@ -724,19 +862,19 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
 	assert(ptr != NULL);
 	assert(size != 0);
 
-	oldsize = isalloc(ptr);
+	oldsize = isalloc(ptr, config_prof);
 
 	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
 	    != 0) {
 		size_t usize, copysize;
 
 		/*
-		 * Existing object alignment is inadquate; allocate new space
+		 * Existing object alignment is inadequate; allocate new space
 		 * and copy.
 		 */
 		if (no_move)
 			return (NULL);
-		usize = sa2u(size + extra, alignment, NULL);
+		usize = sa2u(size + extra, alignment);
 		if (usize == 0)
 			return (NULL);
 		ret = ipalloc(usize, alignment, zero);
@@ -744,7 +882,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
 			if (extra == 0)
 				return (NULL);
 			/* Try again, without extra this time. */
-			usize = sa2u(size, alignment, NULL);
+			usize = sa2u(size, alignment);
 			if (usize == 0)
 				return (NULL);
 			ret = ipalloc(usize, alignment, zero);
@@ -758,7 +896,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
 		 */
 		copysize = (size < oldsize) ? size : oldsize;
 		memcpy(ret, ptr, copysize);
-		idalloc(ptr);
+		iqalloc(ptr);
 		return (ret);
 	}
 
@@ -773,16 +911,21 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero,
 	} else {
 		if (size + extra <= arena_maxclass) {
 			return (arena_ralloc(ptr, oldsize, size, extra,
-			    alignment, zero));
+			    alignment, zero, true));
 		} else {
 			return (huge_ralloc(ptr, oldsize, size, extra,
 			    alignment, zero));
 		}
 	}
 }
+
+malloc_tsd_externs(thread_allocated, thread_allocated_t)
+malloc_tsd_funcs(JEMALLOC_INLINE, thread_allocated, thread_allocated_t,
+    THREAD_ALLOCATED_INITIALIZER, malloc_tsd_no_cleanup)
 #endif
 
 #include "jemalloc/internal/prof.h"
 
 #undef JEMALLOC_H_INLINES
 /******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_H */
diff --git a/deps/jemalloc/include/jemalloc/internal/mb.h b/deps/jemalloc/include/jemalloc/internal/mb.h
index dc9f2a54..3cfa7872 100644
--- a/deps/jemalloc/include/jemalloc/internal/mb.h
+++ b/deps/jemalloc/include/jemalloc/internal/mb.h
@@ -54,7 +54,7 @@ mb_write(void)
 	    );
 #endif
 }
-#elif (defined(__amd64_) || defined(__x86_64__))
+#elif (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE void
 mb_write(void)
 {
@@ -87,6 +87,13 @@ mb_write(void)
 	    : "memory" /* Clobbers. */
 	    );
 }
+#elif defined(__tile__)
+JEMALLOC_INLINE void
+mb_write(void)
+{
+
+	__sync_synchronize();
+}
 #else
 /*
  * This is much slower than a simple memory barrier, but the semantics of mutex
diff --git a/deps/jemalloc/include/jemalloc/internal/mutex.h b/deps/jemalloc/include/jemalloc/internal/mutex.h
index 62947ced..de44e143 100644
--- a/deps/jemalloc/include/jemalloc/internal/mutex.h
+++ b/deps/jemalloc/include/jemalloc/internal/mutex.h
@@ -1,22 +1,42 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#ifdef JEMALLOC_OSSPIN
-typedef OSSpinLock malloc_mutex_t;
-#else
-typedef pthread_mutex_t malloc_mutex_t;
-#endif
+typedef struct malloc_mutex_s malloc_mutex_t;
 
-#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
-#  define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
+#ifdef _WIN32
+#  define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OSSPIN))
+#  define MALLOC_MUTEX_INITIALIZER {0}
+#elif (defined(JEMALLOC_MUTEX_INIT_CB))
+#  define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
 #else
-#  define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#  if (defined(PTHREAD_MUTEX_ADAPTIVE_NP) &&				\
+       defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
+#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
+#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}
+#  else
+#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
+#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER}
+#  endif
 #endif
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
+struct malloc_mutex_s {
+#ifdef _WIN32
+	CRITICAL_SECTION	lock;
+#elif (defined(JEMALLOC_OSSPIN))
+	OSSpinLock		lock;
+#elif (defined(JEMALLOC_MUTEX_INIT_CB))
+	pthread_mutex_t		lock;
+	malloc_mutex_t		*postponed_next;
+#else
+	pthread_mutex_t		lock;
+#endif
+};
+
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
@@ -24,11 +44,15 @@ typedef pthread_mutex_t malloc_mutex_t;
 #ifdef JEMALLOC_LAZY_LOCK
 extern bool isthreaded;
 #else
+#  undef isthreaded /* Undo private_namespace.h definition. */
 #  define isthreaded true
 #endif
 
 bool	malloc_mutex_init(malloc_mutex_t *mutex);
-void	malloc_mutex_destroy(malloc_mutex_t *mutex);
+void	malloc_mutex_prefork(malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_parent(malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_child(malloc_mutex_t *mutex);
+bool	mutex_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -36,7 +60,6 @@ void	malloc_mutex_destroy(malloc_mutex_t *mutex);
 
 #ifndef JEMALLOC_ENABLE_INLINE
 void	malloc_mutex_lock(malloc_mutex_t *mutex);
-bool	malloc_mutex_trylock(malloc_mutex_t *mutex);
 void	malloc_mutex_unlock(malloc_mutex_t *mutex);
 #endif
 
@@ -46,37 +69,27 @@ malloc_mutex_lock(malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-#ifdef JEMALLOC_OSSPIN
-		OSSpinLockLock(mutex);
+#ifdef _WIN32
+		EnterCriticalSection(&mutex->lock);
+#elif (defined(JEMALLOC_OSSPIN))
+		OSSpinLockLock(&mutex->lock);
 #else
-		pthread_mutex_lock(mutex);
+		pthread_mutex_lock(&mutex->lock);
 #endif
 	}
 }
 
-JEMALLOC_INLINE bool
-malloc_mutex_trylock(malloc_mutex_t *mutex)
-{
-
-	if (isthreaded) {
-#ifdef JEMALLOC_OSSPIN
-		return (OSSpinLockTry(mutex) == false);
-#else
-		return (pthread_mutex_trylock(mutex) != 0);
-#endif
-	} else
-		return (false);
-}
-
 JEMALLOC_INLINE void
 malloc_mutex_unlock(malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-#ifdef JEMALLOC_OSSPIN
-		OSSpinLockUnlock(mutex);
+#ifdef _WIN32
+		LeaveCriticalSection(&mutex->lock);
+#elif (defined(JEMALLOC_OSSPIN))
+		OSSpinLockUnlock(&mutex->lock);
 #else
-		pthread_mutex_unlock(mutex);
+		pthread_mutex_unlock(&mutex->lock);
 #endif
 	}
 }
diff --git a/deps/jemalloc/include/jemalloc/internal/private_namespace.h b/deps/jemalloc/include/jemalloc/internal/private_namespace.h
index d4f5f96d..b8166470 100644
--- a/deps/jemalloc/include/jemalloc/internal/private_namespace.h
+++ b/deps/jemalloc/include/jemalloc/internal/private_namespace.h
@@ -1,36 +1,84 @@
+#define	a0calloc JEMALLOC_N(a0calloc)
+#define	a0free JEMALLOC_N(a0free)
+#define	a0malloc JEMALLOC_N(a0malloc)
+#define	arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small)
 #define	arena_bin_index JEMALLOC_N(arena_bin_index)
+#define	arena_bin_info JEMALLOC_N(arena_bin_info)
 #define	arena_boot JEMALLOC_N(arena_boot)
 #define	arena_dalloc JEMALLOC_N(arena_dalloc)
 #define	arena_dalloc_bin JEMALLOC_N(arena_dalloc_bin)
+#define	arena_dalloc_bin_locked JEMALLOC_N(arena_dalloc_bin_locked)
+#define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
 #define	arena_dalloc_large JEMALLOC_N(arena_dalloc_large)
+#define	arena_dalloc_large_locked JEMALLOC_N(arena_dalloc_large_locked)
+#define	arena_dalloc_small JEMALLOC_N(arena_dalloc_small)
 #define	arena_malloc JEMALLOC_N(arena_malloc)
 #define	arena_malloc_large JEMALLOC_N(arena_malloc_large)
 #define	arena_malloc_small JEMALLOC_N(arena_malloc_small)
+#define	arena_mapbits_allocated_get JEMALLOC_N(arena_mapbits_allocated_get)
+#define	arena_mapbits_binind_get JEMALLOC_N(arena_mapbits_binind_get)
+#define	arena_mapbits_dirty_get JEMALLOC_N(arena_mapbits_dirty_get)
+#define	arena_mapbits_get JEMALLOC_N(arena_mapbits_get)
+#define	arena_mapbits_large_binind_set JEMALLOC_N(arena_mapbits_large_binind_set)
+#define	arena_mapbits_large_get JEMALLOC_N(arena_mapbits_large_get)
+#define	arena_mapbits_large_set JEMALLOC_N(arena_mapbits_large_set)
+#define	arena_mapbits_large_size_get JEMALLOC_N(arena_mapbits_large_size_get)
+#define	arena_mapbits_small_runind_get JEMALLOC_N(arena_mapbits_small_runind_get)
+#define	arena_mapbits_small_set JEMALLOC_N(arena_mapbits_small_set)
+#define	arena_mapbits_unallocated_set JEMALLOC_N(arena_mapbits_unallocated_set)
+#define	arena_mapbits_unallocated_size_get JEMALLOC_N(arena_mapbits_unallocated_size_get)
+#define	arena_mapbits_unallocated_size_set JEMALLOC_N(arena_mapbits_unallocated_size_set)
+#define	arena_mapbits_unzeroed_get JEMALLOC_N(arena_mapbits_unzeroed_get)
+#define	arena_mapbits_unzeroed_set JEMALLOC_N(arena_mapbits_unzeroed_set)
+#define	arena_mapbitsp_get JEMALLOC_N(arena_mapbitsp_get)
+#define	arena_mapp_get JEMALLOC_N(arena_mapp_get)
+#define	arena_maxclass JEMALLOC_N(arena_maxclass)
 #define	arena_new JEMALLOC_N(arena_new)
 #define	arena_palloc JEMALLOC_N(arena_palloc)
+#define	arena_postfork_child JEMALLOC_N(arena_postfork_child)
+#define	arena_postfork_parent JEMALLOC_N(arena_postfork_parent)
+#define	arena_prefork JEMALLOC_N(arena_prefork)
 #define	arena_prof_accum JEMALLOC_N(arena_prof_accum)
 #define	arena_prof_ctx_get JEMALLOC_N(arena_prof_ctx_get)
 #define	arena_prof_ctx_set JEMALLOC_N(arena_prof_ctx_set)
 #define	arena_prof_promoted JEMALLOC_N(arena_prof_promoted)
+#define	arena_ptr_small_binind_get JEMALLOC_N(arena_ptr_small_binind_get)
 #define	arena_purge_all JEMALLOC_N(arena_purge_all)
 #define	arena_ralloc JEMALLOC_N(arena_ralloc)
 #define	arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move)
 #define	arena_run_regind JEMALLOC_N(arena_run_regind)
 #define	arena_salloc JEMALLOC_N(arena_salloc)
-#define	arena_salloc_demote JEMALLOC_N(arena_salloc_demote)
 #define	arena_stats_merge JEMALLOC_N(arena_stats_merge)
 #define	arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small)
+#define	arenas JEMALLOC_N(arenas)
 #define	arenas_bin_i_index JEMALLOC_N(arenas_bin_i_index)
+#define	arenas_booted JEMALLOC_N(arenas_booted)
+#define	arenas_cleanup JEMALLOC_N(arenas_cleanup)
 #define	arenas_extend JEMALLOC_N(arenas_extend)
+#define	arenas_initialized JEMALLOC_N(arenas_initialized)
+#define	arenas_lock JEMALLOC_N(arenas_lock)
 #define	arenas_lrun_i_index JEMALLOC_N(arenas_lrun_i_index)
+#define	arenas_tls JEMALLOC_N(arenas_tls)
+#define	arenas_tsd_boot JEMALLOC_N(arenas_tsd_boot)
+#define	arenas_tsd_cleanup_wrapper JEMALLOC_N(arenas_tsd_cleanup_wrapper)
+#define	arenas_tsd_get JEMALLOC_N(arenas_tsd_get)
+#define	arenas_tsd_set JEMALLOC_N(arenas_tsd_set)
+#define	atomic_add_u JEMALLOC_N(atomic_add_u)
 #define	atomic_add_uint32 JEMALLOC_N(atomic_add_uint32)
 #define	atomic_add_uint64 JEMALLOC_N(atomic_add_uint64)
+#define	atomic_add_z JEMALLOC_N(atomic_add_z)
+#define	atomic_sub_u JEMALLOC_N(atomic_sub_u)
 #define	atomic_sub_uint32 JEMALLOC_N(atomic_sub_uint32)
 #define	atomic_sub_uint64 JEMALLOC_N(atomic_sub_uint64)
+#define	atomic_sub_z JEMALLOC_N(atomic_sub_z)
 #define	base_alloc JEMALLOC_N(base_alloc)
 #define	base_boot JEMALLOC_N(base_boot)
+#define	base_calloc JEMALLOC_N(base_calloc)
 #define	base_node_alloc JEMALLOC_N(base_node_alloc)
 #define	base_node_dealloc JEMALLOC_N(base_node_dealloc)
+#define	base_postfork_child JEMALLOC_N(base_postfork_child)
+#define	base_postfork_parent JEMALLOC_N(base_postfork_parent)
+#define	base_prefork JEMALLOC_N(base_prefork)
 #define	bitmap_full JEMALLOC_N(bitmap_full)
 #define	bitmap_get JEMALLOC_N(bitmap_get)
 #define	bitmap_info_init JEMALLOC_N(bitmap_info_init)
@@ -47,19 +95,19 @@
 #define	chunk_alloc JEMALLOC_N(chunk_alloc)
 #define	chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss)
 #define	chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap)
-#define	chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve)
-#define	chunk_alloc_swap JEMALLOC_N(chunk_alloc_swap)
 #define	chunk_boot JEMALLOC_N(chunk_boot)
 #define	chunk_dealloc JEMALLOC_N(chunk_dealloc)
-#define	chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss)
 #define	chunk_dealloc_mmap JEMALLOC_N(chunk_dealloc_mmap)
-#define	chunk_dealloc_swap JEMALLOC_N(chunk_dealloc_swap)
 #define	chunk_dss_boot JEMALLOC_N(chunk_dss_boot)
+#define	chunk_dss_postfork_child JEMALLOC_N(chunk_dss_postfork_child)
+#define	chunk_dss_postfork_parent JEMALLOC_N(chunk_dss_postfork_parent)
+#define	chunk_dss_prefork JEMALLOC_N(chunk_dss_prefork)
 #define	chunk_in_dss JEMALLOC_N(chunk_in_dss)
-#define	chunk_in_swap JEMALLOC_N(chunk_in_swap)
-#define	chunk_mmap_boot JEMALLOC_N(chunk_mmap_boot)
-#define	chunk_swap_boot JEMALLOC_N(chunk_swap_boot)
-#define	chunk_swap_enable JEMALLOC_N(chunk_swap_enable)
+#define	chunk_npages JEMALLOC_N(chunk_npages)
+#define	chunks_mtx JEMALLOC_N(chunks_mtx)
+#define	chunks_rtree JEMALLOC_N(chunks_rtree)
+#define	chunksize JEMALLOC_N(chunksize)
+#define	chunksize_mask JEMALLOC_N(chunksize_mask)
 #define	ckh_bucket_search JEMALLOC_N(ckh_bucket_search)
 #define	ckh_count JEMALLOC_N(ckh_count)
 #define	ckh_delete JEMALLOC_N(ckh_delete)
@@ -77,7 +125,6 @@
 #define	ckh_string_keycomp JEMALLOC_N(ckh_string_keycomp)
 #define	ckh_try_bucket_insert JEMALLOC_N(ckh_try_bucket_insert)
 #define	ckh_try_insert JEMALLOC_N(ckh_try_insert)
-#define	create_zone JEMALLOC_N(create_zone)
 #define	ctl_boot JEMALLOC_N(ctl_boot)
 #define	ctl_bymib JEMALLOC_N(ctl_bymib)
 #define	ctl_byname JEMALLOC_N(ctl_byname)
@@ -115,10 +162,17 @@
 #define	extent_tree_szad_reverse_iter_start JEMALLOC_N(extent_tree_szad_reverse_iter_start)
 #define	extent_tree_szad_search JEMALLOC_N(extent_tree_szad_search)
 #define	hash JEMALLOC_N(hash)
+#define	huge_allocated JEMALLOC_N(huge_allocated)
 #define	huge_boot JEMALLOC_N(huge_boot)
 #define	huge_dalloc JEMALLOC_N(huge_dalloc)
 #define	huge_malloc JEMALLOC_N(huge_malloc)
+#define	huge_mtx JEMALLOC_N(huge_mtx)
+#define	huge_ndalloc JEMALLOC_N(huge_ndalloc)
+#define	huge_nmalloc JEMALLOC_N(huge_nmalloc)
 #define	huge_palloc JEMALLOC_N(huge_palloc)
+#define	huge_postfork_child JEMALLOC_N(huge_postfork_child)
+#define	huge_postfork_parent JEMALLOC_N(huge_postfork_parent)
+#define	huge_prefork JEMALLOC_N(huge_prefork)
 #define	huge_prof_ctx_get JEMALLOC_N(huge_prof_ctx_get)
 #define	huge_prof_ctx_set JEMALLOC_N(huge_prof_ctx_set)
 #define	huge_ralloc JEMALLOC_N(huge_ralloc)
@@ -129,21 +183,63 @@
 #define	idalloc JEMALLOC_N(idalloc)
 #define	imalloc JEMALLOC_N(imalloc)
 #define	ipalloc JEMALLOC_N(ipalloc)
+#define	iqalloc JEMALLOC_N(iqalloc)
 #define	iralloc JEMALLOC_N(iralloc)
 #define	isalloc JEMALLOC_N(isalloc)
+#define	isthreaded JEMALLOC_N(isthreaded)
 #define	ivsalloc JEMALLOC_N(ivsalloc)
-#define	jemalloc_darwin_init JEMALLOC_N(jemalloc_darwin_init)
-#define	jemalloc_postfork JEMALLOC_N(jemalloc_postfork)
+#define	jemalloc_postfork_child JEMALLOC_N(jemalloc_postfork_child)
+#define	jemalloc_postfork_parent JEMALLOC_N(jemalloc_postfork_parent)
 #define	jemalloc_prefork JEMALLOC_N(jemalloc_prefork)
 #define	malloc_cprintf JEMALLOC_N(malloc_cprintf)
-#define	malloc_mutex_destroy JEMALLOC_N(malloc_mutex_destroy)
 #define	malloc_mutex_init JEMALLOC_N(malloc_mutex_init)
 #define	malloc_mutex_lock JEMALLOC_N(malloc_mutex_lock)
-#define	malloc_mutex_trylock JEMALLOC_N(malloc_mutex_trylock)
+#define	malloc_mutex_postfork_child JEMALLOC_N(malloc_mutex_postfork_child)
+#define	malloc_mutex_postfork_parent JEMALLOC_N(malloc_mutex_postfork_parent)
+#define	malloc_mutex_prefork JEMALLOC_N(malloc_mutex_prefork)
 #define	malloc_mutex_unlock JEMALLOC_N(malloc_mutex_unlock)
 #define	malloc_printf JEMALLOC_N(malloc_printf)
+#define	malloc_snprintf JEMALLOC_N(malloc_snprintf)
+#define	malloc_strtoumax JEMALLOC_N(malloc_strtoumax)
+#define	malloc_tsd_boot JEMALLOC_N(malloc_tsd_boot)
+#define	malloc_tsd_cleanup_register JEMALLOC_N(malloc_tsd_cleanup_register)
+#define	malloc_tsd_dalloc JEMALLOC_N(malloc_tsd_dalloc)
+#define	malloc_tsd_malloc JEMALLOC_N(malloc_tsd_malloc)
+#define	malloc_tsd_no_cleanup JEMALLOC_N(malloc_tsd_no_cleanup)
+#define	malloc_vcprintf JEMALLOC_N(malloc_vcprintf)
+#define	malloc_vsnprintf JEMALLOC_N(malloc_vsnprintf)
 #define	malloc_write JEMALLOC_N(malloc_write)
+#define	map_bias JEMALLOC_N(map_bias)
 #define	mb_write JEMALLOC_N(mb_write)
+#define	mutex_boot JEMALLOC_N(mutex_boot)
+#define	narenas JEMALLOC_N(narenas)
+#define	ncpus JEMALLOC_N(ncpus)
+#define	nhbins JEMALLOC_N(nhbins)
+#define	opt_abort JEMALLOC_N(opt_abort)
+#define	opt_junk JEMALLOC_N(opt_junk)
+#define	opt_lg_chunk JEMALLOC_N(opt_lg_chunk)
+#define	opt_lg_dirty_mult JEMALLOC_N(opt_lg_dirty_mult)
+#define	opt_lg_prof_interval JEMALLOC_N(opt_lg_prof_interval)
+#define	opt_lg_prof_sample JEMALLOC_N(opt_lg_prof_sample)
+#define	opt_lg_tcache_max JEMALLOC_N(opt_lg_tcache_max)
+#define	opt_narenas JEMALLOC_N(opt_narenas)
+#define	opt_prof JEMALLOC_N(opt_prof)
+#define	opt_prof_accum JEMALLOC_N(opt_prof_accum)
+#define	opt_prof_active JEMALLOC_N(opt_prof_active)
+#define	opt_prof_final JEMALLOC_N(opt_prof_final)
+#define	opt_prof_gdump JEMALLOC_N(opt_prof_gdump)
+#define	opt_prof_leak JEMALLOC_N(opt_prof_leak)
+#define	opt_prof_prefix JEMALLOC_N(opt_prof_prefix)
+#define	opt_quarantine JEMALLOC_N(opt_quarantine)
+#define	opt_redzone JEMALLOC_N(opt_redzone)
+#define	opt_stats_print JEMALLOC_N(opt_stats_print)
+#define	opt_tcache JEMALLOC_N(opt_tcache)
+#define	opt_utrace JEMALLOC_N(opt_utrace)
+#define	opt_valgrind JEMALLOC_N(opt_valgrind)
+#define	opt_xmalloc JEMALLOC_N(opt_xmalloc)
+#define	opt_zero JEMALLOC_N(opt_zero)
+#define	p2rz JEMALLOC_N(p2rz)
+#define	pages_purge JEMALLOC_N(pages_purge)
 #define	pow2_ceil JEMALLOC_N(pow2_ceil)
 #define	prof_backtrace JEMALLOC_N(prof_backtrace)
 #define	prof_boot0 JEMALLOC_N(prof_boot0)
@@ -154,14 +250,31 @@
 #define	prof_free JEMALLOC_N(prof_free)
 #define	prof_gdump JEMALLOC_N(prof_gdump)
 #define	prof_idump JEMALLOC_N(prof_idump)
+#define	prof_interval JEMALLOC_N(prof_interval)
 #define	prof_lookup JEMALLOC_N(prof_lookup)
 #define	prof_malloc JEMALLOC_N(prof_malloc)
 #define	prof_mdump JEMALLOC_N(prof_mdump)
+#define	prof_promote JEMALLOC_N(prof_promote)
 #define	prof_realloc JEMALLOC_N(prof_realloc)
 #define	prof_sample_accum_update JEMALLOC_N(prof_sample_accum_update)
 #define	prof_sample_threshold_update JEMALLOC_N(prof_sample_threshold_update)
+#define	prof_tdata_booted JEMALLOC_N(prof_tdata_booted)
+#define	prof_tdata_cleanup JEMALLOC_N(prof_tdata_cleanup)
+#define	prof_tdata_get JEMALLOC_N(prof_tdata_get)
 #define	prof_tdata_init JEMALLOC_N(prof_tdata_init)
-#define	pthread_create JEMALLOC_N(pthread_create)
+#define	prof_tdata_initialized JEMALLOC_N(prof_tdata_initialized)
+#define	prof_tdata_tls JEMALLOC_N(prof_tdata_tls)
+#define	prof_tdata_tsd_boot JEMALLOC_N(prof_tdata_tsd_boot)
+#define	prof_tdata_tsd_cleanup_wrapper JEMALLOC_N(prof_tdata_tsd_cleanup_wrapper)
+#define	prof_tdata_tsd_get JEMALLOC_N(prof_tdata_tsd_get)
+#define	prof_tdata_tsd_set JEMALLOC_N(prof_tdata_tsd_set)
+#define	quarantine JEMALLOC_N(quarantine)
+#define	quarantine_boot JEMALLOC_N(quarantine_boot)
+#define	quarantine_tsd_boot JEMALLOC_N(quarantine_tsd_boot)
+#define	quarantine_tsd_cleanup_wrapper JEMALLOC_N(quarantine_tsd_cleanup_wrapper)
+#define	quarantine_tsd_get JEMALLOC_N(quarantine_tsd_get)
+#define	quarantine_tsd_set JEMALLOC_N(quarantine_tsd_set)
+#define	register_zone JEMALLOC_N(register_zone)
 #define	rtree_get JEMALLOC_N(rtree_get)
 #define	rtree_get_locked JEMALLOC_N(rtree_get_locked)
 #define	rtree_new JEMALLOC_N(rtree_new)
@@ -171,25 +284,56 @@
 #define	stats_arenas_i_bins_j_index JEMALLOC_N(stats_arenas_i_bins_j_index)
 #define	stats_arenas_i_index JEMALLOC_N(stats_arenas_i_index)
 #define	stats_arenas_i_lruns_j_index JEMALLOC_N(stats_arenas_i_lruns_j_index)
+#define	stats_cactive JEMALLOC_N(stats_cactive)
 #define	stats_cactive_add JEMALLOC_N(stats_cactive_add)
 #define	stats_cactive_get JEMALLOC_N(stats_cactive_get)
 #define	stats_cactive_sub JEMALLOC_N(stats_cactive_sub)
+#define	stats_chunks JEMALLOC_N(stats_chunks)
 #define	stats_print JEMALLOC_N(stats_print)
-#define	szone2ozone JEMALLOC_N(szone2ozone)
 #define	tcache_alloc_easy JEMALLOC_N(tcache_alloc_easy)
 #define	tcache_alloc_large JEMALLOC_N(tcache_alloc_large)
 #define	tcache_alloc_small JEMALLOC_N(tcache_alloc_small)
 #define	tcache_alloc_small_hard JEMALLOC_N(tcache_alloc_small_hard)
+#define	tcache_arena_associate JEMALLOC_N(tcache_arena_associate)
+#define	tcache_arena_dissociate JEMALLOC_N(tcache_arena_dissociate)
 #define	tcache_bin_flush_large JEMALLOC_N(tcache_bin_flush_large)
 #define	tcache_bin_flush_small JEMALLOC_N(tcache_bin_flush_small)
-#define	tcache_boot JEMALLOC_N(tcache_boot)
+#define	tcache_bin_info JEMALLOC_N(tcache_bin_info)
+#define	tcache_boot0 JEMALLOC_N(tcache_boot0)
+#define	tcache_boot1 JEMALLOC_N(tcache_boot1)
+#define	tcache_booted JEMALLOC_N(tcache_booted)
 #define	tcache_create JEMALLOC_N(tcache_create)
 #define	tcache_dalloc_large JEMALLOC_N(tcache_dalloc_large)
 #define	tcache_dalloc_small JEMALLOC_N(tcache_dalloc_small)
 #define	tcache_destroy JEMALLOC_N(tcache_destroy)
+#define	tcache_enabled_booted JEMALLOC_N(tcache_enabled_booted)
+#define	tcache_enabled_get JEMALLOC_N(tcache_enabled_get)
+#define	tcache_enabled_initialized JEMALLOC_N(tcache_enabled_initialized)
+#define	tcache_enabled_set JEMALLOC_N(tcache_enabled_set)
+#define	tcache_enabled_tls JEMALLOC_N(tcache_enabled_tls)
+#define	tcache_enabled_tsd_boot JEMALLOC_N(tcache_enabled_tsd_boot)
+#define	tcache_enabled_tsd_cleanup_wrapper JEMALLOC_N(tcache_enabled_tsd_cleanup_wrapper)
+#define	tcache_enabled_tsd_get JEMALLOC_N(tcache_enabled_tsd_get)
+#define	tcache_enabled_tsd_set JEMALLOC_N(tcache_enabled_tsd_set)
 #define	tcache_event JEMALLOC_N(tcache_event)
+#define	tcache_event_hard JEMALLOC_N(tcache_event_hard)
+#define	tcache_flush JEMALLOC_N(tcache_flush)
 #define	tcache_get JEMALLOC_N(tcache_get)
+#define	tcache_initialized JEMALLOC_N(tcache_initialized)
+#define	tcache_maxclass JEMALLOC_N(tcache_maxclass)
+#define	tcache_salloc JEMALLOC_N(tcache_salloc)
 #define	tcache_stats_merge JEMALLOC_N(tcache_stats_merge)
-#define	thread_allocated_get JEMALLOC_N(thread_allocated_get)
-#define	thread_allocated_get_hard JEMALLOC_N(thread_allocated_get_hard)
-#define	u2s JEMALLOC_N(u2s)
+#define	tcache_thread_cleanup JEMALLOC_N(tcache_thread_cleanup)
+#define	tcache_tls JEMALLOC_N(tcache_tls)
+#define	tcache_tsd_boot JEMALLOC_N(tcache_tsd_boot)
+#define	tcache_tsd_cleanup_wrapper JEMALLOC_N(tcache_tsd_cleanup_wrapper)
+#define	tcache_tsd_get JEMALLOC_N(tcache_tsd_get)
+#define	tcache_tsd_set JEMALLOC_N(tcache_tsd_set)
+#define	thread_allocated_booted JEMALLOC_N(thread_allocated_booted)
+#define	thread_allocated_initialized JEMALLOC_N(thread_allocated_initialized)
+#define	thread_allocated_tls JEMALLOC_N(thread_allocated_tls)
+#define	thread_allocated_tsd_boot JEMALLOC_N(thread_allocated_tsd_boot)
+#define	thread_allocated_tsd_cleanup_wrapper JEMALLOC_N(thread_allocated_tsd_cleanup_wrapper)
+#define	thread_allocated_tsd_get JEMALLOC_N(thread_allocated_tsd_get)
+#define	thread_allocated_tsd_set JEMALLOC_N(thread_allocated_tsd_set)
+#define	u2rz JEMALLOC_N(u2rz)
diff --git a/deps/jemalloc/include/jemalloc/internal/prn.h b/deps/jemalloc/include/jemalloc/internal/prn.h
deleted file mode 100644
index 0709d708..00000000
--- a/deps/jemalloc/include/jemalloc/internal/prn.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-/*
- * Simple linear congruential pseudo-random number generator:
- *
- *   prn(y) = (a*x + c) % m
- *
- * where the following constants ensure maximal period:
- *
- *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
- *   c == Odd number (relatively prime to 2^n).
- *   m == 2^32
- *
- * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
- *
- * This choice of m has the disadvantage that the quality of the bits is
- * proportional to bit position.  For example. the lowest bit has a cycle of 2,
- * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
- * bits.
- *
- * Macro parameters:
- *   uint32_t r          : Result.
- *   unsigned lg_range   : (0..32], number of least significant bits to return.
- *   uint32_t state      : Seed value.
- *   const uint32_t a, c : See above discussion.
- */
-#define prn32(r, lg_range, state, a, c) do {				\
-	assert(lg_range > 0);						\
-	assert(lg_range <= 32);						\
-									\
-	r = (state * (a)) + (c);					\
-	state = r;							\
-	r >>= (32 - lg_range);						\
-} while (false)
-
-/* Same as prn32(), but 64 bits of pseudo-randomness, using uint64_t. */
-#define prn64(r, lg_range, state, a, c) do {				\
-	assert(lg_range > 0);						\
-	assert(lg_range <= 64);						\
-									\
-	r = (state * (a)) + (c);					\
-	state = r;							\
-	r >>= (64 - lg_range);						\
-} while (false)
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/prng.h b/deps/jemalloc/include/jemalloc/internal/prng.h
new file mode 100644
index 00000000..83a5462b
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/prng.h
@@ -0,0 +1,60 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * Simple linear congruential pseudo-random number generator:
+ *
+ *   prng(y) = (a*x + c) % m
+ *
+ * where the following constants ensure maximal period:
+ *
+ *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
+ *   c == Odd number (relatively prime to 2^n).
+ *   m == 2^32
+ *
+ * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
+ *
+ * This choice of m has the disadvantage that the quality of the bits is
+ * proportional to bit position.  For example. the lowest bit has a cycle of 2,
+ * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
+ * bits.
+ *
+ * Macro parameters:
+ *   uint32_t r          : Result.
+ *   unsigned lg_range   : (0..32], number of least significant bits to return.
+ *   uint32_t state      : Seed value.
+ *   const uint32_t a, c : See above discussion.
+ */
+#define prng32(r, lg_range, state, a, c) do {				\
+	assert(lg_range > 0);						\
+	assert(lg_range <= 32);						\
+									\
+	r = (state * (a)) + (c);					\
+	state = r;							\
+	r >>= (32 - lg_range);						\
+} while (false)
+
+/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */
+#define prng64(r, lg_range, state, a, c) do {				\
+	assert(lg_range > 0);						\
+	assert(lg_range <= 64);						\
+									\
+	r = (state * (a)) + (c);					\
+	state = r;							\
+	r >>= (64 - lg_range);						\
+} while (false)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/prof.h b/deps/jemalloc/include/jemalloc/internal/prof.h
index e9064ba6..c3e3f9e4 100644
--- a/deps/jemalloc/include/jemalloc/internal/prof.h
+++ b/deps/jemalloc/include/jemalloc/internal/prof.h
@@ -1,4 +1,3 @@
-#ifdef JEMALLOC_PROF
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
@@ -10,28 +9,41 @@ typedef struct prof_tdata_s prof_tdata_t;
 
 /* Option defaults. */
 #define	PROF_PREFIX_DEFAULT		"jeprof"
-#define	LG_PROF_BT_MAX_DEFAULT		7
-#define	LG_PROF_SAMPLE_DEFAULT		0
+#define	LG_PROF_SAMPLE_DEFAULT		19
 #define	LG_PROF_INTERVAL_DEFAULT	-1
-#define	LG_PROF_TCMAX_DEFAULT		-1
 
 /*
- * Hard limit on stack backtrace depth.  Note that the version of
- * prof_backtrace() that is based on __builtin_return_address() necessarily has
- * a hard-coded number of backtrace frame handlers.
+ * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
+ * is based on __builtin_return_address() necessarily has a hard-coded number
+ * of backtrace frame handlers, and should be kept in sync with this setting.
  */
-#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND))
-#  define LG_PROF_BT_MAX	((ZU(1) << (LG_SIZEOF_PTR+3)) - 1)
-#else
-#  define LG_PROF_BT_MAX	7 /* >= LG_PROF_BT_MAX_DEFAULT */
-#endif
-#define	PROF_BT_MAX		(1U << LG_PROF_BT_MAX)
+#define	PROF_BT_MAX			128
+
+/* Maximum number of backtraces to store in each per thread LRU cache. */
+#define	PROF_TCMAX			1024
 
 /* Initial hash table size. */
-#define	PROF_CKH_MINITEMS	64
+#define	PROF_CKH_MINITEMS		64
 
 /* Size of memory buffer to use when writing dump files. */
-#define	PROF_DUMP_BUF_SIZE	65536
+#define	PROF_DUMP_BUFSIZE		65536
+
+/* Size of stack-allocated buffer used by prof_printf(). */
+#define	PROF_PRINTF_BUFSIZE		128
+
+/*
+ * Number of mutexes shared among all ctx's.  No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define	PROF_NCTX_LOCKS			1024
+
+/*
+ * prof_tdata pointers close to NULL are used to encode state information that
+ * is used for cleaning up during thread shutdown.
+ */
+#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
+#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
+#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -109,8 +121,18 @@ struct prof_ctx_s {
 	/* Associated backtrace. */
 	prof_bt_t		*bt;
 
-	/* Protects cnt_merged and cnts_ql. */
-	malloc_mutex_t		lock;
+	/* Protects nlimbo, cnt_merged, and cnts_ql. */
+	malloc_mutex_t		*lock;
+
+	/*
+	 * Number of threads that currently cause this ctx to be in a state of
+	 * limbo due to one of:
+	 *   - Initializing per thread counters associated with this ctx.
+	 *   - Preparing to destroy this ctx.
+	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
+	 * ctx.
+	 */
+	unsigned		nlimbo;
 
 	/* Temporary storage for summation during dump. */
 	prof_cnt_t		cnt_summed;
@@ -145,9 +167,14 @@ struct prof_tdata_s {
 	void			**vec;
 
 	/* Sampling state. */
-	uint64_t		prn_state;
+	uint64_t		prng_state;
 	uint64_t		threshold;
 	uint64_t		accum;
+
+	/* State used to avoid dumping while operating on prof internals. */
+	bool			enq;
+	bool			enq_idump;
+	bool			enq_gdump;
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
@@ -162,13 +189,12 @@ extern bool	opt_prof;
  * to notice state changes.
  */
 extern bool	opt_prof_active;
-extern size_t	opt_lg_prof_bt_max;   /* Maximum backtrace depth. */
 extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
 extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
 extern bool	opt_prof_gdump;       /* High-water memory dumping. */
+extern bool	opt_prof_final;       /* Final profile dumping. */
 extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
 extern bool	opt_prof_accum;       /* Report cumulative bytes. */
-extern ssize_t	opt_lg_prof_tcmax;    /* lg(max per thread bactrace cache) */
 extern char	opt_prof_prefix[PATH_MAX + 1];
 
 /*
@@ -186,39 +212,14 @@ extern uint64_t	prof_interval;
  */
 extern bool	prof_promote;
 
-/* (1U << opt_lg_prof_bt_max). */
-extern unsigned	prof_bt_max;
-
-/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
-#ifndef NO_TLS
-extern __thread prof_tdata_t	*prof_tdata_tls
-    JEMALLOC_ATTR(tls_model("initial-exec"));
-#  define PROF_TCACHE_GET()	prof_tdata_tls
-#  define PROF_TCACHE_SET(v)	do {					\
-	prof_tdata_tls = (v);						\
-	pthread_setspecific(prof_tdata_tsd, (void *)(v));		\
-} while (0)
-#else
-#  define PROF_TCACHE_GET()						\
-	((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd))
-#  define PROF_TCACHE_SET(v)	do {					\
-	pthread_setspecific(prof_tdata_tsd, (void *)(v));		\
-} while (0)
-#endif
-/*
- * Same contents as b2cnt_tls, but initialized such that the TSD destructor is
- * called when a thread exits, so that prof_tdata_tls contents can be merged,
- * unlinked, and deallocated.
- */
-extern pthread_key_t	prof_tdata_tsd;
-
 void	bt_init(prof_bt_t *bt, void **vec);
-void	prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max);
+void	prof_backtrace(prof_bt_t *bt, unsigned nignore);
 prof_thr_cnt_t	*prof_lookup(prof_bt_t *bt);
 void	prof_idump(void);
 bool	prof_mdump(const char *filename);
 void	prof_gdump(void);
 prof_tdata_t	*prof_tdata_init(void);
+void	prof_tdata_cleanup(void *arg);
 void	prof_boot0(void);
 void	prof_boot1(void);
 bool	prof_boot2(void);
@@ -233,13 +234,13 @@ bool	prof_boot2(void);
 									\
 	assert(size == s2u(size));					\
 									\
-	prof_tdata = PROF_TCACHE_GET();					\
-	if (prof_tdata == NULL) {					\
-		prof_tdata = prof_tdata_init();				\
-		if (prof_tdata == NULL) {				\
+	prof_tdata = prof_tdata_get();					\
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {	\
+		if (prof_tdata != NULL)					\
+			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
+		else							\
 			ret = NULL;					\
-			break;						\
-		}							\
+		break;							\
 	}								\
 									\
 	if (opt_prof_active == false) {					\
@@ -249,13 +250,13 @@ bool	prof_boot2(void);
 		/* Don't bother with sampling logic, since sampling   */\
 		/* interval is 1.                                     */\
 		bt_init(&bt, prof_tdata->vec);				\
-		prof_backtrace(&bt, nignore, prof_bt_max);		\
+		prof_backtrace(&bt, nignore);				\
 		ret = prof_lookup(&bt);					\
 	} else {							\
 		if (prof_tdata->threshold == 0) {			\
 			/* Initialize.  Seed the prng differently for */\
 			/* each thread.                               */\
-			prof_tdata->prn_state =				\
+			prof_tdata->prng_state =			\
 			    (uint64_t)(uintptr_t)&size;			\
 			prof_sample_threshold_update(prof_tdata);	\
 		}							\
@@ -272,7 +273,7 @@ bool	prof_boot2(void);
 		if (size >= prof_tdata->threshold -			\
 		    prof_tdata->accum) {				\
 			bt_init(&bt, prof_tdata->vec);			\
-			prof_backtrace(&bt, nignore, prof_bt_max);	\
+			prof_backtrace(&bt, nignore);			\
 			ret = prof_lookup(&bt);				\
 		} else							\
 			ret = (prof_thr_cnt_t *)(uintptr_t)1U;		\
@@ -280,6 +281,9 @@ bool	prof_boot2(void);
 } while (0)
 
 #ifndef JEMALLOC_ENABLE_INLINE
+malloc_tsd_protos(JEMALLOC_ATTR(unused), prof_tdata, prof_tdata_t *)
+
+prof_tdata_t	*prof_tdata_get(void);
 void	prof_sample_threshold_update(prof_tdata_t *prof_tdata);
 prof_ctx_t	*prof_ctx_get(const void *ptr);
 void	prof_ctx_set(const void *ptr, prof_ctx_t *ctx);
@@ -291,12 +295,35 @@ void	prof_free(const void *ptr, size_t size);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
+/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */
+malloc_tsd_externs(prof_tdata, prof_tdata_t *)
+malloc_tsd_funcs(JEMALLOC_INLINE, prof_tdata, prof_tdata_t *, NULL,
+    prof_tdata_cleanup)
+
+JEMALLOC_INLINE prof_tdata_t *
+prof_tdata_get(void)
+{
+	prof_tdata_t *prof_tdata;
+
+	cassert(config_prof);
+
+	prof_tdata = *prof_tdata_tsd_get();
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) {
+		if (prof_tdata == NULL)
+			prof_tdata = prof_tdata_init();
+	}
+
+	return (prof_tdata);
+}
+
 JEMALLOC_INLINE void
 prof_sample_threshold_update(prof_tdata_t *prof_tdata)
 {
 	uint64_t r;
 	double u;
 
+	cassert(config_prof);
+
 	/*
 	 * Compute sample threshold as a geometrically distributed random
 	 * variable with mean (2^opt_lg_prof_sample).
@@ -315,8 +342,8 @@ prof_sample_threshold_update(prof_tdata_t *prof_tdata)
 	 *   pp 500
 	 *   (http://cg.scs.carleton.ca/~luc/rnbookindex.html)
 	 */
-	prn64(r, 53, prof_tdata->prn_state,
-	    (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU);
+	prng64(r, 53, prof_tdata->prng_state,
+	    UINT64_C(6364136223846793005), UINT64_C(1442695040888963407));
 	u = (double)r * (1.0/9007199254740992.0L);
 	prof_tdata->threshold = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
@@ -329,13 +356,12 @@ prof_ctx_get(const void *ptr)
 	prof_ctx_t *ret;
 	arena_chunk_t *chunk;
 
+	cassert(config_prof);
 	assert(ptr != NULL);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (chunk != ptr) {
 		/* Region. */
-		dassert(chunk->arena->magic == ARENA_MAGIC);
-
 		ret = arena_prof_ctx_get(ptr);
 	} else
 		ret = huge_prof_ctx_get(ptr);
@@ -348,13 +374,12 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 {
 	arena_chunk_t *chunk;
 
+	cassert(config_prof);
 	assert(ptr != NULL);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (chunk != ptr) {
 		/* Region. */
-		dassert(chunk->arena->magic == ARENA_MAGIC);
-
 		arena_prof_ctx_set(ptr, ctx);
 	} else
 		huge_prof_ctx_set(ptr, ctx);
@@ -365,11 +390,13 @@ prof_sample_accum_update(size_t size)
 {
 	prof_tdata_t *prof_tdata;
 
+	cassert(config_prof);
 	/* Sampling logic is unnecessary if the interval is 1. */
 	assert(opt_lg_prof_sample != 0);
 
-	prof_tdata = PROF_TCACHE_GET();
-	assert(prof_tdata != NULL);
+	prof_tdata = *prof_tdata_tsd_get();
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		return (true);
 
 	/* Take care to avoid integer overflow. */
 	if (size >= prof_tdata->threshold - prof_tdata->accum) {
@@ -391,8 +418,9 @@ JEMALLOC_INLINE void
 prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
 {
 
+	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(size == isalloc(ptr));
+	assert(size == isalloc(ptr, true));
 
 	if (opt_lg_prof_sample != 0) {
 		if (prof_sample_accum_update(size)) {
@@ -437,10 +465,11 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
 {
 	prof_thr_cnt_t *told_cnt;
 
+	cassert(config_prof);
 	assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
 
 	if (ptr != NULL) {
-		assert(size == isalloc(ptr));
+		assert(size == isalloc(ptr, true));
 		if (opt_lg_prof_sample != 0) {
 			if (prof_sample_accum_update(size)) {
 				/*
@@ -463,10 +492,10 @@ prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
 			 * It's too late to propagate OOM for this realloc(),
 			 * so operate directly on old_cnt->ctx->cnt_merged.
 			 */
-			malloc_mutex_lock(&old_ctx->lock);
+			malloc_mutex_lock(old_ctx->lock);
 			old_ctx->cnt_merged.curobjs--;
 			old_ctx->cnt_merged.curbytes -= old_size;
-			malloc_mutex_unlock(&old_ctx->lock);
+			malloc_mutex_unlock(old_ctx->lock);
 			told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U;
 		}
 	} else
@@ -510,9 +539,12 @@ prof_free(const void *ptr, size_t size)
 {
 	prof_ctx_t *ctx = prof_ctx_get(ptr);
 
+	cassert(config_prof);
+
 	if ((uintptr_t)ctx > (uintptr_t)1) {
-		assert(size == isalloc(ptr));
-		prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt);
+		prof_thr_cnt_t *tcnt;
+		assert(size == isalloc(ptr, true));
+		tcnt = prof_lookup(ctx->bt);
 
 		if (tcnt != NULL) {
 			tcnt->epoch++;
@@ -533,10 +565,10 @@ prof_free(const void *ptr, size_t size)
 			 * OOM during free() cannot be propagated, so operate
 			 * directly on cnt->ctx->cnt_merged.
 			 */
-			malloc_mutex_lock(&ctx->lock);
+			malloc_mutex_lock(ctx->lock);
 			ctx->cnt_merged.curobjs--;
 			ctx->cnt_merged.curbytes -= size;
-			malloc_mutex_unlock(&ctx->lock);
+			malloc_mutex_unlock(ctx->lock);
 		}
 	}
 }
@@ -544,4 +576,3 @@ prof_free(const void *ptr, size_t size)
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
-#endif /* JEMALLOC_PROF */
diff --git a/deps/jemalloc/include/jemalloc/internal/quarantine.h b/deps/jemalloc/include/jemalloc/internal/quarantine.h
new file mode 100644
index 00000000..38f3d696
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/quarantine.h
@@ -0,0 +1,24 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/* Default per thread quarantine size if valgrind is enabled. */
+#define	JEMALLOC_VALGRIND_QUARANTINE_DEFAULT	(ZU(1) << 24)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	quarantine(void *ptr);
+bool	quarantine_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
diff --git a/deps/jemalloc/include/jemalloc/internal/rb.h b/deps/jemalloc/include/jemalloc/internal/rb.h
index ee9b009d..7b675f09 100644
--- a/deps/jemalloc/include/jemalloc/internal/rb.h
+++ b/deps/jemalloc/include/jemalloc/internal/rb.h
@@ -223,88 +223,88 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
  * The following API is generated:
  *
  *   static void
- *   ex_new(ex_t *extree);
+ *   ex_new(ex_t *tree);
  *       Description: Initialize a red-black tree structure.
  *       Args:
- *         extree: Pointer to an uninitialized red-black tree object.
+ *         tree: Pointer to an uninitialized red-black tree object.
  *
  *   static ex_node_t *
- *   ex_first(ex_t *extree);
+ *   ex_first(ex_t *tree);
  *   static ex_node_t *
- *   ex_last(ex_t *extree);
- *       Description: Get the first/last node in extree.
+ *   ex_last(ex_t *tree);
+ *       Description: Get the first/last node in tree.
  *       Args:
- *         extree: Pointer to an initialized red-black tree object.
- *       Ret: First/last node in extree, or NULL if extree is empty.
+ *         tree: Pointer to an initialized red-black tree object.
+ *       Ret: First/last node in tree, or NULL if tree is empty.
  *
  *   static ex_node_t *
- *   ex_next(ex_t *extree, ex_node_t *node);
+ *   ex_next(ex_t *tree, ex_node_t *node);
  *   static ex_node_t *
- *   ex_prev(ex_t *extree, ex_node_t *node);
+ *   ex_prev(ex_t *tree, ex_node_t *node);
  *       Description: Get node's successor/predecessor.
  *       Args:
- *         extree: Pointer to an initialized red-black tree object.
- *         node : A node in extree.
- *       Ret: node's successor/predecessor in extree, or NULL if node is
+ *         tree: Pointer to an initialized red-black tree object.
+ *         node: A node in tree.
+ *       Ret: node's successor/predecessor in tree, or NULL if node is
  *            last/first.
  *
  *   static ex_node_t *
- *   ex_search(ex_t *extree, ex_node_t *key);
+ *   ex_search(ex_t *tree, ex_node_t *key);
  *       Description: Search for node that matches key.
  *       Args:
- *         extree: Pointer to an initialized red-black tree object.
- *         key  : Search key.
- *       Ret: Node in extree that matches key, or NULL if no match.
+ *         tree: Pointer to an initialized red-black tree object.
+ *         key : Search key.
+ *       Ret: Node in tree that matches key, or NULL if no match.
  *
  *   static ex_node_t *
- *   ex_nsearch(ex_t *extree, ex_node_t *key);
+ *   ex_nsearch(ex_t *tree, ex_node_t *key);
  *   static ex_node_t *
- *   ex_psearch(ex_t *extree, ex_node_t *key);
+ *   ex_psearch(ex_t *tree, ex_node_t *key);
  *       Description: Search for node that matches key.  If no match is found,
  *                    return what would be key's successor/predecessor, were
- *                    key in extree.
+ *                    key in tree.
  *       Args:
- *         extree: Pointer to an initialized red-black tree object.
- *         key   : Search key.
- *       Ret: Node in extree that matches key, or if no match, hypothetical
- *            node's successor/predecessor (NULL if no successor/predecessor).
+ *         tree: Pointer to an initialized red-black tree object.
+ *         key : Search key.
+ *       Ret: Node in tree that matches key, or if no match, hypothetical node's
+ *            successor/predecessor (NULL if no successor/predecessor).
  *
  *   static void
- *   ex_insert(ex_t *extree, ex_node_t *node);
- *       Description: Insert node into extree.
+ *   ex_insert(ex_t *tree, ex_node_t *node);
+ *       Description: Insert node into tree.
  *       Args:
- *         extree: Pointer to an initialized red-black tree object.
- *         node  : Node to be inserted into extree.
+ *         tree: Pointer to an initialized red-black tree object.
+ *         node: Node to be inserted into tree.
  *
  *   static void
- *   ex_remove(ex_t *extree, ex_node_t *node);
- *       Description: Remove node from extree.
+ *   ex_remove(ex_t *tree, ex_node_t *node);
+ *       Description: Remove node from tree.
  *       Args:
- *         extree: Pointer to an initialized red-black tree object.
- *         node  : Node in extree to be removed.
+ *         tree: Pointer to an initialized red-black tree object.
+ *         node: Node in tree to be removed.
  *
  *   static ex_node_t *
- *   ex_iter(ex_t *extree, ex_node_t *start, ex_node_t *(*cb)(ex_t *,
+ *   ex_iter(ex_t *tree, ex_node_t *start, ex_node_t *(*cb)(ex_t *,
  *     ex_node_t *, void *), void *arg);
  *   static ex_node_t *
- *   ex_reverse_iter(ex_t *extree, ex_node_t *start, ex_node *(*cb)(ex_t *,
+ *   ex_reverse_iter(ex_t *tree, ex_node_t *start, ex_node *(*cb)(ex_t *,
  *     ex_node_t *, void *), void *arg);
- *       Description: Iterate forward/backward over extree, starting at node.
- *                    If extree is modified, iteration must be immediately
+ *       Description: Iterate forward/backward over tree, starting at node.  If
+ *                    tree is modified, iteration must be immediately
  *                    terminated by the callback function that causes the
  *                    modification.
  *       Args:
- *         extree: Pointer to an initialized red-black tree object.
- *         start : Node at which to start iteration, or NULL to start at
- *                 first/last node.
- *         cb    : Callback function, which is called for each node during
- *                 iteration.  Under normal circumstances the callback function
- *                 should return NULL, which causes iteration to continue.  If a
- *                 callback function returns non-NULL, iteration is immediately
- *                 terminated and the non-NULL return value is returned by the
- *                 iterator.  This is useful for re-starting iteration after
- *                 modifying extree.
- *         arg   : Opaque pointer passed to cb().
+ *         tree : Pointer to an initialized red-black tree object.
+ *         start: Node at which to start iteration, or NULL to start at
+ *                first/last node.
+ *         cb   : Callback function, which is called for each node during
+ *                iteration.  Under normal circumstances the callback function
+ *                should return NULL, which causes iteration to continue.  If a
+ *                callback function returns non-NULL, iteration is immediately
+ *                terminated and the non-NULL return value is returned by the
+ *                iterator.  This is useful for re-starting iteration after
+ *                modifying tree.
+ *         arg  : Opaque pointer passed to cb().
  *       Ret: NULL if iteration completed, or the non-NULL callback return value
  *            that caused termination of the iteration.
  */
diff --git a/deps/jemalloc/include/jemalloc/internal/size_classes.sh b/deps/jemalloc/include/jemalloc/internal/size_classes.sh
new file mode 100755
index 00000000..29c80c1f
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/size_classes.sh
@@ -0,0 +1,122 @@
+#!/bin/sh
+
+# The following limits are chosen such that they cover all supported platforms.
+
+# Range of quanta.
+lg_qmin=3
+lg_qmax=4
+
+# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)].
+lg_tmin=3
+
+# Range of page sizes.
+lg_pmin=12
+lg_pmax=16
+
+pow2() {
+  e=$1
+  pow2_result=1
+  while [ ${e} -gt 0 ] ; do
+    pow2_result=$((${pow2_result} + ${pow2_result}))
+    e=$((${e} - 1))
+  done
+}
+
+cat <<EOF
+/* This file was automatically generated by size_classes.sh. */
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+EOF
+
+lg_q=${lg_qmin}
+while [ ${lg_q} -le ${lg_qmax} ] ; do
+  lg_t=${lg_tmin}
+  while [ ${lg_t} -le ${lg_q} ] ; do
+    lg_p=${lg_pmin}
+    while [ ${lg_p} -le ${lg_pmax} ] ; do
+      echo "#if (LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
+      echo "#define	SIZE_CLASSES_DEFINED"
+      pow2 ${lg_q}; q=${pow2_result}
+      pow2 ${lg_t}; t=${pow2_result}
+      pow2 ${lg_p}; p=${pow2_result}
+      bin=0
+      psz=0
+      sz=${t}
+      delta=$((${sz} - ${psz}))
+      echo "/*  SIZE_CLASS(bin,	delta,	sz) */"
+      echo "#define	SIZE_CLASSES							\\"
+
+      # Tiny size classes.
+      while [ ${sz} -lt ${q} ] ; do
+        echo "    SIZE_CLASS(${bin},	${delta},	${sz})					\\"
+        bin=$((${bin} + 1))
+        psz=${sz}
+        sz=$((${sz} + ${sz}))
+        delta=$((${sz} - ${psz}))
+      done
+      # Quantum-multiple size classes.  For each doubling of sz, as many as 4
+      # size classes exist.  Their spacing is the greater of:
+      # - q
+      # - sz/4, where sz is a power of 2
+      while [ ${sz} -lt ${p} ] ; do
+        if [ ${sz} -ge $((${q} * 4)) ] ; then
+          i=$((${sz} / 4))
+        else
+          i=${q}
+        fi
+        next_2pow=$((${sz} * 2))
+        while [ ${sz} -lt $next_2pow ] ; do
+          echo "    SIZE_CLASS(${bin},	${delta},	${sz})					\\"
+          bin=$((${bin} + 1))
+          psz=${sz}
+          sz=$((${sz} + ${i}))
+          delta=$((${sz} - ${psz}))
+        done
+      done
+      echo
+      echo "#define	NBINS		${bin}"
+      echo "#define	SMALL_MAXCLASS	${psz}"
+      echo "#endif"
+      echo
+      lg_p=$((${lg_p} + 1))
+    done
+    lg_t=$((${lg_t} + 1))
+  done
+  lg_q=$((${lg_q} + 1))
+done
+
+cat <<EOF
+#ifndef SIZE_CLASSES_DEFINED
+#  error "No size class definitions match configuration"
+#endif
+#undef SIZE_CLASSES_DEFINED
+/*
+ * The small_size2bin lookup table uses uint8_t to encode each bin index, so we
+ * cannot support more than 256 small size classes.  Further constrain NBINS to
+ * 255 to support prof_promote, since all small size classes, plus a "not
+ * small" size class must be stored in 8 bits of arena_chunk_map_t's bits
+ * field.
+ */
+#if (NBINS > 255)
+#  error "Too many small size classes"
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+EOF
diff --git a/deps/jemalloc/include/jemalloc/internal/stats.h b/deps/jemalloc/include/jemalloc/internal/stats.h
index 2a9b31d9..27f68e36 100644
--- a/deps/jemalloc/include/jemalloc/internal/stats.h
+++ b/deps/jemalloc/include/jemalloc/internal/stats.h
@@ -1,25 +1,16 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#define	UMAX2S_BUFSIZE	65
-
-#ifdef JEMALLOC_STATS
 typedef struct tcache_bin_stats_s tcache_bin_stats_t;
 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 typedef struct malloc_large_stats_s malloc_large_stats_t;
 typedef struct arena_stats_s arena_stats_t;
-#endif
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 typedef struct chunk_stats_s chunk_stats_t;
-#endif
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
-#ifdef JEMALLOC_STATS
-
-#ifdef JEMALLOC_TCACHE
 struct tcache_bin_stats_s {
 	/*
 	 * Number of allocation requests that corresponded to the size of this
@@ -27,7 +18,6 @@ struct tcache_bin_stats_s {
 	 */
 	uint64_t	nrequests;
 };
-#endif
 
 struct malloc_bin_stats_s {
 	/*
@@ -52,13 +42,11 @@ struct malloc_bin_stats_s {
 	 */
 	uint64_t	nrequests;
 
-#ifdef JEMALLOC_TCACHE
 	/* Number of tcache fills from this bin. */
 	uint64_t	nfills;
 
 	/* Number of tcache flushes to this bin. */
 	uint64_t	nflushes;
-#endif
 
 	/* Total number of runs created for this bin's size class. */
 	uint64_t	nruns;
@@ -69,9 +57,6 @@ struct malloc_bin_stats_s {
 	 */
 	uint64_t	reruns;
 
-	/* High-water mark for this bin. */
-	size_t		highruns;
-
 	/* Current number of runs in this bin. */
 	size_t		curruns;
 };
@@ -93,9 +78,6 @@ struct malloc_large_stats_s {
 	 */
 	uint64_t	nrequests;
 
-	/* High-water mark for this size class. */
-	size_t		highruns;
-
 	/* Current number of runs of this size class. */
 	size_t		curruns;
 };
@@ -127,14 +109,10 @@ struct arena_stats_s {
 	 */
 	malloc_large_stats_t	*lstats;
 };
-#endif /* JEMALLOC_STATS */
 
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 struct chunk_stats_s {
-#  ifdef JEMALLOC_STATS
 	/* Number of chunks that were allocated. */
 	uint64_t	nchunks;
-#  endif
 
 	/* High-water mark for number of chunks allocated. */
 	size_t		highchunks;
@@ -146,7 +124,6 @@ struct chunk_stats_s {
 	 */
 	size_t		curchunks;
 };
-#endif /* JEMALLOC_STATS */
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
@@ -154,24 +131,14 @@ struct chunk_stats_s {
 
 extern bool	opt_stats_print;
 
-#ifdef JEMALLOC_STATS
 extern size_t	stats_cactive;
-#endif
 
-char	*u2s(uint64_t x, unsigned base, char *s);
-#ifdef JEMALLOC_STATS
-void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
-    const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
-void	malloc_printf(const char *format, ...)
-    JEMALLOC_ATTR(format(printf, 1, 2));
-#endif
 void	stats_print(void (*write)(void *, const char *), void *cbopaque,
     const char *opts);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
-#ifdef JEMALLOC_STATS
 
 #ifndef JEMALLOC_ENABLE_INLINE
 size_t	stats_cactive_get(void);
@@ -202,6 +169,5 @@ stats_cactive_sub(size_t size)
 }
 #endif
 
-#endif /* JEMALLOC_STATS */
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/tcache.h b/deps/jemalloc/include/jemalloc/internal/tcache.h
index da3c68c5..38d735c8 100644
--- a/deps/jemalloc/include/jemalloc/internal/tcache.h
+++ b/deps/jemalloc/include/jemalloc/internal/tcache.h
@@ -1,4 +1,3 @@
-#ifdef JEMALLOC_TCACHE
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
@@ -6,6 +5,16 @@ typedef struct tcache_bin_info_s tcache_bin_info_t;
 typedef struct tcache_bin_s tcache_bin_t;
 typedef struct tcache_s tcache_t;
 
+/*
+ * tcache pointers close to NULL are used to encode state information that is
+ * used for two purposes: preventing thread caching on a per thread basis and
+ * cleaning up during thread shutdown.
+ */
+#define	TCACHE_STATE_DISABLED		((tcache_t *)(uintptr_t)1)
+#define	TCACHE_STATE_REINCARNATED	((tcache_t *)(uintptr_t)2)
+#define	TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
+#define	TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
+
 /*
  * Absolute maximum number of cache slots for each small bin in the thread
  * cache.  This is an additional constraint beyond that imposed as: twice the
@@ -22,17 +31,26 @@ typedef struct tcache_s tcache_t;
 #define	LG_TCACHE_MAXCLASS_DEFAULT	15
 
 /*
- * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation
- * events between full GC sweeps (-1: disabled).  Integer rounding may cause
- * the actual number to be slightly higher, since GC is performed
- * incrementally.
+ * TCACHE_GC_SWEEP is the approximate number of allocation events between
+ * full GC sweeps.  Integer rounding may cause the actual number to be
+ * slightly higher, since GC is performed incrementally.
  */
-#define	LG_TCACHE_GC_SWEEP_DEFAULT	13
+#define	TCACHE_GC_SWEEP			8192
+
+/* Number of tcache allocation/deallocation events between incremental GCs. */
+#define	TCACHE_GC_INCR							\
+    ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
+typedef enum {
+	tcache_enabled_false   = 0, /* Enable cast to/from bool. */
+	tcache_enabled_true    = 1,
+	tcache_enabled_default = 2
+} tcache_enabled_t;
+
 /*
  * Read-only information associated with each element of tcache_t's tbins array
  * is stored separately, mainly to reduce memory usage.
@@ -42,9 +60,7 @@ struct tcache_bin_info_s {
 };
 
 struct tcache_bin_s {
-#  ifdef JEMALLOC_STATS
 	tcache_bin_stats_t tstats;
-#  endif
 	int		low_water;	/* Min # cached since last GC. */
 	unsigned	lg_fill_div;	/* Fill (ncached_max >> lg_fill_div). */
 	unsigned	ncached;	/* # of cached objects. */
@@ -52,12 +68,8 @@ struct tcache_bin_s {
 };
 
 struct tcache_s {
-#  ifdef JEMALLOC_STATS
 	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
-#  endif
-#  ifdef JEMALLOC_PROF
 	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum() */
-#  endif
 	arena_t		*arena;		/* This thread's arena. */
 	unsigned	ev_cnt;		/* Event count since incremental GC. */
 	unsigned	next_gc_bin;	/* Next bin to GC. */
@@ -76,29 +88,11 @@ struct tcache_s {
 
 extern bool	opt_tcache;
 extern ssize_t	opt_lg_tcache_max;
-extern ssize_t	opt_lg_tcache_gc_sweep;
 
 extern tcache_bin_info_t	*tcache_bin_info;
 
-/* Map of thread-specific caches. */
-#ifndef NO_TLS
-extern __thread tcache_t	*tcache_tls
-    JEMALLOC_ATTR(tls_model("initial-exec"));
-#  define TCACHE_GET()	tcache_tls
-#  define TCACHE_SET(v)	do {						\
-	tcache_tls = (tcache_t *)(v);					\
-	pthread_setspecific(tcache_tsd, (void *)(v));			\
-} while (0)
-#else
-#  define TCACHE_GET()	((tcache_t *)pthread_getspecific(tcache_tsd))
-#  define TCACHE_SET(v)	do {						\
-	pthread_setspecific(tcache_tsd, (void *)(v));			\
-} while (0)
-#endif
-extern pthread_key_t		tcache_tsd;
-
 /*
- * Number of tcache bins.  There are nbins small-object bins, plus 0 or more
+ * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
  * large-object bins.
  */
 extern size_t			nhbins;
@@ -106,68 +100,159 @@ extern size_t			nhbins;
 /* Maximum cached size class. */
 extern size_t			tcache_maxclass;
 
-/* Number of tcache allocation/deallocation events between incremental GCs. */
-extern unsigned			tcache_gc_incr;
-
-void	tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-    , tcache_t *tcache
-#endif
-    );
-void	tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-    , tcache_t *tcache
-#endif
-    );
-tcache_t *tcache_create(arena_t *arena);
+size_t	tcache_salloc(const void *ptr);
+void	tcache_event_hard(tcache_t *tcache);
 void	*tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
     size_t binind);
+void	tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
+    tcache_t *tcache);
+void	tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
+    tcache_t *tcache);
+void	tcache_arena_associate(tcache_t *tcache, arena_t *arena);
+void	tcache_arena_dissociate(tcache_t *tcache);
+tcache_t *tcache_create(arena_t *arena);
 void	tcache_destroy(tcache_t *tcache);
-#ifdef JEMALLOC_STATS
+void	tcache_thread_cleanup(void *arg);
 void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
-#endif
-bool	tcache_boot(void);
+bool	tcache_boot0(void);
+bool	tcache_boot1(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
+malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache, tcache_t *)
+malloc_tsd_protos(JEMALLOC_ATTR(unused), tcache_enabled, tcache_enabled_t)
+
 void	tcache_event(tcache_t *tcache);
-tcache_t *tcache_get(void);
+void	tcache_flush(void);
+bool	tcache_enabled_get(void);
+tcache_t *tcache_get(bool create);
+void	tcache_enabled_set(bool enabled);
 void	*tcache_alloc_easy(tcache_bin_t *tbin);
 void	*tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
 void	*tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
-void	tcache_dalloc_small(tcache_t *tcache, void *ptr);
+void	tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind);
 void	tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
+/* Map of thread-specific caches. */
+malloc_tsd_externs(tcache, tcache_t *)
+malloc_tsd_funcs(JEMALLOC_INLINE, tcache, tcache_t *, NULL,
+    tcache_thread_cleanup)
+/* Per thread flag that allows thread caches to be disabled. */
+malloc_tsd_externs(tcache_enabled, tcache_enabled_t)
+malloc_tsd_funcs(JEMALLOC_INLINE, tcache_enabled, tcache_enabled_t,
+    tcache_enabled_default, malloc_tsd_no_cleanup)
+
+JEMALLOC_INLINE void
+tcache_flush(void)
+{
+	tcache_t *tcache;
+
+	cassert(config_tcache);
+
+	tcache = *tcache_tsd_get();
+	if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX)
+		return;
+	tcache_destroy(tcache);
+	tcache = NULL;
+	tcache_tsd_set(&tcache);
+}
+
+JEMALLOC_INLINE bool
+tcache_enabled_get(void)
+{
+	tcache_enabled_t tcache_enabled;
+
+	cassert(config_tcache);
+
+	tcache_enabled = *tcache_enabled_tsd_get();
+	if (tcache_enabled == tcache_enabled_default) {
+		tcache_enabled = (tcache_enabled_t)opt_tcache;
+		tcache_enabled_tsd_set(&tcache_enabled);
+	}
+
+	return ((bool)tcache_enabled);
+}
+
+JEMALLOC_INLINE void
+tcache_enabled_set(bool enabled)
+{
+	tcache_enabled_t tcache_enabled;
+	tcache_t *tcache;
+
+	cassert(config_tcache);
+
+	tcache_enabled = (tcache_enabled_t)enabled;
+	tcache_enabled_tsd_set(&tcache_enabled);
+	tcache = *tcache_tsd_get();
+	if (enabled) {
+		if (tcache == TCACHE_STATE_DISABLED) {
+			tcache = NULL;
+			tcache_tsd_set(&tcache);
+		}
+	} else /* disabled */ {
+		if (tcache > TCACHE_STATE_MAX) {
+			tcache_destroy(tcache);
+			tcache = NULL;
+		}
+		if (tcache == NULL) {
+			tcache = TCACHE_STATE_DISABLED;
+			tcache_tsd_set(&tcache);
+		}
+	}
+}
+
 JEMALLOC_INLINE tcache_t *
-tcache_get(void)
+tcache_get(bool create)
 {
 	tcache_t *tcache;
 
-	if ((isthreaded & opt_tcache) == false)
+	if (config_tcache == false)
+		return (NULL);
+	if (config_lazy_lock && isthreaded == false)
 		return (NULL);
 
-	tcache = TCACHE_GET();
-	if ((uintptr_t)tcache <= (uintptr_t)2) {
+	tcache = *tcache_tsd_get();
+	if ((uintptr_t)tcache <= (uintptr_t)TCACHE_STATE_MAX) {
+		if (tcache == TCACHE_STATE_DISABLED)
+			return (NULL);
 		if (tcache == NULL) {
-			tcache = tcache_create(choose_arena());
-			if (tcache == NULL)
-				return (NULL);
-		} else {
-			if (tcache == (void *)(uintptr_t)1) {
+			if (create == false) {
 				/*
-				 * Make a note that an allocator function was
-				 * called after the tcache_thread_cleanup() was
-				 * called.
+				 * Creating a tcache here would cause
+				 * allocation as a side effect of free().
+				 * Ordinarily that would be okay since
+				 * tcache_create() failure is a soft failure
+				 * that doesn't propagate.  However, if TLS
+				 * data are freed via free() as in glibc,
+				 * subtle corruption could result from setting
+				 * a TLS variable after its backing memory is
+				 * freed.
 				 */
-				TCACHE_SET((uintptr_t)2);
+				return (NULL);
+			}
+			if (tcache_enabled_get() == false) {
+				tcache_enabled_set(false); /* Memoize. */
+				return (NULL);
 			}
+			return (tcache_create(choose_arena(NULL)));
+		}
+		if (tcache == TCACHE_STATE_PURGATORY) {
+			/*
+			 * Make a note that an allocator function was called
+			 * after tcache_thread_cleanup() was called.
+			 */
+			tcache = TCACHE_STATE_REINCARNATED;
+			tcache_tsd_set(&tcache);
 			return (NULL);
 		}
+		if (tcache == TCACHE_STATE_REINCARNATED)
+			return (NULL);
+		not_reached();
 	}
 
 	return (tcache);
@@ -177,60 +262,13 @@ JEMALLOC_INLINE void
 tcache_event(tcache_t *tcache)
 {
 
-	if (tcache_gc_incr == 0)
+	if (TCACHE_GC_INCR == 0)
 		return;
 
 	tcache->ev_cnt++;
-	assert(tcache->ev_cnt <= tcache_gc_incr);
-	if (tcache->ev_cnt == tcache_gc_incr) {
-		size_t binind = tcache->next_gc_bin;
-		tcache_bin_t *tbin = &tcache->tbins[binind];
-		tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
-
-		if (tbin->low_water > 0) {
-			/*
-			 * Flush (ceiling) 3/4 of the objects below the low
-			 * water mark.
-			 */
-			if (binind < nbins) {
-				tcache_bin_flush_small(tbin, binind,
-				    tbin->ncached - tbin->low_water +
-				    (tbin->low_water >> 2)
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-				    , tcache
-#endif
-				    );
-			} else {
-				tcache_bin_flush_large(tbin, binind,
-				    tbin->ncached - tbin->low_water +
-				    (tbin->low_water >> 2)
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-				    , tcache
-#endif
-				    );
-			}
-			/*
-			 * Reduce fill count by 2X.  Limit lg_fill_div such that
-			 * the fill count is always at least 1.
-			 */
-			if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1))
-			    >= 1)
-				tbin->lg_fill_div++;
-		} else if (tbin->low_water < 0) {
-			/*
-			 * Increase fill count by 2X.  Make sure lg_fill_div
-			 * stays greater than 0.
-			 */
-			if (tbin->lg_fill_div > 1)
-				tbin->lg_fill_div--;
-		}
-		tbin->low_water = tbin->ncached;
-
-		tcache->next_gc_bin++;
-		if (tcache->next_gc_bin == nhbins)
-			tcache->next_gc_bin = 0;
-		tcache->ev_cnt = 0;
-	}
+	assert(tcache->ev_cnt <= TCACHE_GC_INCR);
+	if (tcache->ev_cnt == TCACHE_GC_INCR)
+		tcache_event_hard(tcache);
 }
 
 JEMALLOC_INLINE void *
@@ -257,7 +295,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
 	tcache_bin_t *tbin;
 
 	binind = SMALL_SIZE2BIN(size);
-	assert(binind < nbins);
+	assert(binind < NBINS);
 	tbin = &tcache->tbins[binind];
 	ret = tcache_alloc_easy(tbin);
 	if (ret == NULL) {
@@ -265,24 +303,29 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
 		if (ret == NULL)
 			return (NULL);
 	}
-	assert(arena_salloc(ret) == arena_bin_info[binind].reg_size);
+	assert(tcache_salloc(ret) == arena_bin_info[binind].reg_size);
 
 	if (zero == false) {
-#ifdef JEMALLOC_FILL
-		if (opt_junk)
-			memset(ret, 0xa5, size);
-		else if (opt_zero)
-			memset(ret, 0, size);
-#endif
-	} else
+		if (config_fill) {
+			if (opt_junk) {
+				arena_alloc_junk_small(ret,
+				    &arena_bin_info[binind], false);
+			} else if (opt_zero)
+				memset(ret, 0, size);
+		}
+	} else {
+		if (config_fill && opt_junk) {
+			arena_alloc_junk_small(ret, &arena_bin_info[binind],
+			    true);
+		}
+		VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 		memset(ret, 0, size);
+	}
 
-#ifdef JEMALLOC_STATS
-	tbin->tstats.nrequests++;
-#endif
-#ifdef JEMALLOC_PROF
-	tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
-#endif
+	if (config_stats)
+		tbin->tstats.nrequests++;
+	if (config_prof)
+		tcache->prof_accumbytes += arena_bin_info[binind].reg_size;
 	tcache_event(tcache);
 	return (ret);
 }
@@ -296,7 +339,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
 
 	size = PAGE_CEILING(size);
 	assert(size <= tcache_maxclass);
-	binind = nbins + (size >> PAGE_SHIFT) - 1;
+	binind = NBINS + (size >> LG_PAGE) - 1;
 	assert(binind < nhbins);
 	tbin = &tcache->tbins[binind];
 	ret = tcache_alloc_easy(tbin);
@@ -309,28 +352,30 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
 		if (ret == NULL)
 			return (NULL);
 	} else {
-#ifdef JEMALLOC_PROF
-		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
-		size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
-		    PAGE_SHIFT);
-		chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK;
-#endif
+		if (config_prof && prof_promote && size == PAGE) {
+			arena_chunk_t *chunk =
+			    (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
+			size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
+			    LG_PAGE);
+			arena_mapbits_large_binind_set(chunk, pageind,
+			    BININD_INVALID);
+		}
 		if (zero == false) {
-#ifdef JEMALLOC_FILL
-			if (opt_junk)
-				memset(ret, 0xa5, size);
-			else if (opt_zero)
-				memset(ret, 0, size);
-#endif
-		} else
+			if (config_fill) {
+				if (opt_junk)
+					memset(ret, 0xa5, size);
+				else if (opt_zero)
+					memset(ret, 0, size);
+			}
+		} else {
+			VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 			memset(ret, 0, size);
+		}
 
-#ifdef JEMALLOC_STATS
-		tbin->tstats.nrequests++;
-#endif
-#ifdef JEMALLOC_PROF
-		tcache->prof_accumbytes += size;
-#endif
+		if (config_stats)
+			tbin->tstats.nrequests++;
+		if (config_prof)
+			tcache->prof_accumbytes += size;
 	}
 
 	tcache_event(tcache);
@@ -338,45 +383,21 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
 }
 
 JEMALLOC_INLINE void
-tcache_dalloc_small(tcache_t *tcache, void *ptr)
+tcache_dalloc_small(tcache_t *tcache, void *ptr, size_t binind)
 {
-	arena_t *arena;
-	arena_chunk_t *chunk;
-	arena_run_t *run;
-	arena_bin_t *bin;
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
-	size_t pageind, binind;
-	arena_chunk_map_t *mapelm;
-
-	assert(arena_salloc(ptr) <= small_maxclass);
-
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	arena = chunk->arena;
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
-	mapelm = &chunk->map[pageind-map_bias];
-	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
-	    (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
-	dassert(run->magic == ARENA_RUN_MAGIC);
-	bin = run->bin;
-	binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
-	    sizeof(arena_bin_t);
-	assert(binind < nbins);
-
-#ifdef JEMALLOC_FILL
-	if (opt_junk)
-		memset(ptr, 0x5a, arena_bin_info[binind].reg_size);
-#endif
+
+	assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
+
+	if (config_fill && opt_junk)
+		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
 
 	tbin = &tcache->tbins[binind];
 	tbin_info = &tcache_bin_info[binind];
 	if (tbin->ncached == tbin_info->ncached_max) {
 		tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >>
-		    1)
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-		    , tcache
-#endif
-		    );
+		    1), tcache);
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
 	tbin->avail[tbin->ncached] = ptr;
@@ -388,35 +409,24 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr)
 JEMALLOC_INLINE void
 tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
 {
-	arena_t *arena;
-	arena_chunk_t *chunk;
-	size_t pageind, binind;
+	size_t binind;
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
 	assert((size & PAGE_MASK) == 0);
-	assert(arena_salloc(ptr) > small_maxclass);
-	assert(arena_salloc(ptr) <= tcache_maxclass);
+	assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
+	assert(tcache_salloc(ptr) <= tcache_maxclass);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	arena = chunk->arena;
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
-	binind = nbins + (size >> PAGE_SHIFT) - 1;
+	binind = NBINS + (size >> LG_PAGE) - 1;
 
-#ifdef JEMALLOC_FILL
-	if (opt_junk)
+	if (config_fill && opt_junk)
 		memset(ptr, 0x5a, size);
-#endif
 
 	tbin = &tcache->tbins[binind];
 	tbin_info = &tcache_bin_info[binind];
 	if (tbin->ncached == tbin_info->ncached_max) {
 		tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >>
-		    1)
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-		    , tcache
-#endif
-		    );
+		    1), tcache);
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
 	tbin->avail[tbin->ncached] = ptr;
@@ -428,4 +438,3 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
-#endif /* JEMALLOC_TCACHE */
diff --git a/deps/jemalloc/include/jemalloc/internal/tsd.h b/deps/jemalloc/include/jemalloc/internal/tsd.h
new file mode 100644
index 00000000..0037cf35
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/tsd.h
@@ -0,0 +1,397 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/* Maximum number of malloc_tsd users with cleanup functions. */
+#define	MALLOC_TSD_CLEANUPS_MAX	8
+
+typedef bool (*malloc_tsd_cleanup_t)(void);
+
+/*
+ * TLS/TSD-agnostic macro-based implementation of thread-specific data.  There
+ * are four macros that support (at least) three use cases: file-private,
+ * library-private, and library-private inlined.  Following is an example
+ * library-private tsd variable:
+ *
+ * In example.h:
+ *   typedef struct {
+ *           int x;
+ *           int y;
+ *   } example_t;
+ *   #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0})
+ *   malloc_tsd_protos(, example, example_t *)
+ *   malloc_tsd_externs(example, example_t *)
+ * In example.c:
+ *   malloc_tsd_data(, example, example_t *, EX_INITIALIZER)
+ *   malloc_tsd_funcs(, example, example_t *, EX_INITIALIZER,
+ *       example_tsd_cleanup)
+ *
+ * The result is a set of generated functions, e.g.:
+ *
+ *   bool example_tsd_boot(void) {...}
+ *   example_t **example_tsd_get() {...}
+ *   void example_tsd_set(example_t **val) {...}
+ *
+ * Note that all of the functions deal in terms of (a_type *) rather than
+ * (a_type)  so that it is possible to support non-pointer types (unlike
+ * pthreads TSD).  example_tsd_cleanup() is passed an (a_type *) pointer that is
+ * cast to (void *).  This means that the cleanup function needs to cast *and*
+ * dereference the function argument, e.g.:
+ *
+ *   void
+ *   example_tsd_cleanup(void *arg)
+ *   {
+ *           example_t *example = *(example_t **)arg;
+ *
+ *           [...]
+ *           if ([want the cleanup function to be called again]) {
+ *                   example_tsd_set(&example);
+ *           }
+ *   }
+ *
+ * If example_tsd_set() is called within example_tsd_cleanup(), it will be
+ * called again.  This is similar to how pthreads TSD destruction works, except
+ * that pthreads only calls the cleanup function again if the value was set to
+ * non-NULL.
+ */
+
+/* malloc_tsd_protos(). */
+#define	malloc_tsd_protos(a_attr, a_name, a_type)			\
+a_attr bool								\
+a_name##_tsd_boot(void);						\
+a_attr a_type *								\
+a_name##_tsd_get(void);							\
+a_attr void								\
+a_name##_tsd_set(a_type *val);
+
+/* malloc_tsd_externs(). */
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#define	malloc_tsd_externs(a_name, a_type)				\
+extern __thread a_type	a_name##_tls;					\
+extern __thread bool	a_name##_initialized;				\
+extern bool		a_name##_booted;
+#elif (defined(JEMALLOC_TLS))
+#define	malloc_tsd_externs(a_name, a_type)				\
+extern __thread a_type	a_name##_tls;					\
+extern pthread_key_t	a_name##_tsd;					\
+extern bool		a_name##_booted;
+#elif (defined(_WIN32))
+#define malloc_tsd_externs(a_name, a_type)				\
+extern DWORD		a_name##_tsd;					\
+extern bool		a_name##_booted;
+#else
+#define	malloc_tsd_externs(a_name, a_type)				\
+extern pthread_key_t	a_name##_tsd;					\
+extern bool		a_name##_booted;
+#endif
+
+/* malloc_tsd_data(). */
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+a_attr __thread a_type JEMALLOC_TLS_MODEL				\
+    a_name##_tls = a_initializer;					\
+a_attr __thread bool JEMALLOC_TLS_MODEL					\
+    a_name##_initialized = false;					\
+a_attr bool		a_name##_booted = false;
+#elif (defined(JEMALLOC_TLS))
+#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+a_attr __thread a_type JEMALLOC_TLS_MODEL				\
+    a_name##_tls = a_initializer;					\
+a_attr pthread_key_t	a_name##_tsd;					\
+a_attr bool		a_name##_booted = false;
+#elif (defined(_WIN32))
+#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+a_attr DWORD		a_name##_tsd;					\
+a_attr bool		a_name##_booted = false;
+#else
+#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+a_attr pthread_key_t	a_name##_tsd;					\
+a_attr bool		a_name##_booted = false;
+#endif
+
+/* malloc_tsd_funcs(). */
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+    a_cleanup)								\
+/* Initialization/cleanup. */						\
+a_attr bool								\
+a_name##_tsd_cleanup_wrapper(void)					\
+{									\
+									\
+	if (a_name##_initialized) {					\
+		a_name##_initialized = false;				\
+		a_cleanup(&a_name##_tls);				\
+	}								\
+	return (a_name##_initialized);					\
+}									\
+a_attr bool								\
+a_name##_tsd_boot(void)							\
+{									\
+									\
+	if (a_cleanup != malloc_tsd_no_cleanup) {			\
+		malloc_tsd_cleanup_register(				\
+		    &a_name##_tsd_cleanup_wrapper);			\
+	}								\
+	a_name##_booted = true;						\
+	return (false);							\
+}									\
+/* Get/set. */								\
+a_attr a_type *								\
+a_name##_tsd_get(void)							\
+{									\
+									\
+	assert(a_name##_booted);					\
+	return (&a_name##_tls);						\
+}									\
+a_attr void								\
+a_name##_tsd_set(a_type *val)						\
+{									\
+									\
+	assert(a_name##_booted);					\
+	a_name##_tls = (*val);						\
+	if (a_cleanup != malloc_tsd_no_cleanup)				\
+		a_name##_initialized = true;				\
+}
+#elif (defined(JEMALLOC_TLS))
+#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+    a_cleanup)								\
+/* Initialization/cleanup. */						\
+a_attr bool								\
+a_name##_tsd_boot(void)							\
+{									\
+									\
+	if (a_cleanup != malloc_tsd_no_cleanup) {			\
+		if (pthread_key_create(&a_name##_tsd, a_cleanup) != 0)	\
+			return (true);					\
+	}								\
+	a_name##_booted = true;						\
+	return (false);							\
+}									\
+/* Get/set. */								\
+a_attr a_type *								\
+a_name##_tsd_get(void)							\
+{									\
+									\
+	assert(a_name##_booted);					\
+	return (&a_name##_tls);						\
+}									\
+a_attr void								\
+a_name##_tsd_set(a_type *val)						\
+{									\
+									\
+	assert(a_name##_booted);					\
+	a_name##_tls = (*val);						\
+	if (a_cleanup != malloc_tsd_no_cleanup) {			\
+		if (pthread_setspecific(a_name##_tsd,			\
+		    (void *)(&a_name##_tls))) {				\
+			malloc_write("<jemalloc>: Error"		\
+			    " setting TSD for "#a_name"\n");		\
+			if (opt_abort)					\
+				abort();				\
+		}							\
+	}								\
+}
+#elif (defined(_WIN32))
+#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+    a_cleanup)								\
+/* Data structure. */							\
+typedef struct {							\
+	bool	initialized;						\
+	a_type	val;							\
+} a_name##_tsd_wrapper_t;						\
+/* Initialization/cleanup. */						\
+a_attr bool								\
+a_name##_tsd_cleanup_wrapper(void)					\
+{									\
+	a_name##_tsd_wrapper_t *wrapper;				\
+									\
+	wrapper = (a_name##_tsd_wrapper_t *) TlsGetValue(a_name##_tsd);	\
+	if (wrapper == NULL)						\
+		return (false);						\
+	if (a_cleanup != malloc_tsd_no_cleanup &&			\
+	    wrapper->initialized) {					\
+		a_type val = wrapper->val;				\
+		a_type tsd_static_data = a_initializer;			\
+		wrapper->initialized = false;				\
+		wrapper->val = tsd_static_data;				\
+		a_cleanup(&val);					\
+		if (wrapper->initialized) {				\
+			/* Trigger another cleanup round. */		\
+			return (true);					\
+		}							\
+	}								\
+	malloc_tsd_dalloc(wrapper);					\
+	return (false);							\
+}									\
+a_attr bool								\
+a_name##_tsd_boot(void)							\
+{									\
+									\
+	a_name##_tsd = TlsAlloc();					\
+	if (a_name##_tsd == TLS_OUT_OF_INDEXES)				\
+		return (true);						\
+	if (a_cleanup != malloc_tsd_no_cleanup) {			\
+		malloc_tsd_cleanup_register(				\
+		    &a_name##_tsd_cleanup_wrapper);			\
+	}								\
+	a_name##_booted = true;						\
+	return (false);							\
+}									\
+/* Get/set. */								\
+a_attr a_name##_tsd_wrapper_t *						\
+a_name##_tsd_get_wrapper(void)						\
+{									\
+	a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)	\
+	    TlsGetValue(a_name##_tsd);					\
+									\
+	if (wrapper == NULL) {						\
+		wrapper = (a_name##_tsd_wrapper_t *)			\
+		    malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t));	\
+		if (wrapper == NULL) {					\
+			malloc_write("<jemalloc>: Error allocating"	\
+			    " TSD for "#a_name"\n");			\
+			abort();					\
+		} else {						\
+			static a_type tsd_static_data = a_initializer;	\
+			wrapper->initialized = false;			\
+			wrapper->val = tsd_static_data;			\
+		}							\
+		if (!TlsSetValue(a_name##_tsd, (void *)wrapper)) {	\
+			malloc_write("<jemalloc>: Error setting"	\
+			    " TSD for "#a_name"\n");			\
+			abort();					\
+		}							\
+	}								\
+	return (wrapper);						\
+}									\
+a_attr a_type *								\
+a_name##_tsd_get(void)							\
+{									\
+	a_name##_tsd_wrapper_t *wrapper;				\
+									\
+	assert(a_name##_booted);					\
+	wrapper = a_name##_tsd_get_wrapper();				\
+	return (&wrapper->val);						\
+}									\
+a_attr void								\
+a_name##_tsd_set(a_type *val)						\
+{									\
+	a_name##_tsd_wrapper_t *wrapper;				\
+									\
+	assert(a_name##_booted);					\
+	wrapper = a_name##_tsd_get_wrapper();				\
+	wrapper->val = *(val);						\
+	if (a_cleanup != malloc_tsd_no_cleanup)				\
+		wrapper->initialized = true;				\
+}
+#else
+#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+    a_cleanup)								\
+/* Data structure. */							\
+typedef struct {							\
+	bool	initialized;						\
+	a_type	val;							\
+} a_name##_tsd_wrapper_t;						\
+/* Initialization/cleanup. */						\
+a_attr void								\
+a_name##_tsd_cleanup_wrapper(void *arg)					\
+{									\
+	a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)arg;\
+									\
+	if (a_cleanup != malloc_tsd_no_cleanup &&			\
+	    wrapper->initialized) {					\
+		wrapper->initialized = false;				\
+		a_cleanup(&wrapper->val);				\
+		if (wrapper->initialized) {				\
+			/* Trigger another cleanup round. */		\
+			if (pthread_setspecific(a_name##_tsd,		\
+			    (void *)wrapper)) {				\
+				malloc_write("<jemalloc>: Error"	\
+				    " setting TSD for "#a_name"\n");	\
+				if (opt_abort)				\
+					abort();			\
+			}						\
+			return;						\
+		}							\
+	}								\
+	malloc_tsd_dalloc(wrapper);					\
+}									\
+a_attr bool								\
+a_name##_tsd_boot(void)							\
+{									\
+									\
+	if (pthread_key_create(&a_name##_tsd,				\
+	    a_name##_tsd_cleanup_wrapper) != 0)				\
+		return (true);						\
+	a_name##_booted = true;						\
+	return (false);							\
+}									\
+/* Get/set. */								\
+a_attr a_name##_tsd_wrapper_t *						\
+a_name##_tsd_get_wrapper(void)						\
+{									\
+	a_name##_tsd_wrapper_t *wrapper = (a_name##_tsd_wrapper_t *)	\
+	    pthread_getspecific(a_name##_tsd);				\
+									\
+	if (wrapper == NULL) {						\
+		wrapper = (a_name##_tsd_wrapper_t *)			\
+		    malloc_tsd_malloc(sizeof(a_name##_tsd_wrapper_t));	\
+		if (wrapper == NULL) {					\
+			malloc_write("<jemalloc>: Error allocating"	\
+			    " TSD for "#a_name"\n");			\
+			abort();					\
+		} else {						\
+			static a_type tsd_static_data = a_initializer;	\
+			wrapper->initialized = false;			\
+			wrapper->val = tsd_static_data;			\
+		}							\
+		if (pthread_setspecific(a_name##_tsd,			\
+		    (void *)wrapper)) {					\
+			malloc_write("<jemalloc>: Error setting"	\
+			    " TSD for "#a_name"\n");			\
+			abort();					\
+		}							\
+	}								\
+	return (wrapper);						\
+}									\
+a_attr a_type *								\
+a_name##_tsd_get(void)							\
+{									\
+	a_name##_tsd_wrapper_t *wrapper;				\
+									\
+	assert(a_name##_booted);					\
+	wrapper = a_name##_tsd_get_wrapper();				\
+	return (&wrapper->val);						\
+}									\
+a_attr void								\
+a_name##_tsd_set(a_type *val)						\
+{									\
+	a_name##_tsd_wrapper_t *wrapper;				\
+									\
+	assert(a_name##_booted);					\
+	wrapper = a_name##_tsd_get_wrapper();				\
+	wrapper->val = *(val);						\
+	if (a_cleanup != malloc_tsd_no_cleanup)				\
+		wrapper->initialized = true;				\
+}
+#endif
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	*malloc_tsd_malloc(size_t size);
+void	malloc_tsd_dalloc(void *wrapper);
+void	malloc_tsd_no_cleanup(void *);
+void	malloc_tsd_cleanup_register(bool (*f)(void));
+void	malloc_tsd_boot(void);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/util.h b/deps/jemalloc/include/jemalloc/internal/util.h
new file mode 100644
index 00000000..84796936
--- /dev/null
+++ b/deps/jemalloc/include/jemalloc/internal/util.h
@@ -0,0 +1,160 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/* Size of stack-allocated buffer passed to buferror(). */
+#define	BUFERROR_BUF		64
+
+/*
+ * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
+ * large enough for all possible uses within jemalloc.
+ */
+#define	MALLOC_PRINTF_BUFSIZE	4096
+
+/*
+ * Wrap a cpp argument that contains commas such that it isn't broken up into
+ * multiple arguments.
+ */
+#define JEMALLOC_CONCAT(...) __VA_ARGS__
+
+/*
+ * Silence compiler warnings due to uninitialized values.  This is used
+ * wherever the compiler fails to recognize that the variable is never used
+ * uninitialized.
+ */
+#ifdef JEMALLOC_CC_SILENCE
+#  define JEMALLOC_CC_SILENCE_INIT(v) = v
+#else
+#  define JEMALLOC_CC_SILENCE_INIT(v)
+#endif
+
+/*
+ * Define a custom assert() in order to reduce the chances of deadlock during
+ * assertion failure.
+ */
+#ifndef assert
+#define	assert(e) do {							\
+	if (config_debug && !(e)) {					\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
+		    __FILE__, __LINE__, #e);				\
+		abort();						\
+	}								\
+} while (0)
+#endif
+
+/* Use to assert a particular configuration, e.g., cassert(config_debug). */
+#define	cassert(c) do {							\
+	if ((c) == false)						\
+		assert(false);						\
+} while (0)
+
+#ifndef not_reached
+#define	not_reached() do {						\
+	if (config_debug) {						\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Unreachable code reached\n",	\
+		    __FILE__, __LINE__);				\
+		abort();						\
+	}								\
+} while (0)
+#endif
+
+#ifndef not_implemented
+#define	not_implemented() do {						\
+	if (config_debug) {						\
+		malloc_printf("<jemalloc>: %s:%d: Not implemented\n",	\
+		    __FILE__, __LINE__);				\
+		abort();						\
+	}								\
+} while (0)
+#endif
+
+#define	assert_not_implemented(e) do {					\
+	if (config_debug && !(e))					\
+		not_implemented();					\
+} while (0)
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+int	buferror(char *buf, size_t buflen);
+uintmax_t	malloc_strtoumax(const char *nptr, char **endptr, int base);
+void	malloc_write(const char *s);
+
+/*
+ * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
+ * point math.
+ */
+int	malloc_vsnprintf(char *str, size_t size, const char *format,
+    va_list ap);
+int	malloc_snprintf(char *str, size_t size, const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 3, 4));
+void	malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap);
+void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
+    const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4));
+void	malloc_printf(const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 1, 2));
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+size_t	pow2_ceil(size_t x);
+void	malloc_write(const char *s);
+void	set_errno(int errnum);
+int	get_errno(void);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_))
+/* Compute the smallest power of 2 that is >= x. */
+JEMALLOC_INLINE size_t
+pow2_ceil(size_t x)
+{
+
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+#if (LG_SIZEOF_PTR == 3)
+	x |= x >> 32;
+#endif
+	x++;
+	return (x);
+}
+
+/* Sets error code */
+JEMALLOC_INLINE void
+set_errno(int errnum)
+{
+
+#ifdef _WIN32
+	SetLastError(errnum);
+#else
+	errno = errnum;
+#endif
+}
+
+/* Get last error code */
+JEMALLOC_INLINE int
+get_errno(void)
+{
+
+#ifdef _WIN32
+	return (GetLastError());
+#else
+	return (errno);
+#endif
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/internal/zone.h b/deps/jemalloc/include/jemalloc/internal/zone.h
deleted file mode 100644
index 859b529d..00000000
--- a/deps/jemalloc/include/jemalloc/internal/zone.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef JEMALLOC_ZONE
-#  error "This source file is for zones on Darwin (OS X)."
-#endif
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-malloc_zone_t *create_zone(void);
-void	szone2ozone(malloc_zone_t *zone);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/deps/jemalloc/include/jemalloc/jemalloc.h.in b/deps/jemalloc/include/jemalloc/jemalloc.h.in
index 580a5ec5..ad069485 100644
--- a/deps/jemalloc/include/jemalloc/jemalloc.h.in
+++ b/deps/jemalloc/include/jemalloc/jemalloc.h.in
@@ -15,10 +15,8 @@ extern "C" {
 #define	JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
 
 #include "jemalloc_defs@install_suffix@.h"
-#ifndef JEMALLOC_P
-#  define JEMALLOC_P(s) s
-#endif
 
+#ifdef JEMALLOC_EXPERIMENTAL
 #define	ALLOCM_LG_ALIGN(la)	(la)
 #if LG_SIZEOF_PTR == 2
 #define	ALLOCM_ALIGN(a)	(ffs(a)-1)
@@ -31,34 +29,124 @@ extern "C" {
 #define	ALLOCM_SUCCESS		0
 #define	ALLOCM_ERR_OOM		1
 #define	ALLOCM_ERR_NOT_MOVED	2
+#endif
 
-extern const char	*JEMALLOC_P(malloc_conf);
-extern void		(*JEMALLOC_P(malloc_message))(void *, const char *);
+/*
+ * The je_ prefix on the following public symbol declarations is an artifact of
+ * namespace management, and should be omitted in application code unless
+ * JEMALLOC_NO_DEMANGLE is defined (see below).
+ */
+extern JEMALLOC_EXPORT const char	*je_malloc_conf;
+extern JEMALLOC_EXPORT void		(*je_malloc_message)(void *cbopaque,
+    const char *s);
 
-void	*JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc);
-void	*JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc);
-int	JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
-    JEMALLOC_ATTR(nonnull(1));
-void	*JEMALLOC_P(realloc)(void *ptr, size_t size);
-void	JEMALLOC_P(free)(void *ptr);
+JEMALLOC_EXPORT void	*je_malloc(size_t size) JEMALLOC_ATTR(malloc);
+JEMALLOC_EXPORT void	*je_calloc(size_t num, size_t size)
+    JEMALLOC_ATTR(malloc);
+JEMALLOC_EXPORT int	je_posix_memalign(void **memptr, size_t alignment,
+    size_t size) JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT void	*je_aligned_alloc(size_t alignment, size_t size)
+    JEMALLOC_ATTR(malloc);
+JEMALLOC_EXPORT void	*je_realloc(void *ptr, size_t size);
+JEMALLOC_EXPORT void	je_free(void *ptr);
 
-size_t	JEMALLOC_P(malloc_usable_size)(const void *ptr);
-void	JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *),
-    void *cbopaque, const char *opts);
-int	JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen);
-int	JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp,
-    size_t *miblenp);
-int	JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
+#ifdef JEMALLOC_OVERRIDE_MEMALIGN
+JEMALLOC_EXPORT void *	je_memalign(size_t alignment, size_t size)
+    JEMALLOC_ATTR(malloc);
+#endif
+
+#ifdef JEMALLOC_OVERRIDE_VALLOC
+JEMALLOC_EXPORT void *	je_valloc(size_t size) JEMALLOC_ATTR(malloc);
+#endif
+
+JEMALLOC_EXPORT size_t	je_malloc_usable_size(const void *ptr);
+JEMALLOC_EXPORT void	je_malloc_stats_print(void (*write_cb)(void *,
+    const char *), void *je_cbopaque, const char *opts);
+JEMALLOC_EXPORT int	je_mallctl(const char *name, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
+JEMALLOC_EXPORT int	je_mallctlnametomib(const char *name, size_t *mibp,
+    size_t *miblenp);
+JEMALLOC_EXPORT int	je_mallctlbymib(const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
-int	JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
-    JEMALLOC_ATTR(nonnull(1));
-int	JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size,
+#ifdef JEMALLOC_EXPERIMENTAL
+JEMALLOC_EXPORT int	je_allocm(void **ptr, size_t *rsize, size_t size,
+    int flags) JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT int	je_rallocm(void **ptr, size_t *rsize, size_t size,
     size_t extra, int flags) JEMALLOC_ATTR(nonnull(1));
-int	JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags)
+JEMALLOC_EXPORT int	je_sallocm(const void *ptr, size_t *rsize, int flags)
     JEMALLOC_ATTR(nonnull(1));
-int	JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT int	je_dallocm(void *ptr, int flags)
+    JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT int	je_nallocm(size_t *rsize, size_t size, int flags);
+#endif
+
+/*
+ * By default application code must explicitly refer to mangled symbol names,
+ * so that it is possible to use jemalloc in conjunction with another allocator
+ * in the same application.  Define JEMALLOC_MANGLE in order to cause automatic
+ * name mangling that matches the API prefixing that happened as a result of
+ * --with-mangling and/or --with-jemalloc-prefix configuration settings.
+ */
+#ifdef JEMALLOC_MANGLE
+#ifndef JEMALLOC_NO_DEMANGLE
+#define	JEMALLOC_NO_DEMANGLE
+#endif
+#define	malloc_conf je_malloc_conf
+#define	malloc_message je_malloc_message
+#define	malloc je_malloc
+#define	calloc je_calloc
+#define	posix_memalign je_posix_memalign
+#define	aligned_alloc je_aligned_alloc
+#define	realloc je_realloc
+#define	free je_free
+#define	malloc_usable_size je_malloc_usable_size
+#define	malloc_stats_print je_malloc_stats_print
+#define	mallctl je_mallctl
+#define	mallctlnametomib je_mallctlnametomib
+#define	mallctlbymib je_mallctlbymib
+#define	memalign je_memalign
+#define	valloc je_valloc
+#ifdef JEMALLOC_EXPERIMENTAL
+#define	allocm je_allocm
+#define	rallocm je_rallocm
+#define	sallocm je_sallocm
+#define	dallocm je_dallocm
+#define	nallocm je_nallocm
+#endif
+#endif
+
+/*
+ * The je_* macros can be used as stable alternative names for the public
+ * jemalloc API if JEMALLOC_NO_DEMANGLE is defined.  This is primarily meant
+ * for use in jemalloc itself, but it can be used by application code to
+ * provide isolation from the name mangling specified via --with-mangling
+ * and/or --with-jemalloc-prefix.
+ */
+#ifndef JEMALLOC_NO_DEMANGLE
+#undef je_malloc_conf
+#undef je_malloc_message
+#undef je_malloc
+#undef je_calloc
+#undef je_posix_memalign
+#undef je_aligned_alloc
+#undef je_realloc
+#undef je_free
+#undef je_malloc_usable_size
+#undef je_malloc_stats_print
+#undef je_mallctl
+#undef je_mallctlnametomib
+#undef je_mallctlbymib
+#undef je_memalign
+#undef je_valloc
+#ifdef JEMALLOC_EXPERIMENTAL
+#undef je_allocm
+#undef je_rallocm
+#undef je_sallocm
+#undef je_dallocm
+#undef je_nallocm
+#endif
+#endif
 
 #ifdef __cplusplus
 };
diff --git a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in
index 9ac7e1c2..c469142a 100644
--- a/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in
+++ b/deps/jemalloc/include/jemalloc/jemalloc_defs.h.in
@@ -1,22 +1,36 @@
-#ifndef JEMALLOC_DEFS_H_
-#define	JEMALLOC_DEFS_H_
-
 /*
- * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed.
- * This makes it possible, with some care, to use multiple allocators
- * simultaneously.
- *
- * In many cases it is more convenient to manually prefix allocator function
- * calls than to let macros do it automatically, particularly when using
- * multiple allocators simultaneously.  Define JEMALLOC_MANGLE before
- * #include'ing jemalloc.h in order to cause name mangling that corresponds to
- * the API prefixing.
+ * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
+ * public APIs to be prefixed.  This makes it possible, with some care, to use
+ * multiple allocators simultaneously.
  */
 #undef JEMALLOC_PREFIX
 #undef JEMALLOC_CPREFIX
-#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE))
-#undef JEMALLOC_P
-#endif
+
+/*
+ * Name mangling for public symbols is controlled by --with-mangling and
+ * --with-jemalloc-prefix.  With default settings the je_ prefix is stripped by
+ * these macro definitions.
+ */
+#undef je_malloc_conf
+#undef je_malloc_message
+#undef je_malloc
+#undef je_calloc
+#undef je_posix_memalign
+#undef je_aligned_alloc
+#undef je_realloc
+#undef je_free
+#undef je_malloc_usable_size
+#undef je_malloc_stats_print
+#undef je_mallctl
+#undef je_mallctlnametomib
+#undef je_mallctlbymib
+#undef je_memalign
+#undef je_valloc
+#undef je_allocm
+#undef je_rallocm
+#undef je_sallocm
+#undef je_dallocm
+#undef je_nallocm
 
 /*
  * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
@@ -33,26 +47,92 @@
  */
 #undef CPU_SPINWAIT
 
+/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */
+#undef JEMALLOC_ATOMIC9
+
 /*
  * Defined if OSAtomic*() functions are available, as provided by Darwin, and
  * documented in the atomic(3) manual page.
  */
 #undef JEMALLOC_OSATOMIC
 
+/*
+ * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and
+ * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite
+ * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the
+ * functions are defined in libgcc instead of being inlines)
+ */
+#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4
+
+/*
+ * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and
+ * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite
+ * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the
+ * functions are defined in libgcc instead of being inlines)
+ */
+#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8
+
 /*
  * Defined if OSSpin*() functions are available, as provided by Darwin, and
  * documented in the spinlock(3) manual page.
  */
 #undef JEMALLOC_OSSPIN
 
+/*
+ * Defined if _malloc_thread_cleanup() exists.  At least in the case of
+ * FreeBSD, pthread_key_create() allocates, which if used during malloc
+ * bootstrapping will cause recursion into the pthreads library.  Therefore, if
+ * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
+ * malloc_tsd.
+ */
+#undef JEMALLOC_MALLOC_THREAD_CLEANUP
+
+/*
+ * Defined if threaded initialization is known to be safe on this platform.
+ * Among other things, it must be possible to initialize a mutex without
+ * triggering allocation in order for threaded allocation to be safe.
+ */
+#undef JEMALLOC_THREADED_INIT
+
+/*
+ * Defined if the pthreads implementation defines
+ * _pthread_mutex_init_calloc_cb(), in which case the function is used in order
+ * to avoid recursive allocation during mutex initialization.
+ */
+#undef JEMALLOC_MUTEX_INIT_CB
+
 /* Defined if __attribute__((...)) syntax is supported. */
 #undef JEMALLOC_HAVE_ATTR
 #ifdef JEMALLOC_HAVE_ATTR
 #  define JEMALLOC_ATTR(s) __attribute__((s))
+#  define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
+#  define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
+#  define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
+#  define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline)
+#elif _MSC_VER
+#  define JEMALLOC_ATTR(s)
+#  ifdef DLLEXPORT
+#    define JEMALLOC_EXPORT __declspec(dllexport)
+#  else
+#    define JEMALLOC_EXPORT __declspec(dllimport)
+#  endif
+#  define JEMALLOC_ALIGNED(s) __declspec(align(s))
+#  define JEMALLOC_SECTION(s) __declspec(allocate(s))
+#  define JEMALLOC_NOINLINE __declspec(noinline)
 #else
 #  define JEMALLOC_ATTR(s)
+#  define JEMALLOC_EXPORT
+#  define JEMALLOC_ALIGNED(s)
+#  define JEMALLOC_SECTION(s)
+#  define JEMALLOC_NOINLINE
 #endif
 
+/* Defined if sbrk() is supported. */
+#undef JEMALLOC_HAVE_SBRK
+
+/* Non-empty if the tls_model attribute is supported. */
+#undef JEMALLOC_TLS_MODEL
+
 /* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */
 #undef JEMALLOC_CC_SILENCE
 
@@ -77,12 +157,6 @@
 /* Use gcc intrinsics for profile backtracing if defined. */
 #undef JEMALLOC_PROF_GCC
 
-/*
- * JEMALLOC_TINY enables support for tiny objects, which are smaller than one
- * quantum.
- */
-#undef JEMALLOC_TINY
-
 /*
  * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
  * This makes it possible to allocate/deallocate objects without any locking
@@ -96,29 +170,43 @@
  */
 #undef JEMALLOC_DSS
 
-/* JEMALLOC_SWAP enables mmap()ed swap file support. */
-#undef JEMALLOC_SWAP
-
-/* Support memory filling (junk/zero). */
+/* Support memory filling (junk/zero/quarantine/redzone). */
 #undef JEMALLOC_FILL
 
+/* Support the experimental API. */
+#undef JEMALLOC_EXPERIMENTAL
+
+/* Support utrace(2)-based tracing. */
+#undef JEMALLOC_UTRACE
+
+/* Support Valgrind. */
+#undef JEMALLOC_VALGRIND
+
 /* Support optional abort() on OOM. */
 #undef JEMALLOC_XMALLOC
 
-/* Support SYSV semantics. */
-#undef JEMALLOC_SYSV
-
 /* Support lazy locking (avoid locking unless a second thread is launched). */
 #undef JEMALLOC_LAZY_LOCK
 
-/* Determine page size at run time if defined. */
-#undef DYNAMIC_PAGE_SHIFT
-
 /* One page is 2^STATIC_PAGE_SHIFT bytes. */
 #undef STATIC_PAGE_SHIFT
 
+/*
+ * If defined, use munmap() to unmap freed chunks, rather than storing them for
+ * later reuse.  This is disabled by default on Linux because common sequences
+ * of mmap()/munmap() calls will cause virtual memory map holes.
+ */
+#undef JEMALLOC_MUNMAP
+
+/*
+ * If defined, use mremap(...MREMAP_FIXED...) for huge realloc().  This is
+ * disabled by default because it is Linux-specific and it will cause virtual
+ * memory map holes, much like munmap(2) does.
+ */
+#undef JEMALLOC_MREMAP
+
 /* TLS is used to map arenas and magazine caches to threads. */
-#undef NO_TLS
+#undef JEMALLOC_TLS
 
 /*
  * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
@@ -139,9 +227,6 @@
 #undef JEMALLOC_ZONE
 #undef JEMALLOC_ZONE_VERSION
 
-/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */
-#undef JEMALLOC_MREMAP_FIXED
-
 /*
  * Methods for purging unused pages differ between operating systems.
  *
@@ -164,4 +249,5 @@
 /* sizeof(long) == 2^LG_SIZEOF_LONG. */
 #undef LG_SIZEOF_LONG
 
-#endif /* JEMALLOC_DEFS_H_ */
+/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
+#undef LG_SIZEOF_INTMAX_T
diff --git a/deps/jemalloc/include/msvc_compat/inttypes.h b/deps/jemalloc/include/msvc_compat/inttypes.h
new file mode 100644
index 00000000..a4e6b75c
--- /dev/null
+++ b/deps/jemalloc/include/msvc_compat/inttypes.h
@@ -0,0 +1,313 @@
+// ISO C9x  compliant inttypes.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_INTTYPES_H_ // [
+#define _MSC_INTTYPES_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include "stdint.h"
+
+// 7.8 Format conversion of integer types
+
+typedef struct {
+   intmax_t quot;
+   intmax_t rem;
+} imaxdiv_t;
+
+// 7.8.1 Macros for format specifiers
+
+#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [   See footnote 185 at page 198
+
+#ifdef _WIN64
+#  define __PRI64_PREFIX        "l"
+#  define __PRIPTR_PREFIX       "l"
+#else
+#  define __PRI64_PREFIX        "ll"
+#  define __PRIPTR_PREFIX
+#endif
+
+// The fprintf macros for signed integers are:
+#define PRId8       "d"
+#define PRIi8       "i"
+#define PRIdLEAST8  "d"
+#define PRIiLEAST8  "i"
+#define PRIdFAST8   "d"
+#define PRIiFAST8   "i"
+
+#define PRId16       "hd"
+#define PRIi16       "hi"
+#define PRIdLEAST16  "hd"
+#define PRIiLEAST16  "hi"
+#define PRIdFAST16   "hd"
+#define PRIiFAST16   "hi"
+
+#define PRId32       "d"
+#define PRIi32       "i"
+#define PRIdLEAST32  "d"
+#define PRIiLEAST32  "i"
+#define PRIdFAST32   "d"
+#define PRIiFAST32   "i"
+
+#define PRId64       __PRI64_PREFIX "d"
+#define PRIi64       __PRI64_PREFIX "i"
+#define PRIdLEAST64  __PRI64_PREFIX "d"
+#define PRIiLEAST64  __PRI64_PREFIX "i"
+#define PRIdFAST64   __PRI64_PREFIX "d"
+#define PRIiFAST64   __PRI64_PREFIX "i"
+
+#define PRIdMAX     __PRI64_PREFIX "d"
+#define PRIiMAX     __PRI64_PREFIX "i"
+
+#define PRIdPTR     __PRIPTR_PREFIX "d"
+#define PRIiPTR     __PRIPTR_PREFIX "i"
+
+// The fprintf macros for unsigned integers are:
+#define PRIo8       "o"
+#define PRIu8       "u"
+#define PRIx8       "x"
+#define PRIX8       "X"
+#define PRIoLEAST8  "o"
+#define PRIuLEAST8  "u"
+#define PRIxLEAST8  "x"
+#define PRIXLEAST8  "X"
+#define PRIoFAST8   "o"
+#define PRIuFAST8   "u"
+#define PRIxFAST8   "x"
+#define PRIXFAST8   "X"
+
+#define PRIo16       "ho"
+#define PRIu16       "hu"
+#define PRIx16       "hx"
+#define PRIX16       "hX"
+#define PRIoLEAST16  "ho"
+#define PRIuLEAST16  "hu"
+#define PRIxLEAST16  "hx"
+#define PRIXLEAST16  "hX"
+#define PRIoFAST16   "ho"
+#define PRIuFAST16   "hu"
+#define PRIxFAST16   "hx"
+#define PRIXFAST16   "hX"
+
+#define PRIo32       "o"
+#define PRIu32       "u"
+#define PRIx32       "x"
+#define PRIX32       "X"
+#define PRIoLEAST32  "o"
+#define PRIuLEAST32  "u"
+#define PRIxLEAST32  "x"
+#define PRIXLEAST32  "X"
+#define PRIoFAST32   "o"
+#define PRIuFAST32   "u"
+#define PRIxFAST32   "x"
+#define PRIXFAST32   "X"
+
+#define PRIo64       __PRI64_PREFIX "o"
+#define PRIu64       __PRI64_PREFIX "u"
+#define PRIx64       __PRI64_PREFIX "x"
+#define PRIX64       __PRI64_PREFIX "X"
+#define PRIoLEAST64  __PRI64_PREFIX "o"
+#define PRIuLEAST64  __PRI64_PREFIX "u"
+#define PRIxLEAST64  __PRI64_PREFIX "x"
+#define PRIXLEAST64  __PRI64_PREFIX "X"
+#define PRIoFAST64   __PRI64_PREFIX "o"
+#define PRIuFAST64   __PRI64_PREFIX "u"
+#define PRIxFAST64   __PRI64_PREFIX "x"
+#define PRIXFAST64   __PRI64_PREFIX "X"
+
+#define PRIoMAX     __PRI64_PREFIX "o"
+#define PRIuMAX     __PRI64_PREFIX "u"
+#define PRIxMAX     __PRI64_PREFIX "x"
+#define PRIXMAX     __PRI64_PREFIX "X"
+
+#define PRIoPTR     __PRIPTR_PREFIX "o"
+#define PRIuPTR     __PRIPTR_PREFIX "u"
+#define PRIxPTR     __PRIPTR_PREFIX "x"
+#define PRIXPTR     __PRIPTR_PREFIX "X"
+
+// The fscanf macros for signed integers are:
+#define SCNd8       "d"
+#define SCNi8       "i"
+#define SCNdLEAST8  "d"
+#define SCNiLEAST8  "i"
+#define SCNdFAST8   "d"
+#define SCNiFAST8   "i"
+
+#define SCNd16       "hd"
+#define SCNi16       "hi"
+#define SCNdLEAST16  "hd"
+#define SCNiLEAST16  "hi"
+#define SCNdFAST16   "hd"
+#define SCNiFAST16   "hi"
+
+#define SCNd32       "ld"
+#define SCNi32       "li"
+#define SCNdLEAST32  "ld"
+#define SCNiLEAST32  "li"
+#define SCNdFAST32   "ld"
+#define SCNiFAST32   "li"
+
+#define SCNd64       "I64d"
+#define SCNi64       "I64i"
+#define SCNdLEAST64  "I64d"
+#define SCNiLEAST64  "I64i"
+#define SCNdFAST64   "I64d"
+#define SCNiFAST64   "I64i"
+
+#define SCNdMAX     "I64d"
+#define SCNiMAX     "I64i"
+
+#ifdef _WIN64 // [
+#  define SCNdPTR     "I64d"
+#  define SCNiPTR     "I64i"
+#else  // _WIN64 ][
+#  define SCNdPTR     "ld"
+#  define SCNiPTR     "li"
+#endif  // _WIN64 ]
+
+// The fscanf macros for unsigned integers are:
+#define SCNo8       "o"
+#define SCNu8       "u"
+#define SCNx8       "x"
+#define SCNX8       "X"
+#define SCNoLEAST8  "o"
+#define SCNuLEAST8  "u"
+#define SCNxLEAST8  "x"
+#define SCNXLEAST8  "X"
+#define SCNoFAST8   "o"
+#define SCNuFAST8   "u"
+#define SCNxFAST8   "x"
+#define SCNXFAST8   "X"
+
+#define SCNo16       "ho"
+#define SCNu16       "hu"
+#define SCNx16       "hx"
+#define SCNX16       "hX"
+#define SCNoLEAST16  "ho"
+#define SCNuLEAST16  "hu"
+#define SCNxLEAST16  "hx"
+#define SCNXLEAST16  "hX"
+#define SCNoFAST16   "ho"
+#define SCNuFAST16   "hu"
+#define SCNxFAST16   "hx"
+#define SCNXFAST16   "hX"
+
+#define SCNo32       "lo"
+#define SCNu32       "lu"
+#define SCNx32       "lx"
+#define SCNX32       "lX"
+#define SCNoLEAST32  "lo"
+#define SCNuLEAST32  "lu"
+#define SCNxLEAST32  "lx"
+#define SCNXLEAST32  "lX"
+#define SCNoFAST32   "lo"
+#define SCNuFAST32   "lu"
+#define SCNxFAST32   "lx"
+#define SCNXFAST32   "lX"
+
+#define SCNo64       "I64o"
+#define SCNu64       "I64u"
+#define SCNx64       "I64x"
+#define SCNX64       "I64X"
+#define SCNoLEAST64  "I64o"
+#define SCNuLEAST64  "I64u"
+#define SCNxLEAST64  "I64x"
+#define SCNXLEAST64  "I64X"
+#define SCNoFAST64   "I64o"
+#define SCNuFAST64   "I64u"
+#define SCNxFAST64   "I64x"
+#define SCNXFAST64   "I64X"
+
+#define SCNoMAX     "I64o"
+#define SCNuMAX     "I64u"
+#define SCNxMAX     "I64x"
+#define SCNXMAX     "I64X"
+
+#ifdef _WIN64 // [
+#  define SCNoPTR     "I64o"
+#  define SCNuPTR     "I64u"
+#  define SCNxPTR     "I64x"
+#  define SCNXPTR     "I64X"
+#else  // _WIN64 ][
+#  define SCNoPTR     "lo"
+#  define SCNuPTR     "lu"
+#  define SCNxPTR     "lx"
+#  define SCNXPTR     "lX"
+#endif  // _WIN64 ]
+
+#endif // __STDC_FORMAT_MACROS ]
+
+// 7.8.2 Functions for greatest-width integer types
+
+// 7.8.2.1 The imaxabs function
+#define imaxabs _abs64
+
+// 7.8.2.2 The imaxdiv function
+
+// This is modified version of div() function from Microsoft's div.c found
+// in %MSVC.NET%\crt\src\div.c
+#ifdef STATIC_IMAXDIV // [
+static
+#else // STATIC_IMAXDIV ][
+_inline
+#endif // STATIC_IMAXDIV ]
+imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
+{
+   imaxdiv_t result;
+
+   result.quot = numer / denom;
+   result.rem = numer % denom;
+
+   if (numer < 0 && result.rem > 0) {
+      // did division wrong; must fix up
+      ++result.quot;
+      result.rem -= denom;
+   }
+
+   return result;
+}
+
+// 7.8.2.3 The strtoimax and strtoumax functions
+#define strtoimax _strtoi64
+#define strtoumax _strtoui64
+
+// 7.8.2.4 The wcstoimax and wcstoumax functions
+#define wcstoimax _wcstoi64
+#define wcstoumax _wcstoui64
+
+
+#endif // _MSC_INTTYPES_H_ ]
diff --git a/deps/jemalloc/include/msvc_compat/stdbool.h b/deps/jemalloc/include/msvc_compat/stdbool.h
new file mode 100644
index 00000000..da9ee8b8
--- /dev/null
+++ b/deps/jemalloc/include/msvc_compat/stdbool.h
@@ -0,0 +1,16 @@
+#ifndef stdbool_h
+#define stdbool_h
+
+#include <wtypes.h>
+
+/* MSVC doesn't define _Bool or bool in C, but does have BOOL */
+/* Note this doesn't pass autoconf's test because (bool) 0.5 != true */
+typedef BOOL _Bool;
+
+#define bool _Bool
+#define true 1
+#define false 0
+
+#define __bool_true_false_are_defined 1
+
+#endif /* stdbool_h */
diff --git a/deps/jemalloc/include/msvc_compat/stdint.h b/deps/jemalloc/include/msvc_compat/stdint.h
new file mode 100644
index 00000000..d02608a5
--- /dev/null
+++ b/deps/jemalloc/include/msvc_compat/stdint.h
@@ -0,0 +1,247 @@
+// ISO C9x  compliant stdint.h for Microsoft Visual Studio
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
+// 
+//  Copyright (c) 2006-2008 Alexander Chemeris
+// 
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// 
+//   1. Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimer.
+// 
+//   2. Redistributions in binary form must reproduce the above copyright
+//      notice, this list of conditions and the following disclaimer in the
+//      documentation and/or other materials provided with the distribution.
+// 
+//   3. The name of the author may be used to endorse or promote products
+//      derived from this software without specific prior written permission.
+// 
+// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// 
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef _MSC_VER // [
+#error "Use this header only with Microsoft Visual C++ compilers!"
+#endif // _MSC_VER ]
+
+#ifndef _MSC_STDINT_H_ // [
+#define _MSC_STDINT_H_
+
+#if _MSC_VER > 1000
+#pragma once
+#endif
+
+#include <limits.h>
+
+// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
+// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
+// or compiler give many errors like this:
+//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
+#ifdef __cplusplus
+extern "C" {
+#endif
+#  include <wchar.h>
+#ifdef __cplusplus
+}
+#endif
+
+// Define _W64 macros to mark types changing their size, like intptr_t.
+#ifndef _W64
+#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
+#     define _W64 __w64
+#  else
+#     define _W64
+#  endif
+#endif
+
+
+// 7.18.1 Integer types
+
+// 7.18.1.1 Exact-width integer types
+
+// Visual Studio 6 and Embedded Visual C++ 4 doesn't
+// realize that, e.g. char has the same size as __int8
+// so we give up on __intX for them.
+#if (_MSC_VER < 1300)
+   typedef signed char       int8_t;
+   typedef signed short      int16_t;
+   typedef signed int        int32_t;
+   typedef unsigned char     uint8_t;
+   typedef unsigned short    uint16_t;
+   typedef unsigned int      uint32_t;
+#else
+   typedef signed __int8     int8_t;
+   typedef signed __int16    int16_t;
+   typedef signed __int32    int32_t;
+   typedef unsigned __int8   uint8_t;
+   typedef unsigned __int16  uint16_t;
+   typedef unsigned __int32  uint32_t;
+#endif
+typedef signed __int64       int64_t;
+typedef unsigned __int64     uint64_t;
+
+
+// 7.18.1.2 Minimum-width integer types
+typedef int8_t    int_least8_t;
+typedef int16_t   int_least16_t;
+typedef int32_t   int_least32_t;
+typedef int64_t   int_least64_t;
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+typedef uint64_t  uint_least64_t;
+
+// 7.18.1.3 Fastest minimum-width integer types
+typedef int8_t    int_fast8_t;
+typedef int16_t   int_fast16_t;
+typedef int32_t   int_fast32_t;
+typedef int64_t   int_fast64_t;
+typedef uint8_t   uint_fast8_t;
+typedef uint16_t  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+typedef uint64_t  uint_fast64_t;
+
+// 7.18.1.4 Integer types capable of holding object pointers
+#ifdef _WIN64 // [
+   typedef signed __int64    intptr_t;
+   typedef unsigned __int64  uintptr_t;
+#else // _WIN64 ][
+   typedef _W64 signed int   intptr_t;
+   typedef _W64 unsigned int uintptr_t;
+#endif // _WIN64 ]
+
+// 7.18.1.5 Greatest-width integer types
+typedef int64_t   intmax_t;
+typedef uint64_t  uintmax_t;
+
+
+// 7.18.2 Limits of specified-width integer types
+
+#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+
+// 7.18.2.1 Limits of exact-width integer types
+#define INT8_MIN     ((int8_t)_I8_MIN)
+#define INT8_MAX     _I8_MAX
+#define INT16_MIN    ((int16_t)_I16_MIN)
+#define INT16_MAX    _I16_MAX
+#define INT32_MIN    ((int32_t)_I32_MIN)
+#define INT32_MAX    _I32_MAX
+#define INT64_MIN    ((int64_t)_I64_MIN)
+#define INT64_MAX    _I64_MAX
+#define UINT8_MAX    _UI8_MAX
+#define UINT16_MAX   _UI16_MAX
+#define UINT32_MAX   _UI32_MAX
+#define UINT64_MAX   _UI64_MAX
+
+// 7.18.2.2 Limits of minimum-width integer types
+#define INT_LEAST8_MIN    INT8_MIN
+#define INT_LEAST8_MAX    INT8_MAX
+#define INT_LEAST16_MIN   INT16_MIN
+#define INT_LEAST16_MAX   INT16_MAX
+#define INT_LEAST32_MIN   INT32_MIN
+#define INT_LEAST32_MAX   INT32_MAX
+#define INT_LEAST64_MIN   INT64_MIN
+#define INT_LEAST64_MAX   INT64_MAX
+#define UINT_LEAST8_MAX   UINT8_MAX
+#define UINT_LEAST16_MAX  UINT16_MAX
+#define UINT_LEAST32_MAX  UINT32_MAX
+#define UINT_LEAST64_MAX  UINT64_MAX
+
+// 7.18.2.3 Limits of fastest minimum-width integer types
+#define INT_FAST8_MIN    INT8_MIN
+#define INT_FAST8_MAX    INT8_MAX
+#define INT_FAST16_MIN   INT16_MIN
+#define INT_FAST16_MAX   INT16_MAX
+#define INT_FAST32_MIN   INT32_MIN
+#define INT_FAST32_MAX   INT32_MAX
+#define INT_FAST64_MIN   INT64_MIN
+#define INT_FAST64_MAX   INT64_MAX
+#define UINT_FAST8_MAX   UINT8_MAX
+#define UINT_FAST16_MAX  UINT16_MAX
+#define UINT_FAST32_MAX  UINT32_MAX
+#define UINT_FAST64_MAX  UINT64_MAX
+
+// 7.18.2.4 Limits of integer types capable of holding object pointers
+#ifdef _WIN64 // [
+#  define INTPTR_MIN   INT64_MIN
+#  define INTPTR_MAX   INT64_MAX
+#  define UINTPTR_MAX  UINT64_MAX
+#else // _WIN64 ][
+#  define INTPTR_MIN   INT32_MIN
+#  define INTPTR_MAX   INT32_MAX
+#  define UINTPTR_MAX  UINT32_MAX
+#endif // _WIN64 ]
+
+// 7.18.2.5 Limits of greatest-width integer types
+#define INTMAX_MIN   INT64_MIN
+#define INTMAX_MAX   INT64_MAX
+#define UINTMAX_MAX  UINT64_MAX
+
+// 7.18.3 Limits of other integer types
+
+#ifdef _WIN64 // [
+#  define PTRDIFF_MIN  _I64_MIN
+#  define PTRDIFF_MAX  _I64_MAX
+#else  // _WIN64 ][
+#  define PTRDIFF_MIN  _I32_MIN
+#  define PTRDIFF_MAX  _I32_MAX
+#endif  // _WIN64 ]
+
+#define SIG_ATOMIC_MIN  INT_MIN
+#define SIG_ATOMIC_MAX  INT_MAX
+
+#ifndef SIZE_MAX // [
+#  ifdef _WIN64 // [
+#     define SIZE_MAX  _UI64_MAX
+#  else // _WIN64 ][
+#     define SIZE_MAX  _UI32_MAX
+#  endif // _WIN64 ]
+#endif // SIZE_MAX ]
+
+// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
+#ifndef WCHAR_MIN // [
+#  define WCHAR_MIN  0
+#endif  // WCHAR_MIN ]
+#ifndef WCHAR_MAX // [
+#  define WCHAR_MAX  _UI16_MAX
+#endif  // WCHAR_MAX ]
+
+#define WINT_MIN  0
+#define WINT_MAX  _UI16_MAX
+
+#endif // __STDC_LIMIT_MACROS ]
+
+
+// 7.18.4 Limits of other integer types
+
+#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+
+// 7.18.4.1 Macros for minimum-width integer constants
+
+#define INT8_C(val)  val##i8
+#define INT16_C(val) val##i16
+#define INT32_C(val) val##i32
+#define INT64_C(val) val##i64
+
+#define UINT8_C(val)  val##ui8
+#define UINT16_C(val) val##ui16
+#define UINT32_C(val) val##ui32
+#define UINT64_C(val) val##ui64
+
+// 7.18.4.2 Macros for greatest-width integer constants
+#define INTMAX_C   INT64_C
+#define UINTMAX_C  UINT64_C
+
+#endif // __STDC_CONSTANT_MACROS ]
+
+
+#endif // _MSC_STDINT_H_ ]
diff --git a/deps/jemalloc/include/msvc_compat/strings.h b/deps/jemalloc/include/msvc_compat/strings.h
new file mode 100644
index 00000000..c84975b6
--- /dev/null
+++ b/deps/jemalloc/include/msvc_compat/strings.h
@@ -0,0 +1,23 @@
+#ifndef strings_h
+#define strings_h
+
+/* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided
+ * for both */
+#include <intrin.h>
+#pragma intrinsic(_BitScanForward)
+static __forceinline int ffsl(long x)
+{
+	unsigned long i;
+
+	if (_BitScanForward(&i, x))
+		return (i + 1);
+	return (0);
+}
+
+static __forceinline int ffs(int x)
+{
+
+	return (ffsl(x));
+}
+
+#endif
diff --git a/deps/jemalloc/src/arena.c b/deps/jemalloc/src/arena.c
index d166ca1e..2a6150f3 100644
--- a/deps/jemalloc/src/arena.c
+++ b/deps/jemalloc/src/arena.c
@@ -4,175 +4,60 @@
 /******************************************************************************/
 /* Data. */
 
-size_t	opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
-size_t	opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
 ssize_t		opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
-uint8_t const	*small_size2bin;
-arena_bin_info_t	*arena_bin_info;
-
-/* Various bin-related settings. */
-unsigned	nqbins;
-unsigned	ncbins;
-unsigned	nsbins;
-unsigned	nbins;
-size_t		qspace_max;
-size_t		cspace_min;
-size_t		cspace_max;
-size_t		sspace_min;
-size_t		sspace_max;
-
-size_t		lg_mspace;
-size_t		mspace_mask;
+arena_bin_info_t	arena_bin_info[NBINS];
 
-/*
- * const_small_size2bin is a static constant lookup table that in the common
- * case can be used as-is for small_size2bin.
- */
-#if (LG_TINY_MIN == 2)
-#define	S2B_4(i)	i,
-#define	S2B_8(i)	S2B_4(i) S2B_4(i)
-#elif (LG_TINY_MIN == 3)
+JEMALLOC_ALIGNED(CACHELINE)
+const uint8_t	small_size2bin[] = {
 #define	S2B_8(i)	i,
-#else
-#  error "Unsupported LG_TINY_MIN"
-#endif
 #define	S2B_16(i)	S2B_8(i) S2B_8(i)
 #define	S2B_32(i)	S2B_16(i) S2B_16(i)
 #define	S2B_64(i)	S2B_32(i) S2B_32(i)
 #define	S2B_128(i)	S2B_64(i) S2B_64(i)
 #define	S2B_256(i)	S2B_128(i) S2B_128(i)
-/*
- * The number of elements in const_small_size2bin is dependent on the
- * definition for SUBPAGE.
- */
-static JEMALLOC_ATTR(aligned(CACHELINE))
-    const uint8_t	const_small_size2bin[] = {
-#if (LG_QUANTUM == 4)
-/* 16-byte quantum **********************/
-#  ifdef JEMALLOC_TINY
-#    if (LG_TINY_MIN == 2)
-       S2B_4(0)			/*    4 */
-       S2B_4(1)			/*    8 */
-       S2B_8(2)			/*   16 */
-#      define S2B_QMIN 2
-#    elif (LG_TINY_MIN == 3)
-       S2B_8(0)			/*    8 */
-       S2B_8(1)			/*   16 */
-#      define S2B_QMIN 1
-#    else
-#      error "Unsupported LG_TINY_MIN"
-#    endif
-#  else
-	S2B_16(0)		/*   16 */
-#    define S2B_QMIN 0
-#  endif
-	S2B_16(S2B_QMIN + 1)	/*   32 */
-	S2B_16(S2B_QMIN + 2)	/*   48 */
-	S2B_16(S2B_QMIN + 3)	/*   64 */
-	S2B_16(S2B_QMIN + 4)	/*   80 */
-	S2B_16(S2B_QMIN + 5)	/*   96 */
-	S2B_16(S2B_QMIN + 6)	/*  112 */
-	S2B_16(S2B_QMIN + 7)	/*  128 */
-#  define S2B_CMIN (S2B_QMIN + 8)
-#else
-/* 8-byte quantum ***********************/
-#  ifdef JEMALLOC_TINY
-#    if (LG_TINY_MIN == 2)
-       S2B_4(0)			/*    4 */
-       S2B_4(1)			/*    8 */
-#      define S2B_QMIN 1
-#    else
-#      error "Unsupported LG_TINY_MIN"
-#    endif
-#  else
-	S2B_8(0)		/*    8 */
-#    define S2B_QMIN 0
-#  endif
-	S2B_8(S2B_QMIN + 1)	/*   16 */
-	S2B_8(S2B_QMIN + 2)	/*   24 */
-	S2B_8(S2B_QMIN + 3)	/*   32 */
-	S2B_8(S2B_QMIN + 4)	/*   40 */
-	S2B_8(S2B_QMIN + 5)	/*   48 */
-	S2B_8(S2B_QMIN + 6)	/*   56 */
-	S2B_8(S2B_QMIN + 7)	/*   64 */
-	S2B_8(S2B_QMIN + 8)	/*   72 */
-	S2B_8(S2B_QMIN + 9)	/*   80 */
-	S2B_8(S2B_QMIN + 10)	/*   88 */
-	S2B_8(S2B_QMIN + 11)	/*   96 */
-	S2B_8(S2B_QMIN + 12)	/*  104 */
-	S2B_8(S2B_QMIN + 13)	/*  112 */
-	S2B_8(S2B_QMIN + 14)	/*  120 */
-	S2B_8(S2B_QMIN + 15)	/*  128 */
-#  define S2B_CMIN (S2B_QMIN + 16)
-#endif
-/****************************************/
-	S2B_64(S2B_CMIN + 0)	/*  192 */
-	S2B_64(S2B_CMIN + 1)	/*  256 */
-	S2B_64(S2B_CMIN + 2)	/*  320 */
-	S2B_64(S2B_CMIN + 3)	/*  384 */
-	S2B_64(S2B_CMIN + 4)	/*  448 */
-	S2B_64(S2B_CMIN + 5)	/*  512 */
-#  define S2B_SMIN (S2B_CMIN + 6)
-	S2B_256(S2B_SMIN + 0)	/*  768 */
-	S2B_256(S2B_SMIN + 1)	/* 1024 */
-	S2B_256(S2B_SMIN + 2)	/* 1280 */
-	S2B_256(S2B_SMIN + 3)	/* 1536 */
-	S2B_256(S2B_SMIN + 4)	/* 1792 */
-	S2B_256(S2B_SMIN + 5)	/* 2048 */
-	S2B_256(S2B_SMIN + 6)	/* 2304 */
-	S2B_256(S2B_SMIN + 7)	/* 2560 */
-	S2B_256(S2B_SMIN + 8)	/* 2816 */
-	S2B_256(S2B_SMIN + 9)	/* 3072 */
-	S2B_256(S2B_SMIN + 10)	/* 3328 */
-	S2B_256(S2B_SMIN + 11)	/* 3584 */
-	S2B_256(S2B_SMIN + 12)	/* 3840 */
-#if (STATIC_PAGE_SHIFT == 13)
-	S2B_256(S2B_SMIN + 13)	/* 4096 */
-	S2B_256(S2B_SMIN + 14)	/* 4352 */
-	S2B_256(S2B_SMIN + 15)	/* 4608 */
-	S2B_256(S2B_SMIN + 16)	/* 4864 */
-	S2B_256(S2B_SMIN + 17)	/* 5120 */
-	S2B_256(S2B_SMIN + 18)	/* 5376 */
-	S2B_256(S2B_SMIN + 19)	/* 5632 */
-	S2B_256(S2B_SMIN + 20)	/* 5888 */
-	S2B_256(S2B_SMIN + 21)	/* 6144 */
-	S2B_256(S2B_SMIN + 22)	/* 6400 */
-	S2B_256(S2B_SMIN + 23)	/* 6656 */
-	S2B_256(S2B_SMIN + 24)	/* 6912 */
-	S2B_256(S2B_SMIN + 25)	/* 7168 */
-	S2B_256(S2B_SMIN + 26)	/* 7424 */
-	S2B_256(S2B_SMIN + 27)	/* 7680 */
-	S2B_256(S2B_SMIN + 28)	/* 7936 */
-#endif
-};
-#undef S2B_1
-#undef S2B_2
-#undef S2B_4
+#define	S2B_512(i)	S2B_256(i) S2B_256(i)
+#define	S2B_1024(i)	S2B_512(i) S2B_512(i)
+#define	S2B_2048(i)	S2B_1024(i) S2B_1024(i)
+#define	S2B_4096(i)	S2B_2048(i) S2B_2048(i)
+#define	S2B_8192(i)	S2B_4096(i) S2B_4096(i)
+#define	SIZE_CLASS(bin, delta, size)					\
+	S2B_##delta(bin)
+	SIZE_CLASSES
 #undef S2B_8
 #undef S2B_16
 #undef S2B_32
 #undef S2B_64
 #undef S2B_128
 #undef S2B_256
-#undef S2B_QMIN
-#undef S2B_CMIN
-#undef S2B_SMIN
+#undef S2B_512
+#undef S2B_1024
+#undef S2B_2048
+#undef S2B_4096
+#undef S2B_8192
+#undef SIZE_CLASS
+};
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
 static void	arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
-    bool large, bool zero);
+    bool large, size_t binind, bool zero);
 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
 static void	arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
+static arena_run_t	*arena_run_alloc_helper(arena_t *arena, size_t size,
+    bool large, size_t binind, bool zero);
 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
-    bool zero);
+    size_t binind, bool zero);
 static void	arena_purge(arena_t *arena, bool all);
 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
 static void	arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, size_t oldsize, size_t newsize);
 static void	arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
+static arena_run_t	*arena_bin_runs_first(arena_bin_t *bin);
+static void	arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run);
+static void	arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run);
+static arena_run_t *arena_bin_nonfull_run_tryget(arena_bin_t *bin);
 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
 static void	*arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
 static void	arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
@@ -187,14 +72,9 @@ static bool	arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t oldsize, size_t size, size_t extra, bool zero);
 static bool	arena_ralloc_large(void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
-static bool	small_size2bin_init(void);
-#ifdef JEMALLOC_DEBUG
-static void	small_size2bin_validate(void);
-#endif
-static bool	small_size2bin_init_hard(void);
 static size_t	bin_info_run_size_calc(arena_bin_info_t *bin_info,
     size_t min_run_size);
-static bool	bin_info_init(void);
+static void	bin_info_init(void);
 
 /******************************************************************************/
 
@@ -211,8 +91,8 @@ arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
 }
 
 /* Generate red-black tree functions. */
-rb_gen(static JEMALLOC_ATTR(unused), arena_run_tree_, arena_run_tree_t,
-    arena_chunk_map_t, u.rb_link, arena_run_comp)
+rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
+    u.rb_link, arena_run_comp)
 
 static inline int
 arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
@@ -246,8 +126,8 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
 }
 
 /* Generate red-black tree functions. */
-rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
-    arena_chunk_map_t, u.rb_link, arena_avail_comp)
+rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t, arena_chunk_map_t,
+    u.rb_link, arena_avail_comp)
 
 static inline void *
 arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
@@ -257,13 +137,12 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
 	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
 	    (uintptr_t)bin_info->bitmap_offset);
 
-	dassert(run->magic == ARENA_RUN_MAGIC);
 	assert(run->nfree > 0);
 	assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false);
 
 	regind = bitmap_sfu(bitmap, &bin_info->bitmap_info);
 	ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset +
-	    (uintptr_t)(bin_info->reg_size * regind));
+	    (uintptr_t)(bin_info->reg_interval * regind));
 	run->nfree--;
 	if (regind == run->nextind)
 		run->nextind++;
@@ -275,7 +154,9 @@ static inline void
 arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-	size_t binind = arena_bin_index(chunk->arena, run->bin);
+	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+	size_t mapbits = arena_mapbits_get(chunk, pageind);
+	size_t binind = arena_ptr_small_binind_get(ptr, mapbits);
 	arena_bin_info_t *bin_info = &arena_bin_info[binind];
 	unsigned regind = arena_run_regind(run, bin_info, ptr);
 	bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
@@ -284,8 +165,8 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 	assert(run->nfree < bin_info->nregs);
 	/* Freeing an interior pointer can cause assertion failure. */
 	assert(((uintptr_t)ptr - ((uintptr_t)run +
-	    (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size
-	    == 0);
+	    (uintptr_t)bin_info->reg0_offset)) %
+	    (uintptr_t)bin_info->reg_interval == 0);
 	assert((uintptr_t)ptr >= (uintptr_t)run +
 	    (uintptr_t)bin_info->reg0_offset);
 	/* Freeing an unallocated pointer can cause assertion failure. */
@@ -295,75 +176,76 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 	run->nfree++;
 }
 
-#ifdef JEMALLOC_DEBUG
 static inline void
 arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
 {
 	size_t i;
-	size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT));
+	UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE));
 
-	for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++)
+	for (i = 0; i < PAGE / sizeof(size_t); i++)
 		assert(p[i] == 0);
 }
-#endif
 
 static void
 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
-    bool zero)
+    size_t binind, bool zero)
 {
 	arena_chunk_t *chunk;
-	size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
+	size_t run_ind, total_pages, need_pages, rem_pages, i;
 	size_t flag_dirty;
 	arena_avail_tree_t *runs_avail;
-#ifdef JEMALLOC_STATS
-	size_t cactive_diff;
-#endif
+
+	assert((large && binind == BININD_INVALID) || (large == false && binind
+	    != BININD_INVALID));
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-	old_ndirty = chunk->ndirty;
-	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
-	    >> PAGE_SHIFT);
-	flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY;
+	run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
+	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
 	runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty :
 	    &arena->runs_avail_clean;
-	total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >>
-	    PAGE_SHIFT;
-	assert((chunk->map[run_ind+total_pages-1-map_bias].bits &
-	    CHUNK_MAP_DIRTY) == flag_dirty);
-	need_pages = (size >> PAGE_SHIFT);
+	total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >>
+	    LG_PAGE;
+	assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) ==
+	    flag_dirty);
+	need_pages = (size >> LG_PAGE);
 	assert(need_pages > 0);
 	assert(need_pages <= total_pages);
 	rem_pages = total_pages - need_pages;
 
-	arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]);
-#ifdef JEMALLOC_STATS
-	/* Update stats_cactive if nactive is crossing a chunk multiple. */
-	cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) <<
-	    PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT);
-	if (cactive_diff != 0)
-		stats_cactive_add(cactive_diff);
-#endif
+	arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, run_ind));
+	if (config_stats) {
+		/*
+		 * Update stats_cactive if nactive is crossing a chunk
+		 * multiple.
+		 */
+		size_t cactive_diff = CHUNK_CEILING((arena->nactive +
+		    need_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive <<
+		    LG_PAGE);
+		if (cactive_diff != 0)
+			stats_cactive_add(cactive_diff);
+	}
 	arena->nactive += need_pages;
 
 	/* Keep track of trailing unused pages for later use. */
 	if (rem_pages > 0) {
 		if (flag_dirty != 0) {
-			chunk->map[run_ind+need_pages-map_bias].bits =
-			    (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY;
-			chunk->map[run_ind+total_pages-1-map_bias].bits =
-			    (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY;
+			arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
+			    (rem_pages << LG_PAGE), CHUNK_MAP_DIRTY);
+			arena_mapbits_unallocated_set(chunk,
+			    run_ind+total_pages-1, (rem_pages << LG_PAGE),
+			    CHUNK_MAP_DIRTY);
 		} else {
-			chunk->map[run_ind+need_pages-map_bias].bits =
-			    (rem_pages << PAGE_SHIFT) |
-			    (chunk->map[run_ind+need_pages-map_bias].bits &
-			    CHUNK_MAP_UNZEROED);
-			chunk->map[run_ind+total_pages-1-map_bias].bits =
-			    (rem_pages << PAGE_SHIFT) |
-			    (chunk->map[run_ind+total_pages-1-map_bias].bits &
-			    CHUNK_MAP_UNZEROED);
+			arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
+			    (rem_pages << LG_PAGE),
+			    arena_mapbits_unzeroed_get(chunk,
+			    run_ind+need_pages));
+			arena_mapbits_unallocated_set(chunk,
+			    run_ind+total_pages-1, (rem_pages << LG_PAGE),
+			    arena_mapbits_unzeroed_get(chunk,
+			    run_ind+total_pages-1));
 		}
-		arena_avail_tree_insert(runs_avail,
-		    &chunk->map[run_ind+need_pages-map_bias]);
+		arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk,
+		    run_ind+need_pages));
 	}
 
 	/* Update dirty page accounting. */
@@ -384,28 +266,34 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 				 * zeroed (i.e. never before touched).
 				 */
 				for (i = 0; i < need_pages; i++) {
-					if ((chunk->map[run_ind+i-map_bias].bits
-					    & CHUNK_MAP_UNZEROED) != 0) {
+					if (arena_mapbits_unzeroed_get(chunk,
+					    run_ind+i) != 0) {
+						VALGRIND_MAKE_MEM_UNDEFINED(
+						    (void *)((uintptr_t)
+						    chunk + ((run_ind+i) <<
+						    LG_PAGE)), PAGE);
 						memset((void *)((uintptr_t)
 						    chunk + ((run_ind+i) <<
-						    PAGE_SHIFT)), 0,
-						    PAGE_SIZE);
-					}
-#ifdef JEMALLOC_DEBUG
-					else {
+						    LG_PAGE)), 0, PAGE);
+					} else if (config_debug) {
+						VALGRIND_MAKE_MEM_DEFINED(
+						    (void *)((uintptr_t)
+						    chunk + ((run_ind+i) <<
+						    LG_PAGE)), PAGE);
 						arena_chunk_validate_zeroed(
 						    chunk, run_ind+i);
 					}
-#endif
 				}
 			} else {
 				/*
 				 * The run is dirty, so all pages must be
 				 * zeroed.
 				 */
+				VALGRIND_MAKE_MEM_UNDEFINED((void
+				    *)((uintptr_t)chunk + (run_ind <<
+				    LG_PAGE)), (need_pages << LG_PAGE));
 				memset((void *)((uintptr_t)chunk + (run_ind <<
-				    PAGE_SHIFT)), 0, (need_pages <<
-				    PAGE_SHIFT));
+				    LG_PAGE)), 0, (need_pages << LG_PAGE));
 			}
 		}
 
@@ -413,10 +301,9 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 		 * Set the last element first, in case the run only contains one
 		 * page (i.e. both statements set the same element).
 		 */
-		chunk->map[run_ind+need_pages-1-map_bias].bits =
-		    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty;
-		chunk->map[run_ind-map_bias].bits = size | flag_dirty |
-		    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+		arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0,
+		    flag_dirty);
+		arena_mapbits_large_set(chunk, run_ind, size, flag_dirty);
 	} else {
 		assert(zero == false);
 		/*
@@ -424,43 +311,29 @@ arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
 		 * small run, so that arena_dalloc_bin_run() has the ability to
 		 * conditionally trim clean pages.
 		 */
-		chunk->map[run_ind-map_bias].bits =
-		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) |
-		    CHUNK_MAP_ALLOCATED | flag_dirty;
-#ifdef JEMALLOC_DEBUG
+		arena_mapbits_small_set(chunk, run_ind, 0, binind, flag_dirty);
 		/*
 		 * The first page will always be dirtied during small run
 		 * initialization, so a validation failure here would not
 		 * actually cause an observable failure.
 		 */
-		if (flag_dirty == 0 &&
-		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED)
-		    == 0)
+		if (config_debug && flag_dirty == 0 &&
+		    arena_mapbits_unzeroed_get(chunk, run_ind) == 0)
 			arena_chunk_validate_zeroed(chunk, run_ind);
-#endif
 		for (i = 1; i < need_pages - 1; i++) {
-			chunk->map[run_ind+i-map_bias].bits = (i << PAGE_SHIFT)
-			    | (chunk->map[run_ind+i-map_bias].bits &
-			    CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED;
-#ifdef JEMALLOC_DEBUG
-			if (flag_dirty == 0 &&
-			    (chunk->map[run_ind+i-map_bias].bits &
-			    CHUNK_MAP_UNZEROED) == 0)
+			arena_mapbits_small_set(chunk, run_ind+i, i, binind, 0);
+			if (config_debug && flag_dirty == 0 &&
+			    arena_mapbits_unzeroed_get(chunk, run_ind+i) == 0)
 				arena_chunk_validate_zeroed(chunk, run_ind+i);
-#endif
 		}
-		chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages
-		    - 1) << PAGE_SHIFT) |
-		    (chunk->map[run_ind+need_pages-1-map_bias].bits &
-		    CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty;
-#ifdef JEMALLOC_DEBUG
-		if (flag_dirty == 0 &&
-		    (chunk->map[run_ind+need_pages-1-map_bias].bits &
-		    CHUNK_MAP_UNZEROED) == 0) {
+		arena_mapbits_small_set(chunk, run_ind+need_pages-1,
+		    need_pages-1, binind, flag_dirty);
+		if (config_debug && flag_dirty == 0 &&
+		    arena_mapbits_unzeroed_get(chunk, run_ind+need_pages-1) ==
+		    0) {
 			arena_chunk_validate_zeroed(chunk,
 			    run_ind+need_pages-1);
 		}
-#endif
 	}
 }
 
@@ -476,31 +349,35 @@ arena_chunk_alloc(arena_t *arena)
 		chunk = arena->spare;
 		arena->spare = NULL;
 
+		assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
+		assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
+		assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
+		    arena_maxclass);
+		assert(arena_mapbits_unallocated_size_get(chunk,
+		    chunk_npages-1) == arena_maxclass);
+		assert(arena_mapbits_dirty_get(chunk, map_bias) ==
+		    arena_mapbits_dirty_get(chunk, chunk_npages-1));
+
 		/* Insert the run into the appropriate runs_avail_* tree. */
-		if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0)
+		if (arena_mapbits_dirty_get(chunk, map_bias) == 0)
 			runs_avail = &arena->runs_avail_clean;
 		else
 			runs_avail = &arena->runs_avail_dirty;
-		assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass);
-		assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK)
-		    == arena_maxclass);
-		assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) ==
-		    (chunk->map[chunk_npages-1-map_bias].bits &
-		    CHUNK_MAP_DIRTY));
-		arena_avail_tree_insert(runs_avail, &chunk->map[0]);
+		arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk,
+		    map_bias));
 	} else {
 		bool zero;
 		size_t unzeroed;
 
 		zero = false;
 		malloc_mutex_unlock(&arena->lock);
-		chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero);
+		chunk = (arena_chunk_t *)chunk_alloc(chunksize, chunksize,
+		    false, &zero);
 		malloc_mutex_lock(&arena->lock);
 		if (chunk == NULL)
 			return (NULL);
-#ifdef JEMALLOC_STATS
-		arena->stats.mapped += chunksize;
-#endif
+		if (config_stats)
+			arena->stats.mapped += chunksize;
 
 		chunk->arena = arena;
 		ql_elm_new(chunk, link_dirty);
@@ -518,27 +395,27 @@ arena_chunk_alloc(arena_t *arena)
 		 * chunk.
 		 */
 		unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED;
-		chunk->map[0].bits = arena_maxclass | unzeroed;
+		arena_mapbits_unallocated_set(chunk, map_bias, arena_maxclass,
+		    unzeroed);
 		/*
 		 * There is no need to initialize the internal page map entries
 		 * unless the chunk is not zeroed.
 		 */
 		if (zero == false) {
 			for (i = map_bias+1; i < chunk_npages-1; i++)
-				chunk->map[i-map_bias].bits = unzeroed;
-		}
-#ifdef JEMALLOC_DEBUG
-		else {
-			for (i = map_bias+1; i < chunk_npages-1; i++)
-				assert(chunk->map[i-map_bias].bits == unzeroed);
+				arena_mapbits_unzeroed_set(chunk, i, unzeroed);
+		} else if (config_debug) {
+			for (i = map_bias+1; i < chunk_npages-1; i++) {
+				assert(arena_mapbits_unzeroed_get(chunk, i) ==
+				    unzeroed);
+			}
 		}
-#endif
-		chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass |
-		    unzeroed;
+		arena_mapbits_unallocated_set(chunk, chunk_npages-1,
+		    arena_maxclass, unzeroed);
 
 		/* Insert the run into the runs_avail_clean tree. */
 		arena_avail_tree_insert(&arena->runs_avail_clean,
-		    &chunk->map[0]);
+		    arena_mapp_get(chunk, map_bias));
 	}
 
 	return (chunk);
@@ -549,15 +426,24 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
 {
 	arena_avail_tree_t *runs_avail;
 
+	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
+	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
+	assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
+	    arena_maxclass);
+	assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
+	    arena_maxclass);
+	assert(arena_mapbits_dirty_get(chunk, map_bias) ==
+	    arena_mapbits_dirty_get(chunk, chunk_npages-1));
+
 	/*
 	 * Remove run from the appropriate runs_avail_* tree, so that the arena
 	 * does not use it.
 	 */
-	if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0)
+	if (arena_mapbits_dirty_get(chunk, map_bias) == 0)
 		runs_avail = &arena->runs_avail_clean;
 	else
 		runs_avail = &arena->runs_avail_dirty;
-	arena_avail_tree_remove(runs_avail, &chunk->map[0]);
+	arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk, map_bias));
 
 	if (arena->spare != NULL) {
 		arena_chunk_t *spare = arena->spare;
@@ -571,24 +457,19 @@ arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
 		malloc_mutex_unlock(&arena->lock);
 		chunk_dealloc((void *)spare, chunksize, true);
 		malloc_mutex_lock(&arena->lock);
-#ifdef JEMALLOC_STATS
-		arena->stats.mapped -= chunksize;
-#endif
+		if (config_stats)
+			arena->stats.mapped -= chunksize;
 	} else
 		arena->spare = chunk;
 }
 
 static arena_run_t *
-arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
+arena_run_alloc_helper(arena_t *arena, size_t size, bool large, size_t binind,
+    bool zero)
 {
-	arena_chunk_t *chunk;
 	arena_run_t *run;
 	arena_chunk_map_t *mapelm, key;
 
-	assert(size <= arena_maxclass);
-	assert((size & PAGE_MASK) == 0);
-
-	/* Search the arena's chunks for the lowest best fit. */
 	key.bits = size | CHUNK_MAP_KEY;
 	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key);
 	if (mapelm != NULL) {
@@ -598,8 +479,8 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
 		    + map_bias;
 
 		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
-		    PAGE_SHIFT));
-		arena_run_split(arena, run, size, large, zero);
+		    LG_PAGE));
+		arena_run_split(arena, run, size, large, binind, zero);
 		return (run);
 	}
 	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
@@ -610,19 +491,38 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
 		    + map_bias;
 
 		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
-		    PAGE_SHIFT));
-		arena_run_split(arena, run, size, large, zero);
+		    LG_PAGE));
+		arena_run_split(arena, run, size, large, binind, zero);
 		return (run);
 	}
 
+	return (NULL);
+}
+
+static arena_run_t *
+arena_run_alloc(arena_t *arena, size_t size, bool large, size_t binind,
+    bool zero)
+{
+	arena_chunk_t *chunk;
+	arena_run_t *run;
+
+	assert(size <= arena_maxclass);
+	assert((size & PAGE_MASK) == 0);
+	assert((large && binind == BININD_INVALID) || (large == false && binind
+	    != BININD_INVALID));
+
+	/* Search the arena's chunks for the lowest best fit. */
+	run = arena_run_alloc_helper(arena, size, large, binind, zero);
+	if (run != NULL)
+		return (run);
+
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
 	chunk = arena_chunk_alloc(arena);
 	if (chunk != NULL) {
-		run = (arena_run_t *)((uintptr_t)chunk + (map_bias <<
-		    PAGE_SHIFT));
-		arena_run_split(arena, run, size, large, zero);
+		run = (arena_run_t *)((uintptr_t)chunk + (map_bias << LG_PAGE));
+		arena_run_split(arena, run, size, large, binind, zero);
 		return (run);
 	}
 
@@ -631,32 +531,7 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
 	 * sufficient memory available while this one dropped arena->lock in
 	 * arena_chunk_alloc(), so search one more time.
 	 */
-	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key);
-	if (mapelm != NULL) {
-		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
-		size_t pageind = (((uintptr_t)mapelm -
-		    (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
-		    + map_bias;
-
-		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
-		    PAGE_SHIFT));
-		arena_run_split(arena, run, size, large, zero);
-		return (run);
-	}
-	mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key);
-	if (mapelm != NULL) {
-		arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
-		size_t pageind = (((uintptr_t)mapelm -
-		    (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t))
-		    + map_bias;
-
-		run = (arena_run_t *)((uintptr_t)run_chunk + (pageind <<
-		    PAGE_SHIFT));
-		arena_run_split(arena, run, size, large, zero);
-		return (run);
-	}
-
-	return (NULL);
+	return (arena_run_alloc_helper(arena, size, large, binind, zero));
 }
 
 static inline void
@@ -677,12 +552,8 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 	ql_head(arena_chunk_map_t) mapelms;
 	arena_chunk_map_t *mapelm;
 	size_t pageind, flag_unzeroed;
-#ifdef JEMALLOC_DEBUG
 	size_t ndirty;
-#endif
-#ifdef JEMALLOC_STATS
 	size_t nmadvise;
-#endif
 
 	ql_new(&mapelms);
 
@@ -692,13 +563,11 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
     * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous
     * mappings, but not for file-backed mappings.
     */
-#  ifdef JEMALLOC_SWAP
-	    swap_enabled ? CHUNK_MAP_UNZEROED :
-#  endif
-	    0;
+	    0
 #else
-	    CHUNK_MAP_UNZEROED;
+	    CHUNK_MAP_UNZEROED
 #endif
+	    ;
 
 	/*
 	 * If chunk is the spare, temporarily re-allocate it, 1) so that its
@@ -716,56 +585,61 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 	 * run.
 	 */
 	if (chunk == arena->spare) {
-		assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0);
+		assert(arena_mapbits_dirty_get(chunk, map_bias) != 0);
+		assert(arena_mapbits_dirty_get(chunk, chunk_npages-1) != 0);
+
 		arena_chunk_alloc(arena);
 	}
 
 	/* Temporarily allocate all free dirty runs within chunk. */
 	for (pageind = map_bias; pageind < chunk_npages;) {
-		mapelm = &chunk->map[pageind-map_bias];
-		if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) {
+		mapelm = arena_mapp_get(chunk, pageind);
+		if (arena_mapbits_allocated_get(chunk, pageind) == 0) {
 			size_t npages;
 
-			npages = mapelm->bits >> PAGE_SHIFT;
+			npages = arena_mapbits_unallocated_size_get(chunk,
+			    pageind) >> LG_PAGE;
 			assert(pageind + npages <= chunk_npages);
-			if (mapelm->bits & CHUNK_MAP_DIRTY) {
+			assert(arena_mapbits_dirty_get(chunk, pageind) ==
+			    arena_mapbits_dirty_get(chunk, pageind+npages-1));
+			if (arena_mapbits_dirty_get(chunk, pageind) != 0) {
 				size_t i;
-#ifdef JEMALLOC_STATS
-				size_t cactive_diff;
-#endif
 
 				arena_avail_tree_remove(
 				    &arena->runs_avail_dirty, mapelm);
 
-				mapelm->bits = (npages << PAGE_SHIFT) |
-				    flag_unzeroed | CHUNK_MAP_LARGE |
-				    CHUNK_MAP_ALLOCATED;
+				arena_mapbits_unzeroed_set(chunk, pageind,
+				    flag_unzeroed);
+				arena_mapbits_large_set(chunk, pageind,
+				    (npages << LG_PAGE), 0);
 				/*
 				 * Update internal elements in the page map, so
 				 * that CHUNK_MAP_UNZEROED is properly set.
 				 */
 				for (i = 1; i < npages - 1; i++) {
-					chunk->map[pageind+i-map_bias].bits =
-					    flag_unzeroed;
+					arena_mapbits_unzeroed_set(chunk,
+					    pageind+i, flag_unzeroed);
 				}
 				if (npages > 1) {
-					chunk->map[
-					    pageind+npages-1-map_bias].bits =
-					    flag_unzeroed | CHUNK_MAP_LARGE |
-					    CHUNK_MAP_ALLOCATED;
+					arena_mapbits_unzeroed_set(chunk,
+					    pageind+npages-1, flag_unzeroed);
+					arena_mapbits_large_set(chunk,
+					    pageind+npages-1, 0, 0);
 				}
 
-#ifdef JEMALLOC_STATS
-				/*
-				 * Update stats_cactive if nactive is crossing a
-				 * chunk multiple.
-				 */
-				cactive_diff = CHUNK_CEILING((arena->nactive +
-				    npages) << PAGE_SHIFT) -
-				    CHUNK_CEILING(arena->nactive << PAGE_SHIFT);
-				if (cactive_diff != 0)
-					stats_cactive_add(cactive_diff);
-#endif
+				if (config_stats) {
+					/*
+					 * Update stats_cactive if nactive is
+					 * crossing a chunk multiple.
+					 */
+					size_t cactive_diff =
+					    CHUNK_CEILING((arena->nactive +
+					    npages) << LG_PAGE) -
+					    CHUNK_CEILING(arena->nactive <<
+					    LG_PAGE);
+					if (cactive_diff != 0)
+						stats_cactive_add(cactive_diff);
+				}
 				arena->nactive += npages;
 				/* Append to list for later processing. */
 				ql_elm_new(mapelm, u.ql_link);
@@ -775,71 +649,57 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 			pageind += npages;
 		} else {
 			/* Skip allocated run. */
-			if (mapelm->bits & CHUNK_MAP_LARGE)
-				pageind += mapelm->bits >> PAGE_SHIFT;
+			if (arena_mapbits_large_get(chunk, pageind))
+				pageind += arena_mapbits_large_size_get(chunk,
+				    pageind) >> LG_PAGE;
 			else {
+				size_t binind;
+				arena_bin_info_t *bin_info;
 				arena_run_t *run = (arena_run_t *)((uintptr_t)
-				    chunk + (uintptr_t)(pageind << PAGE_SHIFT));
-
-				assert((mapelm->bits >> PAGE_SHIFT) == 0);
-				dassert(run->magic == ARENA_RUN_MAGIC);
-				size_t binind = arena_bin_index(arena,
-				    run->bin);
-				arena_bin_info_t *bin_info =
-				    &arena_bin_info[binind];
-				pageind += bin_info->run_size >> PAGE_SHIFT;
+				    chunk + (uintptr_t)(pageind << LG_PAGE));
+
+				assert(arena_mapbits_small_runind_get(chunk,
+				    pageind) == 0);
+				binind = arena_bin_index(arena, run->bin);
+				bin_info = &arena_bin_info[binind];
+				pageind += bin_info->run_size >> LG_PAGE;
 			}
 		}
 	}
 	assert(pageind == chunk_npages);
 
-#ifdef JEMALLOC_DEBUG
-	ndirty = chunk->ndirty;
-#endif
-#ifdef JEMALLOC_STATS
-	arena->stats.purged += chunk->ndirty;
-#endif
+	if (config_debug)
+		ndirty = chunk->ndirty;
+	if (config_stats)
+		arena->stats.purged += chunk->ndirty;
 	arena->ndirty -= chunk->ndirty;
 	chunk->ndirty = 0;
 	ql_remove(&arena->chunks_dirty, chunk, link_dirty);
 	chunk->dirtied = false;
 
 	malloc_mutex_unlock(&arena->lock);
-#ifdef JEMALLOC_STATS
-	nmadvise = 0;
-#endif
+	if (config_stats)
+		nmadvise = 0;
 	ql_foreach(mapelm, &mapelms, u.ql_link) {
 		size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
 		    sizeof(arena_chunk_map_t)) + map_bias;
-		size_t npages = mapelm->bits >> PAGE_SHIFT;
+		size_t npages = arena_mapbits_large_size_get(chunk, pageind) >>
+		    LG_PAGE;
 
 		assert(pageind + npages <= chunk_npages);
-#ifdef JEMALLOC_DEBUG
 		assert(ndirty >= npages);
-		ndirty -= npages;
-#endif
+		if (config_debug)
+			ndirty -= npages;
 
-#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
-		madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
-		    (npages << PAGE_SHIFT), MADV_DONTNEED);
-#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
-		madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
-		    (npages << PAGE_SHIFT), MADV_FREE);
-#else
-#  error "No method defined for purging unused dirty pages."
-#endif
-
-#ifdef JEMALLOC_STATS
-		nmadvise++;
-#endif
+		pages_purge((void *)((uintptr_t)chunk + (pageind << LG_PAGE)),
+		    (npages << LG_PAGE));
+		if (config_stats)
+			nmadvise++;
 	}
-#ifdef JEMALLOC_DEBUG
 	assert(ndirty == 0);
-#endif
 	malloc_mutex_lock(&arena->lock);
-#ifdef JEMALLOC_STATS
-	arena->stats.nmadvise += nmadvise;
-#endif
+	if (config_stats)
+		arena->stats.nmadvise += nmadvise;
 
 	/* Deallocate runs. */
 	for (mapelm = ql_first(&mapelms); mapelm != NULL;
@@ -847,7 +707,7 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
 		size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
 		    sizeof(arena_chunk_map_t)) + map_bias;
 		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
-		    (uintptr_t)(pageind << PAGE_SHIFT));
+		    (uintptr_t)(pageind << LG_PAGE));
 
 		ql_remove(&mapelms, mapelm, u.ql_link);
 		arena_run_dalloc(arena, run, false);
@@ -859,23 +719,22 @@ arena_purge(arena_t *arena, bool all)
 {
 	arena_chunk_t *chunk;
 	size_t npurgatory;
-#ifdef JEMALLOC_DEBUG
-	size_t ndirty = 0;
+	if (config_debug) {
+		size_t ndirty = 0;
 
-	ql_foreach(chunk, &arena->chunks_dirty, link_dirty) {
-	    assert(chunk->dirtied);
-	    ndirty += chunk->ndirty;
+		ql_foreach(chunk, &arena->chunks_dirty, link_dirty) {
+		    assert(chunk->dirtied);
+		    ndirty += chunk->ndirty;
+		}
+		assert(ndirty == arena->ndirty);
 	}
-	assert(ndirty == arena->ndirty);
-#endif
 	assert(arena->ndirty > arena->npurgatory || all);
 	assert(arena->ndirty - arena->npurgatory > chunk_npages || all);
 	assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
 	    arena->npurgatory) || all);
 
-#ifdef JEMALLOC_STATS
-	arena->stats.npurge++;
-#endif
+	if (config_stats)
+		arena->stats.npurge++;
 
 	/*
 	 * Compute the minimum number of pages that this thread should try to
@@ -957,44 +816,41 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 	arena_chunk_t *chunk;
 	size_t size, run_ind, run_pages, flag_dirty;
 	arena_avail_tree_t *runs_avail;
-#ifdef JEMALLOC_STATS
-	size_t cactive_diff;
-#endif
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
-	    >> PAGE_SHIFT);
+	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
 	assert(run_ind >= map_bias);
 	assert(run_ind < chunk_npages);
-	if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) {
-		size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK;
-		assert(size == PAGE_SIZE ||
-		    (chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
-		    ~PAGE_MASK) == 0);
-		assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
-		    CHUNK_MAP_LARGE) != 0);
-		assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits &
-		    CHUNK_MAP_ALLOCATED) != 0);
+	if (arena_mapbits_large_get(chunk, run_ind) != 0) {
+		size = arena_mapbits_large_size_get(chunk, run_ind);
+		assert(size == PAGE ||
+		    arena_mapbits_large_size_get(chunk,
+		    run_ind+(size>>LG_PAGE)-1) == 0);
 	} else {
 		size_t binind = arena_bin_index(arena, run->bin);
 		arena_bin_info_t *bin_info = &arena_bin_info[binind];
 		size = bin_info->run_size;
 	}
-	run_pages = (size >> PAGE_SHIFT);
-#ifdef JEMALLOC_STATS
-	/* Update stats_cactive if nactive is crossing a chunk multiple. */
-	cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) -
-	    CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT);
-	if (cactive_diff != 0)
-		stats_cactive_sub(cactive_diff);
-#endif
+	run_pages = (size >> LG_PAGE);
+	if (config_stats) {
+		/*
+		 * Update stats_cactive if nactive is crossing a chunk
+		 * multiple.
+		 */
+		size_t cactive_diff = CHUNK_CEILING(arena->nactive << LG_PAGE) -
+		    CHUNK_CEILING((arena->nactive - run_pages) << LG_PAGE);
+		if (cactive_diff != 0)
+			stats_cactive_sub(cactive_diff);
+	}
 	arena->nactive -= run_pages;
 
 	/*
 	 * The run is dirty if the caller claims to have dirtied it, as well as
 	 * if it was already dirty before being allocated.
 	 */
-	if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0)
+	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
+	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
+	if (arena_mapbits_dirty_get(chunk, run_ind) != 0)
 		dirty = true;
 	flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
 	runs_avail = dirty ? &arena->runs_avail_dirty :
@@ -1002,59 +858,53 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 
 	/* Mark pages as unallocated in the chunk map. */
 	if (dirty) {
-		chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY;
-		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
-		    CHUNK_MAP_DIRTY;
+		arena_mapbits_unallocated_set(chunk, run_ind, size,
+		    CHUNK_MAP_DIRTY);
+		arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
+		    CHUNK_MAP_DIRTY);
 
 		chunk->ndirty += run_pages;
 		arena->ndirty += run_pages;
 	} else {
-		chunk->map[run_ind-map_bias].bits = size |
-		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED);
-		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
-		    (chunk->map[run_ind+run_pages-1-map_bias].bits &
-		    CHUNK_MAP_UNZEROED);
+		arena_mapbits_unallocated_set(chunk, run_ind, size,
+		    arena_mapbits_unzeroed_get(chunk, run_ind));
+		arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
+		    arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1));
 	}
 
 	/* Try to coalesce forward. */
 	if (run_ind + run_pages < chunk_npages &&
-	    (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED)
-	    == 0 && (chunk->map[run_ind+run_pages-map_bias].bits &
-	    CHUNK_MAP_DIRTY) == flag_dirty) {
-		size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits &
-		    ~PAGE_MASK;
-		size_t nrun_pages = nrun_size >> PAGE_SHIFT;
+	    arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 &&
+	    arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty) {
+		size_t nrun_size = arena_mapbits_unallocated_size_get(chunk,
+		    run_ind+run_pages);
+		size_t nrun_pages = nrun_size >> LG_PAGE;
 
 		/*
 		 * Remove successor from runs_avail; the coalesced run is
 		 * inserted later.
 		 */
-		assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
-		    & ~PAGE_MASK) == nrun_size);
-		assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
-		    & CHUNK_MAP_ALLOCATED) == 0);
-		assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits
-		    & CHUNK_MAP_DIRTY) == flag_dirty);
+		assert(arena_mapbits_unallocated_size_get(chunk,
+		    run_ind+run_pages+nrun_pages-1) == nrun_size);
+		assert(arena_mapbits_dirty_get(chunk,
+		    run_ind+run_pages+nrun_pages-1) == flag_dirty);
 		arena_avail_tree_remove(runs_avail,
-		    &chunk->map[run_ind+run_pages-map_bias]);
+		    arena_mapp_get(chunk, run_ind+run_pages));
 
 		size += nrun_size;
 		run_pages += nrun_pages;
 
-		chunk->map[run_ind-map_bias].bits = size |
-		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK);
-		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
-		    (chunk->map[run_ind+run_pages-1-map_bias].bits &
-		    CHUNK_MAP_FLAGS_MASK);
+		arena_mapbits_unallocated_size_set(chunk, run_ind, size);
+		arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
+		    size);
 	}
 
 	/* Try to coalesce backward. */
-	if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits &
-	    CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits &
-	    CHUNK_MAP_DIRTY) == flag_dirty) {
-		size_t prun_size = chunk->map[run_ind-1-map_bias].bits &
-		    ~PAGE_MASK;
-		size_t prun_pages = prun_size >> PAGE_SHIFT;
+	if (run_ind > map_bias && arena_mapbits_allocated_get(chunk, run_ind-1)
+	    == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) == flag_dirty) {
+		size_t prun_size = arena_mapbits_unallocated_size_get(chunk,
+		    run_ind-1);
+		size_t prun_pages = prun_size >> LG_PAGE;
 
 		run_ind -= prun_pages;
 
@@ -1062,31 +912,26 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 		 * Remove predecessor from runs_avail; the coalesced run is
 		 * inserted later.
 		 */
-		assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK)
-		    == prun_size);
-		assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED)
-		    == 0);
-		assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY)
-		    == flag_dirty);
-		arena_avail_tree_remove(runs_avail,
-		    &chunk->map[run_ind-map_bias]);
+		assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
+		    prun_size);
+		assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty);
+		arena_avail_tree_remove(runs_avail, arena_mapp_get(chunk,
+		    run_ind));
 
 		size += prun_size;
 		run_pages += prun_pages;
 
-		chunk->map[run_ind-map_bias].bits = size |
-		    (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK);
-		chunk->map[run_ind+run_pages-1-map_bias].bits = size |
-		    (chunk->map[run_ind+run_pages-1-map_bias].bits &
-		    CHUNK_MAP_FLAGS_MASK);
+		arena_mapbits_unallocated_size_set(chunk, run_ind, size);
+		arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
+		    size);
 	}
 
 	/* Insert into runs_avail, now that coalescing is complete. */
-	assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) ==
-	    (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK));
-	assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) ==
-	    (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY));
-	arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]);
+	assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
+	    arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1));
+	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
+	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
+	arena_avail_tree_insert(runs_avail, arena_mapp_get(chunk, run_ind));
 
 	if (dirty) {
 		/*
@@ -1100,14 +945,12 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
 		}
 	}
 
-	/*
-	 * Deallocate chunk if it is now completely unused.  The bit
-	 * manipulation checks whether the first run is unallocated and extends
-	 * to the end of the chunk.
-	 */
-	if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) ==
-	    arena_maxclass)
+	/* Deallocate chunk if it is now completely unused. */
+	if (size == arena_maxclass) {
+		assert(run_ind == map_bias);
+		assert(run_pages == (arena_maxclass >> LG_PAGE));
 		arena_chunk_dealloc(arena, chunk);
+	}
 
 	/*
 	 * It is okay to do dirty page processing here even if the chunk was
@@ -1124,9 +967,9 @@ static void
 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
     size_t oldsize, size_t newsize)
 {
-	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
-	size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
-	size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY;
+	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
+	size_t head_npages = (oldsize - newsize) >> LG_PAGE;
+	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
 
 	assert(oldsize > newsize);
 
@@ -1135,31 +978,19 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	 * leading run as separately allocated.  Set the last element of each
 	 * run first, in case of single-page runs.
 	 */
-	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0);
-	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0);
-	chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty |
-	    (chunk->map[pageind+head_npages-1-map_bias].bits &
-	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
-	chunk->map[pageind-map_bias].bits = (oldsize - newsize)
-	    | flag_dirty | (chunk->map[pageind-map_bias].bits &
-	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
-
-#ifdef JEMALLOC_DEBUG
-	{
-		size_t tail_npages = newsize >> PAGE_SHIFT;
-		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
-		    .bits & ~PAGE_MASK) == 0);
-		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
-		    .bits & CHUNK_MAP_DIRTY) == flag_dirty);
-		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
-		    .bits & CHUNK_MAP_LARGE) != 0);
-		assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias]
-		    .bits & CHUNK_MAP_ALLOCATED) != 0);
+	assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
+	arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty);
+	arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty);
+
+	if (config_debug) {
+		UNUSED size_t tail_npages = newsize >> LG_PAGE;
+		assert(arena_mapbits_large_size_get(chunk,
+		    pageind+head_npages+tail_npages-1) == 0);
+		assert(arena_mapbits_dirty_get(chunk,
+		    pageind+head_npages+tail_npages-1) == flag_dirty);
 	}
-#endif
-	chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty |
-	    (chunk->map[pageind+head_npages-map_bias].bits &
-	    CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+	arena_mapbits_large_set(chunk, pageind+head_npages, newsize,
+	    flag_dirty);
 
 	arena_run_dalloc(arena, run, false);
 }
@@ -1168,11 +999,9 @@ static void
 arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
     size_t oldsize, size_t newsize, bool dirty)
 {
-	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
-	size_t head_npages = newsize >> PAGE_SHIFT;
-	size_t tail_npages = (oldsize - newsize) >> PAGE_SHIFT;
-	size_t flag_dirty = chunk->map[pageind-map_bias].bits &
-	    CHUNK_MAP_DIRTY;
+	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
+	size_t head_npages = newsize >> LG_PAGE;
+	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
 
 	assert(oldsize > newsize);
 
@@ -1181,61 +1010,92 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	 * trailing run as separately allocated.  Set the last element of each
 	 * run first, in case of single-page runs.
 	 */
-	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0);
-	assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0);
-	chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty |
-	    (chunk->map[pageind+head_npages-1-map_bias].bits &
-	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
-	chunk->map[pageind-map_bias].bits = newsize | flag_dirty |
-	    (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) |
-	    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
-
-	assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
-	    ~PAGE_MASK) == 0);
-	assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
-	    CHUNK_MAP_LARGE) != 0);
-	assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
-	    CHUNK_MAP_ALLOCATED) != 0);
-	chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits =
-	    flag_dirty |
-	    (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits &
-	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
-	chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) |
-	    flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits &
-	    CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
+	assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
+	arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty);
+	arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty);
+
+	if (config_debug) {
+		UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE;
+		assert(arena_mapbits_large_size_get(chunk,
+		    pageind+head_npages+tail_npages-1) == 0);
+		assert(arena_mapbits_dirty_get(chunk,
+		    pageind+head_npages+tail_npages-1) == flag_dirty);
+	}
+	arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize,
+	    flag_dirty);
 
 	arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
 	    dirty);
 }
 
 static arena_run_t *
-arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
+arena_bin_runs_first(arena_bin_t *bin)
 {
-	arena_chunk_map_t *mapelm;
-	arena_run_t *run;
-	size_t binind;
-	arena_bin_info_t *bin_info;
-
-	/* Look for a usable run. */
-	mapelm = arena_run_tree_first(&bin->runs);
+	arena_chunk_map_t *mapelm = arena_run_tree_first(&bin->runs);
 	if (mapelm != NULL) {
 		arena_chunk_t *chunk;
 		size_t pageind;
-
-		/* run is guaranteed to have available space. */
-		arena_run_tree_remove(&bin->runs, mapelm);
+		arena_run_t *run;
 
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
 		pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) /
 		    sizeof(arena_chunk_map_t))) + map_bias;
 		run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
-		    (mapelm->bits >> PAGE_SHIFT))
-		    << PAGE_SHIFT));
-#ifdef JEMALLOC_STATS
-		bin->stats.reruns++;
-#endif
+		    arena_mapbits_small_runind_get(chunk, pageind)) <<
+		    LG_PAGE));
 		return (run);
 	}
+
+	return (NULL);
+}
+
+static void
+arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
+{
+	arena_chunk_t *chunk = CHUNK_ADDR2BASE(run);
+	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
+	arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
+
+	assert(arena_run_tree_search(&bin->runs, mapelm) == NULL);
+
+	arena_run_tree_insert(&bin->runs, mapelm);
+}
+
+static void
+arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
+{
+	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+	size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE;
+	arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
+
+	assert(arena_run_tree_search(&bin->runs, mapelm) != NULL);
+
+	arena_run_tree_remove(&bin->runs, mapelm);
+}
+
+static arena_run_t *
+arena_bin_nonfull_run_tryget(arena_bin_t *bin)
+{
+	arena_run_t *run = arena_bin_runs_first(bin);
+	if (run != NULL) {
+		arena_bin_runs_remove(bin, run);
+		if (config_stats)
+			bin->stats.reruns++;
+	}
+	return (run);
+}
+
+static arena_run_t *
+arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
+{
+	arena_run_t *run;
+	size_t binind;
+	arena_bin_info_t *bin_info;
+
+	/* Look for a usable run. */
+	run = arena_bin_nonfull_run_tryget(bin);
+	if (run != NULL)
+		return (run);
 	/* No existing runs have any space available. */
 
 	binind = arena_bin_index(arena, bin);
@@ -1245,30 +1105,27 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 	malloc_mutex_unlock(&bin->lock);
 	/******************************/
 	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc(arena, bin_info->run_size, false, false);
+	run = arena_run_alloc(arena, bin_info->run_size, false, binind, false);
 	if (run != NULL) {
 		bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run +
 		    (uintptr_t)bin_info->bitmap_offset);
 
 		/* Initialize run internals. */
+		VALGRIND_MAKE_MEM_UNDEFINED(run, bin_info->reg0_offset -
+		    bin_info->redzone_size);
 		run->bin = bin;
 		run->nextind = 0;
 		run->nfree = bin_info->nregs;
 		bitmap_init(bitmap, &bin_info->bitmap_info);
-#ifdef JEMALLOC_DEBUG
-		run->magic = ARENA_RUN_MAGIC;
-#endif
 	}
 	malloc_mutex_unlock(&arena->lock);
 	/********************************/
 	malloc_mutex_lock(&bin->lock);
 	if (run != NULL) {
-#ifdef JEMALLOC_STATS
-		bin->stats.nruns++;
-		bin->stats.curruns++;
-		if (bin->stats.curruns > bin->stats.highruns)
-			bin->stats.highruns = bin->stats.curruns;
-#endif
+		if (config_stats) {
+			bin->stats.nruns++;
+			bin->stats.curruns++;
+		}
 		return (run);
 	}
 
@@ -1277,25 +1134,9 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 	 * sufficient memory available while this one dropped bin->lock above,
 	 * so search one more time.
 	 */
-	mapelm = arena_run_tree_first(&bin->runs);
-	if (mapelm != NULL) {
-		arena_chunk_t *chunk;
-		size_t pageind;
-
-		/* run is guaranteed to have available space. */
-		arena_run_tree_remove(&bin->runs, mapelm);
-
-		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
-		pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) /
-		    sizeof(arena_chunk_map_t))) + map_bias;
-		run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
-		    (mapelm->bits >> PAGE_SHIFT))
-		    << PAGE_SHIFT));
-#ifdef JEMALLOC_STATS
-		bin->stats.reruns++;
-#endif
+	run = arena_bin_nonfull_run_tryget(bin);
+	if (run != NULL)
 		return (run);
-	}
 
 	return (NULL);
 }
@@ -1318,7 +1159,6 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 		 * Another thread updated runcur while this one ran without the
 		 * bin lock in arena_bin_nonfull_run_get().
 		 */
-		dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
 		assert(bin->runcur->nfree > 0);
 		ret = arena_run_reg_alloc(bin->runcur, bin_info);
 		if (run != NULL) {
@@ -1346,18 +1186,18 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 
 	bin->runcur = run;
 
-	dassert(bin->runcur->magic == ARENA_RUN_MAGIC);
 	assert(bin->runcur->nfree > 0);
 
 	return (arena_run_reg_alloc(bin->runcur, bin_info));
 }
 
-#ifdef JEMALLOC_PROF
 void
 arena_prof_accum(arena_t *arena, uint64_t accumbytes)
 {
 
-	if (prof_interval != 0) {
+	cassert(config_prof);
+
+	if (config_prof && prof_interval != 0) {
 		arena->prof_accumbytes += accumbytes;
 		if (arena->prof_accumbytes >= prof_interval) {
 			prof_idump();
@@ -1365,15 +1205,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
 		}
 	}
 }
-#endif
 
-#ifdef JEMALLOC_TCACHE
 void
-arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
-#  ifdef JEMALLOC_PROF
-    , uint64_t prof_accumbytes
-#  endif
-    )
+arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind,
+    uint64_t prof_accumbytes)
 {
 	unsigned i, nfill;
 	arena_bin_t *bin;
@@ -1382,11 +1217,11 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
 
 	assert(tbin->ncached == 0);
 
-#ifdef JEMALLOC_PROF
-	malloc_mutex_lock(&arena->lock);
-	arena_prof_accum(arena, prof_accumbytes);
-	malloc_mutex_unlock(&arena->lock);
-#endif
+	if (config_prof) {
+		malloc_mutex_lock(&arena->lock);
+		arena_prof_accum(arena, prof_accumbytes);
+		malloc_mutex_unlock(&arena->lock);
+	}
 	bin = &arena->bins[binind];
 	malloc_mutex_lock(&bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
@@ -1397,20 +1232,72 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
 			ptr = arena_bin_malloc_hard(arena, bin);
 		if (ptr == NULL)
 			break;
+		if (config_fill && opt_junk) {
+			arena_alloc_junk_small(ptr, &arena_bin_info[binind],
+			    true);
+		}
 		/* Insert such that low regions get used first. */
 		tbin->avail[nfill - 1 - i] = ptr;
 	}
-#ifdef JEMALLOC_STATS
-	bin->stats.allocated += i * arena_bin_info[binind].reg_size;
-	bin->stats.nmalloc += i;
-	bin->stats.nrequests += tbin->tstats.nrequests;
-	bin->stats.nfills++;
-	tbin->tstats.nrequests = 0;
-#endif
+	if (config_stats) {
+		bin->stats.allocated += i * arena_bin_info[binind].reg_size;
+		bin->stats.nmalloc += i;
+		bin->stats.nrequests += tbin->tstats.nrequests;
+		bin->stats.nfills++;
+		tbin->tstats.nrequests = 0;
+	}
 	malloc_mutex_unlock(&bin->lock);
 	tbin->ncached = i;
 }
-#endif
+
+void
+arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
+{
+
+	if (zero) {
+		size_t redzone_size = bin_info->redzone_size;
+		memset((void *)((uintptr_t)ptr - redzone_size), 0xa5,
+		    redzone_size);
+		memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5,
+		    redzone_size);
+	} else {
+		memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5,
+		    bin_info->reg_interval);
+	}
+}
+
+void
+arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
+{
+	size_t size = bin_info->reg_size;
+	size_t redzone_size = bin_info->redzone_size;
+	size_t i;
+	bool error = false;
+
+	for (i = 1; i <= redzone_size; i++) {
+		unsigned byte;
+		if ((byte = *(uint8_t *)((uintptr_t)ptr - i)) != 0xa5) {
+			error = true;
+			malloc_printf("<jemalloc>: Corrupt redzone "
+			    "%zu byte%s before %p (size %zu), byte=%#x\n", i,
+			    (i == 1) ? "" : "s", ptr, size, byte);
+		}
+	}
+	for (i = 0; i < redzone_size; i++) {
+		unsigned byte;
+		if ((byte = *(uint8_t *)((uintptr_t)ptr + size + i)) != 0xa5) {
+			error = true;
+			malloc_printf("<jemalloc>: Corrupt redzone "
+			    "%zu byte%s after end of %p (size %zu), byte=%#x\n",
+			    i, (i == 1) ? "" : "s", ptr, size, byte);
+		}
+	}
+	if (opt_abort && error)
+		abort();
+
+	memset((void *)((uintptr_t)ptr - redzone_size), 0x5a,
+	    bin_info->reg_interval);
+}
 
 void *
 arena_malloc_small(arena_t *arena, size_t size, bool zero)
@@ -1421,7 +1308,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
 	size_t binind;
 
 	binind = SMALL_SIZE2BIN(size);
-	assert(binind < nbins);
+	assert(binind < NBINS);
 	bin = &arena->bins[binind];
 	size = arena_bin_info[binind].reg_size;
 
@@ -1436,29 +1323,34 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
 		return (NULL);
 	}
 
-#ifdef JEMALLOC_STATS
-	bin->stats.allocated += size;
-	bin->stats.nmalloc++;
-	bin->stats.nrequests++;
-#endif
+	if (config_stats) {
+		bin->stats.allocated += size;
+		bin->stats.nmalloc++;
+		bin->stats.nrequests++;
+	}
 	malloc_mutex_unlock(&bin->lock);
-#ifdef JEMALLOC_PROF
-	if (isthreaded == false) {
+	if (config_prof && isthreaded == false) {
 		malloc_mutex_lock(&arena->lock);
 		arena_prof_accum(arena, size);
 		malloc_mutex_unlock(&arena->lock);
 	}
-#endif
 
 	if (zero == false) {
-#ifdef JEMALLOC_FILL
-		if (opt_junk)
-			memset(ret, 0xa5, size);
-		else if (opt_zero)
-			memset(ret, 0, size);
-#endif
-	} else
+		if (config_fill) {
+			if (opt_junk) {
+				arena_alloc_junk_small(ret,
+				    &arena_bin_info[binind], false);
+			} else if (opt_zero)
+				memset(ret, 0, size);
+		}
+	} else {
+		if (config_fill && opt_junk) {
+			arena_alloc_junk_small(ret, &arena_bin_info[binind],
+			    true);
+		}
+		VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 		memset(ret, 0, size);
+	}
 
 	return (ret);
 }
@@ -1471,243 +1363,112 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
 	/* Large allocation. */
 	size = PAGE_CEILING(size);
 	malloc_mutex_lock(&arena->lock);
-	ret = (void *)arena_run_alloc(arena, size, true, zero);
+	ret = (void *)arena_run_alloc(arena, size, true, BININD_INVALID, zero);
 	if (ret == NULL) {
 		malloc_mutex_unlock(&arena->lock);
 		return (NULL);
 	}
-#ifdef JEMALLOC_STATS
-	arena->stats.nmalloc_large++;
-	arena->stats.nrequests_large++;
-	arena->stats.allocated_large += size;
-	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
-	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
-	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
-	if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
-	    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
-		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
-		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
+	if (config_stats) {
+		arena->stats.nmalloc_large++;
+		arena->stats.nrequests_large++;
+		arena->stats.allocated_large += size;
+		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
+		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
+		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
 	}
-#endif
-#ifdef JEMALLOC_PROF
-	arena_prof_accum(arena, size);
-#endif
+	if (config_prof)
+		arena_prof_accum(arena, size);
 	malloc_mutex_unlock(&arena->lock);
 
 	if (zero == false) {
-#ifdef JEMALLOC_FILL
-		if (opt_junk)
-			memset(ret, 0xa5, size);
-		else if (opt_zero)
-			memset(ret, 0, size);
-#endif
+		if (config_fill) {
+			if (opt_junk)
+				memset(ret, 0xa5, size);
+			else if (opt_zero)
+				memset(ret, 0, size);
+		}
 	}
 
 	return (ret);
 }
 
-void *
-arena_malloc(size_t size, bool zero)
-{
-
-	assert(size != 0);
-	assert(QUANTUM_CEILING(size) <= arena_maxclass);
-
-	if (size <= small_maxclass) {
-#ifdef JEMALLOC_TCACHE
-		tcache_t *tcache;
-
-		if ((tcache = tcache_get()) != NULL)
-			return (tcache_alloc_small(tcache, size, zero));
-		else
-
-#endif
-			return (arena_malloc_small(choose_arena(), size, zero));
-	} else {
-#ifdef JEMALLOC_TCACHE
-		if (size <= tcache_maxclass) {
-			tcache_t *tcache;
-
-			if ((tcache = tcache_get()) != NULL)
-				return (tcache_alloc_large(tcache, size, zero));
-			else {
-				return (arena_malloc_large(choose_arena(),
-				    size, zero));
-			}
-		} else
-#endif
-			return (arena_malloc_large(choose_arena(), size, zero));
-	}
-}
-
 /* Only handles large allocations that require more than page alignment. */
 void *
-arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment,
-    bool zero)
+arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero)
 {
 	void *ret;
-	size_t offset;
+	size_t alloc_size, leadsize, trailsize;
+	arena_run_t *run;
 	arena_chunk_t *chunk;
 
 	assert((size & PAGE_MASK) == 0);
 
 	alignment = PAGE_CEILING(alignment);
+	alloc_size = size + alignment - PAGE;
 
 	malloc_mutex_lock(&arena->lock);
-	ret = (void *)arena_run_alloc(arena, alloc_size, true, zero);
-	if (ret == NULL) {
+	run = arena_run_alloc(arena, alloc_size, true, BININD_INVALID, zero);
+	if (run == NULL) {
 		malloc_mutex_unlock(&arena->lock);
 		return (NULL);
 	}
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
-
-	offset = (uintptr_t)ret & (alignment - 1);
-	assert((offset & PAGE_MASK) == 0);
-	assert(offset < alloc_size);
-	if (offset == 0)
-		arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
-	else {
-		size_t leadsize, trailsize;
-
-		leadsize = alignment - offset;
-		if (leadsize > 0) {
-			arena_run_trim_head(arena, chunk, ret, alloc_size,
-			    alloc_size - leadsize);
-			ret = (void *)((uintptr_t)ret + leadsize);
-		}
-
-		trailsize = alloc_size - leadsize - size;
-		if (trailsize != 0) {
-			/* Trim trailing space. */
-			assert(trailsize < alloc_size);
-			arena_run_trim_tail(arena, chunk, ret, size + trailsize,
-			    size, false);
-		}
+	leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) -
+	    (uintptr_t)run;
+	assert(alloc_size >= leadsize + size);
+	trailsize = alloc_size - leadsize - size;
+	ret = (void *)((uintptr_t)run + leadsize);
+	if (leadsize != 0) {
+		arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size -
+		    leadsize);
+	}
+	if (trailsize != 0) {
+		arena_run_trim_tail(arena, chunk, ret, size + trailsize, size,
+		    false);
 	}
 
-#ifdef JEMALLOC_STATS
-	arena->stats.nmalloc_large++;
-	arena->stats.nrequests_large++;
-	arena->stats.allocated_large += size;
-	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
-	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
-	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
-	if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
-	    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
-		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
-		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
+	if (config_stats) {
+		arena->stats.nmalloc_large++;
+		arena->stats.nrequests_large++;
+		arena->stats.allocated_large += size;
+		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
+		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
+		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
 	}
-#endif
 	malloc_mutex_unlock(&arena->lock);
 
-#ifdef JEMALLOC_FILL
-	if (zero == false) {
+	if (config_fill && zero == false) {
 		if (opt_junk)
 			memset(ret, 0xa5, size);
 		else if (opt_zero)
 			memset(ret, 0, size);
 	}
-#endif
 	return (ret);
 }
 
-/* Return the size of the allocation pointed to by ptr. */
-size_t
-arena_salloc(const void *ptr)
-{
-	size_t ret;
-	arena_chunk_t *chunk;
-	size_t pageind, mapbits;
-
-	assert(ptr != NULL);
-	assert(CHUNK_ADDR2BASE(ptr) != ptr);
-
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
-	mapbits = chunk->map[pageind-map_bias].bits;
-	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
-	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
-		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
-		    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
-		    PAGE_SHIFT));
-		dassert(run->magic == ARENA_RUN_MAGIC);
-		size_t binind = arena_bin_index(chunk->arena, run->bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
-		assert(((uintptr_t)ptr - ((uintptr_t)run +
-		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
-		    0);
-		ret = bin_info->reg_size;
-	} else {
-		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
-		ret = mapbits & ~PAGE_MASK;
-		assert(ret != 0);
-	}
-
-	return (ret);
-}
-
-#ifdef JEMALLOC_PROF
 void
 arena_prof_promoted(const void *ptr, size_t size)
 {
 	arena_chunk_t *chunk;
 	size_t pageind, binind;
 
+	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
-	assert(isalloc(ptr) == PAGE_SIZE);
-	assert(size <= small_maxclass);
+	assert(isalloc(ptr, false) == PAGE);
+	assert(isalloc(ptr, true) == PAGE);
+	assert(size <= SMALL_MAXCLASS);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	binind = SMALL_SIZE2BIN(size);
-	assert(binind < nbins);
-	chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits &
-	    ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT);
-}
-
-size_t
-arena_salloc_demote(const void *ptr)
-{
-	size_t ret;
-	arena_chunk_t *chunk;
-	size_t pageind, mapbits;
-
-	assert(ptr != NULL);
-	assert(CHUNK_ADDR2BASE(ptr) != ptr);
-
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
-	mapbits = chunk->map[pageind-map_bias].bits;
-	assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
-	if ((mapbits & CHUNK_MAP_LARGE) == 0) {
-		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
-		    (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) <<
-		    PAGE_SHIFT));
-		dassert(run->magic == ARENA_RUN_MAGIC);
-		size_t binind = arena_bin_index(chunk->arena, run->bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
-		assert(((uintptr_t)ptr - ((uintptr_t)run +
-		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size ==
-		    0);
-		ret = bin_info->reg_size;
-	} else {
-		assert(((uintptr_t)ptr & PAGE_MASK) == 0);
-		ret = mapbits & ~PAGE_MASK;
-		if (prof_promote && ret == PAGE_SIZE && (mapbits &
-		    CHUNK_MAP_CLASS_MASK) != 0) {
-			size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >>
-			    CHUNK_MAP_CLASS_SHIFT) - 1;
-			assert(binind < nbins);
-			ret = arena_bin_info[binind].reg_size;
-		}
-		assert(ret != 0);
-	}
+	assert(binind < NBINS);
+	arena_mapbits_large_binind_set(chunk, pageind, binind);
 
-	return (ret);
+	assert(isalloc(ptr, false) == PAGE);
+	assert(isalloc(ptr, true) == size);
 }
-#endif
 
 static void
 arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
@@ -1722,16 +1483,12 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 		arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
 		if (bin_info->nregs != 1) {
-			size_t run_pageind = (((uintptr_t)run -
-			    (uintptr_t)chunk)) >> PAGE_SHIFT;
-			arena_chunk_map_t *run_mapelm =
-			    &chunk->map[run_pageind-map_bias];
 			/*
 			 * This block's conditional is necessary because if the
 			 * run only contains one region, then it never gets
 			 * inserted into the non-full runs tree.
 			 */
-			arena_run_tree_remove(&bin->runs, run_mapelm);
+			arena_bin_runs_remove(bin, run);
 		}
 	}
 }
@@ -1745,19 +1502,21 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	size_t npages, run_ind, past;
 
 	assert(run != bin->runcur);
-	assert(arena_run_tree_search(&bin->runs, &chunk->map[
-	    (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL);
+	assert(arena_run_tree_search(&bin->runs,
+	    arena_mapp_get(chunk, ((uintptr_t)run-(uintptr_t)chunk)>>LG_PAGE))
+	    == NULL);
 
 	binind = arena_bin_index(chunk->arena, run->bin);
 	bin_info = &arena_bin_info[binind];
 
 	malloc_mutex_unlock(&bin->lock);
 	/******************************/
-	npages = bin_info->run_size >> PAGE_SHIFT;
-	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
+	npages = bin_info->run_size >> LG_PAGE;
+	run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> LG_PAGE);
 	past = (size_t)(PAGE_CEILING((uintptr_t)run +
 	    (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind *
-	    bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT);
+	    bin_info->reg_interval - bin_info->redzone_size) -
+	    (uintptr_t)chunk) >> LG_PAGE);
 	malloc_mutex_lock(&arena->lock);
 
 	/*
@@ -1765,32 +1524,24 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	 * trim the clean pages before deallocating the dirty portion of the
 	 * run.
 	 */
-	if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past
-	    - run_ind < npages) {
-		/*
-		 * Trim clean pages.  Convert to large run beforehand.  Set the
-		 * last map element first, in case this is a one-page run.
-		 */
-		chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE |
-		    (chunk->map[run_ind+npages-1-map_bias].bits &
-		    CHUNK_MAP_FLAGS_MASK);
-		chunk->map[run_ind-map_bias].bits = bin_info->run_size |
-		    CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits &
-		    CHUNK_MAP_FLAGS_MASK);
-		arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT),
-		    ((past - run_ind) << PAGE_SHIFT), false);
+	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
+	    arena_mapbits_dirty_get(chunk, run_ind+npages-1));
+	if (arena_mapbits_dirty_get(chunk, run_ind) == 0 && past - run_ind <
+	    npages) {
+		/* Trim clean pages.  Convert to large run beforehand. */
+		assert(npages > 0);
+		arena_mapbits_large_set(chunk, run_ind, bin_info->run_size, 0);
+		arena_mapbits_large_set(chunk, run_ind+npages-1, 0, 0);
+		arena_run_trim_tail(arena, chunk, run, (npages << LG_PAGE),
+		    ((past - run_ind) << LG_PAGE), false);
 		/* npages = past - run_ind; */
 	}
-#ifdef JEMALLOC_DEBUG
-	run->magic = 0;
-#endif
 	arena_run_dalloc(arena, run, true);
 	malloc_mutex_unlock(&arena->lock);
 	/****************************/
 	malloc_mutex_lock(&bin->lock);
-#ifdef JEMALLOC_STATS
-	bin->stats.curruns--;
-#endif
+	if (config_stats)
+		bin->stats.curruns--;
 }
 
 static void
@@ -1799,62 +1550,42 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 {
 
 	/*
-	 * Make sure that bin->runcur always refers to the lowest non-full run,
-	 * if one exists.
+	 * Make sure that if bin->runcur is non-NULL, it refers to the lowest
+	 * non-full run.  It is okay to NULL runcur out rather than proactively
+	 * keeping it pointing at the lowest non-full run.
 	 */
-	if (bin->runcur == NULL)
-		bin->runcur = run;
-	else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
+	if ((uintptr_t)run < (uintptr_t)bin->runcur) {
 		/* Switch runcur. */
-		if (bin->runcur->nfree > 0) {
-			arena_chunk_t *runcur_chunk =
-			    CHUNK_ADDR2BASE(bin->runcur);
-			size_t runcur_pageind = (((uintptr_t)bin->runcur -
-			    (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
-			arena_chunk_map_t *runcur_mapelm =
-			    &runcur_chunk->map[runcur_pageind-map_bias];
-
-			/* Insert runcur. */
-			arena_run_tree_insert(&bin->runs, runcur_mapelm);
-		}
+		if (bin->runcur->nfree > 0)
+			arena_bin_runs_insert(bin, bin->runcur);
 		bin->runcur = run;
-	} else {
-		size_t run_pageind = (((uintptr_t)run -
-		    (uintptr_t)chunk)) >> PAGE_SHIFT;
-		arena_chunk_map_t *run_mapelm =
-		    &chunk->map[run_pageind-map_bias];
-
-		assert(arena_run_tree_search(&bin->runs, run_mapelm) == NULL);
-		arena_run_tree_insert(&bin->runs, run_mapelm);
-	}
+		if (config_stats)
+			bin->stats.reruns++;
+	} else
+		arena_bin_runs_insert(bin, run);
 }
 
 void
-arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+arena_dalloc_bin_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     arena_chunk_map_t *mapelm)
 {
 	size_t pageind;
 	arena_run_t *run;
 	arena_bin_t *bin;
-#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
-	size_t size;
-#endif
+	arena_bin_info_t *bin_info;
+	size_t size, binind;
 
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
+	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
-	    (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT));
-	dassert(run->magic == ARENA_RUN_MAGIC);
+	    arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
 	bin = run->bin;
-	size_t binind = arena_bin_index(arena, bin);
-	arena_bin_info_t *bin_info = &arena_bin_info[binind];
-#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
-	size = bin_info->reg_size;
-#endif
+	binind = arena_ptr_small_binind_get(ptr, mapelm->bits);
+	bin_info = &arena_bin_info[binind];
+	if (config_fill || config_stats)
+		size = bin_info->reg_size;
 
-#ifdef JEMALLOC_FILL
-	if (opt_junk)
-		memset(ptr, 0x5a, size);
-#endif
+	if (config_fill && opt_junk)
+		arena_dalloc_junk_small(ptr, bin_info);
 
 	arena_run_reg_dalloc(run, ptr);
 	if (run->nfree == bin_info->nregs) {
@@ -1863,13 +1594,41 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	} else if (run->nfree == 1 && run != bin->runcur)
 		arena_bin_lower_run(arena, chunk, run, bin);
 
-#ifdef JEMALLOC_STATS
-	bin->stats.allocated -= size;
-	bin->stats.ndalloc++;
-#endif
+	if (config_stats) {
+		bin->stats.allocated -= size;
+		bin->stats.ndalloc++;
+	}
 }
 
-#ifdef JEMALLOC_STATS
+void
+arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    size_t pageind, arena_chunk_map_t *mapelm)
+{
+	arena_run_t *run;
+	arena_bin_t *bin;
+
+	run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
+	    arena_mapbits_small_runind_get(chunk, pageind)) << LG_PAGE));
+	bin = run->bin;
+	malloc_mutex_lock(&bin->lock);
+	arena_dalloc_bin_locked(arena, chunk, ptr, mapelm);
+	malloc_mutex_unlock(&bin->lock);
+}
+
+void
+arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+    size_t pageind)
+{
+	arena_chunk_map_t *mapelm;
+
+	if (config_debug) {
+		/* arena_ptr_small_binind_get() does extra sanity checking. */
+		assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
+		    pageind)) != BININD_INVALID);
+	}
+	mapelm = arena_mapp_get(chunk, pageind);
+	arena_dalloc_bin(arena, chunk, ptr, pageind, mapelm);
+}
 void
 arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
     arena_stats_t *astats, malloc_bin_stats_t *bstats,
@@ -1894,12 +1653,11 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
 		lstats[i].nmalloc += arena->stats.lstats[i].nmalloc;
 		lstats[i].ndalloc += arena->stats.lstats[i].ndalloc;
 		lstats[i].nrequests += arena->stats.lstats[i].nrequests;
-		lstats[i].highruns += arena->stats.lstats[i].highruns;
 		lstats[i].curruns += arena->stats.lstats[i].curruns;
 	}
 	malloc_mutex_unlock(&arena->lock);
 
-	for (i = 0; i < nbins; i++) {
+	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 
 		malloc_mutex_lock(&bin->lock);
@@ -1907,53 +1665,47 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
 		bstats[i].nmalloc += bin->stats.nmalloc;
 		bstats[i].ndalloc += bin->stats.ndalloc;
 		bstats[i].nrequests += bin->stats.nrequests;
-#ifdef JEMALLOC_TCACHE
-		bstats[i].nfills += bin->stats.nfills;
-		bstats[i].nflushes += bin->stats.nflushes;
-#endif
+		if (config_tcache) {
+			bstats[i].nfills += bin->stats.nfills;
+			bstats[i].nflushes += bin->stats.nflushes;
+		}
 		bstats[i].nruns += bin->stats.nruns;
 		bstats[i].reruns += bin->stats.reruns;
-		bstats[i].highruns += bin->stats.highruns;
 		bstats[i].curruns += bin->stats.curruns;
 		malloc_mutex_unlock(&bin->lock);
 	}
 }
-#endif
 
 void
-arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+arena_dalloc_large_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr)
 {
 
-	/* Large allocation. */
-#ifdef JEMALLOC_FILL
-#  ifndef JEMALLOC_STATS
-	if (opt_junk)
-#  endif
-#endif
-	{
-#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS))
-		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
-		    PAGE_SHIFT;
-		size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK;
-#endif
+	if (config_fill || config_stats) {
+		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+		size_t size = arena_mapbits_large_size_get(chunk, pageind);
 
-#ifdef JEMALLOC_FILL
-#  ifdef JEMALLOC_STATS
-		if (opt_junk)
-#  endif
+		if (config_fill && config_stats && opt_junk)
 			memset(ptr, 0x5a, size);
-#endif
-#ifdef JEMALLOC_STATS
-		arena->stats.ndalloc_large++;
-		arena->stats.allocated_large -= size;
-		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++;
-		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--;
-#endif
+		if (config_stats) {
+			arena->stats.ndalloc_large++;
+			arena->stats.allocated_large -= size;
+			arena->stats.lstats[(size >> LG_PAGE) - 1].ndalloc++;
+			arena->stats.lstats[(size >> LG_PAGE) - 1].curruns--;
+		}
 	}
 
 	arena_run_dalloc(arena, (arena_run_t *)ptr, true);
 }
 
+void
+arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+{
+
+	malloc_mutex_lock(&arena->lock);
+	arena_dalloc_large_locked(arena, chunk, ptr);
+	malloc_mutex_unlock(&arena->lock);
+}
+
 static void
 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t oldsize, size_t size)
@@ -1968,24 +1720,19 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	malloc_mutex_lock(&arena->lock);
 	arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
 	    true);
-#ifdef JEMALLOC_STATS
-	arena->stats.ndalloc_large++;
-	arena->stats.allocated_large -= oldsize;
-	arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++;
-	arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
-
-	arena->stats.nmalloc_large++;
-	arena->stats.nrequests_large++;
-	arena->stats.allocated_large += size;
-	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
-	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
-	arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
-	if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
-	    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
-		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
-		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
+	if (config_stats) {
+		arena->stats.ndalloc_large++;
+		arena->stats.allocated_large -= oldsize;
+		arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
+		arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
+
+		arena->stats.nmalloc_large++;
+		arena->stats.nrequests_large++;
+		arena->stats.allocated_large += size;
+		arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
+		arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
+		arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
 	}
-#endif
 	malloc_mutex_unlock(&arena->lock);
 }
 
@@ -1993,20 +1740,19 @@ static bool
 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t oldsize, size_t size, size_t extra, bool zero)
 {
-	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
-	size_t npages = oldsize >> PAGE_SHIFT;
+	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+	size_t npages = oldsize >> LG_PAGE;
 	size_t followsize;
 
-	assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK));
+	assert(oldsize == arena_mapbits_large_size_get(chunk, pageind));
 
 	/* Try to extend the run. */
 	assert(size + extra > oldsize);
 	malloc_mutex_lock(&arena->lock);
 	if (pageind + npages < chunk_npages &&
-	    (chunk->map[pageind+npages-map_bias].bits
-	    & CHUNK_MAP_ALLOCATED) == 0 && (followsize =
-	    chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size -
-	    oldsize) {
+	    arena_mapbits_allocated_get(chunk, pageind+npages) == 0 &&
+	    (followsize = arena_mapbits_unallocated_size_get(chunk,
+	    pageind+npages)) >= size - oldsize) {
 		/*
 		 * The next run is available and sufficiently large.  Split the
 		 * following run, then merge the first part with the existing
@@ -2016,10 +1762,11 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		size_t splitsize = (oldsize + followsize <= size + extra)
 		    ? followsize : size + extra - oldsize;
 		arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
-		    ((pageind+npages) << PAGE_SHIFT)), splitsize, true, zero);
+		    ((pageind+npages) << LG_PAGE)), splitsize, true,
+		    BININD_INVALID, zero);
 
 		size = oldsize + splitsize;
-		npages = size >> PAGE_SHIFT;
+		npages = size >> LG_PAGE;
 
 		/*
 		 * Mark the extended run as dirty if either portion of the run
@@ -2029,34 +1776,24 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		 * arena_run_dalloc() with the dirty argument set to false
 		 * (which is when dirty flag consistency would really matter).
 		 */
-		flag_dirty = (chunk->map[pageind-map_bias].bits &
-		    CHUNK_MAP_DIRTY) |
-		    (chunk->map[pageind+npages-1-map_bias].bits &
-		    CHUNK_MAP_DIRTY);
-		chunk->map[pageind-map_bias].bits = size | flag_dirty
-		    | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
-		chunk->map[pageind+npages-1-map_bias].bits = flag_dirty |
-		    CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
-
-#ifdef JEMALLOC_STATS
-		arena->stats.ndalloc_large++;
-		arena->stats.allocated_large -= oldsize;
-		arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++;
-		arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
-
-		arena->stats.nmalloc_large++;
-		arena->stats.nrequests_large++;
-		arena->stats.allocated_large += size;
-		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
-		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
-		arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
-		if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
-		    arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
-			arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
-			    arena->stats.lstats[(size >> PAGE_SHIFT) -
-			    1].curruns;
+		flag_dirty = arena_mapbits_dirty_get(chunk, pageind) |
+		    arena_mapbits_dirty_get(chunk, pageind+npages-1);
+		arena_mapbits_large_set(chunk, pageind, size, flag_dirty);
+		arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty);
+
+		if (config_stats) {
+			arena->stats.ndalloc_large++;
+			arena->stats.allocated_large -= oldsize;
+			arena->stats.lstats[(oldsize >> LG_PAGE) - 1].ndalloc++;
+			arena->stats.lstats[(oldsize >> LG_PAGE) - 1].curruns--;
+
+			arena->stats.nmalloc_large++;
+			arena->stats.nrequests_large++;
+			arena->stats.allocated_large += size;
+			arena->stats.lstats[(size >> LG_PAGE) - 1].nmalloc++;
+			arena->stats.lstats[(size >> LG_PAGE) - 1].nrequests++;
+			arena->stats.lstats[(size >> LG_PAGE) - 1].curruns++;
 		}
-#endif
 		malloc_mutex_unlock(&arena->lock);
 		return (false);
 	}
@@ -2078,12 +1815,10 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
 	psize = PAGE_CEILING(size + extra);
 	if (psize == oldsize) {
 		/* Same size class. */
-#ifdef JEMALLOC_FILL
-		if (opt_junk && size < oldsize) {
+		if (config_fill && opt_junk && size < oldsize) {
 			memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
 			    size);
 		}
-#endif
 		return (false);
 	} else {
 		arena_chunk_t *chunk;
@@ -2091,16 +1826,13 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
 
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 		arena = chunk->arena;
-		dassert(arena->magic == ARENA_MAGIC);
 
 		if (psize < oldsize) {
-#ifdef JEMALLOC_FILL
 			/* Fill before shrinking in order avoid a race. */
-			if (opt_junk) {
+			if (config_fill && opt_junk) {
 				memset((void *)((uintptr_t)ptr + size), 0x5a,
 				    oldsize - size);
 			}
-#endif
 			arena_ralloc_large_shrink(arena, chunk, ptr, oldsize,
 			    psize);
 			return (false);
@@ -2108,12 +1840,11 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
 			bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
 			    oldsize, PAGE_CEILING(size),
 			    psize - PAGE_CEILING(size), zero);
-#ifdef JEMALLOC_FILL
-			if (ret == false && zero == false && opt_zero) {
+			if (config_fill && ret == false && zero == false &&
+			    opt_zero) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
 				    size - oldsize);
 			}
-#endif
 			return (ret);
 		}
 	}
@@ -2128,24 +1859,22 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 * Avoid moving the allocation if the size class can be left the same.
 	 */
 	if (oldsize <= arena_maxclass) {
-		if (oldsize <= small_maxclass) {
+		if (oldsize <= SMALL_MAXCLASS) {
 			assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size
 			    == oldsize);
-			if ((size + extra <= small_maxclass &&
+			if ((size + extra <= SMALL_MAXCLASS &&
 			    SMALL_SIZE2BIN(size + extra) ==
 			    SMALL_SIZE2BIN(oldsize)) || (size <= oldsize &&
 			    size + extra >= oldsize)) {
-#ifdef JEMALLOC_FILL
-				if (opt_junk && size < oldsize) {
+				if (config_fill && opt_junk && size < oldsize) {
 					memset((void *)((uintptr_t)ptr + size),
 					    0x5a, oldsize - size);
 				}
-#endif
 				return (ptr);
 			}
 		} else {
 			assert(size <= arena_maxclass);
-			if (size + extra > small_maxclass) {
+			if (size + extra > SMALL_MAXCLASS) {
 				if (arena_ralloc_large(ptr, oldsize, size,
 				    extra, zero) == false)
 					return (ptr);
@@ -2159,7 +1888,7 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 
 void *
 arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
-    size_t alignment, bool zero)
+    size_t alignment, bool zero, bool try_tcache)
 {
 	void *ret;
 	size_t copysize;
@@ -2175,24 +1904,24 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 * copying.
 	 */
 	if (alignment != 0) {
-		size_t usize = sa2u(size + extra, alignment, NULL);
+		size_t usize = sa2u(size + extra, alignment);
 		if (usize == 0)
 			return (NULL);
 		ret = ipalloc(usize, alignment, zero);
 	} else
-		ret = arena_malloc(size + extra, zero);
+		ret = arena_malloc(NULL, size + extra, zero, try_tcache);
 
 	if (ret == NULL) {
 		if (extra == 0)
 			return (NULL);
 		/* Try again, this time without extra. */
 		if (alignment != 0) {
-			size_t usize = sa2u(size, alignment, NULL);
+			size_t usize = sa2u(size, alignment);
 			if (usize == 0)
 				return (NULL);
 			ret = ipalloc(usize, alignment, zero);
 		} else
-			ret = arena_malloc(size, zero);
+			ret = arena_malloc(NULL, size, zero, try_tcache);
 
 		if (ret == NULL)
 			return (NULL);
@@ -2205,8 +1934,9 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 * expectation that the extra bytes will be reliably preserved.
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
+	VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
 	memcpy(ret, ptr, copysize);
-	idalloc(ptr);
+	iqalloc(ptr);
 	return (ret);
 }
 
@@ -2222,22 +1952,21 @@ arena_new(arena_t *arena, unsigned ind)
 	if (malloc_mutex_init(&arena->lock))
 		return (true);
 
-#ifdef JEMALLOC_STATS
-	memset(&arena->stats, 0, sizeof(arena_stats_t));
-	arena->stats.lstats = (malloc_large_stats_t *)base_alloc(nlclasses *
-	    sizeof(malloc_large_stats_t));
-	if (arena->stats.lstats == NULL)
-		return (true);
-	memset(arena->stats.lstats, 0, nlclasses *
-	    sizeof(malloc_large_stats_t));
-#  ifdef JEMALLOC_TCACHE
-	ql_new(&arena->tcache_ql);
-#  endif
-#endif
+	if (config_stats) {
+		memset(&arena->stats, 0, sizeof(arena_stats_t));
+		arena->stats.lstats =
+		    (malloc_large_stats_t *)base_alloc(nlclasses *
+		    sizeof(malloc_large_stats_t));
+		if (arena->stats.lstats == NULL)
+			return (true);
+		memset(arena->stats.lstats, 0, nlclasses *
+		    sizeof(malloc_large_stats_t));
+		if (config_tcache)
+			ql_new(&arena->tcache_ql);
+	}
 
-#ifdef JEMALLOC_PROF
-	arena->prof_accumbytes = 0;
-#endif
+	if (config_prof)
+		arena->prof_accumbytes = 0;
 
 	/* Initialize chunks. */
 	ql_new(&arena->chunks_dirty);
@@ -2251,183 +1980,17 @@ arena_new(arena_t *arena, unsigned ind)
 	arena_avail_tree_new(&arena->runs_avail_dirty);
 
 	/* Initialize bins. */
-	i = 0;
-#ifdef JEMALLOC_TINY
-	/* (2^n)-spaced tiny bins. */
-	for (; i < ntbins; i++) {
+	for (i = 0; i < NBINS; i++) {
 		bin = &arena->bins[i];
 		if (malloc_mutex_init(&bin->lock))
 			return (true);
 		bin->runcur = NULL;
 		arena_run_tree_new(&bin->runs);
-#ifdef JEMALLOC_STATS
-		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
-#endif
+		if (config_stats)
+			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 	}
-#endif
 
-	/* Quantum-spaced bins. */
-	for (; i < ntbins + nqbins; i++) {
-		bin = &arena->bins[i];
-		if (malloc_mutex_init(&bin->lock))
-			return (true);
-		bin->runcur = NULL;
-		arena_run_tree_new(&bin->runs);
-#ifdef JEMALLOC_STATS
-		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
-#endif
-	}
-
-	/* Cacheline-spaced bins. */
-	for (; i < ntbins + nqbins + ncbins; i++) {
-		bin = &arena->bins[i];
-		if (malloc_mutex_init(&bin->lock))
-			return (true);
-		bin->runcur = NULL;
-		arena_run_tree_new(&bin->runs);
-#ifdef JEMALLOC_STATS
-		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
-#endif
-	}
-
-	/* Subpage-spaced bins. */
-	for (; i < nbins; i++) {
-		bin = &arena->bins[i];
-		if (malloc_mutex_init(&bin->lock))
-			return (true);
-		bin->runcur = NULL;
-		arena_run_tree_new(&bin->runs);
-#ifdef JEMALLOC_STATS
-		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
-#endif
-	}
-
-#ifdef JEMALLOC_DEBUG
-	arena->magic = ARENA_MAGIC;
-#endif
-
-	return (false);
-}
-
-#ifdef JEMALLOC_DEBUG
-static void
-small_size2bin_validate(void)
-{
-	size_t i, size, binind;
-
-	i = 1;
-#  ifdef JEMALLOC_TINY
-	/* Tiny. */
-	for (; i < (1U << LG_TINY_MIN); i++) {
-		size = pow2_ceil(1U << LG_TINY_MIN);
-		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		assert(SMALL_SIZE2BIN(i) == binind);
-	}
-	for (; i < qspace_min; i++) {
-		size = pow2_ceil(i);
-		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		assert(SMALL_SIZE2BIN(i) == binind);
-	}
-#  endif
-	/* Quantum-spaced. */
-	for (; i <= qspace_max; i++) {
-		size = QUANTUM_CEILING(i);
-		binind = ntbins + (size >> LG_QUANTUM) - 1;
-		assert(SMALL_SIZE2BIN(i) == binind);
-	}
-	/* Cacheline-spaced. */
-	for (; i <= cspace_max; i++) {
-		size = CACHELINE_CEILING(i);
-		binind = ntbins + nqbins + ((size - cspace_min) >>
-		    LG_CACHELINE);
-		assert(SMALL_SIZE2BIN(i) == binind);
-	}
-	/* Sub-page. */
-	for (; i <= sspace_max; i++) {
-		size = SUBPAGE_CEILING(i);
-		binind = ntbins + nqbins + ncbins + ((size - sspace_min)
-		    >> LG_SUBPAGE);
-		assert(SMALL_SIZE2BIN(i) == binind);
-	}
-}
-#endif
-
-static bool
-small_size2bin_init(void)
-{
-
-	if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
-	    || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
-	    || (sizeof(const_small_size2bin) != ((small_maxclass-1) >>
-	    LG_TINY_MIN) + 1))
-		return (small_size2bin_init_hard());
-
-	small_size2bin = const_small_size2bin;
-#ifdef JEMALLOC_DEBUG
-	small_size2bin_validate();
-#endif
-	return (false);
-}
-
-static bool
-small_size2bin_init_hard(void)
-{
-	size_t i, size, binind;
-	uint8_t *custom_small_size2bin;
-#define	CUSTOM_SMALL_SIZE2BIN(s)					\
-    custom_small_size2bin[(s-1) >> LG_TINY_MIN]
-
-	assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
-	    || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
-	    || (sizeof(const_small_size2bin) != ((small_maxclass-1) >>
-	    LG_TINY_MIN) + 1));
-
-	custom_small_size2bin = (uint8_t *)
-	    base_alloc(small_maxclass >> LG_TINY_MIN);
-	if (custom_small_size2bin == NULL)
-		return (true);
-
-	i = 1;
-#ifdef JEMALLOC_TINY
-	/* Tiny. */
-	for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) {
-		size = pow2_ceil(1U << LG_TINY_MIN);
-		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
-	}
-	for (; i < qspace_min; i += TINY_MIN) {
-		size = pow2_ceil(i);
-		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
-	}
-#endif
-	/* Quantum-spaced. */
-	for (; i <= qspace_max; i += TINY_MIN) {
-		size = QUANTUM_CEILING(i);
-		binind = ntbins + (size >> LG_QUANTUM) - 1;
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
-	}
-	/* Cacheline-spaced. */
-	for (; i <= cspace_max; i += TINY_MIN) {
-		size = CACHELINE_CEILING(i);
-		binind = ntbins + nqbins + ((size - cspace_min) >>
-		    LG_CACHELINE);
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
-	}
-	/* Sub-page. */
-	for (; i <= sspace_max; i += TINY_MIN) {
-		size = SUBPAGE_CEILING(i);
-		binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
-		    LG_SUBPAGE);
-		CUSTOM_SMALL_SIZE2BIN(i) = binind;
-	}
-
-	small_size2bin = custom_small_size2bin;
-#ifdef JEMALLOC_DEBUG
-	small_size2bin_validate();
-#endif
 	return (false);
-#undef CUSTOM_SMALL_SIZE2BIN
 }
 
 /*
@@ -2444,18 +2007,40 @@ small_size2bin_init_hard(void)
 static size_t
 bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
 {
+	size_t pad_size;
 	size_t try_run_size, good_run_size;
 	uint32_t try_nregs, good_nregs;
 	uint32_t try_hdr_size, good_hdr_size;
 	uint32_t try_bitmap_offset, good_bitmap_offset;
-#ifdef JEMALLOC_PROF
 	uint32_t try_ctx0_offset, good_ctx0_offset;
-#endif
-	uint32_t try_reg0_offset, good_reg0_offset;
+	uint32_t try_redzone0_offset, good_redzone0_offset;
 
-	assert(min_run_size >= PAGE_SIZE);
+	assert(min_run_size >= PAGE);
 	assert(min_run_size <= arena_maxclass);
 
+	/*
+	 * Determine redzone size based on minimum alignment and minimum
+	 * redzone size.  Add padding to the end of the run if it is needed to
+	 * align the regions.  The padding allows each redzone to be half the
+	 * minimum alignment; without the padding, each redzone would have to
+	 * be twice as large in order to maintain alignment.
+	 */
+	if (config_fill && opt_redzone) {
+		size_t align_min = ZU(1) << (ffs(bin_info->reg_size) - 1);
+		if (align_min <= REDZONE_MINSIZE) {
+			bin_info->redzone_size = REDZONE_MINSIZE;
+			pad_size = 0;
+		} else {
+			bin_info->redzone_size = align_min >> 1;
+			pad_size = bin_info->redzone_size;
+		}
+	} else {
+		bin_info->redzone_size = 0;
+		pad_size = 0;
+	}
+	bin_info->reg_interval = bin_info->reg_size +
+	    (bin_info->redzone_size << 1);
+
 	/*
 	 * Calculate known-valid settings before entering the run_size
 	 * expansion loop, so that the first part of the loop always copies
@@ -2467,7 +2052,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
 	 * header's mask length and the number of regions.
 	 */
 	try_run_size = min_run_size;
-	try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size)
+	try_nregs = ((try_run_size - sizeof(arena_run_t)) /
+	    bin_info->reg_interval)
 	    + 1; /* Counter-act try_nregs-- in loop. */
 	if (try_nregs > RUN_MAXREGS) {
 		try_nregs = RUN_MAXREGS
@@ -2481,8 +2067,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
 		try_bitmap_offset = try_hdr_size;
 		/* Add space for bitmap. */
 		try_hdr_size += bitmap_size(try_nregs);
-#ifdef JEMALLOC_PROF
-		if (opt_prof && prof_promote == false) {
+		if (config_prof && opt_prof && prof_promote == false) {
 			/* Pad to a quantum boundary. */
 			try_hdr_size = QUANTUM_CEILING(try_hdr_size);
 			try_ctx0_offset = try_hdr_size;
@@ -2490,10 +2075,9 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
 			try_hdr_size += try_nregs * sizeof(prof_ctx_t *);
 		} else
 			try_ctx0_offset = 0;
-#endif
-		try_reg0_offset = try_run_size - (try_nregs *
-		    bin_info->reg_size);
-	} while (try_hdr_size > try_reg0_offset);
+		try_redzone0_offset = try_run_size - (try_nregs *
+		    bin_info->reg_interval) - pad_size;
+	} while (try_hdr_size > try_redzone0_offset);
 
 	/* run_size expansion loop. */
 	do {
@@ -2504,15 +2088,13 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
 		good_nregs = try_nregs;
 		good_hdr_size = try_hdr_size;
 		good_bitmap_offset = try_bitmap_offset;
-#ifdef JEMALLOC_PROF
 		good_ctx0_offset = try_ctx0_offset;
-#endif
-		good_reg0_offset = try_reg0_offset;
+		good_redzone0_offset = try_redzone0_offset;
 
 		/* Try more aggressive settings. */
-		try_run_size += PAGE_SIZE;
-		try_nregs = ((try_run_size - sizeof(arena_run_t)) /
-		    bin_info->reg_size)
+		try_run_size += PAGE;
+		try_nregs = ((try_run_size - sizeof(arena_run_t) - pad_size) /
+		    bin_info->reg_interval)
 		    + 1; /* Counter-act try_nregs-- in loop. */
 		if (try_nregs > RUN_MAXREGS) {
 			try_nregs = RUN_MAXREGS
@@ -2526,8 +2108,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
 			try_bitmap_offset = try_hdr_size;
 			/* Add space for bitmap. */
 			try_hdr_size += bitmap_size(try_nregs);
-#ifdef JEMALLOC_PROF
-			if (opt_prof && prof_promote == false) {
+			if (config_prof && opt_prof && prof_promote == false) {
 				/* Pad to a quantum boundary. */
 				try_hdr_size = QUANTUM_CEILING(try_hdr_size);
 				try_ctx0_offset = try_hdr_size;
@@ -2537,140 +2118,52 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size)
 				try_hdr_size += try_nregs *
 				    sizeof(prof_ctx_t *);
 			}
-#endif
-			try_reg0_offset = try_run_size - (try_nregs *
-			    bin_info->reg_size);
-		} while (try_hdr_size > try_reg0_offset);
+			try_redzone0_offset = try_run_size - (try_nregs *
+			    bin_info->reg_interval) - pad_size;
+		} while (try_hdr_size > try_redzone0_offset);
 	} while (try_run_size <= arena_maxclass
 	    && try_run_size <= arena_maxclass
-	    && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX
-	    && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
+	    && RUN_MAX_OVRHD * (bin_info->reg_interval << 3) >
+	    RUN_MAX_OVRHD_RELAX
+	    && (try_redzone0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
 	    && try_nregs < RUN_MAXREGS);
 
-	assert(good_hdr_size <= good_reg0_offset);
+	assert(good_hdr_size <= good_redzone0_offset);
 
 	/* Copy final settings. */
 	bin_info->run_size = good_run_size;
 	bin_info->nregs = good_nregs;
 	bin_info->bitmap_offset = good_bitmap_offset;
-#ifdef JEMALLOC_PROF
 	bin_info->ctx0_offset = good_ctx0_offset;
-#endif
-	bin_info->reg0_offset = good_reg0_offset;
+	bin_info->reg0_offset = good_redzone0_offset + bin_info->redzone_size;
+
+	assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs
+	    * bin_info->reg_interval) + pad_size == bin_info->run_size);
 
 	return (good_run_size);
 }
 
-static bool
+static void
 bin_info_init(void)
 {
 	arena_bin_info_t *bin_info;
-	unsigned i;
-	size_t prev_run_size;
-
-	arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins);
-	if (arena_bin_info == NULL)
-		return (true);
-
-	prev_run_size = PAGE_SIZE;
-	i = 0;
-#ifdef JEMALLOC_TINY
-	/* (2^n)-spaced tiny bins. */
-	for (; i < ntbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = (1U << (LG_TINY_MIN + i));
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-#endif
-
-	/* Quantum-spaced bins. */
-	for (; i < ntbins + nqbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM;
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-
-	/* Cacheline-spaced bins. */
-	for (; i < ntbins + nqbins + ncbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
-		    LG_CACHELINE);
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-
-	/* Subpage-spaced bins. */
-	for (; i < nbins; i++) {
-		bin_info = &arena_bin_info[i];
-		bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins +
-		    ncbins)) << LG_SUBPAGE);
-		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
-		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-	}
-
-	return (false);
+	size_t prev_run_size = PAGE;
+
+#define	SIZE_CLASS(bin, delta, size)					\
+	bin_info = &arena_bin_info[bin];				\
+	bin_info->reg_size = size;					\
+	prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\
+	bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
+	SIZE_CLASSES
+#undef SIZE_CLASS
 }
 
-bool
+void
 arena_boot(void)
 {
 	size_t header_size;
 	unsigned i;
 
-	/* Set variables according to the value of opt_lg_[qc]space_max. */
-	qspace_max = (1U << opt_lg_qspace_max);
-	cspace_min = CACHELINE_CEILING(qspace_max);
-	if (cspace_min == qspace_max)
-		cspace_min += CACHELINE;
-	cspace_max = (1U << opt_lg_cspace_max);
-	sspace_min = SUBPAGE_CEILING(cspace_max);
-	if (sspace_min == cspace_max)
-		sspace_min += SUBPAGE;
-	assert(sspace_min < PAGE_SIZE);
-	sspace_max = PAGE_SIZE - SUBPAGE;
-
-#ifdef JEMALLOC_TINY
-	assert(LG_QUANTUM >= LG_TINY_MIN);
-#endif
-	assert(ntbins <= LG_QUANTUM);
-	nqbins = qspace_max >> LG_QUANTUM;
-	ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1;
-	nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1;
-	nbins = ntbins + nqbins + ncbins + nsbins;
-
-	/*
-	 * The small_size2bin lookup table uses uint8_t to encode each bin
-	 * index, so we cannot support more than 256 small size classes.  This
-	 * limit is difficult to exceed (not even possible with 16B quantum and
-	 * 4KiB pages), and such configurations are impractical, but
-	 * nonetheless we need to protect against this case in order to avoid
-	 * undefined behavior.
-	 *
-	 * Further constrain nbins to 255 if prof_promote is true, since all
-	 * small size classes, plus a "not small" size class must be stored in
-	 * 8 bits of arena_chunk_map_t's bits field.
-	 */
-#ifdef JEMALLOC_PROF
-	if (opt_prof && prof_promote) {
-		if (nbins > 255) {
-		    char line_buf[UMAX2S_BUFSIZE];
-		    malloc_write("<jemalloc>: Too many small size classes (");
-		    malloc_write(u2s(nbins, 10, line_buf));
-		    malloc_write(" > max 255)\n");
-		    abort();
-		}
-	} else
-#endif
-	if (nbins > 256) {
-	    char line_buf[UMAX2S_BUFSIZE];
-	    malloc_write("<jemalloc>: Too many small size classes (");
-	    malloc_write(u2s(nbins, 10, line_buf));
-	    malloc_write(" > max 256)\n");
-	    abort();
-	}
-
 	/*
 	 * Compute the header size such that it is large enough to contain the
 	 * page map.  The page map is biased to omit entries for the header
@@ -2685,20 +2178,44 @@ arena_boot(void)
 	 */
 	map_bias = 0;
 	for (i = 0; i < 3; i++) {
-		header_size = offsetof(arena_chunk_t, map)
-			+ (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias));
-		map_bias = (header_size >> PAGE_SHIFT) + ((header_size &
-		    PAGE_MASK) != 0);
+		header_size = offsetof(arena_chunk_t, map) +
+		    (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias));
+		map_bias = (header_size >> LG_PAGE) + ((header_size & PAGE_MASK)
+		    != 0);
 	}
 	assert(map_bias > 0);
 
-	arena_maxclass = chunksize - (map_bias << PAGE_SHIFT);
+	arena_maxclass = chunksize - (map_bias << LG_PAGE);
 
-	if (small_size2bin_init())
-		return (true);
+	bin_info_init();
+}
 
-	if (bin_info_init())
-		return (true);
+void
+arena_prefork(arena_t *arena)
+{
+	unsigned i;
 
-	return (false);
+	malloc_mutex_prefork(&arena->lock);
+	for (i = 0; i < NBINS; i++)
+		malloc_mutex_prefork(&arena->bins[i].lock);
+}
+
+void
+arena_postfork_parent(arena_t *arena)
+{
+	unsigned i;
+
+	for (i = 0; i < NBINS; i++)
+		malloc_mutex_postfork_parent(&arena->bins[i].lock);
+	malloc_mutex_postfork_parent(&arena->lock);
+}
+
+void
+arena_postfork_child(arena_t *arena)
+{
+	unsigned i;
+
+	for (i = 0; i < NBINS; i++)
+		malloc_mutex_postfork_child(&arena->bins[i].lock);
+	malloc_mutex_postfork_child(&arena->lock);
 }
diff --git a/deps/jemalloc/src/base.c b/deps/jemalloc/src/base.c
index cc85e849..bafaa743 100644
--- a/deps/jemalloc/src/base.c
+++ b/deps/jemalloc/src/base.c
@@ -4,7 +4,7 @@
 /******************************************************************************/
 /* Data. */
 
-malloc_mutex_t	base_mtx;
+static malloc_mutex_t	base_mtx;
 
 /*
  * Current pages that are being used for internal memory allocations.  These
@@ -32,7 +32,7 @@ base_pages_alloc(size_t minsize)
 	assert(minsize != 0);
 	csize = CHUNK_CEILING(minsize);
 	zero = false;
-	base_pages = chunk_alloc(csize, true, &zero);
+	base_pages = chunk_alloc(csize, chunksize, true, &zero);
 	if (base_pages == NULL)
 		return (true);
 	base_next_addr = base_pages;
@@ -66,6 +66,17 @@ base_alloc(size_t size)
 	return (ret);
 }
 
+void *
+base_calloc(size_t number, size_t size)
+{
+	void *ret = base_alloc(number * size);
+
+	if (ret != NULL)
+		memset(ret, 0, number * size);
+
+	return (ret);
+}
+
 extent_node_t *
 base_node_alloc(void)
 {
@@ -104,3 +115,24 @@ base_boot(void)
 
 	return (false);
 }
+
+void
+base_prefork(void)
+{
+
+	malloc_mutex_prefork(&base_mtx);
+}
+
+void
+base_postfork_parent(void)
+{
+
+	malloc_mutex_postfork_parent(&base_mtx);
+}
+
+void
+base_postfork_child(void)
+{
+
+	malloc_mutex_postfork_child(&base_mtx);
+}
diff --git a/deps/jemalloc/src/chunk.c b/deps/jemalloc/src/chunk.c
index d190c6f4..6bc24544 100644
--- a/deps/jemalloc/src/chunk.c
+++ b/deps/jemalloc/src/chunk.c
@@ -5,18 +5,20 @@
 /* Data. */
 
 size_t	opt_lg_chunk = LG_CHUNK_DEFAULT;
-#ifdef JEMALLOC_SWAP
-bool	opt_overcommit = true;
-#endif
 
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
 malloc_mutex_t	chunks_mtx;
 chunk_stats_t	stats_chunks;
-#endif
 
-#ifdef JEMALLOC_IVSALLOC
+/*
+ * Trees of chunks that were previously allocated (trees differ only in node
+ * ordering).  These are used when allocating chunks, in an attempt to re-use
+ * address space.  Depending on function, different tree orderings are needed,
+ * which is why there are two trees with the same contents.
+ */
+static extent_tree_t	chunks_szad;
+static extent_tree_t	chunks_ad;
+
 rtree_t		*chunks_rtree;
-#endif
 
 /* Various chunk-related settings. */
 size_t		chunksize;
@@ -26,6 +28,98 @@ size_t		map_bias;
 size_t		arena_maxclass; /* Max size class for arenas. */
 
 /******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	*chunk_recycle(size_t size, size_t alignment, bool base,
+    bool *zero);
+static void	chunk_record(void *chunk, size_t size);
+
+/******************************************************************************/
+
+static void *
+chunk_recycle(size_t size, size_t alignment, bool base, bool *zero)
+{
+	void *ret;
+	extent_node_t *node;
+	extent_node_t key;
+	size_t alloc_size, leadsize, trailsize;
+
+	if (base) {
+		/*
+		 * This function may need to call base_node_{,de}alloc(), but
+		 * the current chunk allocation request is on behalf of the
+		 * base allocator.  Avoid deadlock (and if that weren't an
+		 * issue, potential for infinite recursion) by returning NULL.
+		 */
+		return (NULL);
+	}
+
+	alloc_size = size + alignment - chunksize;
+	/* Beware size_t wrap-around. */
+	if (alloc_size < size)
+		return (NULL);
+	key.addr = NULL;
+	key.size = alloc_size;
+	malloc_mutex_lock(&chunks_mtx);
+	node = extent_tree_szad_nsearch(&chunks_szad, &key);
+	if (node == NULL) {
+		malloc_mutex_unlock(&chunks_mtx);
+		return (NULL);
+	}
+	leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) -
+	    (uintptr_t)node->addr;
+	assert(node->size >= leadsize + size);
+	trailsize = node->size - leadsize - size;
+	ret = (void *)((uintptr_t)node->addr + leadsize);
+	/* Remove node from the tree. */
+	extent_tree_szad_remove(&chunks_szad, node);
+	extent_tree_ad_remove(&chunks_ad, node);
+	if (leadsize != 0) {
+		/* Insert the leading space as a smaller chunk. */
+		node->size = leadsize;
+		extent_tree_szad_insert(&chunks_szad, node);
+		extent_tree_ad_insert(&chunks_ad, node);
+		node = NULL;
+	}
+	if (trailsize != 0) {
+		/* Insert the trailing space as a smaller chunk. */
+		if (node == NULL) {
+			/*
+			 * An additional node is required, but
+			 * base_node_alloc() can cause a new base chunk to be
+			 * allocated.  Drop chunks_mtx in order to avoid
+			 * deadlock, and if node allocation fails, deallocate
+			 * the result before returning an error.
+			 */
+			malloc_mutex_unlock(&chunks_mtx);
+			node = base_node_alloc();
+			if (node == NULL) {
+				chunk_dealloc(ret, size, true);
+				return (NULL);
+			}
+			malloc_mutex_lock(&chunks_mtx);
+		}
+		node->addr = (void *)((uintptr_t)(ret) + size);
+		node->size = trailsize;
+		extent_tree_szad_insert(&chunks_szad, node);
+		extent_tree_ad_insert(&chunks_ad, node);
+		node = NULL;
+	}
+	malloc_mutex_unlock(&chunks_mtx);
+
+	if (node != NULL)
+		base_node_dealloc(node);
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
+	/* Pages are zeroed as a side effect of pages_purge(). */
+	*zero = true;
+#else
+	if (*zero) {
+		VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+		memset(ret, 0, size);
+	}
+#endif
+	return (ret);
+}
 
 /*
  * If the caller specifies (*zero == false), it is still possible to receive
@@ -34,79 +128,138 @@ size_t		arena_maxclass; /* Max size class for arenas. */
  * advantage of them if they are returned.
  */
 void *
-chunk_alloc(size_t size, bool base, bool *zero)
+chunk_alloc(size_t size, size_t alignment, bool base, bool *zero)
 {
 	void *ret;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
+	assert(alignment != 0);
+	assert((alignment & chunksize_mask) == 0);
 
-#ifdef JEMALLOC_SWAP
-	if (swap_enabled) {
-		ret = chunk_alloc_swap(size, zero);
-		if (ret != NULL)
-			goto RETURN;
-	}
+	ret = chunk_recycle(size, alignment, base, zero);
+	if (ret != NULL)
+		goto label_return;
 
-	if (swap_enabled == false || opt_overcommit) {
-#endif
-#ifdef JEMALLOC_DSS
-		ret = chunk_alloc_dss(size, zero);
+	ret = chunk_alloc_mmap(size, alignment, zero);
+	if (ret != NULL)
+		goto label_return;
+
+	if (config_dss) {
+		ret = chunk_alloc_dss(size, alignment, zero);
 		if (ret != NULL)
-			goto RETURN;
-#endif
-		ret = chunk_alloc_mmap(size);
-		if (ret != NULL) {
-			*zero = true;
-			goto RETURN;
-		}
-#ifdef JEMALLOC_SWAP
+			goto label_return;
 	}
-#endif
 
 	/* All strategies for allocation failed. */
 	ret = NULL;
-RETURN:
-#ifdef JEMALLOC_IVSALLOC
-	if (base == false && ret != NULL) {
+label_return:
+	if (config_ivsalloc && base == false && ret != NULL) {
 		if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
 			chunk_dealloc(ret, size, true);
 			return (NULL);
 		}
 	}
-#endif
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-	if (ret != NULL) {
-#  ifdef JEMALLOC_PROF
+	if ((config_stats || config_prof) && ret != NULL) {
 		bool gdump;
-#  endif
 		malloc_mutex_lock(&chunks_mtx);
-#  ifdef JEMALLOC_STATS
-		stats_chunks.nchunks += (size / chunksize);
-#  endif
+		if (config_stats)
+			stats_chunks.nchunks += (size / chunksize);
 		stats_chunks.curchunks += (size / chunksize);
 		if (stats_chunks.curchunks > stats_chunks.highchunks) {
 			stats_chunks.highchunks = stats_chunks.curchunks;
-#  ifdef JEMALLOC_PROF
-			gdump = true;
-#  endif
-		}
-#  ifdef JEMALLOC_PROF
-		else
+			if (config_prof)
+				gdump = true;
+		} else if (config_prof)
 			gdump = false;
-#  endif
 		malloc_mutex_unlock(&chunks_mtx);
-#  ifdef JEMALLOC_PROF
-		if (opt_prof && opt_prof_gdump && gdump)
+		if (config_prof && opt_prof && opt_prof_gdump && gdump)
 			prof_gdump();
-#  endif
 	}
-#endif
+	if (config_debug && *zero && ret != NULL) {
+		size_t i;
+		size_t *p = (size_t *)(uintptr_t)ret;
 
+		VALGRIND_MAKE_MEM_DEFINED(ret, size);
+		for (i = 0; i < size / sizeof(size_t); i++)
+			assert(p[i] == 0);
+	}
 	assert(CHUNK_ADDR2BASE(ret) == ret);
 	return (ret);
 }
 
+static void
+chunk_record(void *chunk, size_t size)
+{
+	extent_node_t *xnode, *node, *prev, key;
+
+	pages_purge(chunk, size);
+
+	/*
+	 * Allocate a node before acquiring chunks_mtx even though it might not
+	 * be needed, because base_node_alloc() may cause a new base chunk to
+	 * be allocated, which could cause deadlock if chunks_mtx were already
+	 * held.
+	 */
+	xnode = base_node_alloc();
+
+	malloc_mutex_lock(&chunks_mtx);
+	key.addr = (void *)((uintptr_t)chunk + size);
+	node = extent_tree_ad_nsearch(&chunks_ad, &key);
+	/* Try to coalesce forward. */
+	if (node != NULL && node->addr == key.addr) {
+		/*
+		 * Coalesce chunk with the following address range.  This does
+		 * not change the position within chunks_ad, so only
+		 * remove/insert from/into chunks_szad.
+		 */
+		extent_tree_szad_remove(&chunks_szad, node);
+		node->addr = chunk;
+		node->size += size;
+		extent_tree_szad_insert(&chunks_szad, node);
+		if (xnode != NULL)
+			base_node_dealloc(xnode);
+	} else {
+		/* Coalescing forward failed, so insert a new node. */
+		if (xnode == NULL) {
+			/*
+			 * base_node_alloc() failed, which is an exceedingly
+			 * unlikely failure.  Leak chunk; its pages have
+			 * already been purged, so this is only a virtual
+			 * memory leak.
+			 */
+			malloc_mutex_unlock(&chunks_mtx);
+			return;
+		}
+		node = xnode;
+		node->addr = chunk;
+		node->size = size;
+		extent_tree_ad_insert(&chunks_ad, node);
+		extent_tree_szad_insert(&chunks_szad, node);
+	}
+
+	/* Try to coalesce backward. */
+	prev = extent_tree_ad_prev(&chunks_ad, node);
+	if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
+	    chunk) {
+		/*
+		 * Coalesce chunk with the previous address range.  This does
+		 * not change the position within chunks_ad, so only
+		 * remove/insert node from/into chunks_szad.
+		 */
+		extent_tree_szad_remove(&chunks_szad, prev);
+		extent_tree_ad_remove(&chunks_ad, prev);
+
+		extent_tree_szad_remove(&chunks_szad, node);
+		node->addr = prev->addr;
+		node->size += prev->size;
+		extent_tree_szad_insert(&chunks_szad, node);
+
+		base_node_dealloc(prev);
+	}
+	malloc_mutex_unlock(&chunks_mtx);
+}
+
 void
 chunk_dealloc(void *chunk, size_t size, bool unmap)
 {
@@ -116,25 +269,18 @@ chunk_dealloc(void *chunk, size_t size, bool unmap)
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-#ifdef JEMALLOC_IVSALLOC
-	rtree_set(chunks_rtree, (uintptr_t)chunk, NULL);
-#endif
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-	malloc_mutex_lock(&chunks_mtx);
-	stats_chunks.curchunks -= (size / chunksize);
-	malloc_mutex_unlock(&chunks_mtx);
-#endif
+	if (config_ivsalloc)
+		rtree_set(chunks_rtree, (uintptr_t)chunk, NULL);
+	if (config_stats || config_prof) {
+		malloc_mutex_lock(&chunks_mtx);
+		stats_chunks.curchunks -= (size / chunksize);
+		malloc_mutex_unlock(&chunks_mtx);
+	}
 
 	if (unmap) {
-#ifdef JEMALLOC_SWAP
-		if (swap_enabled && chunk_dealloc_swap(chunk, size) == false)
-			return;
-#endif
-#ifdef JEMALLOC_DSS
-		if (chunk_dealloc_dss(chunk, size) == false)
-			return;
-#endif
-		chunk_dealloc_mmap(chunk, size);
+		if ((config_dss && chunk_in_dss(chunk)) ||
+		    chunk_dealloc_mmap(chunk, size))
+			chunk_record(chunk, size);
 	}
 }
 
@@ -144,30 +290,25 @@ chunk_boot(void)
 
 	/* Set variables according to the value of opt_lg_chunk. */
 	chunksize = (ZU(1) << opt_lg_chunk);
-	assert(chunksize >= PAGE_SIZE);
+	assert(chunksize >= PAGE);
 	chunksize_mask = chunksize - 1;
-	chunk_npages = (chunksize >> PAGE_SHIFT);
+	chunk_npages = (chunksize >> LG_PAGE);
 
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-	if (malloc_mutex_init(&chunks_mtx))
-		return (true);
-	memset(&stats_chunks, 0, sizeof(chunk_stats_t));
-#endif
-#ifdef JEMALLOC_SWAP
-	if (chunk_swap_boot())
-		return (true);
-#endif
-	if (chunk_mmap_boot())
-		return (true);
-#ifdef JEMALLOC_DSS
-	if (chunk_dss_boot())
-		return (true);
-#endif
-#ifdef JEMALLOC_IVSALLOC
-	chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk);
-	if (chunks_rtree == NULL)
+	if (config_stats || config_prof) {
+		if (malloc_mutex_init(&chunks_mtx))
+			return (true);
+		memset(&stats_chunks, 0, sizeof(chunk_stats_t));
+	}
+	if (config_dss && chunk_dss_boot())
 		return (true);
-#endif
+	extent_tree_szad_new(&chunks_szad);
+	extent_tree_ad_new(&chunks_ad);
+	if (config_ivsalloc) {
+		chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) -
+		    opt_lg_chunk);
+		if (chunks_rtree == NULL)
+			return (true);
+	}
 
 	return (false);
 }
diff --git a/deps/jemalloc/src/chunk_dss.c b/deps/jemalloc/src/chunk_dss.c
index 5c0e290e..2d68e480 100644
--- a/deps/jemalloc/src/chunk_dss.c
+++ b/deps/jemalloc/src/chunk_dss.c
@@ -1,82 +1,42 @@
 #define	JEMALLOC_CHUNK_DSS_C_
 #include "jemalloc/internal/jemalloc_internal.h"
-#ifdef JEMALLOC_DSS
 /******************************************************************************/
 /* Data. */
 
-malloc_mutex_t	dss_mtx;
+/*
+ * Protects sbrk() calls.  This avoids malloc races among threads, though it
+ * does not protect against races with threads that call sbrk() directly.
+ */
+static malloc_mutex_t	dss_mtx;
 
 /* Base address of the DSS. */
-static void	*dss_base;
+static void		*dss_base;
 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
-static void	*dss_prev;
+static void		*dss_prev;
 /* Current upper limit on DSS addresses. */
-static void	*dss_max;
-
-/*
- * Trees of chunks that were previously allocated (trees differ only in node
- * ordering).  These are used when allocating chunks, in an attempt to re-use
- * address space.  Depending on function, different tree orderings are needed,
- * which is why there are two trees with the same contents.
- */
-static extent_tree_t	dss_chunks_szad;
-static extent_tree_t	dss_chunks_ad;
-
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static void	*chunk_recycle_dss(size_t size, bool *zero);
-static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
+static void		*dss_max;
 
 /******************************************************************************/
 
+#ifndef JEMALLOC_HAVE_SBRK
 static void *
-chunk_recycle_dss(size_t size, bool *zero)
+sbrk(intptr_t increment)
 {
-	extent_node_t *node, key;
-
-	key.addr = NULL;
-	key.size = size;
-	malloc_mutex_lock(&dss_mtx);
-	node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
-	if (node != NULL) {
-		void *ret = node->addr;
 
-		/* Remove node from the tree. */
-		extent_tree_szad_remove(&dss_chunks_szad, node);
-		if (node->size == size) {
-			extent_tree_ad_remove(&dss_chunks_ad, node);
-			base_node_dealloc(node);
-		} else {
-			/*
-			 * Insert the remainder of node's address range as a
-			 * smaller chunk.  Its position within dss_chunks_ad
-			 * does not change.
-			 */
-			assert(node->size > size);
-			node->addr = (void *)((uintptr_t)node->addr + size);
-			node->size -= size;
-			extent_tree_szad_insert(&dss_chunks_szad, node);
-		}
-		malloc_mutex_unlock(&dss_mtx);
-
-		if (*zero)
-			memset(ret, 0, size);
-		return (ret);
-	}
-	malloc_mutex_unlock(&dss_mtx);
+	not_implemented();
 
 	return (NULL);
 }
+#endif
 
 void *
-chunk_alloc_dss(size_t size, bool *zero)
+chunk_alloc_dss(size_t size, size_t alignment, bool *zero)
 {
 	void *ret;
 
-	ret = chunk_recycle_dss(size, zero);
-	if (ret != NULL)
-		return (ret);
+	cassert(config_dss);
+	assert(size > 0 && (size & chunksize_mask) == 0);
+	assert(alignment > 0 && (alignment & chunksize_mask) == 0);
 
 	/*
 	 * sbrk() uses a signed increment argument, so take care not to
@@ -87,6 +47,8 @@ chunk_alloc_dss(size_t size, bool *zero)
 
 	malloc_mutex_lock(&dss_mtx);
 	if (dss_prev != (void *)-1) {
+		size_t gap_size, cpad_size;
+		void *cpad, *dss_next;
 		intptr_t incr;
 
 		/*
@@ -97,26 +59,40 @@ chunk_alloc_dss(size_t size, bool *zero)
 		do {
 			/* Get the current end of the DSS. */
 			dss_max = sbrk(0);
-
 			/*
 			 * Calculate how much padding is necessary to
 			 * chunk-align the end of the DSS.
 			 */
-			incr = (intptr_t)size
-			    - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
-			if (incr == (intptr_t)size)
-				ret = dss_max;
-			else {
-				ret = (void *)((intptr_t)dss_max + incr);
-				incr += size;
+			gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) &
+			    chunksize_mask;
+			/*
+			 * Compute how much chunk-aligned pad space (if any) is
+			 * necessary to satisfy alignment.  This space can be
+			 * recycled for later use.
+			 */
+			cpad = (void *)((uintptr_t)dss_max + gap_size);
+			ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max,
+			    alignment);
+			cpad_size = (uintptr_t)ret - (uintptr_t)cpad;
+			dss_next = (void *)((uintptr_t)ret + size);
+			if ((uintptr_t)ret < (uintptr_t)dss_max ||
+			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
+				/* Wrap-around. */
+				malloc_mutex_unlock(&dss_mtx);
+				return (NULL);
 			}
-
+			incr = gap_size + cpad_size + size;
 			dss_prev = sbrk(incr);
 			if (dss_prev == dss_max) {
 				/* Success. */
-				dss_max = (void *)((intptr_t)dss_prev + incr);
+				dss_max = dss_next;
 				malloc_mutex_unlock(&dss_mtx);
-				*zero = true;
+				if (cpad_size != 0)
+					chunk_dealloc(cpad, cpad_size, true);
+				if (*zero) {
+					VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+					memset(ret, 0, size);
+				}
 				return (ret);
 			}
 		} while (dss_prev != (void *)-1);
@@ -126,84 +102,13 @@ chunk_alloc_dss(size_t size, bool *zero)
 	return (NULL);
 }
 
-static extent_node_t *
-chunk_dealloc_dss_record(void *chunk, size_t size)
-{
-	extent_node_t *xnode, *node, *prev, key;
-
-	xnode = NULL;
-	while (true) {
-		key.addr = (void *)((uintptr_t)chunk + size);
-		node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
-		/* Try to coalesce forward. */
-		if (node != NULL && node->addr == key.addr) {
-			/*
-			 * Coalesce chunk with the following address range.
-			 * This does not change the position within
-			 * dss_chunks_ad, so only remove/insert from/into
-			 * dss_chunks_szad.
-			 */
-			extent_tree_szad_remove(&dss_chunks_szad, node);
-			node->addr = chunk;
-			node->size += size;
-			extent_tree_szad_insert(&dss_chunks_szad, node);
-			break;
-		} else if (xnode == NULL) {
-			/*
-			 * It is possible that base_node_alloc() will cause a
-			 * new base chunk to be allocated, so take care not to
-			 * deadlock on dss_mtx, and recover if another thread
-			 * deallocates an adjacent chunk while this one is busy
-			 * allocating xnode.
-			 */
-			malloc_mutex_unlock(&dss_mtx);
-			xnode = base_node_alloc();
-			malloc_mutex_lock(&dss_mtx);
-			if (xnode == NULL)
-				return (NULL);
-		} else {
-			/* Coalescing forward failed, so insert a new node. */
-			node = xnode;
-			xnode = NULL;
-			node->addr = chunk;
-			node->size = size;
-			extent_tree_ad_insert(&dss_chunks_ad, node);
-			extent_tree_szad_insert(&dss_chunks_szad, node);
-			break;
-		}
-	}
-	/* Discard xnode if it ended up unused do to a race. */
-	if (xnode != NULL)
-		base_node_dealloc(xnode);
-
-	/* Try to coalesce backward. */
-	prev = extent_tree_ad_prev(&dss_chunks_ad, node);
-	if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
-	    chunk) {
-		/*
-		 * Coalesce chunk with the previous address range.  This does
-		 * not change the position within dss_chunks_ad, so only
-		 * remove/insert node from/into dss_chunks_szad.
-		 */
-		extent_tree_szad_remove(&dss_chunks_szad, prev);
-		extent_tree_ad_remove(&dss_chunks_ad, prev);
-
-		extent_tree_szad_remove(&dss_chunks_szad, node);
-		node->addr = prev->addr;
-		node->size += prev->size;
-		extent_tree_szad_insert(&dss_chunks_szad, node);
-
-		base_node_dealloc(prev);
-	}
-
-	return (node);
-}
-
 bool
 chunk_in_dss(void *chunk)
 {
 	bool ret;
 
+	cassert(config_dss);
+
 	malloc_mutex_lock(&dss_mtx);
 	if ((uintptr_t)chunk >= (uintptr_t)dss_base
 	    && (uintptr_t)chunk < (uintptr_t)dss_max)
@@ -216,69 +121,42 @@ chunk_in_dss(void *chunk)
 }
 
 bool
-chunk_dealloc_dss(void *chunk, size_t size)
+chunk_dss_boot(void)
 {
-	bool ret;
 
-	malloc_mutex_lock(&dss_mtx);
-	if ((uintptr_t)chunk >= (uintptr_t)dss_base
-	    && (uintptr_t)chunk < (uintptr_t)dss_max) {
-		extent_node_t *node;
+	cassert(config_dss);
 
-		/* Try to coalesce with other unused chunks. */
-		node = chunk_dealloc_dss_record(chunk, size);
-		if (node != NULL) {
-			chunk = node->addr;
-			size = node->size;
-		}
+	if (malloc_mutex_init(&dss_mtx))
+		return (true);
+	dss_base = sbrk(0);
+	dss_prev = dss_base;
+	dss_max = dss_base;
 
-		/* Get the current end of the DSS. */
-		dss_max = sbrk(0);
+	return (false);
+}
 
-		/*
-		 * Try to shrink the DSS if this chunk is at the end of the
-		 * DSS.  The sbrk() call here is subject to a race condition
-		 * with threads that use brk(2) or sbrk(2) directly, but the
-		 * alternative would be to leak memory for the sake of poorly
-		 * designed multi-threaded programs.
-		 */
-		if ((void *)((uintptr_t)chunk + size) == dss_max
-		    && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
-			/* Success. */
-			dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
+void
+chunk_dss_prefork(void)
+{
 
-			if (node != NULL) {
-				extent_tree_szad_remove(&dss_chunks_szad, node);
-				extent_tree_ad_remove(&dss_chunks_ad, node);
-				base_node_dealloc(node);
-			}
-		} else
-			madvise(chunk, size, MADV_DONTNEED);
+	if (config_dss)
+		malloc_mutex_prefork(&dss_mtx);
+}
 
-		ret = false;
-		goto RETURN;
-	}
+void
+chunk_dss_postfork_parent(void)
+{
 
-	ret = true;
-RETURN:
-	malloc_mutex_unlock(&dss_mtx);
-	return (ret);
+	if (config_dss)
+		malloc_mutex_postfork_parent(&dss_mtx);
 }
 
-bool
-chunk_dss_boot(void)
+void
+chunk_dss_postfork_child(void)
 {
 
-	if (malloc_mutex_init(&dss_mtx))
-		return (true);
-	dss_base = sbrk(0);
-	dss_prev = dss_base;
-	dss_max = dss_base;
-	extent_tree_szad_new(&dss_chunks_szad);
-	extent_tree_ad_new(&dss_chunks_ad);
-
-	return (false);
+	if (config_dss)
+		malloc_mutex_postfork_child(&dss_mtx);
 }
 
 /******************************************************************************/
-#endif /* JEMALLOC_DSS */
diff --git a/deps/jemalloc/src/chunk_mmap.c b/deps/jemalloc/src/chunk_mmap.c
index 164e86e7..c8da6556 100644
--- a/deps/jemalloc/src/chunk_mmap.c
+++ b/deps/jemalloc/src/chunk_mmap.c
@@ -1,54 +1,37 @@
 #define	JEMALLOC_CHUNK_MMAP_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
-/******************************************************************************/
-/* Data. */
-
-/*
- * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and
- * potentially avoid some system calls.
- */
-#ifndef NO_TLS
-static __thread bool	mmap_unaligned_tls
-    JEMALLOC_ATTR(tls_model("initial-exec"));
-#define	MMAP_UNALIGNED_GET()	mmap_unaligned_tls
-#define	MMAP_UNALIGNED_SET(v)	do {					\
-	mmap_unaligned_tls = (v);					\
-} while (0)
-#else
-static pthread_key_t	mmap_unaligned_tsd;
-#define	MMAP_UNALIGNED_GET()	((bool)pthread_getspecific(mmap_unaligned_tsd))
-#define	MMAP_UNALIGNED_SET(v)	do {					\
-	pthread_setspecific(mmap_unaligned_tsd, (void *)(v));		\
-} while (0)
-#endif
-
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static void	*pages_map(void *addr, size_t size, bool noreserve);
+static void	*pages_map(void *addr, size_t size);
 static void	pages_unmap(void *addr, size_t size);
-static void	*chunk_alloc_mmap_slow(size_t size, bool unaligned,
-    bool noreserve);
-static void	*chunk_alloc_mmap_internal(size_t size, bool noreserve);
+static void	*chunk_alloc_mmap_slow(size_t size, size_t alignment,
+    bool *zero);
 
 /******************************************************************************/
 
 static void *
-pages_map(void *addr, size_t size, bool noreserve)
+pages_map(void *addr, size_t size)
 {
 	void *ret;
 
+	assert(size != 0);
+
+#ifdef _WIN32
+	/*
+	 * If VirtualAlloc can't allocate at the given address when one is
+	 * given, it fails and returns NULL.
+	 */
+	ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
+	    PAGE_READWRITE);
+#else
 	/*
 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
 	 * of existing mappings, and we only want to create new mappings.
 	 */
-	int flags = MAP_PRIVATE | MAP_ANON;
-#ifdef MAP_NORESERVE
-	if (noreserve)
-		flags |= MAP_NORESERVE;
-#endif
-	ret = mmap(addr, size, PROT_READ | PROT_WRITE, flags, -1, 0);
+	ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+	    -1, 0);
 	assert(ret != NULL);
 
 	if (ret == MAP_FAILED)
@@ -60,16 +43,15 @@ pages_map(void *addr, size_t size, bool noreserve)
 		if (munmap(ret, size) == -1) {
 			char buf[BUFERROR_BUF];
 
-			buferror(errno, buf, sizeof(buf));
-			malloc_write("<jemalloc>: Error in munmap(): ");
-			malloc_write(buf);
-			malloc_write("\n");
+			buferror(buf, sizeof(buf));
+			malloc_printf("<jemalloc: Error in munmap(): %s\n",
+			    buf);
 			if (opt_abort)
 				abort();
 		}
 		ret = NULL;
 	}
-
+#endif
 	assert(ret == NULL || (addr == NULL && ret != addr)
 	    || (addr != NULL && ret == addr));
 	return (ret);
@@ -79,161 +61,142 @@ static void
 pages_unmap(void *addr, size_t size)
 {
 
-	if (munmap(addr, size) == -1) {
+#ifdef _WIN32
+	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
+#else
+	if (munmap(addr, size) == -1)
+#endif
+	{
 		char buf[BUFERROR_BUF];
 
-		buferror(errno, buf, sizeof(buf));
-		malloc_write("<jemalloc>: Error in munmap(): ");
-		malloc_write(buf);
-		malloc_write("\n");
+		buferror(buf, sizeof(buf));
+		malloc_printf("<jemalloc>: Error in "
+#ifdef _WIN32
+		              "VirtualFree"
+#else
+		              "munmap"
+#endif
+		              "(): %s\n", buf);
 		if (opt_abort)
 			abort();
 	}
 }
 
 static void *
-chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve)
+pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
 {
-	void *ret;
-	size_t offset;
-
-	/* Beware size_t wrap-around. */
-	if (size + chunksize <= size)
+	void *ret = (void *)((uintptr_t)addr + leadsize);
+
+	assert(alloc_size >= leadsize + size);
+#ifdef _WIN32
+	{
+		void *new_addr;
+
+		pages_unmap(addr, alloc_size);
+		new_addr = pages_map(ret, size);
+		if (new_addr == ret)
+			return (ret);
+		if (new_addr)
+			pages_unmap(new_addr, size);
 		return (NULL);
+	}
+#else
+	{
+		size_t trailsize = alloc_size - leadsize - size;
+
+		if (leadsize != 0)
+			pages_unmap(addr, leadsize);
+		if (trailsize != 0)
+			pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+		return (ret);
+	}
+#endif
+}
 
-	ret = pages_map(NULL, size + chunksize, noreserve);
-	if (ret == NULL)
-		return (NULL);
+void
+pages_purge(void *addr, size_t length)
+{
 
-	/* Clean up unneeded leading/trailing space. */
-	offset = CHUNK_ADDR2OFFSET(ret);
-	if (offset != 0) {
-		/* Note that mmap() returned an unaligned mapping. */
-		unaligned = true;
-
-		/* Leading space. */
-		pages_unmap(ret, chunksize - offset);
-
-		ret = (void *)((uintptr_t)ret +
-		    (chunksize - offset));
-
-		/* Trailing space. */
-		pages_unmap((void *)((uintptr_t)ret + size),
-		    offset);
-	} else {
-		/* Trailing space only. */
-		pages_unmap((void *)((uintptr_t)ret + size),
-		    chunksize);
-	}
+#ifdef _WIN32
+	VirtualAlloc(addr, length, MEM_RESET, PAGE_READWRITE);
+#else
+#  ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
+#    define JEMALLOC_MADV_PURGE MADV_DONTNEED
+#  elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+#    define JEMALLOC_MADV_PURGE MADV_FREE
+#  else
+#    error "No method defined for purging unused dirty pages."
+#  endif
+	madvise(addr, length, JEMALLOC_MADV_PURGE);
+#endif
+}
 
-	/*
-	 * If mmap() returned an aligned mapping, reset mmap_unaligned so that
-	 * the next chunk_alloc_mmap() execution tries the fast allocation
-	 * method.
-	 */
-	if (unaligned == false)
-		MMAP_UNALIGNED_SET(false);
+static void *
+chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero)
+{
+	void *ret, *pages;
+	size_t alloc_size, leadsize;
 
+	alloc_size = size + alignment - PAGE;
+	/* Beware size_t wrap-around. */
+	if (alloc_size < size)
+		return (NULL);
+	do {
+		pages = pages_map(NULL, alloc_size);
+		if (pages == NULL)
+			return (NULL);
+		leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
+		    (uintptr_t)pages;
+		ret = pages_trim(pages, alloc_size, leadsize, size);
+	} while (ret == NULL);
+
+	assert(ret != NULL);
+	*zero = true;
 	return (ret);
 }
 
-static void *
-chunk_alloc_mmap_internal(size_t size, bool noreserve)
+void *
+chunk_alloc_mmap(size_t size, size_t alignment, bool *zero)
 {
 	void *ret;
+	size_t offset;
 
 	/*
 	 * Ideally, there would be a way to specify alignment to mmap() (like
 	 * NetBSD has), but in the absence of such a feature, we have to work
 	 * hard to efficiently create aligned mappings.  The reliable, but
 	 * slow method is to create a mapping that is over-sized, then trim the
-	 * excess.  However, that always results in at least one call to
+	 * excess.  However, that always results in one or two calls to
 	 * pages_unmap().
 	 *
-	 * A more optimistic approach is to try mapping precisely the right
-	 * amount, then try to append another mapping if alignment is off.  In
-	 * practice, this works out well as long as the application is not
-	 * interleaving mappings via direct mmap() calls.  If we do run into a
-	 * situation where there is an interleaved mapping and we are unable to
-	 * extend an unaligned mapping, our best option is to switch to the
-	 * slow method until mmap() returns another aligned mapping.  This will
-	 * tend to leave a gap in the memory map that is too small to cause
-	 * later problems for the optimistic method.
-	 *
-	 * Another possible confounding factor is address space layout
-	 * randomization (ASLR), which causes mmap(2) to disregard the
-	 * requested address.  mmap_unaligned tracks whether the previous
-	 * chunk_alloc_mmap() execution received any unaligned or relocated
-	 * mappings, and if so, the current execution will immediately fall
-	 * back to the slow method.  However, we keep track of whether the fast
-	 * method would have succeeded, and if so, we make a note to try the
-	 * fast method next time.
+	 * Optimistically try mapping precisely the right amount before falling
+	 * back to the slow method, with the expectation that the optimistic
+	 * approach works most of the time.
 	 */
 
-	if (MMAP_UNALIGNED_GET() == false) {
-		size_t offset;
+	assert(alignment != 0);
+	assert((alignment & chunksize_mask) == 0);
 
-		ret = pages_map(NULL, size, noreserve);
-		if (ret == NULL)
-			return (NULL);
-
-		offset = CHUNK_ADDR2OFFSET(ret);
-		if (offset != 0) {
-			MMAP_UNALIGNED_SET(true);
-			/* Try to extend chunk boundary. */
-			if (pages_map((void *)((uintptr_t)ret + size),
-			    chunksize - offset, noreserve) == NULL) {
-				/*
-				 * Extension failed.  Clean up, then revert to
-				 * the reliable-but-expensive method.
-				 */
-				pages_unmap(ret, size);
-				ret = chunk_alloc_mmap_slow(size, true,
-				    noreserve);
-			} else {
-				/* Clean up unneeded leading space. */
-				pages_unmap(ret, chunksize - offset);
-				ret = (void *)((uintptr_t)ret + (chunksize -
-				    offset));
-			}
-		}
-	} else
-		ret = chunk_alloc_mmap_slow(size, false, noreserve);
+	ret = pages_map(NULL, size);
+	if (ret == NULL)
+		return (NULL);
+	offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
+	if (offset != 0) {
+		pages_unmap(ret, size);
+		return (chunk_alloc_mmap_slow(size, alignment, zero));
+	}
 
+	assert(ret != NULL);
+	*zero = true;
 	return (ret);
 }
 
-void *
-chunk_alloc_mmap(size_t size)
-{
-
-	return (chunk_alloc_mmap_internal(size, false));
-}
-
-void *
-chunk_alloc_mmap_noreserve(size_t size)
-{
-
-	return (chunk_alloc_mmap_internal(size, true));
-}
-
-void
-chunk_dealloc_mmap(void *chunk, size_t size)
-{
-
-	pages_unmap(chunk, size);
-}
-
 bool
-chunk_mmap_boot(void)
+chunk_dealloc_mmap(void *chunk, size_t size)
 {
 
-#ifdef NO_TLS
-	if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) {
-		malloc_write("<jemalloc>: Error in pthread_key_create()\n");
-		return (true);
-	}
-#endif
+	if (config_munmap)
+		pages_unmap(chunk, size);
 
-	return (false);
+	return (config_munmap == false);
 }
diff --git a/deps/jemalloc/src/chunk_swap.c b/deps/jemalloc/src/chunk_swap.c
deleted file mode 100644
index cb25ae0d..00000000
--- a/deps/jemalloc/src/chunk_swap.c
+++ /dev/null
@@ -1,402 +0,0 @@
-#define	JEMALLOC_CHUNK_SWAP_C_
-#include "jemalloc/internal/jemalloc_internal.h"
-#ifdef JEMALLOC_SWAP
-/******************************************************************************/
-/* Data. */
-
-malloc_mutex_t	swap_mtx;
-bool		swap_enabled;
-bool		swap_prezeroed;
-size_t		swap_nfds;
-int		*swap_fds;
-#ifdef JEMALLOC_STATS
-size_t		swap_avail;
-#endif
-
-/* Base address of the mmap()ed file(s). */
-static void	*swap_base;
-/* Current end of the space in use (<= swap_max). */
-static void	*swap_end;
-/* Absolute upper limit on file-backed addresses. */
-static void	*swap_max;
-
-/*
- * Trees of chunks that were previously allocated (trees differ only in node
- * ordering).  These are used when allocating chunks, in an attempt to re-use
- * address space.  Depending on function, different tree orderings are needed,
- * which is why there are two trees with the same contents.
- */
-static extent_tree_t	swap_chunks_szad;
-static extent_tree_t	swap_chunks_ad;
-
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static void	*chunk_recycle_swap(size_t size, bool *zero);
-static extent_node_t *chunk_dealloc_swap_record(void *chunk, size_t size);
-
-/******************************************************************************/
-
-static void *
-chunk_recycle_swap(size_t size, bool *zero)
-{
-	extent_node_t *node, key;
-
-	key.addr = NULL;
-	key.size = size;
-	malloc_mutex_lock(&swap_mtx);
-	node = extent_tree_szad_nsearch(&swap_chunks_szad, &key);
-	if (node != NULL) {
-		void *ret = node->addr;
-
-		/* Remove node from the tree. */
-		extent_tree_szad_remove(&swap_chunks_szad, node);
-		if (node->size == size) {
-			extent_tree_ad_remove(&swap_chunks_ad, node);
-			base_node_dealloc(node);
-		} else {
-			/*
-			 * Insert the remainder of node's address range as a
-			 * smaller chunk.  Its position within swap_chunks_ad
-			 * does not change.
-			 */
-			assert(node->size > size);
-			node->addr = (void *)((uintptr_t)node->addr + size);
-			node->size -= size;
-			extent_tree_szad_insert(&swap_chunks_szad, node);
-		}
-#ifdef JEMALLOC_STATS
-		swap_avail -= size;
-#endif
-		malloc_mutex_unlock(&swap_mtx);
-
-		if (*zero)
-			memset(ret, 0, size);
-		return (ret);
-	}
-	malloc_mutex_unlock(&swap_mtx);
-
-	return (NULL);
-}
-
-void *
-chunk_alloc_swap(size_t size, bool *zero)
-{
-	void *ret;
-
-	assert(swap_enabled);
-
-	ret = chunk_recycle_swap(size, zero);
-	if (ret != NULL)
-		return (ret);
-
-	malloc_mutex_lock(&swap_mtx);
-	if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) {
-		ret = swap_end;
-		swap_end = (void *)((uintptr_t)swap_end + size);
-#ifdef JEMALLOC_STATS
-		swap_avail -= size;
-#endif
-		malloc_mutex_unlock(&swap_mtx);
-
-		if (swap_prezeroed)
-			*zero = true;
-		else if (*zero)
-			memset(ret, 0, size);
-	} else {
-		malloc_mutex_unlock(&swap_mtx);
-		return (NULL);
-	}
-
-	return (ret);
-}
-
-static extent_node_t *
-chunk_dealloc_swap_record(void *chunk, size_t size)
-{
-	extent_node_t *xnode, *node, *prev, key;
-
-	xnode = NULL;
-	while (true) {
-		key.addr = (void *)((uintptr_t)chunk + size);
-		node = extent_tree_ad_nsearch(&swap_chunks_ad, &key);
-		/* Try to coalesce forward. */
-		if (node != NULL && node->addr == key.addr) {
-			/*
-			 * Coalesce chunk with the following address range.
-			 * This does not change the position within
-			 * swap_chunks_ad, so only remove/insert from/into
-			 * swap_chunks_szad.
-			 */
-			extent_tree_szad_remove(&swap_chunks_szad, node);
-			node->addr = chunk;
-			node->size += size;
-			extent_tree_szad_insert(&swap_chunks_szad, node);
-			break;
-		} else if (xnode == NULL) {
-			/*
-			 * It is possible that base_node_alloc() will cause a
-			 * new base chunk to be allocated, so take care not to
-			 * deadlock on swap_mtx, and recover if another thread
-			 * deallocates an adjacent chunk while this one is busy
-			 * allocating xnode.
-			 */
-			malloc_mutex_unlock(&swap_mtx);
-			xnode = base_node_alloc();
-			malloc_mutex_lock(&swap_mtx);
-			if (xnode == NULL)
-				return (NULL);
-		} else {
-			/* Coalescing forward failed, so insert a new node. */
-			node = xnode;
-			xnode = NULL;
-			node->addr = chunk;
-			node->size = size;
-			extent_tree_ad_insert(&swap_chunks_ad, node);
-			extent_tree_szad_insert(&swap_chunks_szad, node);
-			break;
-		}
-	}
-	/* Discard xnode if it ended up unused do to a race. */
-	if (xnode != NULL)
-		base_node_dealloc(xnode);
-
-	/* Try to coalesce backward. */
-	prev = extent_tree_ad_prev(&swap_chunks_ad, node);
-	if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
-	    chunk) {
-		/*
-		 * Coalesce chunk with the previous address range.  This does
-		 * not change the position within swap_chunks_ad, so only
-		 * remove/insert node from/into swap_chunks_szad.
-		 */
-		extent_tree_szad_remove(&swap_chunks_szad, prev);
-		extent_tree_ad_remove(&swap_chunks_ad, prev);
-
-		extent_tree_szad_remove(&swap_chunks_szad, node);
-		node->addr = prev->addr;
-		node->size += prev->size;
-		extent_tree_szad_insert(&swap_chunks_szad, node);
-
-		base_node_dealloc(prev);
-	}
-
-	return (node);
-}
-
-bool
-chunk_in_swap(void *chunk)
-{
-	bool ret;
-
-	assert(swap_enabled);
-
-	malloc_mutex_lock(&swap_mtx);
-	if ((uintptr_t)chunk >= (uintptr_t)swap_base
-	    && (uintptr_t)chunk < (uintptr_t)swap_max)
-		ret = true;
-	else
-		ret = false;
-	malloc_mutex_unlock(&swap_mtx);
-
-	return (ret);
-}
-
-bool
-chunk_dealloc_swap(void *chunk, size_t size)
-{
-	bool ret;
-
-	assert(swap_enabled);
-
-	malloc_mutex_lock(&swap_mtx);
-	if ((uintptr_t)chunk >= (uintptr_t)swap_base
-	    && (uintptr_t)chunk < (uintptr_t)swap_max) {
-		extent_node_t *node;
-
-		/* Try to coalesce with other unused chunks. */
-		node = chunk_dealloc_swap_record(chunk, size);
-		if (node != NULL) {
-			chunk = node->addr;
-			size = node->size;
-		}
-
-		/*
-		 * Try to shrink the in-use memory if this chunk is at the end
-		 * of the in-use memory.
-		 */
-		if ((void *)((uintptr_t)chunk + size) == swap_end) {
-			swap_end = (void *)((uintptr_t)swap_end - size);
-
-			if (node != NULL) {
-				extent_tree_szad_remove(&swap_chunks_szad,
-				    node);
-				extent_tree_ad_remove(&swap_chunks_ad, node);
-				base_node_dealloc(node);
-			}
-		} else
-			madvise(chunk, size, MADV_DONTNEED);
-
-#ifdef JEMALLOC_STATS
-		swap_avail += size;
-#endif
-		ret = false;
-		goto RETURN;
-	}
-
-	ret = true;
-RETURN:
-	malloc_mutex_unlock(&swap_mtx);
-	return (ret);
-}
-
-bool
-chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed)
-{
-	bool ret;
-	unsigned i;
-	off_t off;
-	void *vaddr;
-	size_t cumsize, voff;
-	size_t sizes[nfds];
-
-	malloc_mutex_lock(&swap_mtx);
-
-	/* Get file sizes. */
-	for (i = 0, cumsize = 0; i < nfds; i++) {
-		off = lseek(fds[i], 0, SEEK_END);
-		if (off == ((off_t)-1)) {
-			ret = true;
-			goto RETURN;
-		}
-		if (PAGE_CEILING(off) != off) {
-			/* Truncate to a multiple of the page size. */
-			off &= ~PAGE_MASK;
-			if (ftruncate(fds[i], off) != 0) {
-				ret = true;
-				goto RETURN;
-			}
-		}
-		sizes[i] = off;
-		if (cumsize + off < cumsize) {
-			/*
-			 * Cumulative file size is greater than the total
-			 * address space.  Bail out while it's still obvious
-			 * what the problem is.
-			 */
-			ret = true;
-			goto RETURN;
-		}
-		cumsize += off;
-	}
-
-	/* Round down to a multiple of the chunk size. */
-	cumsize &= ~chunksize_mask;
-	if (cumsize == 0) {
-		ret = true;
-		goto RETURN;
-	}
-
-	/*
-	 * Allocate a chunk-aligned region of anonymous memory, which will
-	 * be the final location for the memory-mapped files.
-	 */
-	vaddr = chunk_alloc_mmap_noreserve(cumsize);
-	if (vaddr == NULL) {
-		ret = true;
-		goto RETURN;
-	}
-
-	/* Overlay the files onto the anonymous mapping. */
-	for (i = 0, voff = 0; i < nfds; i++) {
-		void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i],
-		    PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0);
-		if (addr == MAP_FAILED) {
-			char buf[BUFERROR_BUF];
-
-
-			buferror(errno, buf, sizeof(buf));
-			malloc_write(
-			    "<jemalloc>: Error in mmap(..., MAP_FIXED, ...): ");
-			malloc_write(buf);
-			malloc_write("\n");
-			if (opt_abort)
-				abort();
-			if (munmap(vaddr, voff) == -1) {
-				buferror(errno, buf, sizeof(buf));
-				malloc_write("<jemalloc>: Error in munmap(): ");
-				malloc_write(buf);
-				malloc_write("\n");
-			}
-			ret = true;
-			goto RETURN;
-		}
-		assert(addr == (void *)((uintptr_t)vaddr + voff));
-
-		/*
-		 * Tell the kernel that the mapping will be accessed randomly,
-		 * and that it should not gratuitously sync pages to the
-		 * filesystem.
-		 */
-#ifdef MADV_RANDOM
-		madvise(addr, sizes[i], MADV_RANDOM);
-#endif
-#ifdef MADV_NOSYNC
-		madvise(addr, sizes[i], MADV_NOSYNC);
-#endif
-
-		voff += sizes[i];
-	}
-
-	swap_prezeroed = prezeroed;
-	swap_base = vaddr;
-	swap_end = swap_base;
-	swap_max = (void *)((uintptr_t)vaddr + cumsize);
-
-	/* Copy the fds array for mallctl purposes. */
-	swap_fds = (int *)base_alloc(nfds * sizeof(int));
-	if (swap_fds == NULL) {
-		ret = true;
-		goto RETURN;
-	}
-	memcpy(swap_fds, fds, nfds * sizeof(int));
-	swap_nfds = nfds;
-
-#ifdef JEMALLOC_STATS
-	swap_avail = cumsize;
-#endif
-
-	swap_enabled = true;
-
-	ret = false;
-RETURN:
-	malloc_mutex_unlock(&swap_mtx);
-	return (ret);
-}
-
-bool
-chunk_swap_boot(void)
-{
-
-	if (malloc_mutex_init(&swap_mtx))
-		return (true);
-
-	swap_enabled = false;
-	swap_prezeroed = false; /* swap.* mallctl's depend on this. */
-	swap_nfds = 0;
-	swap_fds = NULL;
-#ifdef JEMALLOC_STATS
-	swap_avail = 0;
-#endif
-	swap_base = NULL;
-	swap_end = NULL;
-	swap_max = NULL;
-
-	extent_tree_szad_new(&swap_chunks_szad);
-	extent_tree_ad_new(&swap_chunks_ad);
-
-	return (false);
-}
-
-/******************************************************************************/
-#endif /* JEMALLOC_SWAP */
diff --git a/deps/jemalloc/src/ckh.c b/deps/jemalloc/src/ckh.c
index 43fcc252..742a950b 100644
--- a/deps/jemalloc/src/ckh.c
+++ b/deps/jemalloc/src/ckh.c
@@ -73,7 +73,6 @@ ckh_isearch(ckh_t *ckh, const void *key)
 	size_t hash1, hash2, bucket, cell;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
 
 	ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2);
 
@@ -100,7 +99,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	prn32(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
+	prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
 		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -142,7 +141,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		prn32(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C);
+		prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
 
@@ -265,15 +264,15 @@ ckh_grow(ckh_t *ckh)
 		size_t usize;
 
 		lg_curcells++;
-		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
+		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 		if (usize == 0) {
 			ret = true;
-			goto RETURN;
+			goto label_return;
 		}
 		tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
 		if (tab == NULL) {
 			ret = true;
-			goto RETURN;
+			goto label_return;
 		}
 		/* Swap in new table. */
 		ttab = ckh->tab;
@@ -293,7 +292,7 @@ ckh_grow(ckh_t *ckh)
 	}
 
 	ret = false;
-RETURN:
+label_return:
 	return (ret);
 }
 
@@ -310,7 +309,7 @@ ckh_shrink(ckh_t *ckh)
 	 */
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
-	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL);
+	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 	if (usize == 0)
 		return;
 	tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
@@ -362,7 +361,7 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
 	ckh->ninserts = 0;
 	ckh->nrelocs = 0;
 #endif
-	ckh->prn_state = 42; /* Value doesn't really matter. */
+	ckh->prng_state = 42; /* Value doesn't really matter. */
 	ckh->count = 0;
 
 	/*
@@ -383,23 +382,19 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp)
 	ckh->hash = hash;
 	ckh->keycomp = keycomp;
 
-	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL);
+	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
 	if (usize == 0) {
 		ret = true;
-		goto RETURN;
+		goto label_return;
 	}
 	ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true);
 	if (ckh->tab == NULL) {
 		ret = true;
-		goto RETURN;
+		goto label_return;
 	}
 
-#ifdef JEMALLOC_DEBUG
-	ckh->magic = CKH_MAGIC;
-#endif
-
 	ret = false;
-RETURN:
+label_return:
 	return (ret);
 }
 
@@ -408,7 +403,6 @@ ckh_delete(ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
 
 #ifdef CKH_VERBOSE
 	malloc_printf(
@@ -433,7 +427,6 @@ ckh_count(ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
 
 	return (ckh->count);
 }
@@ -464,7 +457,6 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
 	bool ret;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
 	assert(ckh_search(ckh, key, NULL, NULL));
 
 #ifdef CKH_COUNT
@@ -474,12 +466,12 @@ ckh_insert(ckh_t *ckh, const void *key, const void *data)
 	while (ckh_try_insert(ckh, &key, &data)) {
 		if (ckh_grow(ckh)) {
 			ret = true;
-			goto RETURN;
+			goto label_return;
 		}
 	}
 
 	ret = false;
-RETURN:
+label_return:
 	return (ret);
 }
 
@@ -489,7 +481,6 @@ ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data)
 	size_t cell;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
 
 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
@@ -521,7 +512,6 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
 	size_t cell;
 
 	assert(ckh != NULL);
-	dassert(ckh->magic == CKH_MAGIC);
 
 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
@@ -545,7 +535,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
 	assert(hash1 != NULL);
 	assert(hash2 != NULL);
 
-	h = hash(key, strlen((const char *)key), 0x94122f335b332aeaLLU);
+	h = hash(key, strlen((const char *)key), UINT64_C(0x94122f335b332aea));
 	if (minbits <= 32) {
 		/*
 		 * Avoid doing multiple hashes, since a single hash provides
@@ -556,7 +546,7 @@ ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
 	} else {
 		ret1 = h;
 		ret2 = hash(key, strlen((const char *)key),
-		    0x8432a476666bbc13LLU);
+		    UINT64_C(0x8432a476666bbc13));
 	}
 
 	*hash1 = ret1;
@@ -593,7 +583,7 @@ ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
 	u.i = 0;
 #endif
 	u.v = key;
-	h = hash(&u.i, sizeof(u.i), 0xd983396e68886082LLU);
+	h = hash(&u.i, sizeof(u.i), UINT64_C(0xd983396e68886082));
 	if (minbits <= 32) {
 		/*
 		 * Avoid doing multiple hashes, since a single hash provides
@@ -604,7 +594,7 @@ ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1,
 	} else {
 		assert(SIZEOF_PTR == 8);
 		ret1 = h;
-		ret2 = hash(&u.i, sizeof(u.i), 0x5e2be9aff8709a5dLLU);
+		ret2 = hash(&u.i, sizeof(u.i), UINT64_C(0x5e2be9aff8709a5d));
 	}
 
 	*hash1 = ret1;
diff --git a/deps/jemalloc/src/ctl.c b/deps/jemalloc/src/ctl.c
index e5336d36..55e76677 100644
--- a/deps/jemalloc/src/ctl.c
+++ b/deps/jemalloc/src/ctl.c
@@ -8,14 +8,38 @@
  * ctl_mtx protects the following:
  * - ctl_stats.*
  * - opt_prof_active
- * - swap_enabled
- * - swap_prezeroed
  */
 static malloc_mutex_t	ctl_mtx;
 static bool		ctl_initialized;
 static uint64_t		ctl_epoch;
 static ctl_stats_t	ctl_stats;
 
+/******************************************************************************/
+/* Helpers for named and indexed nodes. */
+
+static inline const ctl_named_node_t *
+ctl_named_node(const ctl_node_t *node)
+{
+
+	return ((node->named) ? (const ctl_named_node_t *)node : NULL);
+}
+
+static inline const ctl_named_node_t *
+ctl_named_children(const ctl_named_node_t *node, int index)
+{
+	const ctl_named_node_t *children = ctl_named_node(node->children);
+
+	return (children ? &children[index] : NULL);
+}
+
+static inline const ctl_indexed_node_t *
+ctl_indexed_node(const ctl_node_t *node)
+{
+
+	return ((node->named == false) ? (const ctl_indexed_node_t *)node :
+	    NULL);
+}
+
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
@@ -24,19 +48,15 @@ static int	n##_ctl(const size_t *mib, size_t miblen, void *oldp,	\
     size_t *oldlenp, void *newp, size_t newlen);
 
 #define	INDEX_PROTO(n)							\
-const ctl_node_t	*n##_index(const size_t *mib, size_t miblen,	\
+const ctl_named_node_t	*n##_index(const size_t *mib, size_t miblen,	\
     size_t i);
 
-#ifdef JEMALLOC_STATS
 static bool	ctl_arena_init(ctl_arena_stats_t *astats);
-#endif
 static void	ctl_arena_clear(ctl_arena_stats_t *astats);
-#ifdef JEMALLOC_STATS
 static void	ctl_arena_stats_amerge(ctl_arena_stats_t *cstats,
     arena_t *arena);
 static void	ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
     ctl_arena_stats_t *astats);
-#endif
 static void	ctl_arena_refresh(arena_t *arena, unsigned i);
 static void	ctl_refresh(void);
 static bool	ctl_init(void);
@@ -45,67 +65,51 @@ static int	ctl_lookup(const char *name, ctl_node_t const **nodesp,
 
 CTL_PROTO(version)
 CTL_PROTO(epoch)
-#ifdef JEMALLOC_TCACHE
-CTL_PROTO(tcache_flush)
-#endif
+CTL_PROTO(thread_tcache_enabled)
+CTL_PROTO(thread_tcache_flush)
 CTL_PROTO(thread_arena)
-#ifdef JEMALLOC_STATS
 CTL_PROTO(thread_allocated)
 CTL_PROTO(thread_allocatedp)
 CTL_PROTO(thread_deallocated)
 CTL_PROTO(thread_deallocatedp)
-#endif
 CTL_PROTO(config_debug)
 CTL_PROTO(config_dss)
-CTL_PROTO(config_dynamic_page_shift)
 CTL_PROTO(config_fill)
 CTL_PROTO(config_lazy_lock)
+CTL_PROTO(config_mremap)
+CTL_PROTO(config_munmap)
 CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
 CTL_PROTO(config_stats)
-CTL_PROTO(config_swap)
-CTL_PROTO(config_sysv)
 CTL_PROTO(config_tcache)
-CTL_PROTO(config_tiny)
 CTL_PROTO(config_tls)
+CTL_PROTO(config_utrace)
+CTL_PROTO(config_valgrind)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
-CTL_PROTO(opt_lg_qspace_max)
-CTL_PROTO(opt_lg_cspace_max)
 CTL_PROTO(opt_lg_chunk)
 CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_lg_dirty_mult)
 CTL_PROTO(opt_stats_print)
-#ifdef JEMALLOC_FILL
 CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
-#endif
-#ifdef JEMALLOC_SYSV
-CTL_PROTO(opt_sysv)
-#endif
-#ifdef JEMALLOC_XMALLOC
+CTL_PROTO(opt_quarantine)
+CTL_PROTO(opt_redzone)
+CTL_PROTO(opt_utrace)
+CTL_PROTO(opt_valgrind)
 CTL_PROTO(opt_xmalloc)
-#endif
-#ifdef JEMALLOC_TCACHE
 CTL_PROTO(opt_tcache)
-CTL_PROTO(opt_lg_tcache_gc_sweep)
-#endif
-#ifdef JEMALLOC_PROF
+CTL_PROTO(opt_lg_tcache_max)
 CTL_PROTO(opt_prof)
 CTL_PROTO(opt_prof_prefix)
 CTL_PROTO(opt_prof_active)
-CTL_PROTO(opt_lg_prof_bt_max)
 CTL_PROTO(opt_lg_prof_sample)
 CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
+CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
-CTL_PROTO(opt_lg_prof_tcmax)
-#endif
-#ifdef JEMALLOC_SWAP
-CTL_PROTO(opt_overcommit)
-#endif
 CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
 CTL_PROTO(arenas_bin_i_run_size)
@@ -115,39 +119,15 @@ INDEX_PROTO(arenas_lrun_i)
 CTL_PROTO(arenas_narenas)
 CTL_PROTO(arenas_initialized)
 CTL_PROTO(arenas_quantum)
-CTL_PROTO(arenas_cacheline)
-CTL_PROTO(arenas_subpage)
-CTL_PROTO(arenas_pagesize)
-CTL_PROTO(arenas_chunksize)
-#ifdef JEMALLOC_TINY
-CTL_PROTO(arenas_tspace_min)
-CTL_PROTO(arenas_tspace_max)
-#endif
-CTL_PROTO(arenas_qspace_min)
-CTL_PROTO(arenas_qspace_max)
-CTL_PROTO(arenas_cspace_min)
-CTL_PROTO(arenas_cspace_max)
-CTL_PROTO(arenas_sspace_min)
-CTL_PROTO(arenas_sspace_max)
-#ifdef JEMALLOC_TCACHE
+CTL_PROTO(arenas_page)
 CTL_PROTO(arenas_tcache_max)
-#endif
-CTL_PROTO(arenas_ntbins)
-CTL_PROTO(arenas_nqbins)
-CTL_PROTO(arenas_ncbins)
-CTL_PROTO(arenas_nsbins)
 CTL_PROTO(arenas_nbins)
-#ifdef JEMALLOC_TCACHE
 CTL_PROTO(arenas_nhbins)
-#endif
 CTL_PROTO(arenas_nlruns)
 CTL_PROTO(arenas_purge)
-#ifdef JEMALLOC_PROF
 CTL_PROTO(prof_active)
 CTL_PROTO(prof_dump)
 CTL_PROTO(prof_interval)
-#endif
-#ifdef JEMALLOC_STATS
 CTL_PROTO(stats_chunks_current)
 CTL_PROTO(stats_chunks_total)
 CTL_PROTO(stats_chunks_high)
@@ -166,46 +146,29 @@ CTL_PROTO(stats_arenas_i_bins_j_allocated)
 CTL_PROTO(stats_arenas_i_bins_j_nmalloc)
 CTL_PROTO(stats_arenas_i_bins_j_ndalloc)
 CTL_PROTO(stats_arenas_i_bins_j_nrequests)
-#ifdef JEMALLOC_TCACHE
 CTL_PROTO(stats_arenas_i_bins_j_nfills)
 CTL_PROTO(stats_arenas_i_bins_j_nflushes)
-#endif
 CTL_PROTO(stats_arenas_i_bins_j_nruns)
 CTL_PROTO(stats_arenas_i_bins_j_nreruns)
-CTL_PROTO(stats_arenas_i_bins_j_highruns)
 CTL_PROTO(stats_arenas_i_bins_j_curruns)
 INDEX_PROTO(stats_arenas_i_bins_j)
 CTL_PROTO(stats_arenas_i_lruns_j_nmalloc)
 CTL_PROTO(stats_arenas_i_lruns_j_ndalloc)
 CTL_PROTO(stats_arenas_i_lruns_j_nrequests)
-CTL_PROTO(stats_arenas_i_lruns_j_highruns)
 CTL_PROTO(stats_arenas_i_lruns_j_curruns)
 INDEX_PROTO(stats_arenas_i_lruns_j)
-#endif
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
-#ifdef JEMALLOC_STATS
 CTL_PROTO(stats_arenas_i_mapped)
 CTL_PROTO(stats_arenas_i_npurge)
 CTL_PROTO(stats_arenas_i_nmadvise)
 CTL_PROTO(stats_arenas_i_purged)
-#endif
 INDEX_PROTO(stats_arenas_i)
-#ifdef JEMALLOC_STATS
 CTL_PROTO(stats_cactive)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
 CTL_PROTO(stats_mapped)
-#endif
-#ifdef JEMALLOC_SWAP
-#  ifdef JEMALLOC_STATS
-CTL_PROTO(swap_avail)
-#  endif
-CTL_PROTO(swap_prezeroed)
-CTL_PROTO(swap_nfds)
-CTL_PROTO(swap_fds)
-#endif
 
 /******************************************************************************/
 /* mallctl tree. */
@@ -213,296 +176,223 @@ CTL_PROTO(swap_fds)
 /* Maximum tree depth. */
 #define	CTL_MAX_DEPTH	6
 
-#define	NAME(n)	true,	{.named = {n
-#define	CHILD(c) sizeof(c##_node) / sizeof(ctl_node_t),	c##_node}},	NULL
-#define	CTL(c)	0,				NULL}},		c##_ctl
+#define	NAME(n)	{true},	n
+#define	CHILD(t, c)							\
+	sizeof(c##_node) / sizeof(ctl_##t##_node_t),			\
+	(ctl_node_t *)c##_node,						\
+	NULL
+#define	CTL(c)	0, NULL, c##_ctl
 
 /*
  * Only handles internal indexed nodes, since there are currently no external
  * ones.
  */
-#define	INDEX(i)	false,	{.indexed = {i##_index}},		NULL
+#define	INDEX(i)	{false},	i##_index
 
-#ifdef JEMALLOC_TCACHE
-static const ctl_node_t	tcache_node[] = {
-	{NAME("flush"),		CTL(tcache_flush)}
+static const ctl_named_node_t	tcache_node[] = {
+	{NAME("enabled"),	CTL(thread_tcache_enabled)},
+	{NAME("flush"),		CTL(thread_tcache_flush)}
 };
-#endif
 
-static const ctl_node_t	thread_node[] = {
-	{NAME("arena"),		CTL(thread_arena)}
-#ifdef JEMALLOC_STATS
-	,
+static const ctl_named_node_t	thread_node[] = {
+	{NAME("arena"),		CTL(thread_arena)},
 	{NAME("allocated"),	CTL(thread_allocated)},
 	{NAME("allocatedp"),	CTL(thread_allocatedp)},
 	{NAME("deallocated"),	CTL(thread_deallocated)},
-	{NAME("deallocatedp"),	CTL(thread_deallocatedp)}
-#endif
+	{NAME("deallocatedp"),	CTL(thread_deallocatedp)},
+	{NAME("tcache"),	CHILD(named, tcache)}
 };
 
-static const ctl_node_t	config_node[] = {
+static const ctl_named_node_t	config_node[] = {
 	{NAME("debug"),			CTL(config_debug)},
 	{NAME("dss"),			CTL(config_dss)},
-	{NAME("dynamic_page_shift"),	CTL(config_dynamic_page_shift)},
 	{NAME("fill"),			CTL(config_fill)},
 	{NAME("lazy_lock"),		CTL(config_lazy_lock)},
+	{NAME("mremap"),		CTL(config_mremap)},
+	{NAME("munmap"),		CTL(config_munmap)},
 	{NAME("prof"),			CTL(config_prof)},
 	{NAME("prof_libgcc"),		CTL(config_prof_libgcc)},
 	{NAME("prof_libunwind"),	CTL(config_prof_libunwind)},
 	{NAME("stats"),			CTL(config_stats)},
-	{NAME("swap"),			CTL(config_swap)},
-	{NAME("sysv"),			CTL(config_sysv)},
 	{NAME("tcache"),		CTL(config_tcache)},
-	{NAME("tiny"),			CTL(config_tiny)},
 	{NAME("tls"),			CTL(config_tls)},
+	{NAME("utrace"),		CTL(config_utrace)},
+	{NAME("valgrind"),		CTL(config_valgrind)},
 	{NAME("xmalloc"),		CTL(config_xmalloc)}
 };
 
-static const ctl_node_t opt_node[] = {
+static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),			CTL(opt_abort)},
-	{NAME("lg_qspace_max"),		CTL(opt_lg_qspace_max)},
-	{NAME("lg_cspace_max"),		CTL(opt_lg_cspace_max)},
 	{NAME("lg_chunk"),		CTL(opt_lg_chunk)},
 	{NAME("narenas"),		CTL(opt_narenas)},
 	{NAME("lg_dirty_mult"),		CTL(opt_lg_dirty_mult)},
-	{NAME("stats_print"),		CTL(opt_stats_print)}
-#ifdef JEMALLOC_FILL
-	,
+	{NAME("stats_print"),		CTL(opt_stats_print)},
 	{NAME("junk"),			CTL(opt_junk)},
-	{NAME("zero"),			CTL(opt_zero)}
-#endif
-#ifdef JEMALLOC_SYSV
-	,
-	{NAME("sysv"),			CTL(opt_sysv)}
-#endif
-#ifdef JEMALLOC_XMALLOC
-	,
-	{NAME("xmalloc"),		CTL(opt_xmalloc)}
-#endif
-#ifdef JEMALLOC_TCACHE
-	,
+	{NAME("zero"),			CTL(opt_zero)},
+	{NAME("quarantine"),		CTL(opt_quarantine)},
+	{NAME("redzone"),		CTL(opt_redzone)},
+	{NAME("utrace"),		CTL(opt_utrace)},
+	{NAME("valgrind"),		CTL(opt_valgrind)},
+	{NAME("xmalloc"),		CTL(opt_xmalloc)},
 	{NAME("tcache"),		CTL(opt_tcache)},
-	{NAME("lg_tcache_gc_sweep"),	CTL(opt_lg_tcache_gc_sweep)}
-#endif
-#ifdef JEMALLOC_PROF
-	,
+	{NAME("lg_tcache_max"),		CTL(opt_lg_tcache_max)},
 	{NAME("prof"),			CTL(opt_prof)},
 	{NAME("prof_prefix"),		CTL(opt_prof_prefix)},
 	{NAME("prof_active"),		CTL(opt_prof_active)},
-	{NAME("lg_prof_bt_max"),	CTL(opt_lg_prof_bt_max)},
 	{NAME("lg_prof_sample"),	CTL(opt_lg_prof_sample)},
 	{NAME("lg_prof_interval"),	CTL(opt_lg_prof_interval)},
 	{NAME("prof_gdump"),		CTL(opt_prof_gdump)},
+	{NAME("prof_final"),		CTL(opt_prof_final)},
 	{NAME("prof_leak"),		CTL(opt_prof_leak)},
-	{NAME("prof_accum"),		CTL(opt_prof_accum)},
-	{NAME("lg_prof_tcmax"),		CTL(opt_lg_prof_tcmax)}
-#endif
-#ifdef JEMALLOC_SWAP
-	,
-	{NAME("overcommit"),		CTL(opt_overcommit)}
-#endif
+	{NAME("prof_accum"),		CTL(opt_prof_accum)}
 };
 
-static const ctl_node_t arenas_bin_i_node[] = {
+static const ctl_named_node_t arenas_bin_i_node[] = {
 	{NAME("size"),			CTL(arenas_bin_i_size)},
 	{NAME("nregs"),			CTL(arenas_bin_i_nregs)},
 	{NAME("run_size"),		CTL(arenas_bin_i_run_size)}
 };
-static const ctl_node_t super_arenas_bin_i_node[] = {
-	{NAME(""),			CHILD(arenas_bin_i)}
+static const ctl_named_node_t super_arenas_bin_i_node[] = {
+	{NAME(""),			CHILD(named, arenas_bin_i)}
 };
 
-static const ctl_node_t arenas_bin_node[] = {
+static const ctl_indexed_node_t arenas_bin_node[] = {
 	{INDEX(arenas_bin_i)}
 };
 
-static const ctl_node_t arenas_lrun_i_node[] = {
+static const ctl_named_node_t arenas_lrun_i_node[] = {
 	{NAME("size"),			CTL(arenas_lrun_i_size)}
 };
-static const ctl_node_t super_arenas_lrun_i_node[] = {
-	{NAME(""),			CHILD(arenas_lrun_i)}
+static const ctl_named_node_t super_arenas_lrun_i_node[] = {
+	{NAME(""),			CHILD(named, arenas_lrun_i)}
 };
 
-static const ctl_node_t arenas_lrun_node[] = {
+static const ctl_indexed_node_t arenas_lrun_node[] = {
 	{INDEX(arenas_lrun_i)}
 };
 
-static const ctl_node_t arenas_node[] = {
+static const ctl_named_node_t arenas_node[] = {
 	{NAME("narenas"),		CTL(arenas_narenas)},
 	{NAME("initialized"),		CTL(arenas_initialized)},
 	{NAME("quantum"),		CTL(arenas_quantum)},
-	{NAME("cacheline"),		CTL(arenas_cacheline)},
-	{NAME("subpage"),		CTL(arenas_subpage)},
-	{NAME("pagesize"),		CTL(arenas_pagesize)},
-	{NAME("chunksize"),		CTL(arenas_chunksize)},
-#ifdef JEMALLOC_TINY
-	{NAME("tspace_min"),		CTL(arenas_tspace_min)},
-	{NAME("tspace_max"),		CTL(arenas_tspace_max)},
-#endif
-	{NAME("qspace_min"),		CTL(arenas_qspace_min)},
-	{NAME("qspace_max"),		CTL(arenas_qspace_max)},
-	{NAME("cspace_min"),		CTL(arenas_cspace_min)},
-	{NAME("cspace_max"),		CTL(arenas_cspace_max)},
-	{NAME("sspace_min"),		CTL(arenas_sspace_min)},
-	{NAME("sspace_max"),		CTL(arenas_sspace_max)},
-#ifdef JEMALLOC_TCACHE
+	{NAME("page"),			CTL(arenas_page)},
 	{NAME("tcache_max"),		CTL(arenas_tcache_max)},
-#endif
-	{NAME("ntbins"),		CTL(arenas_ntbins)},
-	{NAME("nqbins"),		CTL(arenas_nqbins)},
-	{NAME("ncbins"),		CTL(arenas_ncbins)},
-	{NAME("nsbins"),		CTL(arenas_nsbins)},
 	{NAME("nbins"),			CTL(arenas_nbins)},
-#ifdef JEMALLOC_TCACHE
 	{NAME("nhbins"),		CTL(arenas_nhbins)},
-#endif
-	{NAME("bin"),			CHILD(arenas_bin)},
+	{NAME("bin"),			CHILD(indexed, arenas_bin)},
 	{NAME("nlruns"),		CTL(arenas_nlruns)},
-	{NAME("lrun"),			CHILD(arenas_lrun)},
+	{NAME("lrun"),			CHILD(indexed, arenas_lrun)},
 	{NAME("purge"),			CTL(arenas_purge)}
 };
 
-#ifdef JEMALLOC_PROF
-static const ctl_node_t	prof_node[] = {
+static const ctl_named_node_t	prof_node[] = {
 	{NAME("active"),	CTL(prof_active)},
 	{NAME("dump"),		CTL(prof_dump)},
 	{NAME("interval"),	CTL(prof_interval)}
 };
-#endif
 
-#ifdef JEMALLOC_STATS
-static const ctl_node_t stats_chunks_node[] = {
+static const ctl_named_node_t stats_chunks_node[] = {
 	{NAME("current"),		CTL(stats_chunks_current)},
 	{NAME("total"),			CTL(stats_chunks_total)},
 	{NAME("high"),			CTL(stats_chunks_high)}
 };
 
-static const ctl_node_t stats_huge_node[] = {
+static const ctl_named_node_t stats_huge_node[] = {
 	{NAME("allocated"),		CTL(stats_huge_allocated)},
 	{NAME("nmalloc"),		CTL(stats_huge_nmalloc)},
 	{NAME("ndalloc"),		CTL(stats_huge_ndalloc)}
 };
 
-static const ctl_node_t stats_arenas_i_small_node[] = {
+static const ctl_named_node_t stats_arenas_i_small_node[] = {
 	{NAME("allocated"),		CTL(stats_arenas_i_small_allocated)},
 	{NAME("nmalloc"),		CTL(stats_arenas_i_small_nmalloc)},
 	{NAME("ndalloc"),		CTL(stats_arenas_i_small_ndalloc)},
 	{NAME("nrequests"),		CTL(stats_arenas_i_small_nrequests)}
 };
 
-static const ctl_node_t stats_arenas_i_large_node[] = {
+static const ctl_named_node_t stats_arenas_i_large_node[] = {
 	{NAME("allocated"),		CTL(stats_arenas_i_large_allocated)},
 	{NAME("nmalloc"),		CTL(stats_arenas_i_large_nmalloc)},
 	{NAME("ndalloc"),		CTL(stats_arenas_i_large_ndalloc)},
 	{NAME("nrequests"),		CTL(stats_arenas_i_large_nrequests)}
 };
 
-static const ctl_node_t stats_arenas_i_bins_j_node[] = {
+static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("allocated"),		CTL(stats_arenas_i_bins_j_allocated)},
 	{NAME("nmalloc"),		CTL(stats_arenas_i_bins_j_nmalloc)},
 	{NAME("ndalloc"),		CTL(stats_arenas_i_bins_j_ndalloc)},
 	{NAME("nrequests"),		CTL(stats_arenas_i_bins_j_nrequests)},
-#ifdef JEMALLOC_TCACHE
 	{NAME("nfills"),		CTL(stats_arenas_i_bins_j_nfills)},
 	{NAME("nflushes"),		CTL(stats_arenas_i_bins_j_nflushes)},
-#endif
 	{NAME("nruns"),			CTL(stats_arenas_i_bins_j_nruns)},
 	{NAME("nreruns"),		CTL(stats_arenas_i_bins_j_nreruns)},
-	{NAME("highruns"),		CTL(stats_arenas_i_bins_j_highruns)},
 	{NAME("curruns"),		CTL(stats_arenas_i_bins_j_curruns)}
 };
-static const ctl_node_t super_stats_arenas_i_bins_j_node[] = {
-	{NAME(""),			CHILD(stats_arenas_i_bins_j)}
+static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
+	{NAME(""),			CHILD(named, stats_arenas_i_bins_j)}
 };
 
-static const ctl_node_t stats_arenas_i_bins_node[] = {
+static const ctl_indexed_node_t stats_arenas_i_bins_node[] = {
 	{INDEX(stats_arenas_i_bins_j)}
 };
 
-static const ctl_node_t stats_arenas_i_lruns_j_node[] = {
+static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = {
 	{NAME("nmalloc"),		CTL(stats_arenas_i_lruns_j_nmalloc)},
 	{NAME("ndalloc"),		CTL(stats_arenas_i_lruns_j_ndalloc)},
 	{NAME("nrequests"),		CTL(stats_arenas_i_lruns_j_nrequests)},
-	{NAME("highruns"),		CTL(stats_arenas_i_lruns_j_highruns)},
 	{NAME("curruns"),		CTL(stats_arenas_i_lruns_j_curruns)}
 };
-static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = {
-	{NAME(""),			CHILD(stats_arenas_i_lruns_j)}
+static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = {
+	{NAME(""),			CHILD(named, stats_arenas_i_lruns_j)}
 };
 
-static const ctl_node_t stats_arenas_i_lruns_node[] = {
+static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = {
 	{INDEX(stats_arenas_i_lruns_j)}
 };
-#endif
 
-static const ctl_node_t stats_arenas_i_node[] = {
+static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),		CTL(stats_arenas_i_nthreads)},
 	{NAME("pactive"),		CTL(stats_arenas_i_pactive)},
-	{NAME("pdirty"),		CTL(stats_arenas_i_pdirty)}
-#ifdef JEMALLOC_STATS
-	,
+	{NAME("pdirty"),		CTL(stats_arenas_i_pdirty)},
 	{NAME("mapped"),		CTL(stats_arenas_i_mapped)},
 	{NAME("npurge"),		CTL(stats_arenas_i_npurge)},
 	{NAME("nmadvise"),		CTL(stats_arenas_i_nmadvise)},
 	{NAME("purged"),		CTL(stats_arenas_i_purged)},
-	{NAME("small"),			CHILD(stats_arenas_i_small)},
-	{NAME("large"),			CHILD(stats_arenas_i_large)},
-	{NAME("bins"),			CHILD(stats_arenas_i_bins)},
-	{NAME("lruns"),		CHILD(stats_arenas_i_lruns)}
-#endif
+	{NAME("small"),			CHILD(named, stats_arenas_i_small)},
+	{NAME("large"),			CHILD(named, stats_arenas_i_large)},
+	{NAME("bins"),			CHILD(indexed, stats_arenas_i_bins)},
+	{NAME("lruns"),			CHILD(indexed, stats_arenas_i_lruns)}
 };
-static const ctl_node_t super_stats_arenas_i_node[] = {
-	{NAME(""),			CHILD(stats_arenas_i)}
+static const ctl_named_node_t super_stats_arenas_i_node[] = {
+	{NAME(""),			CHILD(named, stats_arenas_i)}
 };
 
-static const ctl_node_t stats_arenas_node[] = {
+static const ctl_indexed_node_t stats_arenas_node[] = {
 	{INDEX(stats_arenas_i)}
 };
 
-static const ctl_node_t stats_node[] = {
-#ifdef JEMALLOC_STATS
+static const ctl_named_node_t stats_node[] = {
 	{NAME("cactive"),		CTL(stats_cactive)},
 	{NAME("allocated"),		CTL(stats_allocated)},
 	{NAME("active"),		CTL(stats_active)},
 	{NAME("mapped"),		CTL(stats_mapped)},
-	{NAME("chunks"),		CHILD(stats_chunks)},
-	{NAME("huge"),			CHILD(stats_huge)},
-#endif
-	{NAME("arenas"),		CHILD(stats_arenas)}
+	{NAME("chunks"),		CHILD(named, stats_chunks)},
+	{NAME("huge"),			CHILD(named, stats_huge)},
+	{NAME("arenas"),		CHILD(indexed, stats_arenas)}
 };
 
-#ifdef JEMALLOC_SWAP
-static const ctl_node_t swap_node[] = {
-#  ifdef JEMALLOC_STATS
-	{NAME("avail"),			CTL(swap_avail)},
-#  endif
-	{NAME("prezeroed"),		CTL(swap_prezeroed)},
-	{NAME("nfds"),			CTL(swap_nfds)},
-	{NAME("fds"),			CTL(swap_fds)}
-};
-#endif
-
-static const ctl_node_t	root_node[] = {
+static const ctl_named_node_t	root_node[] = {
 	{NAME("version"),	CTL(version)},
 	{NAME("epoch"),		CTL(epoch)},
-#ifdef JEMALLOC_TCACHE
-	{NAME("tcache"),	CHILD(tcache)},
-#endif
-	{NAME("thread"),	CHILD(thread)},
-	{NAME("config"),	CHILD(config)},
-	{NAME("opt"),		CHILD(opt)},
-	{NAME("arenas"),	CHILD(arenas)},
-#ifdef JEMALLOC_PROF
-	{NAME("prof"),		CHILD(prof)},
-#endif
-	{NAME("stats"),		CHILD(stats)}
-#ifdef JEMALLOC_SWAP
-	,
-	{NAME("swap"),		CHILD(swap)}
-#endif
+	{NAME("thread"),	CHILD(named, thread)},
+	{NAME("config"),	CHILD(named, config)},
+	{NAME("opt"),		CHILD(named, opt)},
+	{NAME("arenas"),	CHILD(named, arenas)},
+	{NAME("prof"),		CHILD(named, prof)},
+	{NAME("stats"),		CHILD(named, stats)}
 };
-static const ctl_node_t super_root_node[] = {
-	{NAME(""),		CHILD(root)}
+static const ctl_named_node_t super_root_node[] = {
+	{NAME(""),		CHILD(named, root)}
 };
 
 #undef NAME
@@ -512,17 +402,10 @@ static const ctl_node_t super_root_node[] = {
 
 /******************************************************************************/
 
-#ifdef JEMALLOC_STATS
 static bool
 ctl_arena_init(ctl_arena_stats_t *astats)
 {
 
-	if (astats->bstats == NULL) {
-		astats->bstats = (malloc_bin_stats_t *)base_alloc(nbins *
-		    sizeof(malloc_bin_stats_t));
-		if (astats->bstats == NULL)
-			return (true);
-	}
 	if (astats->lstats == NULL) {
 		astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses *
 		    sizeof(malloc_large_stats_t));
@@ -532,7 +415,6 @@ ctl_arena_init(ctl_arena_stats_t *astats)
 
 	return (false);
 }
-#endif
 
 static void
 ctl_arena_clear(ctl_arena_stats_t *astats)
@@ -540,18 +422,18 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
 
 	astats->pactive = 0;
 	astats->pdirty = 0;
-#ifdef JEMALLOC_STATS
-	memset(&astats->astats, 0, sizeof(arena_stats_t));
-	astats->allocated_small = 0;
-	astats->nmalloc_small = 0;
-	astats->ndalloc_small = 0;
-	astats->nrequests_small = 0;
-	memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t));
-	memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t));
-#endif
+	if (config_stats) {
+		memset(&astats->astats, 0, sizeof(arena_stats_t));
+		astats->allocated_small = 0;
+		astats->nmalloc_small = 0;
+		astats->ndalloc_small = 0;
+		astats->nrequests_small = 0;
+		memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t));
+		memset(astats->lstats, 0, nlclasses *
+		    sizeof(malloc_large_stats_t));
+	}
 }
 
-#ifdef JEMALLOC_STATS
 static void
 ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
 {
@@ -560,7 +442,7 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
 	arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty,
 	    &cstats->astats, cstats->bstats, cstats->lstats);
 
-	for (i = 0; i < nbins; i++) {
+	for (i = 0; i < NBINS; i++) {
 		cstats->allocated_small += cstats->bstats[i].allocated;
 		cstats->nmalloc_small += cstats->bstats[i].nmalloc;
 		cstats->ndalloc_small += cstats->bstats[i].ndalloc;
@@ -595,26 +477,24 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 		sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
 		sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
 		sstats->lstats[i].nrequests += astats->lstats[i].nrequests;
-		sstats->lstats[i].highruns += astats->lstats[i].highruns;
 		sstats->lstats[i].curruns += astats->lstats[i].curruns;
 	}
 
-	for (i = 0; i < nbins; i++) {
+	for (i = 0; i < NBINS; i++) {
 		sstats->bstats[i].allocated += astats->bstats[i].allocated;
 		sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
 		sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
 		sstats->bstats[i].nrequests += astats->bstats[i].nrequests;
-#ifdef JEMALLOC_TCACHE
-		sstats->bstats[i].nfills += astats->bstats[i].nfills;
-		sstats->bstats[i].nflushes += astats->bstats[i].nflushes;
-#endif
+		if (config_tcache) {
+			sstats->bstats[i].nfills += astats->bstats[i].nfills;
+			sstats->bstats[i].nflushes +=
+			    astats->bstats[i].nflushes;
+		}
 		sstats->bstats[i].nruns += astats->bstats[i].nruns;
 		sstats->bstats[i].reruns += astats->bstats[i].reruns;
-		sstats->bstats[i].highruns += astats->bstats[i].highruns;
 		sstats->bstats[i].curruns += astats->bstats[i].curruns;
 	}
 }
-#endif
 
 static void
 ctl_arena_refresh(arena_t *arena, unsigned i)
@@ -625,38 +505,38 @@ ctl_arena_refresh(arena_t *arena, unsigned i)
 	ctl_arena_clear(astats);
 
 	sstats->nthreads += astats->nthreads;
-#ifdef JEMALLOC_STATS
-	ctl_arena_stats_amerge(astats, arena);
-	/* Merge into sum stats as well. */
-	ctl_arena_stats_smerge(sstats, astats);
-#else
-	astats->pactive += arena->nactive;
-	astats->pdirty += arena->ndirty;
-	/* Merge into sum stats as well. */
-	sstats->pactive += arena->nactive;
-	sstats->pdirty += arena->ndirty;
-#endif
+	if (config_stats) {
+		ctl_arena_stats_amerge(astats, arena);
+		/* Merge into sum stats as well. */
+		ctl_arena_stats_smerge(sstats, astats);
+	} else {
+		astats->pactive += arena->nactive;
+		astats->pdirty += arena->ndirty;
+		/* Merge into sum stats as well. */
+		sstats->pactive += arena->nactive;
+		sstats->pdirty += arena->ndirty;
+	}
 }
 
 static void
 ctl_refresh(void)
 {
 	unsigned i;
-	arena_t *tarenas[narenas];
-
-#ifdef JEMALLOC_STATS
-	malloc_mutex_lock(&chunks_mtx);
-	ctl_stats.chunks.current = stats_chunks.curchunks;
-	ctl_stats.chunks.total = stats_chunks.nchunks;
-	ctl_stats.chunks.high = stats_chunks.highchunks;
-	malloc_mutex_unlock(&chunks_mtx);
-
-	malloc_mutex_lock(&huge_mtx);
-	ctl_stats.huge.allocated = huge_allocated;
-	ctl_stats.huge.nmalloc = huge_nmalloc;
-	ctl_stats.huge.ndalloc = huge_ndalloc;
-	malloc_mutex_unlock(&huge_mtx);
-#endif
+	VARIABLE_ARRAY(arena_t *, tarenas, narenas);
+
+	if (config_stats) {
+		malloc_mutex_lock(&chunks_mtx);
+		ctl_stats.chunks.current = stats_chunks.curchunks;
+		ctl_stats.chunks.total = stats_chunks.nchunks;
+		ctl_stats.chunks.high = stats_chunks.highchunks;
+		malloc_mutex_unlock(&chunks_mtx);
+
+		malloc_mutex_lock(&huge_mtx);
+		ctl_stats.huge.allocated = huge_allocated;
+		ctl_stats.huge.nmalloc = huge_nmalloc;
+		ctl_stats.huge.ndalloc = huge_ndalloc;
+		malloc_mutex_unlock(&huge_mtx);
+	}
 
 	/*
 	 * Clear sum stats, since they will be merged into by
@@ -682,20 +562,14 @@ ctl_refresh(void)
 			ctl_arena_refresh(tarenas[i], i);
 	}
 
-#ifdef JEMALLOC_STATS
-	ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small
-	    + ctl_stats.arenas[narenas].astats.allocated_large
-	    + ctl_stats.huge.allocated;
-	ctl_stats.active = (ctl_stats.arenas[narenas].pactive << PAGE_SHIFT)
-	    + ctl_stats.huge.allocated;
-	ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk);
-
-#  ifdef JEMALLOC_SWAP
-	malloc_mutex_lock(&swap_mtx);
-	ctl_stats.swap_avail = swap_avail;
-	malloc_mutex_unlock(&swap_mtx);
-#  endif
-#endif
+	if (config_stats) {
+		ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small
+		    + ctl_stats.arenas[narenas].astats.allocated_large
+		    + ctl_stats.huge.allocated;
+		ctl_stats.active = (ctl_stats.arenas[narenas].pactive <<
+		    LG_PAGE) + ctl_stats.huge.allocated;
+		ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk);
+	}
 
 	ctl_epoch++;
 }
@@ -707,10 +581,6 @@ ctl_init(void)
 
 	malloc_mutex_lock(&ctl_mtx);
 	if (ctl_initialized == false) {
-#ifdef JEMALLOC_STATS
-		unsigned i;
-#endif
-
 		/*
 		 * Allocate space for one extra arena stats element, which
 		 * contains summed stats across all arenas.
@@ -719,7 +589,7 @@ ctl_init(void)
 		    (narenas + 1) * sizeof(ctl_arena_stats_t));
 		if (ctl_stats.arenas == NULL) {
 			ret = true;
-			goto RETURN;
+			goto label_return;
 		}
 		memset(ctl_stats.arenas, 0, (narenas + 1) *
 		    sizeof(ctl_arena_stats_t));
@@ -729,14 +599,15 @@ ctl_init(void)
 		 * ever get used.  Lazy initialization would allow errors to
 		 * cause inconsistent state to be viewable by the application.
 		 */
-#ifdef JEMALLOC_STATS
-		for (i = 0; i <= narenas; i++) {
-			if (ctl_arena_init(&ctl_stats.arenas[i])) {
-				ret = true;
-				goto RETURN;
+		if (config_stats) {
+			unsigned i;
+			for (i = 0; i <= narenas; i++) {
+				if (ctl_arena_init(&ctl_stats.arenas[i])) {
+					ret = true;
+					goto label_return;
+				}
 			}
 		}
-#endif
 		ctl_stats.arenas[narenas].initialized = true;
 
 		ctl_epoch = 0;
@@ -745,7 +616,7 @@ ctl_init(void)
 	}
 
 	ret = false;
-RETURN:
+label_return:
 	malloc_mutex_unlock(&ctl_mtx);
 	return (ret);
 }
@@ -757,7 +628,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
 	int ret;
 	const char *elm, *tdot, *dot;
 	size_t elen, i, j;
-	const ctl_node_t *node;
+	const ctl_named_node_t *node;
 
 	elm = name;
 	/* Equivalent to strchrnul(). */
@@ -765,54 +636,53 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
 	elen = (size_t)((uintptr_t)dot - (uintptr_t)elm);
 	if (elen == 0) {
 		ret = ENOENT;
-		goto RETURN;
+		goto label_return;
 	}
 	node = super_root_node;
 	for (i = 0; i < *depthp; i++) {
-		assert(node->named);
-		assert(node->u.named.nchildren > 0);
-		if (node->u.named.children[0].named) {
-			const ctl_node_t *pnode = node;
+		assert(node);
+		assert(node->nchildren > 0);
+		if (ctl_named_node(node->children) != NULL) {
+			const ctl_named_node_t *pnode = node;
 
 			/* Children are named. */
-			for (j = 0; j < node->u.named.nchildren; j++) {
-				const ctl_node_t *child =
-				    &node->u.named.children[j];
-				if (strlen(child->u.named.name) == elen
-				    && strncmp(elm, child->u.named.name,
-				    elen) == 0) {
+			for (j = 0; j < node->nchildren; j++) {
+				const ctl_named_node_t *child =
+				    ctl_named_children(node, j);
+				if (strlen(child->name) == elen &&
+				    strncmp(elm, child->name, elen) == 0) {
 					node = child;
 					if (nodesp != NULL)
-						nodesp[i] = node;
+						nodesp[i] =
+						    (const ctl_node_t *)node;
 					mibp[i] = j;
 					break;
 				}
 			}
 			if (node == pnode) {
 				ret = ENOENT;
-				goto RETURN;
+				goto label_return;
 			}
 		} else {
-			unsigned long index;
-			const ctl_node_t *inode;
+			uintmax_t index;
+			const ctl_indexed_node_t *inode;
 
 			/* Children are indexed. */
-			index = strtoul(elm, NULL, 10);
-			if (index == ULONG_MAX) {
+			index = malloc_strtoumax(elm, NULL, 10);
+			if (index == UINTMAX_MAX || index > SIZE_T_MAX) {
 				ret = ENOENT;
-				goto RETURN;
+				goto label_return;
 			}
 
-			inode = &node->u.named.children[0];
-			node = inode->u.indexed.index(mibp, *depthp,
-			    index);
+			inode = ctl_indexed_node(node->children);
+			node = inode->index(mibp, *depthp, (size_t)index);
 			if (node == NULL) {
 				ret = ENOENT;
-				goto RETURN;
+				goto label_return;
 			}
 
 			if (nodesp != NULL)
-				nodesp[i] = node;
+				nodesp[i] = (const ctl_node_t *)node;
 			mibp[i] = (size_t)index;
 		}
 
@@ -824,7 +694,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
 				 * in this path through the tree.
 				 */
 				ret = ENOENT;
-				goto RETURN;
+				goto label_return;
 			}
 			/* Complete lookup successful. */
 			*depthp = i + 1;
@@ -835,7 +705,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
 		if (*dot == '\0') {
 			/* No more elements. */
 			ret = ENOENT;
-			goto RETURN;
+			goto label_return;
 		}
 		elm = &dot[1];
 		dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot :
@@ -844,7 +714,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
 	}
 
 	ret = 0;
-RETURN:
+label_return:
 	return (ret);
 }
 
@@ -856,25 +726,27 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	size_t depth;
 	ctl_node_t const *nodes[CTL_MAX_DEPTH];
 	size_t mib[CTL_MAX_DEPTH];
+	const ctl_named_node_t *node;
 
 	if (ctl_initialized == false && ctl_init()) {
 		ret = EAGAIN;
-		goto RETURN;
+		goto label_return;
 	}
 
 	depth = CTL_MAX_DEPTH;
 	ret = ctl_lookup(name, nodes, mib, &depth);
 	if (ret != 0)
-		goto RETURN;
+		goto label_return;
 
-	if (nodes[depth-1]->ctl == NULL) {
+	node = ctl_named_node(nodes[depth-1]);
+	if (node != NULL && node->ctl)
+		ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen);
+	else {
 		/* The name refers to a partial path through the ctl tree. */
 		ret = ENOENT;
-		goto RETURN;
 	}
 
-	ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen);
-RETURN:
+label_return:
 	return(ret);
 }
 
@@ -885,11 +757,11 @@ ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp)
 
 	if (ctl_initialized == false && ctl_init()) {
 		ret = EAGAIN;
-		goto RETURN;
+		goto label_return;
 	}
 
 	ret = ctl_lookup(name, NULL, mibp, miblenp);
-RETURN:
+label_return:
 	return(ret);
 }
 
@@ -898,46 +770,48 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen)
 {
 	int ret;
-	const ctl_node_t *node;
+	const ctl_named_node_t *node;
 	size_t i;
 
 	if (ctl_initialized == false && ctl_init()) {
 		ret = EAGAIN;
-		goto RETURN;
+		goto label_return;
 	}
 
 	/* Iterate down the tree. */
 	node = super_root_node;
 	for (i = 0; i < miblen; i++) {
-		if (node->u.named.children[0].named) {
+		assert(node);
+		assert(node->nchildren > 0);
+		if (ctl_named_node(node->children) != NULL) {
 			/* Children are named. */
-			if (node->u.named.nchildren <= mib[i]) {
+			if (node->nchildren <= mib[i]) {
 				ret = ENOENT;
-				goto RETURN;
+				goto label_return;
 			}
-			node = &node->u.named.children[mib[i]];
+			node = ctl_named_children(node, mib[i]);
 		} else {
-			const ctl_node_t *inode;
+			const ctl_indexed_node_t *inode;
 
 			/* Indexed element. */
-			inode = &node->u.named.children[0];
-			node = inode->u.indexed.index(mib, miblen, mib[i]);
+			inode = ctl_indexed_node(node->children);
+			node = inode->index(mib, miblen, mib[i]);
 			if (node == NULL) {
 				ret = ENOENT;
-				goto RETURN;
+				goto label_return;
 			}
 		}
 	}
 
 	/* Call the ctl function. */
-	if (node->ctl == NULL) {
+	if (node && node->ctl)
+		ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen);
+	else {
 		/* Partial MIB. */
 		ret = ENOENT;
-		goto RETURN;
 	}
-	ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen);
 
-RETURN:
+label_return:
 	return(ret);
 }
 
@@ -959,22 +833,17 @@ ctl_boot(void)
 #define	READONLY()	do {						\
 	if (newp != NULL || newlen != 0) {				\
 		ret = EPERM;						\
-		goto RETURN;						\
+		goto label_return;					\
 	}								\
 } while (0)
 
 #define	WRITEONLY()	do {						\
 	if (oldp != NULL || oldlenp != NULL) {				\
 		ret = EPERM;						\
-		goto RETURN;						\
+		goto label_return;					\
 	}								\
 } while (0)
 
-#define	VOID()	do {							\
-	READONLY();							\
-	WRITEONLY();							\
-} while (0)
-
 #define	READ(v, t)	do {						\
 	if (oldp != NULL && oldlenp != NULL) {				\
 		if (*oldlenp != sizeof(t)) {				\
@@ -982,7 +851,7 @@ ctl_boot(void)
 			    ? sizeof(t) : *oldlenp;			\
 			memcpy(oldp, (void *)&v, copylen);		\
 			ret = EINVAL;					\
-			goto RETURN;					\
+			goto label_return;				\
 		} else							\
 			*(t *)oldp = v;					\
 	}								\
@@ -992,12 +861,60 @@ ctl_boot(void)
 	if (newp != NULL) {						\
 		if (newlen != sizeof(t)) {				\
 			ret = EINVAL;					\
-			goto RETURN;					\
+			goto label_return;				\
 		}							\
 		v = *(t *)newp;						\
 	}								\
 } while (0)
 
+/*
+ * There's a lot of code duplication in the following macros due to limitations
+ * in how nested cpp macros are expanded.
+ */
+#define	CTL_RO_CLGEN(c, l, n, v, t)					\
+static int								\
+n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
+    void *newp, size_t newlen)						\
+{									\
+	int ret;							\
+	t oldval;							\
+									\
+	if ((c) == false)						\
+		return (ENOENT);					\
+	if (l)								\
+		malloc_mutex_lock(&ctl_mtx);				\
+	READONLY();							\
+	oldval = v;							\
+	READ(oldval, t);						\
+									\
+	ret = 0;							\
+label_return:								\
+	if (l)								\
+		malloc_mutex_unlock(&ctl_mtx);				\
+	return (ret);							\
+}
+
+#define	CTL_RO_CGEN(c, n, v, t)						\
+static int								\
+n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
+    void *newp, size_t newlen)						\
+{									\
+	int ret;							\
+	t oldval;							\
+									\
+	if ((c) == false)						\
+		return (ENOENT);					\
+	malloc_mutex_lock(&ctl_mtx);					\
+	READONLY();							\
+	oldval = v;							\
+	READ(oldval, t);						\
+									\
+	ret = 0;							\
+label_return:								\
+	malloc_mutex_unlock(&ctl_mtx);					\
+	return (ret);							\
+}
+
 #define	CTL_RO_GEN(n, v, t)						\
 static int								\
 n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
@@ -1012,7 +929,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
-RETURN:									\
+label_return:								\
 	malloc_mutex_unlock(&ctl_mtx);					\
 	return (ret);							\
 }
@@ -1021,7 +938,7 @@ RETURN:									\
  * ctl_mtx is not acquired, under the assumption that no pertinent data will
  * mutate during the call.
  */
-#define	CTL_RO_NL_GEN(n, v, t)					\
+#define	CTL_RO_NL_CGEN(c, n, v, t)					\
 static int								\
 n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
     void *newp, size_t newlen)						\
@@ -1029,33 +946,35 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	int ret;							\
 	t oldval;							\
 									\
+	if ((c) == false)						\
+		return (ENOENT);					\
 	READONLY();							\
 	oldval = v;							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
-RETURN:									\
+label_return:								\
 	return (ret);							\
 }
 
-#define	CTL_RO_TRUE_GEN(n)						\
+#define	CTL_RO_NL_GEN(n, v, t)						\
 static int								\
 n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
     void *newp, size_t newlen)						\
 {									\
 	int ret;							\
-	bool oldval;							\
+	t oldval;							\
 									\
 	READONLY();							\
-	oldval = true;							\
-	READ(oldval, bool);						\
+	oldval = v;							\
+	READ(oldval, t);						\
 									\
 	ret = 0;							\
-RETURN:									\
+label_return:								\
 	return (ret);							\
 }
 
-#define	CTL_RO_FALSE_GEN(n)						\
+#define	CTL_RO_BOOL_CONFIG_GEN(n)					\
 static int								\
 n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
     void *newp, size_t newlen)						\
@@ -1064,11 +983,11 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	bool oldval;							\
 									\
 	READONLY();							\
-	oldval = false;							\
+	oldval = n;							\
 	READ(oldval, bool);						\
 									\
 	ret = 0;							\
-RETURN:									\
+label_return:								\
 	return (ret);							\
 }
 
@@ -1082,41 +1001,60 @@ epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	uint64_t newval;
 
 	malloc_mutex_lock(&ctl_mtx);
-	newval = 0;
 	WRITE(newval, uint64_t);
-	if (newval != 0)
+	if (newp != NULL)
 		ctl_refresh();
 	READ(ctl_epoch, uint64_t);
 
 	ret = 0;
-RETURN:
+label_return:
 	malloc_mutex_unlock(&ctl_mtx);
 	return (ret);
 }
 
-#ifdef JEMALLOC_TCACHE
 static int
-tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tcache_t *tcache;
+	bool oldval;
 
-	VOID();
+	if (config_tcache == false)
+		return (ENOENT);
 
-	tcache = TCACHE_GET();
-	if (tcache == NULL) {
-		ret = 0;
-		goto RETURN;
+	oldval = tcache_enabled_get();
+	if (newp != NULL) {
+		if (newlen != sizeof(bool)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		tcache_enabled_set(*(bool *)newp);
 	}
-	tcache_destroy(tcache);
-	TCACHE_SET(NULL);
+	READ(oldval, bool);
 
+label_return:
 	ret = 0;
-RETURN:
 	return (ret);
 }
-#endif
+
+static int
+thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+
+	if (config_tcache == false)
+		return (ENOENT);
+
+	READONLY();
+	WRITEONLY();
+
+	tcache_flush();
+
+	ret = 0;
+label_return:
+	return (ret);
+}
 
 static int
 thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
@@ -1125,7 +1063,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	int ret;
 	unsigned newind, oldind;
 
-	newind = oldind = choose_arena()->ind;
+	newind = oldind = choose_arena(NULL)->ind;
 	WRITE(newind, unsigned);
 	READ(oldind, unsigned);
 	if (newind != oldind) {
@@ -1134,191 +1072,108 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		if (newind >= narenas) {
 			/* New arena index is out of range. */
 			ret = EFAULT;
-			goto RETURN;
+			goto label_return;
 		}
 
 		/* Initialize arena if necessary. */
 		malloc_mutex_lock(&arenas_lock);
-		if ((arena = arenas[newind]) == NULL)
-			arena = arenas_extend(newind);
+		if ((arena = arenas[newind]) == NULL && (arena =
+		    arenas_extend(newind)) == NULL) {
+			malloc_mutex_unlock(&arenas_lock);
+			ret = EAGAIN;
+			goto label_return;
+		}
+		assert(arena == arenas[newind]);
 		arenas[oldind]->nthreads--;
 		arenas[newind]->nthreads++;
 		malloc_mutex_unlock(&arenas_lock);
-		if (arena == NULL) {
-			ret = EAGAIN;
-			goto RETURN;
-		}
 
 		/* Set new arena association. */
-		ARENA_SET(arena);
-#ifdef JEMALLOC_TCACHE
-		{
-			tcache_t *tcache = TCACHE_GET();
-			if (tcache != NULL)
-				tcache->arena = arena;
+		if (config_tcache) {
+			tcache_t *tcache;
+			if ((uintptr_t)(tcache = *tcache_tsd_get()) >
+			    (uintptr_t)TCACHE_STATE_MAX) {
+				tcache_arena_dissociate(tcache);
+				tcache_arena_associate(tcache, arena);
+			}
 		}
-#endif
+		arenas_tsd_set(&arena);
 	}
 
 	ret = 0;
-RETURN:
+label_return:
 	return (ret);
 }
 
-#ifdef JEMALLOC_STATS
-CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t);
-CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *);
-CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t);
-CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *);
-#endif
+CTL_RO_NL_CGEN(config_stats, thread_allocated,
+    thread_allocated_tsd_get()->allocated, uint64_t)
+CTL_RO_NL_CGEN(config_stats, thread_allocatedp,
+    &thread_allocated_tsd_get()->allocated, uint64_t *)
+CTL_RO_NL_CGEN(config_stats, thread_deallocated,
+    thread_allocated_tsd_get()->deallocated, uint64_t)
+CTL_RO_NL_CGEN(config_stats, thread_deallocatedp,
+    &thread_allocated_tsd_get()->deallocated, uint64_t *)
 
 /******************************************************************************/
 
-#ifdef JEMALLOC_DEBUG
-CTL_RO_TRUE_GEN(config_debug)
-#else
-CTL_RO_FALSE_GEN(config_debug)
-#endif
-
-#ifdef JEMALLOC_DSS
-CTL_RO_TRUE_GEN(config_dss)
-#else
-CTL_RO_FALSE_GEN(config_dss)
-#endif
-
-#ifdef JEMALLOC_DYNAMIC_PAGE_SHIFT
-CTL_RO_TRUE_GEN(config_dynamic_page_shift)
-#else
-CTL_RO_FALSE_GEN(config_dynamic_page_shift)
-#endif
-
-#ifdef JEMALLOC_FILL
-CTL_RO_TRUE_GEN(config_fill)
-#else
-CTL_RO_FALSE_GEN(config_fill)
-#endif
-
-#ifdef JEMALLOC_LAZY_LOCK
-CTL_RO_TRUE_GEN(config_lazy_lock)
-#else
-CTL_RO_FALSE_GEN(config_lazy_lock)
-#endif
-
-#ifdef JEMALLOC_PROF
-CTL_RO_TRUE_GEN(config_prof)
-#else
-CTL_RO_FALSE_GEN(config_prof)
-#endif
-
-#ifdef JEMALLOC_PROF_LIBGCC
-CTL_RO_TRUE_GEN(config_prof_libgcc)
-#else
-CTL_RO_FALSE_GEN(config_prof_libgcc)
-#endif
-
-#ifdef JEMALLOC_PROF_LIBUNWIND
-CTL_RO_TRUE_GEN(config_prof_libunwind)
-#else
-CTL_RO_FALSE_GEN(config_prof_libunwind)
-#endif
-
-#ifdef JEMALLOC_STATS
-CTL_RO_TRUE_GEN(config_stats)
-#else
-CTL_RO_FALSE_GEN(config_stats)
-#endif
-
-#ifdef JEMALLOC_SWAP
-CTL_RO_TRUE_GEN(config_swap)
-#else
-CTL_RO_FALSE_GEN(config_swap)
-#endif
-
-#ifdef JEMALLOC_SYSV
-CTL_RO_TRUE_GEN(config_sysv)
-#else
-CTL_RO_FALSE_GEN(config_sysv)
-#endif
-
-#ifdef JEMALLOC_TCACHE
-CTL_RO_TRUE_GEN(config_tcache)
-#else
-CTL_RO_FALSE_GEN(config_tcache)
-#endif
-
-#ifdef JEMALLOC_TINY
-CTL_RO_TRUE_GEN(config_tiny)
-#else
-CTL_RO_FALSE_GEN(config_tiny)
-#endif
-
-#ifdef JEMALLOC_TLS
-CTL_RO_TRUE_GEN(config_tls)
-#else
-CTL_RO_FALSE_GEN(config_tls)
-#endif
-
-#ifdef JEMALLOC_XMALLOC
-CTL_RO_TRUE_GEN(config_xmalloc)
-#else
-CTL_RO_FALSE_GEN(config_xmalloc)
-#endif
+CTL_RO_BOOL_CONFIG_GEN(config_debug)
+CTL_RO_BOOL_CONFIG_GEN(config_dss)
+CTL_RO_BOOL_CONFIG_GEN(config_fill)
+CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock)
+CTL_RO_BOOL_CONFIG_GEN(config_mremap)
+CTL_RO_BOOL_CONFIG_GEN(config_munmap)
+CTL_RO_BOOL_CONFIG_GEN(config_prof)
+CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc)
+CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind)
+CTL_RO_BOOL_CONFIG_GEN(config_stats)
+CTL_RO_BOOL_CONFIG_GEN(config_tcache)
+CTL_RO_BOOL_CONFIG_GEN(config_tls)
+CTL_RO_BOOL_CONFIG_GEN(config_utrace)
+CTL_RO_BOOL_CONFIG_GEN(config_valgrind)
+CTL_RO_BOOL_CONFIG_GEN(config_xmalloc)
 
 /******************************************************************************/
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
-CTL_RO_NL_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t)
-CTL_RO_NL_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t)
 CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t)
 CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
-#ifdef JEMALLOC_FILL
-CTL_RO_NL_GEN(opt_junk, opt_junk, bool)
-CTL_RO_NL_GEN(opt_zero, opt_zero, bool)
-#endif
-#ifdef JEMALLOC_SYSV
-CTL_RO_NL_GEN(opt_sysv, opt_sysv, bool)
-#endif
-#ifdef JEMALLOC_XMALLOC
-CTL_RO_NL_GEN(opt_xmalloc, opt_xmalloc, bool)
-#endif
-#ifdef JEMALLOC_TCACHE
-CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
-CTL_RO_NL_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t)
-#endif
-#ifdef JEMALLOC_PROF
-CTL_RO_NL_GEN(opt_prof, opt_prof, bool)
-CTL_RO_NL_GEN(opt_prof_prefix, opt_prof_prefix, const char *)
-CTL_RO_GEN(opt_prof_active, opt_prof_active, bool) /* Mutable. */
-CTL_RO_NL_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t)
-CTL_RO_NL_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t)
-CTL_RO_NL_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
-CTL_RO_NL_GEN(opt_prof_gdump, opt_prof_gdump, bool)
-CTL_RO_NL_GEN(opt_prof_leak, opt_prof_leak, bool)
-CTL_RO_NL_GEN(opt_prof_accum, opt_prof_accum, bool)
-CTL_RO_NL_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t)
-#endif
-#ifdef JEMALLOC_SWAP
-CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool)
-#endif
+CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, bool)
+CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
+CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t)
+CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool)
+CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
+CTL_RO_NL_CGEN(config_valgrind, opt_valgrind, opt_valgrind, bool)
+CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
+CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool)
+CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
+CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
+CTL_RO_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) /* Mutable. */
+CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
+CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
+CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
 
 /******************************************************************************/
 
 CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
-const ctl_node_t *
+const ctl_named_node_t *
 arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
 {
 
-	if (i > nbins)
+	if (i > NBINS)
 		return (NULL);
 	return (super_arenas_bin_i_node);
 }
 
-CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << PAGE_SHIFT), size_t)
-const ctl_node_t *
+CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << LG_PAGE), size_t)
+const ctl_named_node_t *
 arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
 {
 
@@ -1350,37 +1205,16 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp,
 	for (i = 0; i < nread; i++)
 		((bool *)oldp)[i] = ctl_stats.arenas[i].initialized;
 
-RETURN:
+label_return:
 	malloc_mutex_unlock(&ctl_mtx);
 	return (ret);
 }
 
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
-CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t)
-CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t)
-CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t)
-CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t)
-#ifdef JEMALLOC_TINY
-CTL_RO_NL_GEN(arenas_tspace_min, (1U << LG_TINY_MIN), size_t)
-CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t)
-#endif
-CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t)
-CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t)
-CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t)
-CTL_RO_NL_GEN(arenas_cspace_max, cspace_max, size_t)
-CTL_RO_NL_GEN(arenas_sspace_min, sspace_min, size_t)
-CTL_RO_NL_GEN(arenas_sspace_max, sspace_max, size_t)
-#ifdef JEMALLOC_TCACHE
-CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t)
-#endif
-CTL_RO_NL_GEN(arenas_ntbins, ntbins, unsigned)
-CTL_RO_NL_GEN(arenas_nqbins, nqbins, unsigned)
-CTL_RO_NL_GEN(arenas_ncbins, ncbins, unsigned)
-CTL_RO_NL_GEN(arenas_nsbins, nsbins, unsigned)
-CTL_RO_NL_GEN(arenas_nbins, nbins, unsigned)
-#ifdef JEMALLOC_TCACHE
-CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned)
-#endif
+CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
+CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t)
+CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned)
+CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned)
 CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t)
 
 static int
@@ -1395,9 +1229,9 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	WRITE(arena, unsigned);
 	if (newp != NULL && arena >= narenas) {
 		ret = EFAULT;
-		goto RETURN;
+		goto label_return;
 	} else {
-		arena_t *tarenas[narenas];
+		VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
 		malloc_mutex_lock(&arenas_lock);
 		memcpy(tarenas, arenas, sizeof(arena_t *) * narenas);
@@ -1417,13 +1251,12 @@ arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	}
 
 	ret = 0;
-RETURN:
+label_return:
 	return (ret);
 }
 
 /******************************************************************************/
 
-#ifdef JEMALLOC_PROF
 static int
 prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen)
@@ -1431,6 +1264,9 @@ prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	int ret;
 	bool oldval;
 
+	if (config_prof == false)
+		return (ENOENT);
+
 	malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */
 	oldval = opt_prof_active;
 	if (newp != NULL) {
@@ -1445,7 +1281,7 @@ prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	READ(oldval, bool);
 
 	ret = 0;
-RETURN:
+label_return:
 	malloc_mutex_unlock(&ctl_mtx);
 	return (ret);
 }
@@ -1457,92 +1293,88 @@ prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	int ret;
 	const char *filename = NULL;
 
+	if (config_prof == false)
+		return (ENOENT);
+
 	WRITEONLY();
 	WRITE(filename, const char *);
 
 	if (prof_mdump(filename)) {
 		ret = EFAULT;
-		goto RETURN;
+		goto label_return;
 	}
 
 	ret = 0;
-RETURN:
+label_return:
 	return (ret);
 }
 
-CTL_RO_NL_GEN(prof_interval, prof_interval, uint64_t)
-#endif
+CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t)
 
 /******************************************************************************/
 
-#ifdef JEMALLOC_STATS
-CTL_RO_GEN(stats_chunks_current, ctl_stats.chunks.current, size_t)
-CTL_RO_GEN(stats_chunks_total, ctl_stats.chunks.total, uint64_t)
-CTL_RO_GEN(stats_chunks_high, ctl_stats.chunks.high, size_t)
-CTL_RO_GEN(stats_huge_allocated, huge_allocated, size_t)
-CTL_RO_GEN(stats_huge_nmalloc, huge_nmalloc, uint64_t)
-CTL_RO_GEN(stats_huge_ndalloc, huge_ndalloc, uint64_t)
-CTL_RO_GEN(stats_arenas_i_small_allocated,
+CTL_RO_CGEN(config_stats, stats_chunks_current, ctl_stats.chunks.current,
+    size_t)
+CTL_RO_CGEN(config_stats, stats_chunks_total, ctl_stats.chunks.total, uint64_t)
+CTL_RO_CGEN(config_stats, stats_chunks_high, ctl_stats.chunks.high, size_t)
+CTL_RO_CGEN(config_stats, stats_huge_allocated, huge_allocated, size_t)
+CTL_RO_CGEN(config_stats, stats_huge_nmalloc, huge_nmalloc, uint64_t)
+CTL_RO_CGEN(config_stats, stats_huge_ndalloc, huge_ndalloc, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
     ctl_stats.arenas[mib[2]].allocated_small, size_t)
-CTL_RO_GEN(stats_arenas_i_small_nmalloc,
+CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc,
     ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t)
-CTL_RO_GEN(stats_arenas_i_small_ndalloc,
+CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc,
     ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t)
-CTL_RO_GEN(stats_arenas_i_small_nrequests,
+CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests,
     ctl_stats.arenas[mib[2]].nrequests_small, uint64_t)
-CTL_RO_GEN(stats_arenas_i_large_allocated,
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
     ctl_stats.arenas[mib[2]].astats.allocated_large, size_t)
-CTL_RO_GEN(stats_arenas_i_large_nmalloc,
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
     ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t)
-CTL_RO_GEN(stats_arenas_i_large_ndalloc,
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
     ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t)
-CTL_RO_GEN(stats_arenas_i_large_nrequests,
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
     ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t)
 
-CTL_RO_GEN(stats_arenas_i_bins_j_allocated,
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_allocated,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t)
-CTL_RO_GEN(stats_arenas_i_bins_j_nmalloc,
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t)
-CTL_RO_GEN(stats_arenas_i_bins_j_ndalloc,
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t)
-CTL_RO_GEN(stats_arenas_i_bins_j_nrequests,
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t)
-#ifdef JEMALLOC_TCACHE
-CTL_RO_GEN(stats_arenas_i_bins_j_nfills,
+CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t)
-CTL_RO_GEN(stats_arenas_i_bins_j_nflushes,
+CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t)
-#endif
-CTL_RO_GEN(stats_arenas_i_bins_j_nruns,
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nruns,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t)
-CTL_RO_GEN(stats_arenas_i_bins_j_nreruns,
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t)
-CTL_RO_GEN(stats_arenas_i_bins_j_highruns,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].highruns, size_t)
-CTL_RO_GEN(stats_arenas_i_bins_j_curruns,
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
 
-const ctl_node_t *
+const ctl_named_node_t *
 stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j)
 {
 
-	if (j > nbins)
+	if (j > NBINS)
 		return (NULL);
 	return (super_stats_arenas_i_bins_j_node);
 }
 
-CTL_RO_GEN(stats_arenas_i_lruns_j_nmalloc,
+CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nmalloc,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t)
-CTL_RO_GEN(stats_arenas_i_lruns_j_ndalloc,
+CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_ndalloc,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t)
-CTL_RO_GEN(stats_arenas_i_lruns_j_nrequests,
+CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t)
-CTL_RO_GEN(stats_arenas_i_lruns_j_curruns,
+CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
-CTL_RO_GEN(stats_arenas_i_lruns_j_highruns,
-    ctl_stats.arenas[mib[2]].lstats[mib[4]].highruns, size_t)
 
-const ctl_node_t *
+const ctl_named_node_t *
 stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
 {
 
@@ -1551,120 +1383,36 @@ stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
 	return (super_stats_arenas_i_lruns_j_node);
 }
 
-#endif
 CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
-#ifdef JEMALLOC_STATS
-CTL_RO_GEN(stats_arenas_i_mapped, ctl_stats.arenas[mib[2]].astats.mapped,
-    size_t)
-CTL_RO_GEN(stats_arenas_i_npurge, ctl_stats.arenas[mib[2]].astats.npurge,
-    uint64_t)
-CTL_RO_GEN(stats_arenas_i_nmadvise, ctl_stats.arenas[mib[2]].astats.nmadvise,
-    uint64_t)
-CTL_RO_GEN(stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged,
-    uint64_t)
-#endif
-
-const ctl_node_t *
+CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
+    ctl_stats.arenas[mib[2]].astats.mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_npurge,
+    ctl_stats.arenas[mib[2]].astats.npurge, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
+    ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
+    ctl_stats.arenas[mib[2]].astats.purged, uint64_t)
+
+const ctl_named_node_t *
 stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
 {
-	const ctl_node_t * ret;
+	const ctl_named_node_t * ret;
 
 	malloc_mutex_lock(&ctl_mtx);
 	if (ctl_stats.arenas[i].initialized == false) {
 		ret = NULL;
-		goto RETURN;
+		goto label_return;
 	}
 
 	ret = super_stats_arenas_i_node;
-RETURN:
-	malloc_mutex_unlock(&ctl_mtx);
-	return (ret);
-}
-
-#ifdef JEMALLOC_STATS
-CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *)
-CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t)
-CTL_RO_GEN(stats_active, ctl_stats.active, size_t)
-CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t)
-#endif
-
-/******************************************************************************/
-
-#ifdef JEMALLOC_SWAP
-#  ifdef JEMALLOC_STATS
-CTL_RO_GEN(swap_avail, ctl_stats.swap_avail, size_t)
-#  endif
-
-static int
-swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
-	int ret;
-
-	malloc_mutex_lock(&ctl_mtx);
-	if (swap_enabled) {
-		READONLY();
-	} else {
-		/*
-		 * swap_prezeroed isn't actually used by the swap code until it
-		 * is set during a successful chunk_swap_enabled() call.  We
-		 * use it here to store the value that we'll pass to
-		 * chunk_swap_enable() in a swap.fds mallctl().  This is not
-		 * very clean, but the obvious alternatives are even worse.
-		 */
-		WRITE(swap_prezeroed, bool);
-	}
-
-	READ(swap_prezeroed, bool);
-
-	ret = 0;
-RETURN:
+label_return:
 	malloc_mutex_unlock(&ctl_mtx);
 	return (ret);
 }
 
-CTL_RO_GEN(swap_nfds, swap_nfds, size_t)
-
-static int
-swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
-{
-	int ret;
-
-	malloc_mutex_lock(&ctl_mtx);
-	if (swap_enabled) {
-		READONLY();
-	} else if (newp != NULL) {
-		size_t nfds = newlen / sizeof(int);
-
-		{
-			int fds[nfds];
-
-			memcpy(fds, newp, nfds * sizeof(int));
-			if (chunk_swap_enable(fds, nfds, swap_prezeroed)) {
-				ret = EFAULT;
-				goto RETURN;
-			}
-		}
-	}
-
-	if (oldp != NULL && oldlenp != NULL) {
-		if (*oldlenp != swap_nfds * sizeof(int)) {
-			size_t copylen = (swap_nfds * sizeof(int) <= *oldlenp)
-			    ? swap_nfds * sizeof(int) : *oldlenp;
-
-			memcpy(oldp, swap_fds, copylen);
-			ret = EINVAL;
-			goto RETURN;
-		} else
-			memcpy(oldp, swap_fds, *oldlenp);
-	}
-
-	ret = 0;
-RETURN:
-	malloc_mutex_unlock(&ctl_mtx);
-	return (ret);
-}
-#endif
+CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *)
+CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t)
+CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t)
+CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
diff --git a/deps/jemalloc/src/extent.c b/deps/jemalloc/src/extent.c
index 3c04d3aa..8c09b486 100644
--- a/deps/jemalloc/src/extent.c
+++ b/deps/jemalloc/src/extent.c
@@ -3,7 +3,6 @@
 
 /******************************************************************************/
 
-#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
 static inline int
 extent_szad_comp(extent_node_t *a, extent_node_t *b)
 {
@@ -25,7 +24,6 @@ extent_szad_comp(extent_node_t *a, extent_node_t *b)
 /* Generate red-black tree functions. */
 rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad,
     extent_szad_comp)
-#endif
 
 static inline int
 extent_ad_comp(extent_node_t *a, extent_node_t *b)
diff --git a/deps/jemalloc/src/huge.c b/deps/jemalloc/src/huge.c
index a4f9b054..8a4ec942 100644
--- a/deps/jemalloc/src/huge.c
+++ b/deps/jemalloc/src/huge.c
@@ -4,11 +4,9 @@
 /******************************************************************************/
 /* Data. */
 
-#ifdef JEMALLOC_STATS
 uint64_t	huge_nmalloc;
 uint64_t	huge_ndalloc;
 size_t		huge_allocated;
-#endif
 
 malloc_mutex_t	huge_mtx;
 
@@ -19,10 +17,18 @@ static extent_tree_t	huge;
 
 void *
 huge_malloc(size_t size, bool zero)
+{
+
+	return (huge_palloc(size, chunksize, zero));
+}
+
+void *
+huge_palloc(size_t size, size_t alignment, bool zero)
 {
 	void *ret;
 	size_t csize;
 	extent_node_t *node;
+	bool is_zeroed;
 
 	/* Allocate one or more contiguous chunks for this request. */
 
@@ -37,7 +43,12 @@ huge_malloc(size_t size, bool zero)
 	if (node == NULL)
 		return (NULL);
 
-	ret = chunk_alloc(csize, false, &zero);
+	/*
+	 * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that
+	 * it is possible to make correct junk/zero fill decisions below.
+	 */
+	is_zeroed = zero;
+	ret = chunk_alloc(csize, alignment, false, &is_zeroed);
 	if (ret == NULL) {
 		base_node_dealloc(node);
 		return (NULL);
@@ -49,106 +60,19 @@ huge_malloc(size_t size, bool zero)
 
 	malloc_mutex_lock(&huge_mtx);
 	extent_tree_ad_insert(&huge, node);
-#ifdef JEMALLOC_STATS
-	stats_cactive_add(csize);
-	huge_nmalloc++;
-	huge_allocated += csize;
-#endif
+	if (config_stats) {
+		stats_cactive_add(csize);
+		huge_nmalloc++;
+		huge_allocated += csize;
+	}
 	malloc_mutex_unlock(&huge_mtx);
 
-#ifdef JEMALLOC_FILL
-	if (zero == false) {
+	if (config_fill && zero == false) {
 		if (opt_junk)
 			memset(ret, 0xa5, csize);
-		else if (opt_zero)
+		else if (opt_zero && is_zeroed == false)
 			memset(ret, 0, csize);
 	}
-#endif
-
-	return (ret);
-}
-
-/* Only handles large allocations that require more than chunk alignment. */
-void *
-huge_palloc(size_t size, size_t alignment, bool zero)
-{
-	void *ret;
-	size_t alloc_size, chunk_size, offset;
-	extent_node_t *node;
-
-	/*
-	 * This allocation requires alignment that is even larger than chunk
-	 * alignment.  This means that huge_malloc() isn't good enough.
-	 *
-	 * Allocate almost twice as many chunks as are demanded by the size or
-	 * alignment, in order to assure the alignment can be achieved, then
-	 * unmap leading and trailing chunks.
-	 */
-	assert(alignment > chunksize);
-
-	chunk_size = CHUNK_CEILING(size);
-
-	if (size >= alignment)
-		alloc_size = chunk_size + alignment - chunksize;
-	else
-		alloc_size = (alignment << 1) - chunksize;
-
-	/* Allocate an extent node with which to track the chunk. */
-	node = base_node_alloc();
-	if (node == NULL)
-		return (NULL);
-
-	ret = chunk_alloc(alloc_size, false, &zero);
-	if (ret == NULL) {
-		base_node_dealloc(node);
-		return (NULL);
-	}
-
-	offset = (uintptr_t)ret & (alignment - 1);
-	assert((offset & chunksize_mask) == 0);
-	assert(offset < alloc_size);
-	if (offset == 0) {
-		/* Trim trailing space. */
-		chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
-		    - chunk_size, true);
-	} else {
-		size_t trailsize;
-
-		/* Trim leading space. */
-		chunk_dealloc(ret, alignment - offset, true);
-
-		ret = (void *)((uintptr_t)ret + (alignment - offset));
-
-		trailsize = alloc_size - (alignment - offset) - chunk_size;
-		if (trailsize != 0) {
-		    /* Trim trailing space. */
-		    assert(trailsize < alloc_size);
-		    chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
-			trailsize, true);
-		}
-	}
-
-	/* Insert node into huge. */
-	node->addr = ret;
-	node->size = chunk_size;
-
-	malloc_mutex_lock(&huge_mtx);
-	extent_tree_ad_insert(&huge, node);
-#ifdef JEMALLOC_STATS
-	stats_cactive_add(chunk_size);
-	huge_nmalloc++;
-	huge_allocated += chunk_size;
-#endif
-	malloc_mutex_unlock(&huge_mtx);
-
-#ifdef JEMALLOC_FILL
-	if (zero == false) {
-		if (opt_junk)
-			memset(ret, 0xa5, chunk_size);
-		else if (opt_zero)
-			memset(ret, 0, chunk_size);
-	}
-#endif
 
 	return (ret);
 }
@@ -164,12 +88,10 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra)
 	    && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size)
 	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) {
 		assert(CHUNK_CEILING(oldsize) == oldsize);
-#ifdef JEMALLOC_FILL
-		if (opt_junk && size < oldsize) {
+		if (config_fill && opt_junk && size < oldsize) {
 			memset((void *)((uintptr_t)ptr + size), 0x5a,
 			    oldsize - size);
 		}
-#endif
 		return (ptr);
 	}
 
@@ -218,20 +140,13 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 
+#ifdef JEMALLOC_MREMAP
 	/*
 	 * Use mremap(2) if this is a huge-->huge reallocation, and neither the
-	 * source nor the destination are in swap or dss.
+	 * source nor the destination are in dss.
 	 */
-#ifdef JEMALLOC_MREMAP_FIXED
-	if (oldsize >= chunksize
-#  ifdef JEMALLOC_SWAP
-	    && (swap_enabled == false || (chunk_in_swap(ptr) == false &&
-	    chunk_in_swap(ret) == false))
-#  endif
-#  ifdef JEMALLOC_DSS
-	    && chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false
-#  endif
-	    ) {
+	if (oldsize >= chunksize && (config_dss == false || (chunk_in_dss(ptr)
+	    == false && chunk_in_dss(ret) == false))) {
 		size_t newsize = huge_salloc(ret);
 
 		/*
@@ -253,10 +168,9 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 			 */
 			char buf[BUFERROR_BUF];
 
-			buferror(errno, buf, sizeof(buf));
-			malloc_write("<jemalloc>: Error in mremap(): ");
-			malloc_write(buf);
-			malloc_write("\n");
+			buferror(buf, sizeof(buf));
+			malloc_printf("<jemalloc>: Error in mremap(): %s\n",
+			    buf);
 			if (opt_abort)
 				abort();
 			memcpy(ret, ptr, copysize);
@@ -266,7 +180,7 @@ huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra,
 #endif
 	{
 		memcpy(ret, ptr, copysize);
-		idalloc(ptr);
+		iqalloc(ptr);
 	}
 	return (ret);
 }
@@ -285,23 +199,16 @@ huge_dalloc(void *ptr, bool unmap)
 	assert(node->addr == ptr);
 	extent_tree_ad_remove(&huge, node);
 
-#ifdef JEMALLOC_STATS
-	stats_cactive_sub(node->size);
-	huge_ndalloc++;
-	huge_allocated -= node->size;
-#endif
+	if (config_stats) {
+		stats_cactive_sub(node->size);
+		huge_ndalloc++;
+		huge_allocated -= node->size;
+	}
 
 	malloc_mutex_unlock(&huge_mtx);
 
-	if (unmap) {
-	/* Unmap chunk. */
-#ifdef JEMALLOC_FILL
-#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS))
-		if (opt_junk)
-			memset(node->addr, 0x5a, node->size);
-#endif
-#endif
-	}
+	if (unmap && config_fill && config_dss && opt_junk)
+		memset(node->addr, 0x5a, node->size);
 
 	chunk_dealloc(node->addr, node->size, unmap);
 
@@ -328,7 +235,6 @@ huge_salloc(const void *ptr)
 	return (ret);
 }
 
-#ifdef JEMALLOC_PROF
 prof_ctx_t *
 huge_prof_ctx_get(const void *ptr)
 {
@@ -365,7 +271,6 @@ huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 
 	malloc_mutex_unlock(&huge_mtx);
 }
-#endif
 
 bool
 huge_boot(void)
@@ -376,11 +281,32 @@ huge_boot(void)
 		return (true);
 	extent_tree_ad_new(&huge);
 
-#ifdef JEMALLOC_STATS
-	huge_nmalloc = 0;
-	huge_ndalloc = 0;
-	huge_allocated = 0;
-#endif
+	if (config_stats) {
+		huge_nmalloc = 0;
+		huge_ndalloc = 0;
+		huge_allocated = 0;
+	}
 
 	return (false);
 }
+
+void
+huge_prefork(void)
+{
+
+	malloc_mutex_prefork(&huge_mtx);
+}
+
+void
+huge_postfork_parent(void)
+{
+
+	malloc_mutex_postfork_parent(&huge_mtx);
+}
+
+void
+huge_postfork_child(void)
+{
+
+	malloc_mutex_postfork_child(&huge_mtx);
+}
diff --git a/deps/jemalloc/src/jemalloc.c b/deps/jemalloc/src/jemalloc.c
index a161c2e2..bc54cd7c 100644
--- a/deps/jemalloc/src/jemalloc.c
+++ b/deps/jemalloc/src/jemalloc.c
@@ -4,111 +4,108 @@
 /******************************************************************************/
 /* Data. */
 
-malloc_mutex_t		arenas_lock;
-arena_t			**arenas;
-unsigned		narenas;
-
-pthread_key_t		arenas_tsd;
-#ifndef NO_TLS
-__thread arena_t	*arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
-#endif
+malloc_tsd_data(, arenas, arena_t *, NULL)
+malloc_tsd_data(, thread_allocated, thread_allocated_t,
+    THREAD_ALLOCATED_INITIALIZER)
 
-#ifdef JEMALLOC_STATS
-#  ifndef NO_TLS
-__thread thread_allocated_t	thread_allocated_tls;
+/* Runtime configuration options. */
+const char	*je_malloc_conf;
+#ifdef JEMALLOC_DEBUG
+bool	opt_abort = true;
+#  ifdef JEMALLOC_FILL
+bool	opt_junk = true;
 #  else
-pthread_key_t		thread_allocated_tsd;
+bool	opt_junk = false;
 #  endif
+#else
+bool	opt_abort = false;
+bool	opt_junk = false;
 #endif
+size_t	opt_quarantine = ZU(0);
+bool	opt_redzone = false;
+bool	opt_utrace = false;
+bool	opt_valgrind = false;
+bool	opt_xmalloc = false;
+bool	opt_zero = false;
+size_t	opt_narenas = 0;
+
+unsigned	ncpus;
+
+malloc_mutex_t		arenas_lock;
+arena_t			**arenas;
+unsigned		narenas;
 
 /* Set to true once the allocator has been initialized. */
 static bool		malloc_initialized = false;
 
+#ifdef JEMALLOC_THREADED_INIT
 /* Used to let the initializing thread recursively allocate. */
-static pthread_t	malloc_initializer = (unsigned long)0;
-
-/* Used to avoid initialization races. */
-static malloc_mutex_t	init_lock =
-#ifdef JEMALLOC_OSSPIN
-    0
+#  define NO_INITIALIZER	((unsigned long)0)
+#  define INITIALIZER		pthread_self()
+#  define IS_INITIALIZER	(malloc_initializer == pthread_self())
+static pthread_t		malloc_initializer = NO_INITIALIZER;
 #else
-    MALLOC_MUTEX_INITIALIZER
+#  define NO_INITIALIZER	false
+#  define INITIALIZER		true
+#  define IS_INITIALIZER	malloc_initializer
+static bool			malloc_initializer = NO_INITIALIZER;
 #endif
-    ;
 
-#ifdef DYNAMIC_PAGE_SHIFT
-size_t		pagesize;
-size_t		pagesize_mask;
-size_t		lg_pagesize;
-#endif
+/* Used to avoid initialization races. */
+#ifdef _WIN32
+static malloc_mutex_t	init_lock;
 
-unsigned	ncpus;
+JEMALLOC_ATTR(constructor)
+static void WINAPI
+_init_init_lock(void)
+{
 
-/* Runtime configuration options. */
-const char	*JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default"));
-#ifdef JEMALLOC_DEBUG
-bool	opt_abort = true;
-#  ifdef JEMALLOC_FILL
-bool	opt_junk = true;
-#  endif
-#else
-bool	opt_abort = false;
-#  ifdef JEMALLOC_FILL
-bool	opt_junk = false;
-#  endif
-#endif
-#ifdef JEMALLOC_SYSV
-bool	opt_sysv = false;
-#endif
-#ifdef JEMALLOC_XMALLOC
-bool	opt_xmalloc = false;
+	malloc_mutex_init(&init_lock);
+}
+
+#ifdef _MSC_VER
+#  pragma section(".CRT$XCU", read)
+JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used)
+static const void (WINAPI *init_init_lock)(void) = _init_init_lock;
 #endif
-#ifdef JEMALLOC_FILL
-bool	opt_zero = false;
+
+#else
+static malloc_mutex_t	init_lock = MALLOC_MUTEX_INITIALIZER;
+#endif
+
+typedef struct {
+	void	*p;	/* Input pointer (as in realloc(p, s)). */
+	size_t	s;	/* Request size. */
+	void	*r;	/* Result pointer. */
+} malloc_utrace_t;
+
+#ifdef JEMALLOC_UTRACE
+#  define UTRACE(a, b, c) do {						\
+	if (opt_utrace) {						\
+		malloc_utrace_t ut;					\
+		ut.p = (a);						\
+		ut.s = (b);						\
+		ut.r = (c);						\
+		utrace(&ut, sizeof(ut));				\
+	}								\
+} while (0)
+#else
+#  define UTRACE(a, b, c)
 #endif
-size_t	opt_narenas = 0;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static void	wrtmessage(void *cbopaque, const char *s);
 static void	stats_print_atexit(void);
 static unsigned	malloc_ncpus(void);
-static void	arenas_cleanup(void *arg);
-#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
-static void	thread_allocated_cleanup(void *arg);
-#endif
 static bool	malloc_conf_next(char const **opts_p, char const **k_p,
     size_t *klen_p, char const **v_p, size_t *vlen_p);
 static void	malloc_conf_error(const char *msg, const char *k, size_t klen,
     const char *v, size_t vlen);
 static void	malloc_conf_init(void);
 static bool	malloc_init_hard(void);
-static int	imemalign(void **memptr, size_t alignment, size_t size);
-
-/******************************************************************************/
-/* malloc_message() setup. */
-
-#ifdef JEMALLOC_HAVE_ATTR
-JEMALLOC_ATTR(visibility("hidden"))
-#else
-static
-#endif
-void
-wrtmessage(void *cbopaque, const char *s)
-{
-#ifdef JEMALLOC_CC_SILENCE
-	int result =
-#endif
-	    write(STDERR_FILENO, s, strlen(s));
-#ifdef JEMALLOC_CC_SILENCE
-	if (result < 0)
-		result = errno;
-#endif
-}
-
-void	(*JEMALLOC_P(malloc_message))(void *, const char *s)
-    JEMALLOC_ATTR(visibility("default")) = wrtmessage;
+static int	imemalign(void **memptr, size_t alignment, size_t size,
+    size_t min_alignment);
 
 /******************************************************************************/
 /*
@@ -121,9 +118,7 @@ arenas_extend(unsigned ind)
 {
 	arena_t *ret;
 
-	/* Allocate enough space for trailing bins. */
-	ret = (arena_t *)base_alloc(offsetof(arena_t, bins)
-	    + (sizeof(arena_bin_t) * nbins));
+	ret = (arena_t *)base_alloc(sizeof(arena_t));
 	if (ret != NULL && arena_new(ret, ind) == false) {
 		arenas[ind] = ret;
 		return (ret);
@@ -143,10 +138,7 @@ arenas_extend(unsigned ind)
 	return (arenas[0]);
 }
 
-/*
- * Choose an arena based on a per-thread value (slow-path code only, called
- * only by choose_arena()).
- */
+/* Slow path, called only by choose_arena(). */
 arena_t *
 choose_arena_hard(void)
 {
@@ -182,7 +174,7 @@ choose_arena_hard(void)
 			}
 		}
 
-		if (arenas[choose] == 0 || first_null == narenas) {
+		if (arenas[choose]->nthreads == 0 || first_null == narenas) {
 			/*
 			 * Use an unloaded arena, or the least loaded arena if
 			 * all arenas are already initialized.
@@ -201,85 +193,46 @@ choose_arena_hard(void)
 		malloc_mutex_unlock(&arenas_lock);
 	}
 
-	ARENA_SET(ret);
+	arenas_tsd_set(&ret);
 
 	return (ret);
 }
 
-/*
- * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so
- * provide a wrapper.
- */
-int
-buferror(int errnum, char *buf, size_t buflen)
-{
-#ifdef _GNU_SOURCE
-	char *b = strerror_r(errno, buf, buflen);
-	if (b != buf) {
-		strncpy(buf, b, buflen);
-		buf[buflen-1] = '\0';
-	}
-	return (0);
-#else
-	return (strerror_r(errno, buf, buflen));
-#endif
-}
-
 static void
 stats_print_atexit(void)
 {
 
-#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS))
-	unsigned i;
+	if (config_tcache && config_stats) {
+		unsigned i;
 
-	/*
-	 * Merge stats from extant threads.  This is racy, since individual
-	 * threads do not lock when recording tcache stats events.  As a
-	 * consequence, the final stats may be slightly out of date by the time
-	 * they are reported, if other threads continue to allocate.
-	 */
-	for (i = 0; i < narenas; i++) {
-		arena_t *arena = arenas[i];
-		if (arena != NULL) {
-			tcache_t *tcache;
+		/*
+		 * Merge stats from extant threads.  This is racy, since
+		 * individual threads do not lock when recording tcache stats
+		 * events.  As a consequence, the final stats may be slightly
+		 * out of date by the time they are reported, if other threads
+		 * continue to allocate.
+		 */
+		for (i = 0; i < narenas; i++) {
+			arena_t *arena = arenas[i];
+			if (arena != NULL) {
+				tcache_t *tcache;
 
-			/*
-			 * tcache_stats_merge() locks bins, so if any code is
-			 * introduced that acquires both arena and bin locks in
-			 * the opposite order, deadlocks may result.
-			 */
-			malloc_mutex_lock(&arena->lock);
-			ql_foreach(tcache, &arena->tcache_ql, link) {
-				tcache_stats_merge(tcache, arena);
+				/*
+				 * tcache_stats_merge() locks bins, so if any
+				 * code is introduced that acquires both arena
+				 * and bin locks in the opposite order,
+				 * deadlocks may result.
+				 */
+				malloc_mutex_lock(&arena->lock);
+				ql_foreach(tcache, &arena->tcache_ql, link) {
+					tcache_stats_merge(tcache, arena);
+				}
+				malloc_mutex_unlock(&arena->lock);
 			}
-			malloc_mutex_unlock(&arena->lock);
 		}
 	}
-#endif
-	JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL);
-}
-
-#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
-thread_allocated_t *
-thread_allocated_get_hard(void)
-{
-	thread_allocated_t *thread_allocated = (thread_allocated_t *)
-	    imalloc(sizeof(thread_allocated_t));
-	if (thread_allocated == NULL) {
-		static thread_allocated_t static_thread_allocated = {0, 0};
-		malloc_write("<jemalloc>: Error allocating TSD;"
-		    " mallctl(\"thread.{de,}allocated[p]\", ...)"
-		    " will be inaccurate\n");
-		if (opt_abort)
-			abort();
-		return (&static_thread_allocated);
-	}
-	pthread_setspecific(thread_allocated_tsd, thread_allocated);
-	thread_allocated->allocated = 0;
-	thread_allocated->deallocated = 0;
-	return (thread_allocated);
+	je_malloc_stats_print(NULL, NULL, NULL);
 }
-#endif
 
 /*
  * End miscellaneous support functions.
@@ -295,42 +248,32 @@ malloc_ncpus(void)
 	unsigned ret;
 	long result;
 
+#ifdef _WIN32
+	SYSTEM_INFO si;
+	GetSystemInfo(&si);
+	result = si.dwNumberOfProcessors;
+#else
 	result = sysconf(_SC_NPROCESSORS_ONLN);
 	if (result == -1) {
 		/* Error. */
 		ret = 1;
 	}
+#endif
 	ret = (unsigned)result;
 
 	return (ret);
 }
 
-static void
+void
 arenas_cleanup(void *arg)
 {
-	arena_t *arena = (arena_t *)arg;
+	arena_t *arena = *(arena_t **)arg;
 
 	malloc_mutex_lock(&arenas_lock);
 	arena->nthreads--;
 	malloc_mutex_unlock(&arenas_lock);
 }
 
-#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
-static void
-thread_allocated_cleanup(void *arg)
-{
-	uint64_t *allocated = (uint64_t *)arg;
-
-	if (allocated != NULL)
-		idalloc(allocated);
-}
-#endif
-
-/*
- * FreeBSD's pthreads implementation calls malloc(3), so the malloc
- * implementation has to take pains to avoid infinite recursion during
- * initialization.
- */
 static inline bool
 malloc_init(void)
 {
@@ -352,68 +295,64 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 
 	for (accept = false; accept == false;) {
 		switch (*opts) {
-			case 'A': case 'B': case 'C': case 'D': case 'E':
-			case 'F': case 'G': case 'H': case 'I': case 'J':
-			case 'K': case 'L': case 'M': case 'N': case 'O':
-			case 'P': case 'Q': case 'R': case 'S': case 'T':
-			case 'U': case 'V': case 'W': case 'X': case 'Y':
-			case 'Z':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
-			case 'f': case 'g': case 'h': case 'i': case 'j':
-			case 'k': case 'l': case 'm': case 'n': case 'o':
-			case 'p': case 'q': case 'r': case 's': case 't':
-			case 'u': case 'v': case 'w': case 'x': case 'y':
-			case 'z':
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case '_':
-				opts++;
-				break;
-			case ':':
-				opts++;
-				*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
-				*v_p = opts;
-				accept = true;
-				break;
-			case '\0':
-				if (opts != *opts_p) {
-					malloc_write("<jemalloc>: Conf string "
-					    "ends with key\n");
-				}
-				return (true);
-			default:
-				malloc_write("<jemalloc>: Malformed conf "
-				    "string\n");
-				return (true);
+		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+		case 'Y': case 'Z':
+		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+		case 'y': case 'z':
+		case '0': case '1': case '2': case '3': case '4': case '5':
+		case '6': case '7': case '8': case '9':
+		case '_':
+			opts++;
+			break;
+		case ':':
+			opts++;
+			*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
+			*v_p = opts;
+			accept = true;
+			break;
+		case '\0':
+			if (opts != *opts_p) {
+				malloc_write("<jemalloc>: Conf string ends "
+				    "with key\n");
+			}
+			return (true);
+		default:
+			malloc_write("<jemalloc>: Malformed conf string\n");
+			return (true);
 		}
 	}
 
 	for (accept = false; accept == false;) {
 		switch (*opts) {
-			case ',':
-				opts++;
-				/*
-				 * Look ahead one character here, because the
-				 * next time this function is called, it will
-				 * assume that end of input has been cleanly
-				 * reached if no input remains, but we have
-				 * optimistically already consumed the comma if
-				 * one exists.
-				 */
-				if (*opts == '\0') {
-					malloc_write("<jemalloc>: Conf string "
-					    "ends with comma\n");
-				}
-				*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
-				accept = true;
-				break;
-			case '\0':
-				*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
-				accept = true;
-				break;
-			default:
-				opts++;
-				break;
+		case ',':
+			opts++;
+			/*
+			 * Look ahead one character here, because the next time
+			 * this function is called, it will assume that end of
+			 * input has been cleanly reached if no input remains,
+			 * but we have optimistically already consumed the
+			 * comma if one exists.
+			 */
+			if (*opts == '\0') {
+				malloc_write("<jemalloc>: Conf string ends "
+				    "with comma\n");
+			}
+			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
+			accept = true;
+			break;
+		case '\0':
+			*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
+			accept = true;
+			break;
+		default:
+			opts++;
+			break;
 		}
 	}
 
@@ -425,17 +364,9 @@ static void
 malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
     size_t vlen)
 {
-	char buf[PATH_MAX + 1];
 
-	malloc_write("<jemalloc>: ");
-	malloc_write(msg);
-	malloc_write(": ");
-	memcpy(buf, k, klen);
-	memcpy(&buf[klen], ":", 1);
-	memcpy(&buf[klen+1], v, vlen);
-	buf[klen+1+vlen] = '\0';
-	malloc_write(buf);
-	malloc_write("\n");
+	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
+	    (int)vlen, v);
 }
 
 static void
@@ -450,12 +381,12 @@ malloc_conf_init(void)
 		/* Get runtime configuration. */
 		switch (i) {
 		case 0:
-			if (JEMALLOC_P(malloc_conf) != NULL) {
+			if (je_malloc_conf != NULL) {
 				/*
 				 * Use options that were compiled into the
 				 * program.
 				 */
-				opts = JEMALLOC_P(malloc_conf);
+				opts = je_malloc_conf;
 			} else {
 				/* No configuration specified. */
 				buf[0] = '\0';
@@ -463,13 +394,14 @@ malloc_conf_init(void)
 			}
 			break;
 		case 1: {
+#ifndef _WIN32
 			int linklen;
 			const char *linkname =
-#ifdef JEMALLOC_PREFIX
+#  ifdef JEMALLOC_PREFIX
 			    "/etc/"JEMALLOC_PREFIX"malloc.conf"
-#else
+#  else
 			    "/etc/malloc.conf"
-#endif
+#  endif
 			    ;
 
 			if ((linklen = readlink(linkname, buf,
@@ -480,14 +412,15 @@ malloc_conf_init(void)
 				 */
 				buf[linklen] = '\0';
 				opts = buf;
-			} else {
+			} else
+#endif
+			{
 				/* No configuration specified. */
 				buf[0] = '\0';
 				opts = buf;
 			}
 			break;
-		}
-		case 2: {
+		} case 2: {
 			const char *envname =
 #ifdef JEMALLOC_PREFIX
 			    JEMALLOC_CPREFIX"MALLOC_CONF"
@@ -508,8 +441,7 @@ malloc_conf_init(void)
 				opts = buf;
 			}
 			break;
-		}
-		default:
+		} default:
 			/* NOTREACHED */
 			assert(false);
 			buf[0] = '\0';
@@ -518,52 +450,59 @@ malloc_conf_init(void)
 
 		while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v,
 		    &vlen) == false) {
-#define	CONF_HANDLE_BOOL(n)						\
-			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+#define	CONF_HANDLE_BOOL_HIT(o, n, hit)					\
+			if (sizeof(n)-1 == klen && strncmp(n, k,	\
 			    klen) == 0) {				\
 				if (strncmp("true", v, vlen) == 0 &&	\
 				    vlen == sizeof("true")-1)		\
-					opt_##n = true;			\
+					o = true;			\
 				else if (strncmp("false", v, vlen) ==	\
 				    0 && vlen == sizeof("false")-1)	\
-					opt_##n = false;		\
+					o = false;			\
 				else {					\
 					malloc_conf_error(		\
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
 				}					\
+				hit = true;				\
+			} else						\
+				hit = false;
+#define	CONF_HANDLE_BOOL(o, n) {					\
+			bool hit;					\
+			CONF_HANDLE_BOOL_HIT(o, n, hit);		\
+			if (hit)					\
 				continue;				\
-			}
-#define	CONF_HANDLE_SIZE_T(n, min, max)					\
-			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+}
+#define	CONF_HANDLE_SIZE_T(o, n, min, max)				\
+			if (sizeof(n)-1 == klen && strncmp(n, k,	\
 			    klen) == 0) {				\
-				unsigned long ul;			\
+				uintmax_t um;				\
 				char *end;				\
 									\
-				errno = 0;				\
-				ul = strtoul(v, &end, 0);		\
-				if (errno != 0 || (uintptr_t)end -	\
+				set_errno(0);				\
+				um = malloc_strtoumax(v, &end, 0);	\
+				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
 					malloc_conf_error(		\
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
-				} else if (ul < min || ul > max) {	\
+				} else if (um < min || um > max) {	\
 					malloc_conf_error(		\
 					    "Out-of-range conf value",	\
 					    k, klen, v, vlen);		\
 				} else					\
-					opt_##n = ul;			\
+					o = um;				\
 				continue;				\
 			}
-#define	CONF_HANDLE_SSIZE_T(n, min, max)				\
-			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+#define	CONF_HANDLE_SSIZE_T(o, n, min, max)				\
+			if (sizeof(n)-1 == klen && strncmp(n, k,	\
 			    klen) == 0) {				\
 				long l;					\
 				char *end;				\
 									\
-				errno = 0;				\
+				set_errno(0);				\
 				l = strtol(v, &end, 0);			\
-				if (errno != 0 || (uintptr_t)end -	\
+				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
 					malloc_conf_error(		\
 					    "Invalid conf value",	\
@@ -574,70 +513,86 @@ malloc_conf_init(void)
 					    "Out-of-range conf value",	\
 					    k, klen, v, vlen);		\
 				} else					\
-					opt_##n = l;			\
+					o = l;				\
 				continue;				\
 			}
-#define	CONF_HANDLE_CHAR_P(n, d)					\
-			if (sizeof(#n)-1 == klen && strncmp(#n, k,	\
+#define	CONF_HANDLE_CHAR_P(o, n, d)					\
+			if (sizeof(n)-1 == klen && strncmp(n, k,	\
 			    klen) == 0) {				\
 				size_t cpylen = (vlen <=		\
-				    sizeof(opt_##n)-1) ? vlen :		\
-				    sizeof(opt_##n)-1;			\
-				strncpy(opt_##n, v, cpylen);		\
-				opt_##n[cpylen] = '\0';			\
+				    sizeof(o)-1) ? vlen :		\
+				    sizeof(o)-1;			\
+				strncpy(o, v, cpylen);			\
+				o[cpylen] = '\0';			\
 				continue;				\
 			}
 
-			CONF_HANDLE_BOOL(abort)
-			CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM,
-			    PAGE_SHIFT-1)
-			CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM,
-			    PAGE_SHIFT-1)
+			CONF_HANDLE_BOOL(opt_abort, "abort")
 			/*
-			 * Chunks always require at least one * header page,
-			 * plus one data page.
+			 * Chunks always require at least one header page, plus
+			 * one data page in the absence of redzones, or three
+			 * pages in the presence of redzones.  In order to
+			 * simplify options processing, fix the limit based on
+			 * config_fill.
 			 */
-			CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1,
-			    (sizeof(size_t) << 3) - 1)
-			CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX)
-			CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1,
-			    (sizeof(size_t) << 3) - 1)
-			CONF_HANDLE_BOOL(stats_print)
-#ifdef JEMALLOC_FILL
-			CONF_HANDLE_BOOL(junk)
-			CONF_HANDLE_BOOL(zero)
-#endif
-#ifdef JEMALLOC_SYSV
-			CONF_HANDLE_BOOL(sysv)
-#endif
-#ifdef JEMALLOC_XMALLOC
-			CONF_HANDLE_BOOL(xmalloc)
-#endif
-#ifdef JEMALLOC_TCACHE
-			CONF_HANDLE_BOOL(tcache)
-			CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1,
-			    (sizeof(size_t) << 3) - 1)
-			CONF_HANDLE_SSIZE_T(lg_tcache_max, -1,
-			    (sizeof(size_t) << 3) - 1)
-#endif
-#ifdef JEMALLOC_PROF
-			CONF_HANDLE_BOOL(prof)
-			CONF_HANDLE_CHAR_P(prof_prefix, "jeprof")
-			CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, LG_PROF_BT_MAX)
-			CONF_HANDLE_BOOL(prof_active)
-			CONF_HANDLE_SSIZE_T(lg_prof_sample, 0,
-			    (sizeof(uint64_t) << 3) - 1)
-			CONF_HANDLE_BOOL(prof_accum)
-			CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1,
-			    (sizeof(size_t) << 3) - 1)
-			CONF_HANDLE_SSIZE_T(lg_prof_interval, -1,
-			    (sizeof(uint64_t) << 3) - 1)
-			CONF_HANDLE_BOOL(prof_gdump)
-			CONF_HANDLE_BOOL(prof_leak)
-#endif
-#ifdef JEMALLOC_SWAP
-			CONF_HANDLE_BOOL(overcommit)
-#endif
+			CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE +
+			    (config_fill ? 2 : 1), (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1,
+			    SIZE_T_MAX)
+			CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult",
+			    -1, (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
+			if (config_fill) {
+				CONF_HANDLE_BOOL(opt_junk, "junk")
+				CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine",
+				    0, SIZE_T_MAX)
+				CONF_HANDLE_BOOL(opt_redzone, "redzone")
+				CONF_HANDLE_BOOL(opt_zero, "zero")
+			}
+			if (config_utrace) {
+				CONF_HANDLE_BOOL(opt_utrace, "utrace")
+			}
+			if (config_valgrind) {
+				bool hit;
+				CONF_HANDLE_BOOL_HIT(opt_valgrind,
+				    "valgrind", hit)
+				if (config_fill && opt_valgrind && hit) {
+					opt_junk = false;
+					opt_zero = false;
+					if (opt_quarantine == 0) {
+						opt_quarantine =
+						    JEMALLOC_VALGRIND_QUARANTINE_DEFAULT;
+					}
+					opt_redzone = true;
+				}
+				if (hit)
+					continue;
+			}
+			if (config_xmalloc) {
+				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
+			}
+			if (config_tcache) {
+				CONF_HANDLE_BOOL(opt_tcache, "tcache")
+				CONF_HANDLE_SSIZE_T(opt_lg_tcache_max,
+				    "lg_tcache_max", -1,
+				    (sizeof(size_t) << 3) - 1)
+			}
+			if (config_prof) {
+				CONF_HANDLE_BOOL(opt_prof, "prof")
+				CONF_HANDLE_CHAR_P(opt_prof_prefix,
+				    "prof_prefix", "jeprof")
+				CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
+				CONF_HANDLE_SSIZE_T(opt_lg_prof_sample,
+				    "lg_prof_sample", 0,
+				    (sizeof(uint64_t) << 3) - 1)
+				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
+				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
+				    "lg_prof_interval", -1,
+				    (sizeof(uint64_t) << 3) - 1)
+				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
+				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
+				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
+			}
 			malloc_conf_error("Invalid conf pair", k, klen, v,
 			    vlen);
 #undef CONF_HANDLE_BOOL
@@ -645,14 +600,6 @@ malloc_conf_init(void)
 #undef CONF_HANDLE_SSIZE_T
 #undef CONF_HANDLE_CHAR_P
 		}
-
-		/* Validate configuration of options that are inter-related. */
-		if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) {
-			malloc_write("<jemalloc>: Invalid lg_[qc]space_max "
-			    "relationship; restoring defaults\n");
-			opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
-			opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
-		}
 	}
 }
 
@@ -662,7 +609,7 @@ malloc_init_hard(void)
 	arena_t *init_arenas[1];
 
 	malloc_mutex_lock(&init_lock);
-	if (malloc_initialized || malloc_initializer == pthread_self()) {
+	if (malloc_initialized || IS_INITIALIZER) {
 		/*
 		 * Another thread initialized the allocator before this one
 		 * acquired init_lock, or this thread is the initializing
@@ -671,7 +618,8 @@ malloc_init_hard(void)
 		malloc_mutex_unlock(&init_lock);
 		return (false);
 	}
-	if (malloc_initializer != (unsigned long)0) {
+#ifdef JEMALLOC_THREADED_INIT
+	if (malloc_initializer != NO_INITIALIZER && IS_INITIALIZER == false) {
 		/* Busy-wait until the initializing thread completes. */
 		do {
 			malloc_mutex_unlock(&init_lock);
@@ -681,44 +629,25 @@ malloc_init_hard(void)
 		malloc_mutex_unlock(&init_lock);
 		return (false);
 	}
-
-#ifdef DYNAMIC_PAGE_SHIFT
-	/* Get page size. */
-	{
-		long result;
-
-		result = sysconf(_SC_PAGESIZE);
-		assert(result != -1);
-		pagesize = (size_t)result;
-
-		/*
-		 * We assume that pagesize is a power of 2 when calculating
-		 * pagesize_mask and lg_pagesize.
-		 */
-		assert(((result - 1) & result) == 0);
-		pagesize_mask = result - 1;
-		lg_pagesize = ffs((int)result) - 1;
-	}
 #endif
+	malloc_initializer = INITIALIZER;
 
-#ifdef JEMALLOC_PROF
-	prof_boot0();
-#endif
+	malloc_tsd_boot();
+	if (config_prof)
+		prof_boot0();
 
 	malloc_conf_init();
 
+#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \
+    && !defined(_WIN32))
 	/* Register fork handlers. */
-	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork,
-	    jemalloc_postfork) != 0) {
+	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
+	    jemalloc_postfork_child) != 0) {
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort)
 			abort();
 	}
-
-	if (ctl_boot()) {
-		malloc_mutex_unlock(&init_lock);
-		return (true);
-	}
+#endif
 
 	if (opt_stats_print) {
 		/* Print statistics at exit. */
@@ -729,54 +658,39 @@ malloc_init_hard(void)
 		}
 	}
 
-	if (chunk_boot()) {
-		malloc_mutex_unlock(&init_lock);
-		return (true);
-	}
-
 	if (base_boot()) {
 		malloc_mutex_unlock(&init_lock);
 		return (true);
 	}
 
-#ifdef JEMALLOC_PROF
-	prof_boot1();
-#endif
-
-	if (arena_boot()) {
+	if (chunk_boot()) {
 		malloc_mutex_unlock(&init_lock);
 		return (true);
 	}
 
-#ifdef JEMALLOC_TCACHE
-	if (tcache_boot()) {
+	if (ctl_boot()) {
 		malloc_mutex_unlock(&init_lock);
 		return (true);
 	}
-#endif
 
-	if (huge_boot()) {
+	if (config_prof)
+		prof_boot1();
+
+	arena_boot();
+
+	if (config_tcache && tcache_boot0()) {
 		malloc_mutex_unlock(&init_lock);
 		return (true);
 	}
 
-#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
-	/* Initialize allocation counters before any allocations can occur. */
-	if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup)
-	    != 0) {
+	if (huge_boot()) {
 		malloc_mutex_unlock(&init_lock);
 		return (true);
 	}
-#endif
 
 	if (malloc_mutex_init(&arenas_lock))
 		return (true);
 
-	if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) {
-		malloc_mutex_unlock(&init_lock);
-		return (true);
-	}
-
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
 	 * malloc_ncpus().
@@ -795,27 +709,42 @@ malloc_init_hard(void)
 		return (true);
 	}
 
-	/*
-	 * Assign the initial arena to the initial thread, in order to avoid
-	 * spurious creation of an extra arena if the application switches to
-	 * threaded mode.
-	 */
-	ARENA_SET(arenas[0]);
-	arenas[0]->nthreads++;
+	/* Initialize allocation counters before any allocations can occur. */
+	if (config_stats && thread_allocated_tsd_boot()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
 
-#ifdef JEMALLOC_PROF
-	if (prof_boot2()) {
+	if (arenas_tsd_boot()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+
+	if (config_tcache && tcache_boot1()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+
+	if (config_fill && quarantine_boot()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+
+	if (config_prof && prof_boot2()) {
 		malloc_mutex_unlock(&init_lock);
 		return (true);
 	}
-#endif
 
 	/* Get number of CPUs. */
-	malloc_initializer = pthread_self();
 	malloc_mutex_unlock(&init_lock);
 	ncpus = malloc_ncpus();
 	malloc_mutex_lock(&init_lock);
 
+	if (mutex_boot()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
+
 	if (opt_narenas == 0) {
 		/*
 		 * For SMP systems, create more than one arena per CPU by
@@ -833,12 +762,9 @@ malloc_init_hard(void)
 	 * machinery will fail to allocate memory at far lower limits.
 	 */
 	if (narenas > chunksize / sizeof(arena_t *)) {
-		char buf[UMAX2S_BUFSIZE];
-
 		narenas = chunksize / sizeof(arena_t *);
-		malloc_write("<jemalloc>: Reducing narenas to limit (");
-		malloc_write(u2s(narenas, 10, buf));
-		malloc_write(")\n");
+		malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n",
+		    narenas);
 	}
 
 	/* Allocate and initialize arenas. */
@@ -855,34 +781,11 @@ malloc_init_hard(void)
 	/* Copy the pointer to the one arena that was already initialized. */
 	arenas[0] = init_arenas[0];
 
-#ifdef JEMALLOC_ZONE
-	/* Register the custom zone. */
-	malloc_zone_register(create_zone());
-
-	/*
-	 * Convert the default szone to an "overlay zone" that is capable of
-	 * deallocating szone-allocated objects, but allocating new objects
-	 * from jemalloc.
-	 */
-	szone2ozone(malloc_default_zone());
-#endif
-
 	malloc_initialized = true;
 	malloc_mutex_unlock(&init_lock);
 	return (false);
 }
 
-#ifdef JEMALLOC_ZONE
-JEMALLOC_ATTR(constructor)
-void
-jemalloc_darwin_init(void)
-{
-
-	if (malloc_init_hard())
-		abort();
-}
-#endif
-
 /*
  * End initialization functions.
  */
@@ -891,191 +794,118 @@ jemalloc_darwin_init(void)
  * Begin malloc(3)-compatible functions.
  */
 
-JEMALLOC_ATTR(malloc)
-JEMALLOC_ATTR(visibility("default"))
 void *
-JEMALLOC_P(malloc)(size_t size)
+je_malloc(size_t size)
 {
 	void *ret;
-#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
-	size_t usize
-#  ifdef JEMALLOC_CC_SILENCE
-	    = 0
-#  endif
-	    ;
-#endif
-#ifdef JEMALLOC_PROF
-	prof_thr_cnt_t *cnt
-#  ifdef JEMALLOC_CC_SILENCE
-	    = NULL
-#  endif
-	    ;
-#endif
+	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
+	prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	if (malloc_init()) {
 		ret = NULL;
-		goto OOM;
+		goto label_oom;
 	}
 
-	if (size == 0) {
-#ifdef JEMALLOC_SYSV
-		if (opt_sysv == false)
-#endif
-			size = 1;
-#ifdef JEMALLOC_SYSV
-		else {
-#  ifdef JEMALLOC_XMALLOC
-			if (opt_xmalloc) {
-				malloc_write("<jemalloc>: Error in malloc(): "
-				    "invalid size 0\n");
-				abort();
-			}
-#  endif
-			ret = NULL;
-			goto RETURN;
-		}
-#endif
-	}
+	if (size == 0)
+		size = 1;
 
-#ifdef JEMALLOC_PROF
-	if (opt_prof) {
+	if (config_prof && opt_prof) {
 		usize = s2u(size);
 		PROF_ALLOC_PREP(1, usize, cnt);
 		if (cnt == NULL) {
 			ret = NULL;
-			goto OOM;
+			goto label_oom;
 		}
 		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
-		    small_maxclass) {
-			ret = imalloc(small_maxclass+1);
+		    SMALL_MAXCLASS) {
+			ret = imalloc(SMALL_MAXCLASS+1);
 			if (ret != NULL)
 				arena_prof_promoted(ret, usize);
 		} else
 			ret = imalloc(size);
-	} else
-#endif
-	{
-#ifdef JEMALLOC_STATS
-		usize = s2u(size);
-#endif
+	} else {
+		if (config_stats || (config_valgrind && opt_valgrind))
+			usize = s2u(size);
 		ret = imalloc(size);
 	}
 
-OOM:
+label_oom:
 	if (ret == NULL) {
-#ifdef JEMALLOC_XMALLOC
-		if (opt_xmalloc) {
+		if (config_xmalloc && opt_xmalloc) {
 			malloc_write("<jemalloc>: Error in malloc(): "
 			    "out of memory\n");
 			abort();
 		}
-#endif
-		errno = ENOMEM;
+		set_errno(ENOMEM);
 	}
-
-#ifdef JEMALLOC_SYSV
-RETURN:
-#endif
-#ifdef JEMALLOC_PROF
-	if (opt_prof && ret != NULL)
+	if (config_prof && opt_prof && ret != NULL)
 		prof_malloc(ret, usize, cnt);
-#endif
-#ifdef JEMALLOC_STATS
-	if (ret != NULL) {
-		assert(usize == isalloc(ret));
-		ALLOCATED_ADD(usize, 0);
+	if (config_stats && ret != NULL) {
+		assert(usize == isalloc(ret, config_prof));
+		thread_allocated_tsd_get()->allocated += usize;
 	}
-#endif
+	UTRACE(0, size, ret);
+	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false);
 	return (ret);
 }
 
 JEMALLOC_ATTR(nonnull(1))
 #ifdef JEMALLOC_PROF
 /*
- * Avoid any uncertainty as to how many backtrace frames to ignore in 
+ * Avoid any uncertainty as to how many backtrace frames to ignore in
  * PROF_ALLOC_PREP().
  */
 JEMALLOC_ATTR(noinline)
 #endif
 static int
-imemalign(void **memptr, size_t alignment, size_t size)
+imemalign(void **memptr, size_t alignment, size_t size,
+    size_t min_alignment)
 {
 	int ret;
-	size_t usize
-#ifdef JEMALLOC_CC_SILENCE
-	    = 0
-#endif
-	    ;
+	size_t usize;
 	void *result;
-#ifdef JEMALLOC_PROF
-	prof_thr_cnt_t *cnt
-#  ifdef JEMALLOC_CC_SILENCE
-	    = NULL
-#  endif
-	    ;
-#endif
+	prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL);
+
+	assert(min_alignment != 0);
 
 	if (malloc_init())
 		result = NULL;
 	else {
-		if (size == 0) {
-#ifdef JEMALLOC_SYSV
-			if (opt_sysv == false)
-#endif
-				size = 1;
-#ifdef JEMALLOC_SYSV
-			else {
-#  ifdef JEMALLOC_XMALLOC
-				if (opt_xmalloc) {
-					malloc_write("<jemalloc>: Error in "
-					    "posix_memalign(): invalid size "
-					    "0\n");
-					abort();
-				}
-#  endif
-				result = NULL;
-				*memptr = NULL;
-				ret = 0;
-				goto RETURN;
-			}
-#endif
-		}
+		if (size == 0)
+			size = 1;
 
 		/* Make sure that alignment is a large enough power of 2. */
 		if (((alignment - 1) & alignment) != 0
-		    || alignment < sizeof(void *)) {
-#ifdef JEMALLOC_XMALLOC
-			if (opt_xmalloc) {
-				malloc_write("<jemalloc>: Error in "
-				    "posix_memalign(): invalid alignment\n");
+		    || (alignment < min_alignment)) {
+			if (config_xmalloc && opt_xmalloc) {
+				malloc_write("<jemalloc>: Error allocating "
+				    "aligned memory: invalid alignment\n");
 				abort();
 			}
-#endif
 			result = NULL;
 			ret = EINVAL;
-			goto RETURN;
+			goto label_return;
 		}
 
-		usize = sa2u(size, alignment, NULL);
+		usize = sa2u(size, alignment);
 		if (usize == 0) {
 			result = NULL;
 			ret = ENOMEM;
-			goto RETURN;
+			goto label_return;
 		}
 
-#ifdef JEMALLOC_PROF
-		if (opt_prof) {
+		if (config_prof && opt_prof) {
 			PROF_ALLOC_PREP(2, usize, cnt);
 			if (cnt == NULL) {
 				result = NULL;
 				ret = EINVAL;
 			} else {
 				if (prof_promote && (uintptr_t)cnt !=
-				    (uintptr_t)1U && usize <= small_maxclass) {
-					assert(sa2u(small_maxclass+1,
-					    alignment, NULL) != 0);
-					result = ipalloc(sa2u(small_maxclass+1,
-					    alignment, NULL), alignment, false);
+				    (uintptr_t)1U && usize <= SMALL_MAXCLASS) {
+					assert(sa2u(SMALL_MAXCLASS+1,
+					    alignment) != 0);
+					result = ipalloc(sa2u(SMALL_MAXCLASS+1,
+					    alignment), alignment, false);
 					if (result != NULL) {
 						arena_prof_promoted(result,
 						    usize);
@@ -1086,88 +916,79 @@ imemalign(void **memptr, size_t alignment, size_t size)
 				}
 			}
 		} else
-#endif
 			result = ipalloc(usize, alignment, false);
 	}
 
 	if (result == NULL) {
-#ifdef JEMALLOC_XMALLOC
-		if (opt_xmalloc) {
-			malloc_write("<jemalloc>: Error in posix_memalign(): "
-			    "out of memory\n");
+		if (config_xmalloc && opt_xmalloc) {
+			malloc_write("<jemalloc>: Error allocating aligned "
+			    "memory: out of memory\n");
 			abort();
 		}
-#endif
 		ret = ENOMEM;
-		goto RETURN;
+		goto label_return;
 	}
 
 	*memptr = result;
 	ret = 0;
 
-RETURN:
-#ifdef JEMALLOC_STATS
-	if (result != NULL) {
-		assert(usize == isalloc(result));
-		ALLOCATED_ADD(usize, 0);
+label_return:
+	if (config_stats && result != NULL) {
+		assert(usize == isalloc(result, config_prof));
+		thread_allocated_tsd_get()->allocated += usize;
 	}
-#endif
-#ifdef JEMALLOC_PROF
-	if (opt_prof && result != NULL)
+	if (config_prof && opt_prof && result != NULL)
 		prof_malloc(result, usize, cnt);
-#endif
+	UTRACE(0, size, result);
 	return (ret);
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
 int
-JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
+je_posix_memalign(void **memptr, size_t alignment, size_t size)
+{
+	int ret = imemalign(memptr, alignment, size, sizeof(void *));
+	JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr,
+	    config_prof), false);
+	return (ret);
+}
+
+void *
+je_aligned_alloc(size_t alignment, size_t size)
 {
+	void *ret;
+	int err;
 
-	return imemalign(memptr, alignment, size);
+	if ((err = imemalign(&ret, alignment, size, 1)) != 0) {
+		ret = NULL;
+		set_errno(err);
+	}
+	JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof),
+	    false);
+	return (ret);
 }
 
-JEMALLOC_ATTR(malloc)
-JEMALLOC_ATTR(visibility("default"))
 void *
-JEMALLOC_P(calloc)(size_t num, size_t size)
+je_calloc(size_t num, size_t size)
 {
 	void *ret;
 	size_t num_size;
-#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
-	size_t usize
-#  ifdef JEMALLOC_CC_SILENCE
-	    = 0
-#  endif
-	    ;
-#endif
-#ifdef JEMALLOC_PROF
-	prof_thr_cnt_t *cnt
-#  ifdef JEMALLOC_CC_SILENCE
-	    = NULL
-#  endif
-	    ;
-#endif
+	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
+	prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	if (malloc_init()) {
 		num_size = 0;
 		ret = NULL;
-		goto RETURN;
+		goto label_return;
 	}
 
 	num_size = num * size;
 	if (num_size == 0) {
-#ifdef JEMALLOC_SYSV
-		if ((opt_sysv == false) && ((num == 0) || (size == 0)))
-#endif
+		if (num == 0 || size == 0)
 			num_size = 1;
-#ifdef JEMALLOC_SYSV
 		else {
 			ret = NULL;
-			goto RETURN;
+			goto label_return;
 		}
-#endif
 	/*
 	 * Try to avoid division here.  We know that it isn't possible to
 	 * overflow during multiplication if neither operand uses any of the
@@ -1177,135 +998,113 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
 	    && (num_size / size != num)) {
 		/* size_t overflow. */
 		ret = NULL;
-		goto RETURN;
+		goto label_return;
 	}
 
-#ifdef JEMALLOC_PROF
-	if (opt_prof) {
+	if (config_prof && opt_prof) {
 		usize = s2u(num_size);
 		PROF_ALLOC_PREP(1, usize, cnt);
 		if (cnt == NULL) {
 			ret = NULL;
-			goto RETURN;
+			goto label_return;
 		}
 		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize
-		    <= small_maxclass) {
-			ret = icalloc(small_maxclass+1);
+		    <= SMALL_MAXCLASS) {
+			ret = icalloc(SMALL_MAXCLASS+1);
 			if (ret != NULL)
 				arena_prof_promoted(ret, usize);
 		} else
 			ret = icalloc(num_size);
-	} else
-#endif
-	{
-#ifdef JEMALLOC_STATS
-		usize = s2u(num_size);
-#endif
+	} else {
+		if (config_stats || (config_valgrind && opt_valgrind))
+			usize = s2u(num_size);
 		ret = icalloc(num_size);
 	}
 
-RETURN:
+label_return:
 	if (ret == NULL) {
-#ifdef JEMALLOC_XMALLOC
-		if (opt_xmalloc) {
+		if (config_xmalloc && opt_xmalloc) {
 			malloc_write("<jemalloc>: Error in calloc(): out of "
 			    "memory\n");
 			abort();
 		}
-#endif
-		errno = ENOMEM;
+		set_errno(ENOMEM);
 	}
 
-#ifdef JEMALLOC_PROF
-	if (opt_prof && ret != NULL)
+	if (config_prof && opt_prof && ret != NULL)
 		prof_malloc(ret, usize, cnt);
-#endif
-#ifdef JEMALLOC_STATS
-	if (ret != NULL) {
-		assert(usize == isalloc(ret));
-		ALLOCATED_ADD(usize, 0);
+	if (config_stats && ret != NULL) {
+		assert(usize == isalloc(ret, config_prof));
+		thread_allocated_tsd_get()->allocated += usize;
 	}
-#endif
+	UTRACE(0, num_size, ret);
+	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true);
 	return (ret);
 }
 
-JEMALLOC_ATTR(visibility("default"))
 void *
-JEMALLOC_P(realloc)(void *ptr, size_t size)
+je_realloc(void *ptr, size_t size)
 {
 	void *ret;
-#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
-	size_t usize
-#  ifdef JEMALLOC_CC_SILENCE
-	    = 0
-#  endif
-	    ;
+	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_size = 0;
-#endif
-#ifdef JEMALLOC_PROF
-	prof_thr_cnt_t *cnt
-#  ifdef JEMALLOC_CC_SILENCE
-	    = NULL
-#  endif
-	    ;
-	prof_ctx_t *old_ctx
-#  ifdef JEMALLOC_CC_SILENCE
-	    = NULL
-#  endif
-	    ;
-#endif
+	size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
+	prof_thr_cnt_t *cnt JEMALLOC_CC_SILENCE_INIT(NULL);
+	prof_ctx_t *old_ctx JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	if (size == 0) {
-#ifdef JEMALLOC_SYSV
-		if (opt_sysv == false)
-#endif
-			size = 1;
-#ifdef JEMALLOC_SYSV
-		else {
-			if (ptr != NULL) {
-#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
-				old_size = isalloc(ptr);
-#endif
-#ifdef JEMALLOC_PROF
-				if (opt_prof) {
-					old_ctx = prof_ctx_get(ptr);
-					cnt = NULL;
-				}
-#endif
-				idalloc(ptr);
+		if (ptr != NULL) {
+			/* realloc(ptr, 0) is equivalent to free(p). */
+			if (config_prof) {
+				old_size = isalloc(ptr, true);
+				if (config_valgrind && opt_valgrind)
+					old_rzsize = p2rz(ptr);
+			} else if (config_stats) {
+				old_size = isalloc(ptr, false);
+				if (config_valgrind && opt_valgrind)
+					old_rzsize = u2rz(old_size);
+			} else if (config_valgrind && opt_valgrind) {
+				old_size = isalloc(ptr, false);
+				old_rzsize = u2rz(old_size);
 			}
-#ifdef JEMALLOC_PROF
-			else if (opt_prof) {
-				old_ctx = NULL;
+			if (config_prof && opt_prof) {
+				old_ctx = prof_ctx_get(ptr);
 				cnt = NULL;
 			}
-#endif
+			iqalloc(ptr);
 			ret = NULL;
-			goto RETURN;
-		}
-#endif
+			goto label_return;
+		} else
+			size = 1;
 	}
 
 	if (ptr != NULL) {
-		assert(malloc_initialized || malloc_initializer ==
-		    pthread_self());
-
-#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
-		old_size = isalloc(ptr);
-#endif
-#ifdef JEMALLOC_PROF
-		if (opt_prof) {
+		assert(malloc_initialized || IS_INITIALIZER);
+
+		if (config_prof) {
+			old_size = isalloc(ptr, true);
+			if (config_valgrind && opt_valgrind)
+				old_rzsize = p2rz(ptr);
+		} else if (config_stats) {
+			old_size = isalloc(ptr, false);
+			if (config_valgrind && opt_valgrind)
+				old_rzsize = u2rz(old_size);
+		} else if (config_valgrind && opt_valgrind) {
+			old_size = isalloc(ptr, false);
+			old_rzsize = u2rz(old_size);
+		}
+		if (config_prof && opt_prof) {
 			usize = s2u(size);
 			old_ctx = prof_ctx_get(ptr);
 			PROF_ALLOC_PREP(1, usize, cnt);
 			if (cnt == NULL) {
 				old_ctx = NULL;
 				ret = NULL;
-				goto OOM;
+				goto label_oom;
 			}
 			if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U &&
-			    usize <= small_maxclass) {
-				ret = iralloc(ptr, small_maxclass+1, 0, 0,
+			    usize <= SMALL_MAXCLASS) {
+				ret = iralloc(ptr, SMALL_MAXCLASS+1, 0, 0,
 				    false, false);
 				if (ret != NULL)
 					arena_prof_promoted(ret, usize);
@@ -1316,42 +1115,31 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
 				if (ret == NULL)
 					old_ctx = NULL;
 			}
-		} else
-#endif
-		{
-#ifdef JEMALLOC_STATS
-			usize = s2u(size);
-#endif
+		} else {
+			if (config_stats || (config_valgrind && opt_valgrind))
+				usize = s2u(size);
 			ret = iralloc(ptr, size, 0, 0, false, false);
 		}
 
-#ifdef JEMALLOC_PROF
-OOM:
-#endif
+label_oom:
 		if (ret == NULL) {
-#ifdef JEMALLOC_XMALLOC
-			if (opt_xmalloc) {
+			if (config_xmalloc && opt_xmalloc) {
 				malloc_write("<jemalloc>: Error in realloc(): "
 				    "out of memory\n");
 				abort();
 			}
-#endif
-			errno = ENOMEM;
+			set_errno(ENOMEM);
 		}
 	} else {
-#ifdef JEMALLOC_PROF
-		if (opt_prof)
+		/* realloc(NULL, size) is equivalent to malloc(size). */
+		if (config_prof && opt_prof)
 			old_ctx = NULL;
-#endif
 		if (malloc_init()) {
-#ifdef JEMALLOC_PROF
-			if (opt_prof)
+			if (config_prof && opt_prof)
 				cnt = NULL;
-#endif
 			ret = NULL;
 		} else {
-#ifdef JEMALLOC_PROF
-			if (opt_prof) {
+			if (config_prof && opt_prof) {
 				usize = s2u(size);
 				PROF_ALLOC_PREP(1, usize, cnt);
 				if (cnt == NULL)
@@ -1359,8 +1147,8 @@ OOM:
 				else {
 					if (prof_promote && (uintptr_t)cnt !=
 					    (uintptr_t)1U && usize <=
-					    small_maxclass) {
-						ret = imalloc(small_maxclass+1);
+					    SMALL_MAXCLASS) {
+						ret = imalloc(SMALL_MAXCLASS+1);
 						if (ret != NULL) {
 							arena_prof_promoted(ret,
 							    usize);
@@ -1368,72 +1156,61 @@ OOM:
 					} else
 						ret = imalloc(size);
 				}
-			} else
-#endif
-			{
-#ifdef JEMALLOC_STATS
-				usize = s2u(size);
-#endif
+			} else {
+				if (config_stats || (config_valgrind &&
+				    opt_valgrind))
+					usize = s2u(size);
 				ret = imalloc(size);
 			}
 		}
 
 		if (ret == NULL) {
-#ifdef JEMALLOC_XMALLOC
-			if (opt_xmalloc) {
+			if (config_xmalloc && opt_xmalloc) {
 				malloc_write("<jemalloc>: Error in realloc(): "
 				    "out of memory\n");
 				abort();
 			}
-#endif
-			errno = ENOMEM;
+			set_errno(ENOMEM);
 		}
 	}
 
-#ifdef JEMALLOC_SYSV
-RETURN:
-#endif
-#ifdef JEMALLOC_PROF
-	if (opt_prof)
+label_return:
+	if (config_prof && opt_prof)
 		prof_realloc(ret, usize, cnt, old_size, old_ctx);
-#endif
-#ifdef JEMALLOC_STATS
-	if (ret != NULL) {
-		assert(usize == isalloc(ret));
-		ALLOCATED_ADD(usize, old_size);
+	if (config_stats && ret != NULL) {
+		thread_allocated_t *ta;
+		assert(usize == isalloc(ret, config_prof));
+		ta = thread_allocated_tsd_get();
+		ta->allocated += usize;
+		ta->deallocated += old_size;
 	}
-#endif
+	UTRACE(ptr, size, ret);
+	JEMALLOC_VALGRIND_REALLOC(ret, usize, ptr, old_size, old_rzsize, false);
 	return (ret);
 }
 
-JEMALLOC_ATTR(visibility("default"))
 void
-JEMALLOC_P(free)(void *ptr)
+je_free(void *ptr)
 {
 
+	UTRACE(ptr, 0, 0);
 	if (ptr != NULL) {
-#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
 		size_t usize;
-#endif
+		size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
-		assert(malloc_initialized || malloc_initializer ==
-		    pthread_self());
+		assert(malloc_initialized || IS_INITIALIZER);
 
-#ifdef JEMALLOC_STATS
-		usize = isalloc(ptr);
-#endif
-#ifdef JEMALLOC_PROF
-		if (opt_prof) {
-#  ifndef JEMALLOC_STATS
-			usize = isalloc(ptr);
-#  endif
+		if (config_prof && opt_prof) {
+			usize = isalloc(ptr, config_prof);
 			prof_free(ptr, usize);
-		}
-#endif
-#ifdef JEMALLOC_STATS
-		ALLOCATED_ADD(0, usize);
-#endif
-		idalloc(ptr);
+		} else if (config_stats || config_valgrind)
+			usize = isalloc(ptr, config_prof);
+		if (config_stats)
+			thread_allocated_tsd_get()->deallocated += usize;
+		if (config_valgrind && opt_valgrind)
+			rzsize = p2rz(ptr);
+		iqalloc(ptr);
+		JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 	}
 }
 
@@ -1443,51 +1220,56 @@ JEMALLOC_P(free)(void *ptr)
 /******************************************************************************/
 /*
  * Begin non-standard override functions.
- *
- * These overrides are omitted if the JEMALLOC_PREFIX is defined, since the
- * entire point is to avoid accidental mixed allocator usage.
  */
-#ifndef JEMALLOC_PREFIX
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
-JEMALLOC_ATTR(malloc)
-JEMALLOC_ATTR(visibility("default"))
 void *
-JEMALLOC_P(memalign)(size_t alignment, size_t size)
+je_memalign(size_t alignment, size_t size)
 {
-	void *ret;
-#ifdef JEMALLOC_CC_SILENCE
-	int result =
-#endif
-	    imemalign(&ret, alignment, size);
-#ifdef JEMALLOC_CC_SILENCE
-	if (result != 0)
-		return (NULL);
-#endif
+	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
+	imemalign(&ret, alignment, size, 1);
+	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
 	return (ret);
 }
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC
-JEMALLOC_ATTR(malloc)
-JEMALLOC_ATTR(visibility("default"))
 void *
-JEMALLOC_P(valloc)(size_t size)
+je_valloc(size_t size)
 {
-	void *ret;
-#ifdef JEMALLOC_CC_SILENCE
-	int result =
-#endif
-	    imemalign(&ret, PAGE_SIZE, size);
-#ifdef JEMALLOC_CC_SILENCE
-	if (result != 0)
-		return (NULL);
-#endif
+	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
+	imemalign(&ret, PAGE, size, 1);
+	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
 	return (ret);
 }
 #endif
 
-#endif /* JEMALLOC_PREFIX */
+/*
+ * is_malloc(je_malloc) is some macro magic to detect if jemalloc_defs.h has
+ * #define je_malloc malloc
+ */
+#define	malloc_is_malloc 1
+#define	is_malloc_(a) malloc_is_ ## a
+#define	is_malloc(a) is_malloc_(a)
+
+#if ((is_malloc(je_malloc) == 1) && defined(__GLIBC__) && !defined(__UCLIBC__))
+/*
+ * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
+ * to inconsistently reference libc's malloc(3)-compatible functions
+ * (https://bugzilla.mozilla.org/show_bug.cgi?id=493541).
+ *
+ * These definitions interpose hooks in glibc.  The functions are actually
+ * passed an extra argument for the caller return address, which will be
+ * ignored.
+ */
+JEMALLOC_EXPORT void (* const __free_hook)(void *ptr) = je_free;
+JEMALLOC_EXPORT void *(* const __malloc_hook)(size_t size) = je_malloc;
+JEMALLOC_EXPORT void *(* const __realloc_hook)(void *ptr, size_t size) =
+    je_realloc;
+JEMALLOC_EXPORT void *(* const __memalign_hook)(size_t alignment, size_t size) =
+    je_memalign;
+#endif
+
 /*
  * End non-standard override functions.
  */
@@ -1496,36 +1278,31 @@ JEMALLOC_P(valloc)(size_t size)
  * Begin non-standard functions.
  */
 
-JEMALLOC_ATTR(visibility("default"))
 size_t
-JEMALLOC_P(malloc_usable_size)(const void *ptr)
+je_malloc_usable_size(const void *ptr)
 {
 	size_t ret;
 
-	assert(malloc_initialized || malloc_initializer == pthread_self());
+	assert(malloc_initialized || IS_INITIALIZER);
 
-#ifdef JEMALLOC_IVSALLOC
-	ret = ivsalloc(ptr);
-#else
-	assert(ptr != NULL);
-	ret = isalloc(ptr);
-#endif
+	if (config_ivsalloc)
+		ret = ivsalloc(ptr, config_prof);
+	else
+		ret = (ptr != NULL) ? isalloc(ptr, config_prof) : 0;
 
 	return (ret);
 }
 
-JEMALLOC_ATTR(visibility("default"))
 void
-JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *),
-    void *cbopaque, const char *opts)
+je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *opts)
 {
 
 	stats_print(write_cb, cbopaque, opts);
 }
 
-JEMALLOC_ATTR(visibility("default"))
 int
-JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp,
+je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen)
 {
 
@@ -1535,9 +1312,8 @@ JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	return (ctl_byname(name, oldp, oldlenp, newp, newlen));
 }
 
-JEMALLOC_ATTR(visibility("default"))
 int
-JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp)
+je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 {
 
 	if (malloc_init())
@@ -1546,10 +1322,9 @@ JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp)
 	return (ctl_nametomib(name, mibp, miblenp));
 }
 
-JEMALLOC_ATTR(visibility("default"))
 int
-JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+  void *newp, size_t newlen)
 {
 
 	if (malloc_init())
@@ -1558,12 +1333,21 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
 	return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen));
 }
 
+/*
+ * End non-standard functions.
+ */
+/******************************************************************************/
+/*
+ * Begin experimental functions.
+ */
+#ifdef JEMALLOC_EXPERIMENTAL
+
 JEMALLOC_INLINE void *
 iallocm(size_t usize, size_t alignment, bool zero)
 {
 
-	assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment,
-	    NULL)));
+	assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize,
+	    alignment)));
 
 	if (alignment != 0)
 		return (ipalloc(usize, alignment, zero));
@@ -1573,116 +1357,96 @@ iallocm(size_t usize, size_t alignment, bool zero)
 		return (imalloc(usize));
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
 int
-JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
+je_allocm(void **ptr, size_t *rsize, size_t size, int flags)
 {
 	void *p;
 	size_t usize;
 	size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
 	    & (SIZE_T_MAX-1));
 	bool zero = flags & ALLOCM_ZERO;
-#ifdef JEMALLOC_PROF
-	prof_thr_cnt_t *cnt;
-#endif
 
 	assert(ptr != NULL);
 	assert(size != 0);
 
 	if (malloc_init())
-		goto OOM;
+		goto label_oom;
 
-	usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL);
+	usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
 	if (usize == 0)
-		goto OOM;
+		goto label_oom;
+
+	if (config_prof && opt_prof) {
+		prof_thr_cnt_t *cnt;
 
-#ifdef JEMALLOC_PROF
-	if (opt_prof) {
 		PROF_ALLOC_PREP(1, usize, cnt);
 		if (cnt == NULL)
-			goto OOM;
+			goto label_oom;
 		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
-		    small_maxclass) {
+		    SMALL_MAXCLASS) {
 			size_t usize_promoted = (alignment == 0) ?
-			    s2u(small_maxclass+1) : sa2u(small_maxclass+1,
-			    alignment, NULL);
+			    s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1,
+			    alignment);
 			assert(usize_promoted != 0);
 			p = iallocm(usize_promoted, alignment, zero);
 			if (p == NULL)
-				goto OOM;
+				goto label_oom;
 			arena_prof_promoted(p, usize);
 		} else {
 			p = iallocm(usize, alignment, zero);
 			if (p == NULL)
-				goto OOM;
+				goto label_oom;
 		}
 		prof_malloc(p, usize, cnt);
-		if (rsize != NULL)
-			*rsize = usize;
-	} else
-#endif
-	{
+	} else {
 		p = iallocm(usize, alignment, zero);
 		if (p == NULL)
-			goto OOM;
-#ifndef JEMALLOC_STATS
-		if (rsize != NULL)
-#endif
-		{
-#ifdef JEMALLOC_STATS
-			if (rsize != NULL)
-#endif
-				*rsize = usize;
-		}
+			goto label_oom;
 	}
+	if (rsize != NULL)
+		*rsize = usize;
 
 	*ptr = p;
-#ifdef JEMALLOC_STATS
-	assert(usize == isalloc(p));
-	ALLOCATED_ADD(usize, 0);
-#endif
+	if (config_stats) {
+		assert(usize == isalloc(p, config_prof));
+		thread_allocated_tsd_get()->allocated += usize;
+	}
+	UTRACE(0, size, p);
+	JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero);
 	return (ALLOCM_SUCCESS);
-OOM:
-#ifdef JEMALLOC_XMALLOC
-	if (opt_xmalloc) {
+label_oom:
+	if (config_xmalloc && opt_xmalloc) {
 		malloc_write("<jemalloc>: Error in allocm(): "
 		    "out of memory\n");
 		abort();
 	}
-#endif
 	*ptr = NULL;
+	UTRACE(0, size, 0);
 	return (ALLOCM_ERR_OOM);
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
 int
-JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
-    int flags)
+je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags)
 {
 	void *p, *q;
 	size_t usize;
-#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
 	size_t old_size;
-#endif
+	size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
 	    & (SIZE_T_MAX-1));
 	bool zero = flags & ALLOCM_ZERO;
 	bool no_move = flags & ALLOCM_NO_MOVE;
-#ifdef JEMALLOC_PROF
-	prof_thr_cnt_t *cnt;
-#endif
 
 	assert(ptr != NULL);
 	assert(*ptr != NULL);
 	assert(size != 0);
 	assert(SIZE_T_MAX - size >= extra);
-	assert(malloc_initialized || malloc_initializer == pthread_self());
+	assert(malloc_initialized || IS_INITIALIZER);
 
 	p = *ptr;
-#ifdef JEMALLOC_PROF
-	if (opt_prof) {
+	if (config_prof && opt_prof) {
+		prof_thr_cnt_t *cnt;
+
 		/*
 		 * usize isn't knowable before iralloc() returns when extra is
 		 * non-zero.  Therefore, compute its maximum possible value and
@@ -1691,191 +1455,284 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
 		 * decide whether to sample.
 		 */
 		size_t max_usize = (alignment == 0) ? s2u(size+extra) :
-		    sa2u(size+extra, alignment, NULL);
+		    sa2u(size+extra, alignment);
 		prof_ctx_t *old_ctx = prof_ctx_get(p);
-		old_size = isalloc(p);
+		old_size = isalloc(p, true);
+		if (config_valgrind && opt_valgrind)
+			old_rzsize = p2rz(p);
 		PROF_ALLOC_PREP(1, max_usize, cnt);
 		if (cnt == NULL)
-			goto OOM;
+			goto label_oom;
 		/*
 		 * Use minimum usize to determine whether promotion may happen.
 		 */
 		if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U
-		    && ((alignment == 0) ? s2u(size) : sa2u(size,
-		    alignment, NULL)) <= small_maxclass) {
-			q = iralloc(p, small_maxclass+1, (small_maxclass+1 >=
-			    size+extra) ? 0 : size+extra - (small_maxclass+1),
+		    && ((alignment == 0) ? s2u(size) : sa2u(size, alignment))
+		    <= SMALL_MAXCLASS) {
+			q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >=
+			    size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1),
 			    alignment, zero, no_move);
 			if (q == NULL)
-				goto ERR;
-			if (max_usize < PAGE_SIZE) {
+				goto label_err;
+			if (max_usize < PAGE) {
 				usize = max_usize;
 				arena_prof_promoted(q, usize);
 			} else
-				usize = isalloc(q);
+				usize = isalloc(q, config_prof);
 		} else {
 			q = iralloc(p, size, extra, alignment, zero, no_move);
 			if (q == NULL)
-				goto ERR;
-			usize = isalloc(q);
+				goto label_err;
+			usize = isalloc(q, config_prof);
 		}
 		prof_realloc(q, usize, cnt, old_size, old_ctx);
 		if (rsize != NULL)
 			*rsize = usize;
-	} else
-#endif
-	{
-#ifdef JEMALLOC_STATS
-		old_size = isalloc(p);
-#endif
+	} else {
+		if (config_stats) {
+			old_size = isalloc(p, false);
+			if (config_valgrind && opt_valgrind)
+				old_rzsize = u2rz(old_size);
+		} else if (config_valgrind && opt_valgrind) {
+			old_size = isalloc(p, false);
+			old_rzsize = u2rz(old_size);
+		}
 		q = iralloc(p, size, extra, alignment, zero, no_move);
 		if (q == NULL)
-			goto ERR;
-#ifndef JEMALLOC_STATS
-		if (rsize != NULL)
-#endif
-		{
-			usize = isalloc(q);
-#ifdef JEMALLOC_STATS
-			if (rsize != NULL)
-#endif
-				*rsize = usize;
+			goto label_err;
+		if (config_stats)
+			usize = isalloc(q, config_prof);
+		if (rsize != NULL) {
+			if (config_stats == false)
+				usize = isalloc(q, config_prof);
+			*rsize = usize;
 		}
 	}
 
 	*ptr = q;
-#ifdef JEMALLOC_STATS
-	ALLOCATED_ADD(usize, old_size);
-#endif
+	if (config_stats) {
+		thread_allocated_t *ta;
+		ta = thread_allocated_tsd_get();
+		ta->allocated += usize;
+		ta->deallocated += old_size;
+	}
+	UTRACE(p, size, q);
+	JEMALLOC_VALGRIND_REALLOC(q, usize, p, old_size, old_rzsize, zero);
 	return (ALLOCM_SUCCESS);
-ERR:
-	if (no_move)
+label_err:
+	if (no_move) {
+		UTRACE(p, size, q);
 		return (ALLOCM_ERR_NOT_MOVED);
-#ifdef JEMALLOC_PROF
-OOM:
-#endif
-#ifdef JEMALLOC_XMALLOC
-	if (opt_xmalloc) {
+	}
+label_oom:
+	if (config_xmalloc && opt_xmalloc) {
 		malloc_write("<jemalloc>: Error in rallocm(): "
 		    "out of memory\n");
 		abort();
 	}
-#endif
+	UTRACE(p, size, 0);
 	return (ALLOCM_ERR_OOM);
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
 int
-JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags)
+je_sallocm(const void *ptr, size_t *rsize, int flags)
 {
 	size_t sz;
 
-	assert(malloc_initialized || malloc_initializer == pthread_self());
+	assert(malloc_initialized || IS_INITIALIZER);
 
-#ifdef JEMALLOC_IVSALLOC
-	sz = ivsalloc(ptr);
-#else
-	assert(ptr != NULL);
-	sz = isalloc(ptr);
-#endif
+	if (config_ivsalloc)
+		sz = ivsalloc(ptr, config_prof);
+	else {
+		assert(ptr != NULL);
+		sz = isalloc(ptr, config_prof);
+	}
 	assert(rsize != NULL);
 	*rsize = sz;
 
 	return (ALLOCM_SUCCESS);
 }
 
-JEMALLOC_ATTR(nonnull(1))
-JEMALLOC_ATTR(visibility("default"))
 int
-JEMALLOC_P(dallocm)(void *ptr, int flags)
+je_dallocm(void *ptr, int flags)
 {
-#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
 	size_t usize;
-#endif
+	size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert(ptr != NULL);
-	assert(malloc_initialized || malloc_initializer == pthread_self());
-
-#ifdef JEMALLOC_STATS
-	usize = isalloc(ptr);
-#endif
-#ifdef JEMALLOC_PROF
-	if (opt_prof) {
-#  ifndef JEMALLOC_STATS
-		usize = isalloc(ptr);
-#  endif
+	assert(malloc_initialized || IS_INITIALIZER);
+
+	UTRACE(ptr, 0, 0);
+	if (config_stats || config_valgrind)
+		usize = isalloc(ptr, config_prof);
+	if (config_prof && opt_prof) {
+		if (config_stats == false && config_valgrind == false)
+			usize = isalloc(ptr, config_prof);
 		prof_free(ptr, usize);
 	}
-#endif
-#ifdef JEMALLOC_STATS
-	ALLOCATED_ADD(0, usize);
-#endif
-	idalloc(ptr);
+	if (config_stats)
+		thread_allocated_tsd_get()->deallocated += usize;
+	if (config_valgrind && opt_valgrind)
+		rzsize = p2rz(ptr);
+	iqalloc(ptr);
+	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 
 	return (ALLOCM_SUCCESS);
 }
 
+int
+je_nallocm(size_t *rsize, size_t size, int flags)
+{
+	size_t usize;
+	size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
+	    & (SIZE_T_MAX-1));
+
+	assert(size != 0);
+
+	if (malloc_init())
+		return (ALLOCM_ERR_OOM);
+
+	usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
+	if (usize == 0)
+		return (ALLOCM_ERR_OOM);
+
+	if (rsize != NULL)
+		*rsize = usize;
+	return (ALLOCM_SUCCESS);
+}
+
+#endif
 /*
- * End non-standard functions.
+ * End experimental functions.
  */
 /******************************************************************************/
-
 /*
  * The following functions are used by threading libraries for protection of
  * malloc during fork().
  */
 
+#ifndef JEMALLOC_MUTEX_INIT_CB
 void
 jemalloc_prefork(void)
+#else
+JEMALLOC_EXPORT void
+_malloc_prefork(void)
+#endif
 {
 	unsigned i;
 
-	/* Acquire all mutexes in a safe order. */
+#ifdef JEMALLOC_MUTEX_INIT_CB
+	if (malloc_initialized == false)
+		return;
+#endif
+	assert(malloc_initialized);
 
-	malloc_mutex_lock(&arenas_lock);
+	/* Acquire all mutexes in a safe order. */
+	malloc_mutex_prefork(&arenas_lock);
 	for (i = 0; i < narenas; i++) {
 		if (arenas[i] != NULL)
-			malloc_mutex_lock(&arenas[i]->lock);
+			arena_prefork(arenas[i]);
 	}
+	base_prefork();
+	huge_prefork();
+	chunk_dss_prefork();
+}
 
-	malloc_mutex_lock(&base_mtx);
-
-	malloc_mutex_lock(&huge_mtx);
-
-#ifdef JEMALLOC_DSS
-	malloc_mutex_lock(&dss_mtx);
+#ifndef JEMALLOC_MUTEX_INIT_CB
+void
+jemalloc_postfork_parent(void)
+#else
+JEMALLOC_EXPORT void
+_malloc_postfork(void)
 #endif
+{
+	unsigned i;
 
-#ifdef JEMALLOC_SWAP
-	malloc_mutex_lock(&swap_mtx);
+#ifdef JEMALLOC_MUTEX_INIT_CB
+	if (malloc_initialized == false)
+		return;
 #endif
+	assert(malloc_initialized);
+
+	/* Release all mutexes, now that fork() has completed. */
+	chunk_dss_postfork_parent();
+	huge_postfork_parent();
+	base_postfork_parent();
+	for (i = 0; i < narenas; i++) {
+		if (arenas[i] != NULL)
+			arena_postfork_parent(arenas[i]);
+	}
+	malloc_mutex_postfork_parent(&arenas_lock);
 }
 
 void
-jemalloc_postfork(void)
+jemalloc_postfork_child(void)
 {
 	unsigned i;
 
+	assert(malloc_initialized);
+
 	/* Release all mutexes, now that fork() has completed. */
+	chunk_dss_postfork_child();
+	huge_postfork_child();
+	base_postfork_child();
+	for (i = 0; i < narenas; i++) {
+		if (arenas[i] != NULL)
+			arena_postfork_child(arenas[i]);
+	}
+	malloc_mutex_postfork_child(&arenas_lock);
+}
 
-#ifdef JEMALLOC_SWAP
-	malloc_mutex_unlock(&swap_mtx);
-#endif
+/******************************************************************************/
+/*
+ * The following functions are used for TLS allocation/deallocation in static
+ * binaries on FreeBSD.  The primary difference between these and i[mcd]alloc()
+ * is that these avoid accessing TLS variables.
+ */
 
-#ifdef JEMALLOC_DSS
-	malloc_mutex_unlock(&dss_mtx);
-#endif
+static void *
+a0alloc(size_t size, bool zero)
+{
+
+	if (malloc_init())
+		return (NULL);
 
-	malloc_mutex_unlock(&huge_mtx);
+	if (size == 0)
+		size = 1;
 
-	malloc_mutex_unlock(&base_mtx);
+	if (size <= arena_maxclass)
+		return (arena_malloc(arenas[0], size, zero, false));
+	else
+		return (huge_malloc(size, zero));
+}
 
-	for (i = 0; i < narenas; i++) {
-		if (arenas[i] != NULL)
-			malloc_mutex_unlock(&arenas[i]->lock);
-	}
-	malloc_mutex_unlock(&arenas_lock);
+void *
+a0malloc(size_t size)
+{
+
+	return (a0alloc(size, false));
+}
+
+void *
+a0calloc(size_t num, size_t size)
+{
+
+	return (a0alloc(num * size, true));
+}
+
+void
+a0free(void *ptr)
+{
+	arena_chunk_t *chunk;
+
+	if (ptr == NULL)
+		return;
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	if (chunk != ptr)
+		arena_dalloc(chunk->arena, chunk, ptr, false);
+	else
+		huge_dalloc(ptr, true);
 }
 
 /******************************************************************************/
diff --git a/deps/jemalloc/src/mutex.c b/deps/jemalloc/src/mutex.c
index ca89ef1c..37a843e6 100644
--- a/deps/jemalloc/src/mutex.c
+++ b/deps/jemalloc/src/mutex.c
@@ -1,14 +1,26 @@
 #define	JEMALLOC_MUTEX_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
+#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
+#include <dlfcn.h>
+#endif
+
+#ifndef _CRT_SPINCOUNT
+#define _CRT_SPINCOUNT 4000
+#endif
+
 /******************************************************************************/
 /* Data. */
 
 #ifdef JEMALLOC_LAZY_LOCK
 bool isthreaded = false;
 #endif
+#ifdef JEMALLOC_MUTEX_INIT_CB
+static bool		postpone_init = true;
+static malloc_mutex_t	*postponed_mutexes = NULL;
+#endif
 
-#ifdef JEMALLOC_LAZY_LOCK
+#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
 static void	pthread_create_once(void);
 #endif
 
@@ -18,7 +30,7 @@ static void	pthread_create_once(void);
  * process goes multi-threaded.
  */
 
-#ifdef JEMALLOC_LAZY_LOCK
+#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
     void *(*)(void *), void *__restrict);
 
@@ -36,8 +48,7 @@ pthread_create_once(void)
 	isthreaded = true;
 }
 
-JEMALLOC_ATTR(visibility("default"))
-int
+JEMALLOC_EXPORT int
 pthread_create(pthread_t *__restrict thread,
     const pthread_attr_t *__restrict attr, void *(*start_routine)(void *),
     void *__restrict arg)
@@ -52,39 +63,87 @@ pthread_create(pthread_t *__restrict thread,
 
 /******************************************************************************/
 
+#ifdef JEMALLOC_MUTEX_INIT_CB
+int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
+    void *(calloc_cb)(size_t, size_t));
+#endif
+
 bool
 malloc_mutex_init(malloc_mutex_t *mutex)
 {
-#ifdef JEMALLOC_OSSPIN
-	*mutex = 0;
+
+#ifdef _WIN32
+	if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
+	    _CRT_SPINCOUNT))
+		return (true);
+#elif (defined(JEMALLOC_OSSPIN))
+	mutex->lock = 0;
+#elif (defined(JEMALLOC_MUTEX_INIT_CB))
+	if (postpone_init) {
+		mutex->postponed_next = postponed_mutexes;
+		postponed_mutexes = mutex;
+	} else {
+		if (_pthread_mutex_init_calloc_cb(&mutex->lock, base_calloc) !=
+		    0)
+			return (true);
+	}
 #else
 	pthread_mutexattr_t attr;
 
 	if (pthread_mutexattr_init(&attr) != 0)
 		return (true);
-#ifdef PTHREAD_MUTEX_ADAPTIVE_NP
-	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
-#else
-	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT);
-#endif
-	if (pthread_mutex_init(mutex, &attr) != 0) {
+	pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE);
+	if (pthread_mutex_init(&mutex->lock, &attr) != 0) {
 		pthread_mutexattr_destroy(&attr);
 		return (true);
 	}
 	pthread_mutexattr_destroy(&attr);
-
 #endif
 	return (false);
 }
 
 void
-malloc_mutex_destroy(malloc_mutex_t *mutex)
+malloc_mutex_prefork(malloc_mutex_t *mutex)
 {
 
-#ifndef JEMALLOC_OSSPIN
-	if (pthread_mutex_destroy(mutex) != 0) {
-		malloc_write("<jemalloc>: Error in pthread_mutex_destroy()\n");
-		abort();
+	malloc_mutex_lock(mutex);
+}
+
+void
+malloc_mutex_postfork_parent(malloc_mutex_t *mutex)
+{
+
+	malloc_mutex_unlock(mutex);
+}
+
+void
+malloc_mutex_postfork_child(malloc_mutex_t *mutex)
+{
+
+#ifdef JEMALLOC_MUTEX_INIT_CB
+	malloc_mutex_unlock(mutex);
+#else
+	if (malloc_mutex_init(mutex)) {
+		malloc_printf("<jemalloc>: Error re-initializing mutex in "
+		    "child\n");
+		if (opt_abort)
+			abort();
 	}
 #endif
 }
+
+bool
+mutex_boot(void)
+{
+
+#ifdef JEMALLOC_MUTEX_INIT_CB
+	postpone_init = false;
+	while (postponed_mutexes != NULL) {
+		if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
+		    base_calloc) != 0)
+			return (true);
+		postponed_mutexes = postponed_mutexes->postponed_next;
+	}
+#endif
+	return (false);
+}
diff --git a/deps/jemalloc/src/prof.c b/deps/jemalloc/src/prof.c
index 8a144b4e..de1d3929 100644
--- a/deps/jemalloc/src/prof.c
+++ b/deps/jemalloc/src/prof.c
@@ -1,6 +1,5 @@
 #define	JEMALLOC_PROF_C_
 #include "jemalloc/internal/jemalloc_internal.h"
-#ifdef JEMALLOC_PROF
 /******************************************************************************/
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
@@ -15,27 +14,30 @@
 /******************************************************************************/
 /* Data. */
 
+malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL)
+
 bool		opt_prof = false;
 bool		opt_prof_active = true;
-size_t		opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT;
 size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
 ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
 bool		opt_prof_gdump = false;
+bool		opt_prof_final = true;
 bool		opt_prof_leak = false;
-bool		opt_prof_accum = true;
-ssize_t		opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT;
+bool		opt_prof_accum = false;
 char		opt_prof_prefix[PATH_MAX + 1];
 
 uint64_t	prof_interval;
 bool		prof_promote;
 
-unsigned	prof_bt_max;
-
-#ifndef NO_TLS
-__thread prof_tdata_t	*prof_tdata_tls
-    JEMALLOC_ATTR(tls_model("initial-exec"));
-#endif
-pthread_key_t	prof_tdata_tsd;
+/*
+ * Table of mutexes that are shared among ctx's.  These are leaf locks, so
+ * there is no problem with using them for more than one ctx at the same time.
+ * The primary motivation for this sharing though is that ctx's are ephemeral,
+ * and destroying mutexes causes complications for systems that allocate when
+ * creating/destroying mutexes.
+ */
+static malloc_mutex_t	*ctx_locks;
+static unsigned		cum_ctxs; /* Atomic counter. */
 
 /*
  * Global hash of (prof_bt_t *)-->(prof_ctx_t *).  This is the master data
@@ -55,18 +57,13 @@ static uint64_t		prof_dump_useq;
  * all profile dumps.  The buffer is implicitly protected by bt2ctx_mtx, since
  * it must be locked anyway during dumping.
  */
-static char		prof_dump_buf[PROF_DUMP_BUF_SIZE];
+static char		prof_dump_buf[PROF_DUMP_BUFSIZE];
 static unsigned		prof_dump_buf_end;
 static int		prof_dump_fd;
 
 /* Do not dump any profiles until bootstrapping is complete. */
 static bool		prof_booted = false;
 
-static malloc_mutex_t	enq_mtx;
-static bool		enq;
-static bool		enq_idump;
-static bool		enq_gdump;
-
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
@@ -79,22 +76,24 @@ static _Unwind_Reason_Code	prof_unwind_callback(
     struct _Unwind_Context *context, void *arg);
 #endif
 static bool	prof_flush(bool propagate_err);
-static bool	prof_write(const char *s, bool propagate_err);
+static bool	prof_write(bool propagate_err, const char *s);
+static bool	prof_printf(bool propagate_err, const char *format, ...)
+    JEMALLOC_ATTR(format(printf, 2, 3));
 static void	prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all,
     size_t *leak_nctx);
 static void	prof_ctx_destroy(prof_ctx_t *ctx);
 static void	prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt);
-static bool	prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt,
-    bool propagate_err);
+static bool	prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx,
+    prof_bt_t *bt);
 static bool	prof_dump_maps(bool propagate_err);
-static bool	prof_dump(const char *filename, bool leakcheck,
-    bool propagate_err);
+static bool	prof_dump(bool propagate_err, const char *filename,
+    bool leakcheck);
 static void	prof_dump_filename(char *filename, char v, int64_t vseq);
 static void	prof_fdump(void);
 static void	prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
     size_t *hash2);
 static bool	prof_bt_keycomp(const void *k1, const void *k2);
-static void	prof_tdata_cleanup(void *arg);
+static malloc_mutex_t	*prof_ctx_mutex_choose(void);
 
 /******************************************************************************/
 
@@ -102,6 +101,8 @@ void
 bt_init(prof_bt_t *bt, void **vec)
 {
 
+	cassert(config_prof);
+
 	bt->vec = vec;
 	bt->len = 0;
 }
@@ -110,6 +111,8 @@ static void
 bt_destroy(prof_bt_t *bt)
 {
 
+	cassert(config_prof);
+
 	idalloc(bt);
 }
 
@@ -118,6 +121,8 @@ bt_dup(prof_bt_t *bt)
 {
 	prof_bt_t *ret;
 
+	cassert(config_prof);
+
 	/*
 	 * Create a single allocation that has space for vec immediately
 	 * following the prof_bt_t structure.  The backtraces that get
@@ -138,30 +143,32 @@ bt_dup(prof_bt_t *bt)
 }
 
 static inline void
-prof_enter(void)
+prof_enter(prof_tdata_t *prof_tdata)
 {
 
-	malloc_mutex_lock(&enq_mtx);
-	enq = true;
-	malloc_mutex_unlock(&enq_mtx);
+	cassert(config_prof);
+
+	assert(prof_tdata->enq == false);
+	prof_tdata->enq = true;
 
 	malloc_mutex_lock(&bt2ctx_mtx);
 }
 
 static inline void
-prof_leave(void)
+prof_leave(prof_tdata_t *prof_tdata)
 {
 	bool idump, gdump;
 
+	cassert(config_prof);
+
 	malloc_mutex_unlock(&bt2ctx_mtx);
 
-	malloc_mutex_lock(&enq_mtx);
-	enq = false;
-	idump = enq_idump;
-	enq_idump = false;
-	gdump = enq_gdump;
-	enq_gdump = false;
-	malloc_mutex_unlock(&enq_mtx);
+	assert(prof_tdata->enq);
+	prof_tdata->enq = false;
+	idump = prof_tdata->enq_idump;
+	prof_tdata->enq_idump = false;
+	gdump = prof_tdata->enq_gdump;
+	prof_tdata->enq_gdump = false;
 
 	if (idump)
 		prof_idump();
@@ -171,16 +178,16 @@ prof_leave(void)
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
 void
-prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+prof_backtrace(prof_bt_t *bt, unsigned nignore)
 {
 	unw_context_t uc;
 	unw_cursor_t cursor;
 	unsigned i;
 	int err;
 
+	cassert(config_prof);
 	assert(bt->len == 0);
 	assert(bt->vec != NULL);
-	assert(max <= (1U << opt_lg_prof_bt_max));
 
 	unw_getcontext(&uc);
 	unw_init_local(&cursor, &uc);
@@ -196,7 +203,7 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 	 * Iterate over stack frames until there are no more, or until no space
 	 * remains in bt.
 	 */
-	for (i = 0; i < max; i++) {
+	for (i = 0; i < PROF_BT_MAX; i++) {
 		unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
 		bt->len++;
 		err = unw_step(&cursor);
@@ -204,12 +211,13 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 			break;
 	}
 }
-#endif
-#ifdef JEMALLOC_PROF_LIBGCC
+#elif (defined(JEMALLOC_PROF_LIBGCC))
 static _Unwind_Reason_Code
 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
 {
 
+	cassert(config_prof);
+
 	return (_URC_NO_REASON);
 }
 
@@ -218,6 +226,8 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg)
 {
 	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
 
+	cassert(config_prof);
+
 	if (data->nignore > 0)
 		data->nignore--;
 	else {
@@ -231,19 +241,20 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg)
 }
 
 void
-prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+prof_backtrace(prof_bt_t *bt, unsigned nignore)
 {
-	prof_unwind_data_t data = {bt, nignore, max};
+	prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX};
+
+	cassert(config_prof);
 
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
-#endif
-#ifdef JEMALLOC_PROF_GCC
+#elif (defined(JEMALLOC_PROF_GCC))
 void
-prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
+prof_backtrace(prof_bt_t *bt, unsigned nignore)
 {
 #define	BT_FRAME(i)							\
-	if ((i) < nignore + max) {					\
+	if ((i) < nignore + PROF_BT_MAX) {				\
 		void *p;						\
 		if (__builtin_frame_address(i) == 0)			\
 			return;						\
@@ -257,8 +268,8 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 	} else								\
 		return;
 
+	cassert(config_prof);
 	assert(nignore <= 3);
-	assert(max <= (1U << opt_lg_prof_bt_max));
 
 	BT_FRAME(0)
 	BT_FRAME(1)
@@ -407,6 +418,14 @@ prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max)
 	BT_FRAME(130)
 #undef BT_FRAME
 }
+#else
+void
+prof_backtrace(prof_bt_t *bt, unsigned nignore)
+{
+
+	cassert(config_prof);
+	assert(false);
+}
 #endif
 
 prof_thr_cnt_t *
@@ -418,12 +437,11 @@ prof_lookup(prof_bt_t *bt)
 	} ret;
 	prof_tdata_t *prof_tdata;
 
-	prof_tdata = PROF_TCACHE_GET();
-	if (prof_tdata == NULL) {
-		prof_tdata = prof_tdata_init();
-		if (prof_tdata == NULL)
-			return (NULL);
-	}
+	cassert(config_prof);
+
+	prof_tdata = prof_tdata_get();
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		return (NULL);
 
 	if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
 		union {
@@ -440,62 +458,51 @@ prof_lookup(prof_bt_t *bt)
 		 * This thread's cache lacks bt.  Look for it in the global
 		 * cache.
 		 */
-		prof_enter();
+		prof_enter(prof_tdata);
 		if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
 			/* bt has never been seen before.  Insert it. */
 			ctx.v = imalloc(sizeof(prof_ctx_t));
 			if (ctx.v == NULL) {
-				prof_leave();
+				prof_leave(prof_tdata);
 				return (NULL);
 			}
 			btkey.p = bt_dup(bt);
 			if (btkey.v == NULL) {
-				prof_leave();
+				prof_leave(prof_tdata);
 				idalloc(ctx.v);
 				return (NULL);
 			}
 			ctx.p->bt = btkey.p;
-			if (malloc_mutex_init(&ctx.p->lock)) {
-				prof_leave();
-				idalloc(btkey.v);
-				idalloc(ctx.v);
-				return (NULL);
-			}
+			ctx.p->lock = prof_ctx_mutex_choose();
+			/*
+			 * Set nlimbo to 1, in order to avoid a race condition
+			 * with prof_ctx_merge()/prof_ctx_destroy().
+			 */
+			ctx.p->nlimbo = 1;
 			memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t));
 			ql_new(&ctx.p->cnts_ql);
 			if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
 				/* OOM. */
-				prof_leave();
-				malloc_mutex_destroy(&ctx.p->lock);
+				prof_leave(prof_tdata);
 				idalloc(btkey.v);
 				idalloc(ctx.v);
 				return (NULL);
 			}
-			/*
-			 * Artificially raise curobjs, in order to avoid a race
-			 * condition with prof_ctx_merge()/prof_ctx_destroy().
-			 *
-			 * No locking is necessary for ctx here because no other
-			 * threads have had the opportunity to fetch it from
-			 * bt2ctx yet.
-			 */
-			ctx.p->cnt_merged.curobjs++;
 			new_ctx = true;
 		} else {
 			/*
-			 * Artificially raise curobjs, in order to avoid a race
-			 * condition with prof_ctx_merge()/prof_ctx_destroy().
+			 * Increment nlimbo, in order to avoid a race condition
+			 * with prof_ctx_merge()/prof_ctx_destroy().
 			 */
-			malloc_mutex_lock(&ctx.p->lock);
-			ctx.p->cnt_merged.curobjs++;
-			malloc_mutex_unlock(&ctx.p->lock);
+			malloc_mutex_lock(ctx.p->lock);
+			ctx.p->nlimbo++;
+			malloc_mutex_unlock(ctx.p->lock);
 			new_ctx = false;
 		}
-		prof_leave();
+		prof_leave(prof_tdata);
 
 		/* Link a prof_thd_cnt_t into ctx for this thread. */
-		if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt)
-		    == (ZU(1) << opt_lg_prof_tcmax)) {
+		if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) {
 			assert(ckh_count(&prof_tdata->bt2cnt) > 0);
 			/*
 			 * Flush the least recently used cnt in order to keep
@@ -510,9 +517,7 @@ prof_lookup(prof_bt_t *bt)
 			prof_ctx_merge(ret.p->ctx, ret.p);
 			/* ret can now be re-used. */
 		} else {
-			assert(opt_lg_prof_tcmax < 0 ||
-			    ckh_count(&prof_tdata->bt2cnt) < (ZU(1) <<
-			    opt_lg_prof_tcmax));
+			assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX);
 			/* Allocate and partially initialize a new cnt. */
 			ret.v = imalloc(sizeof(prof_thr_cnt_t));
 			if (ret.p == NULL) {
@@ -534,10 +539,10 @@ prof_lookup(prof_bt_t *bt)
 			return (NULL);
 		}
 		ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
-		malloc_mutex_lock(&ctx.p->lock);
+		malloc_mutex_lock(ctx.p->lock);
 		ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link);
-		ctx.p->cnt_merged.curobjs--;
-		malloc_mutex_unlock(&ctx.p->lock);
+		ctx.p->nlimbo--;
+		malloc_mutex_unlock(ctx.p->lock);
 	} else {
 		/* Move ret to the front of the LRU. */
 		ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
@@ -553,6 +558,8 @@ prof_flush(bool propagate_err)
 	bool ret = false;
 	ssize_t err;
 
+	cassert(config_prof);
+
 	err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
 	if (err == -1) {
 		if (propagate_err == false) {
@@ -569,24 +576,26 @@ prof_flush(bool propagate_err)
 }
 
 static bool
-prof_write(const char *s, bool propagate_err)
+prof_write(bool propagate_err, const char *s)
 {
 	unsigned i, slen, n;
 
+	cassert(config_prof);
+
 	i = 0;
 	slen = strlen(s);
 	while (i < slen) {
 		/* Flush the buffer if it is full. */
-		if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE)
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE)
 			if (prof_flush(propagate_err) && propagate_err)
 				return (true);
 
-		if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) {
+		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
 			/* Finish writing. */
 			n = slen - i;
 		} else {
 			/* Write as much of s as will fit. */
-			n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end;
+			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
 		}
 		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
 		prof_dump_buf_end += n;
@@ -596,13 +605,31 @@ prof_write(const char *s, bool propagate_err)
 	return (false);
 }
 
+JEMALLOC_ATTR(format(printf, 2, 3))
+static bool
+prof_printf(bool propagate_err, const char *format, ...)
+{
+	bool ret;
+	va_list ap;
+	char buf[PROF_PRINTF_BUFSIZE];
+
+	va_start(ap, format);
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	va_end(ap);
+	ret = prof_write(propagate_err, buf);
+
+	return (ret);
+}
+
 static void
 prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
 {
 	prof_thr_cnt_t *thr_cnt;
 	prof_cnt_t tcnt;
 
-	malloc_mutex_lock(&ctx->lock);
+	cassert(config_prof);
+
+	malloc_mutex_lock(ctx->lock);
 
 	memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
 	ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
@@ -641,43 +668,48 @@ prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx)
 		cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
 	}
 
-	malloc_mutex_unlock(&ctx->lock);
+	malloc_mutex_unlock(ctx->lock);
 }
 
 static void
 prof_ctx_destroy(prof_ctx_t *ctx)
 {
+	prof_tdata_t *prof_tdata;
+
+	cassert(config_prof);
 
 	/*
 	 * Check that ctx is still unused by any thread cache before destroying
-	 * it.  prof_lookup() artificially raises ctx->cnt_merge.curobjs in
-	 * order to avoid a race condition with this function, as does
-	 * prof_ctx_merge() in order to avoid a race between the main body of
-	 * prof_ctx_merge() and entry into this function.
+	 * it.  prof_lookup() increments ctx->nlimbo in order to avoid a race
+	 * condition with this function, as does prof_ctx_merge() in order to
+	 * avoid a race between the main body of prof_ctx_merge() and entry
+	 * into this function.
 	 */
-	prof_enter();
-	malloc_mutex_lock(&ctx->lock);
-	if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) {
+	prof_tdata = *prof_tdata_tsd_get();
+	assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
+	prof_enter(prof_tdata);
+	malloc_mutex_lock(ctx->lock);
+	if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
+	    ctx->nlimbo == 1) {
 		assert(ctx->cnt_merged.curbytes == 0);
 		assert(ctx->cnt_merged.accumobjs == 0);
 		assert(ctx->cnt_merged.accumbytes == 0);
 		/* Remove ctx from bt2ctx. */
 		if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
 			assert(false);
-		prof_leave();
+		prof_leave(prof_tdata);
 		/* Destroy ctx. */
-		malloc_mutex_unlock(&ctx->lock);
+		malloc_mutex_unlock(ctx->lock);
 		bt_destroy(ctx->bt);
-		malloc_mutex_destroy(&ctx->lock);
 		idalloc(ctx);
 	} else {
 		/*
 		 * Compensate for increment in prof_ctx_merge() or
 		 * prof_lookup().
 		 */
-		ctx->cnt_merged.curobjs--;
-		malloc_mutex_unlock(&ctx->lock);
-		prof_leave();
+		ctx->nlimbo--;
+		malloc_mutex_unlock(ctx->lock);
+		prof_leave(prof_tdata);
 	}
 }
 
@@ -686,20 +718,22 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
 {
 	bool destroy;
 
+	cassert(config_prof);
+
 	/* Merge cnt stats and detach from ctx. */
-	malloc_mutex_lock(&ctx->lock);
+	malloc_mutex_lock(ctx->lock);
 	ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
 	ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
 	ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
 	ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
 	ql_remove(&ctx->cnts_ql, cnt, cnts_link);
 	if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
-	    ctx->cnt_merged.curobjs == 0) {
+	    ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
 		/*
-		 * Artificially raise ctx->cnt_merged.curobjs in order to keep
-		 * another thread from winning the race to destroy ctx while
-		 * this one has ctx->lock dropped.  Without this, it would be
-		 * possible for another thread to:
+		 * Increment ctx->nlimbo in order to keep another thread from
+		 * winning the race to destroy ctx while this one has ctx->lock
+		 * dropped.  Without this, it would be possible for another
+		 * thread to:
 		 *
 		 * 1) Sample an allocation associated with ctx.
 		 * 2) Deallocate the sampled object.
@@ -708,49 +742,51 @@ prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
 		 * The result would be that ctx no longer exists by the time
 		 * this thread accesses it in prof_ctx_destroy().
 		 */
-		ctx->cnt_merged.curobjs++;
+		ctx->nlimbo++;
 		destroy = true;
 	} else
 		destroy = false;
-	malloc_mutex_unlock(&ctx->lock);
+	malloc_mutex_unlock(ctx->lock);
 	if (destroy)
 		prof_ctx_destroy(ctx);
 }
 
 static bool
-prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err)
+prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt)
 {
-	char buf[UMAX2S_BUFSIZE];
 	unsigned i;
 
-	if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) {
+	cassert(config_prof);
+
+	/*
+	 * Current statistics can sum to 0 as a result of unmerged per thread
+	 * statistics.  Additionally, interval- and growth-triggered dumps can
+	 * occur between the time a ctx is created and when its statistics are
+	 * filled in.  Avoid dumping any ctx that is an artifact of either
+	 * implementation detail.
+	 */
+	if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) ||
+	    (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) {
+		assert(ctx->cnt_summed.curobjs == 0);
 		assert(ctx->cnt_summed.curbytes == 0);
 		assert(ctx->cnt_summed.accumobjs == 0);
 		assert(ctx->cnt_summed.accumbytes == 0);
 		return (false);
 	}
 
-	if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err)
-	    || prof_write(": ", propagate_err)
-	    || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf),
-	    propagate_err)
-	    || prof_write(" [", propagate_err)
-	    || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf),
-	    propagate_err)
-	    || prof_write(": ", propagate_err)
-	    || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf),
-	    propagate_err)
-	    || prof_write("] @", propagate_err))
+	if (prof_printf(propagate_err, "%"PRId64": %"PRId64
+	    " [%"PRIu64": %"PRIu64"] @",
+	    ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes,
+	    ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes))
 		return (true);
 
 	for (i = 0; i < bt->len; i++) {
-		if (prof_write(" 0x", propagate_err)
-		    || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf),
-		    propagate_err))
+		if (prof_printf(propagate_err, " %#"PRIxPTR,
+		    (uintptr_t)bt->vec[i]))
 			return (true);
 	}
 
-	if (prof_write("\n", propagate_err))
+	if (prof_write(propagate_err, "\n"))
 		return (true);
 
 	return (false);
@@ -760,49 +796,29 @@ static bool
 prof_dump_maps(bool propagate_err)
 {
 	int mfd;
-	char buf[UMAX2S_BUFSIZE];
-	char *s;
-	unsigned i, slen;
-	/*         /proc/<pid>/maps\0 */
-	char mpath[6     + UMAX2S_BUFSIZE
-			      + 5  + 1];
+	char filename[PATH_MAX + 1];
 
-	i = 0;
+	cassert(config_prof);
 
-	s = "/proc/";
-	slen = strlen(s);
-	memcpy(&mpath[i], s, slen);
-	i += slen;
-
-	s = u2s(getpid(), 10, buf);
-	slen = strlen(s);
-	memcpy(&mpath[i], s, slen);
-	i += slen;
-
-	s = "/maps";
-	slen = strlen(s);
-	memcpy(&mpath[i], s, slen);
-	i += slen;
-
-	mpath[i] = '\0';
-
-	mfd = open(mpath, O_RDONLY);
+	malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps",
+	    (int)getpid());
+	mfd = open(filename, O_RDONLY);
 	if (mfd != -1) {
 		ssize_t nread;
 
-		if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) &&
+		if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
 		    propagate_err)
 			return (true);
 		nread = 0;
 		do {
 			prof_dump_buf_end += nread;
-			if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) {
+			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
 				/* Make space in prof_dump_buf before read(). */
 				if (prof_flush(propagate_err) && propagate_err)
 					return (true);
 			}
 			nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
-			    PROF_DUMP_BUF_SIZE - prof_dump_buf_end);
+			    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
 		} while (nread > 0);
 		close(mfd);
 	} else
@@ -812,8 +828,9 @@ prof_dump_maps(bool propagate_err)
 }
 
 static bool
-prof_dump(const char *filename, bool leakcheck, bool propagate_err)
+prof_dump(bool propagate_err, const char *filename, bool leakcheck)
 {
+	prof_tdata_t *prof_tdata;
 	prof_cnt_t cnt_all;
 	size_t tabind;
 	union {
@@ -824,20 +841,24 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err)
 		prof_ctx_t	*p;
 		void		*v;
 	} ctx;
-	char buf[UMAX2S_BUFSIZE];
 	size_t leak_nctx;
 
-	prof_enter();
+	cassert(config_prof);
+
+	prof_tdata = prof_tdata_get();
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		return (true);
+	prof_enter(prof_tdata);
 	prof_dump_fd = creat(filename, 0644);
 	if (prof_dump_fd == -1) {
 		if (propagate_err == false) {
-			malloc_write("<jemalloc>: creat(\"");
-			malloc_write(filename);
-			malloc_write("\", 0644) failed\n");
+			malloc_printf(
+			    "<jemalloc>: creat(\"%s\"), 0644) failed\n",
+			    filename);
 			if (opt_abort)
 				abort();
 		}
-		goto ERROR;
+		goto label_error;
 	}
 
 	/* Merge per thread profile stats, and sum them in cnt_all. */
@@ -847,131 +868,75 @@ prof_dump(const char *filename, bool leakcheck, bool propagate_err)
 		prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx);
 
 	/* Dump profile header. */
-	if (prof_write("heap profile: ", propagate_err)
-	    || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err)
-	    || prof_write(": ", propagate_err)
-	    || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err)
-	    || prof_write(" [", propagate_err)
-	    || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err)
-	    || prof_write(": ", propagate_err)
-	    || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err))
-		goto ERROR;
-
 	if (opt_lg_prof_sample == 0) {
-		if (prof_write("] @ heapprofile\n", propagate_err))
-			goto ERROR;
+		if (prof_printf(propagate_err,
+		    "heap profile: %"PRId64": %"PRId64
+		    " [%"PRIu64": %"PRIu64"] @ heapprofile\n",
+		    cnt_all.curobjs, cnt_all.curbytes,
+		    cnt_all.accumobjs, cnt_all.accumbytes))
+			goto label_error;
 	} else {
-		if (prof_write("] @ heap_v2/", propagate_err)
-		    || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10,
-		    buf), propagate_err)
-		    || prof_write("\n", propagate_err))
-			goto ERROR;
+		if (prof_printf(propagate_err,
+		    "heap profile: %"PRId64": %"PRId64
+		    " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n",
+		    cnt_all.curobjs, cnt_all.curbytes,
+		    cnt_all.accumobjs, cnt_all.accumbytes,
+		    ((uint64_t)1U << opt_lg_prof_sample)))
+			goto label_error;
 	}
 
 	/* Dump  per ctx profile stats. */
 	for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v)
 	    == false;) {
-		if (prof_dump_ctx(ctx.p, bt.p, propagate_err))
-			goto ERROR;
+		if (prof_dump_ctx(propagate_err, ctx.p, bt.p))
+			goto label_error;
 	}
 
 	/* Dump /proc/<pid>/maps if possible. */
 	if (prof_dump_maps(propagate_err))
-		goto ERROR;
+		goto label_error;
 
 	if (prof_flush(propagate_err))
-		goto ERROR;
+		goto label_error;
 	close(prof_dump_fd);
-	prof_leave();
+	prof_leave(prof_tdata);
 
 	if (leakcheck && cnt_all.curbytes != 0) {
-		malloc_write("<jemalloc>: Leak summary: ");
-		malloc_write(u2s(cnt_all.curbytes, 10, buf));
-		malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, ");
-		malloc_write(u2s(cnt_all.curobjs, 10, buf));
-		malloc_write((cnt_all.curobjs != 1) ? " objects, " :
-		    " object, ");
-		malloc_write(u2s(leak_nctx, 10, buf));
-		malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n");
-		malloc_write("<jemalloc>: Run pprof on \"");
-		malloc_write(filename);
-		malloc_write("\" for leak detail\n");
+		malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
+		    PRId64" object%s, %zu context%s\n",
+		    cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "",
+		    cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "",
+		    leak_nctx, (leak_nctx != 1) ? "s" : "");
+		malloc_printf(
+		    "<jemalloc>: Run pprof on \"%s\" for leak detail\n",
+		    filename);
 	}
 
 	return (false);
-ERROR:
-	prof_leave();
+label_error:
+	prof_leave(prof_tdata);
 	return (true);
 }
 
-#define	DUMP_FILENAME_BUFSIZE	(PATH_MAX+ UMAX2S_BUFSIZE		\
-					       + 1			\
-						+ UMAX2S_BUFSIZE	\
-						     + 2		\
-						       + UMAX2S_BUFSIZE	\
-						             + 5  + 1)
+#define	DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
 static void
 prof_dump_filename(char *filename, char v, int64_t vseq)
 {
-	char buf[UMAX2S_BUFSIZE];
-	char *s;
-	unsigned i, slen;
-
-	/*
-	 * Construct a filename of the form:
-	 *
-	 *   <prefix>.<pid>.<seq>.v<vseq>.heap\0
-	 */
-
-	i = 0;
 
-	s = opt_prof_prefix;
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	s = ".";
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	s = u2s(getpid(), 10, buf);
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
+	cassert(config_prof);
 
-	s = ".";
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	s = u2s(prof_dump_seq, 10, buf);
-	prof_dump_seq++;
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	s = ".";
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	filename[i] = v;
-	i++;
-
-	if (vseq != 0xffffffffffffffffLLU) {
-		s = u2s(vseq, 10, buf);
-		slen = strlen(s);
-		memcpy(&filename[i], s, slen);
-		i += slen;
+	if (vseq != UINT64_C(0xffffffffffffffff)) {
+	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
+		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+		    "%s.%d.%"PRIu64".%c%"PRId64".heap",
+		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq);
+	} else {
+	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
+		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+		    "%s.%d.%"PRIu64".%c.heap",
+		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
 	}
-
-	s = ".heap";
-	slen = strlen(s);
-	memcpy(&filename[i], s, slen);
-	i += slen;
-
-	filename[i] = '\0';
+	prof_dump_seq++;
 }
 
 static void
@@ -979,38 +944,47 @@ prof_fdump(void)
 {
 	char filename[DUMP_FILENAME_BUFSIZE];
 
+	cassert(config_prof);
+
 	if (prof_booted == false)
 		return;
 
-	if (opt_prof_prefix[0] != '\0') {
+	if (opt_prof_final && opt_prof_prefix[0] != '\0') {
 		malloc_mutex_lock(&prof_dump_seq_mtx);
-		prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU);
+		prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff));
 		malloc_mutex_unlock(&prof_dump_seq_mtx);
-		prof_dump(filename, opt_prof_leak, false);
+		prof_dump(false, filename, opt_prof_leak);
 	}
 }
 
 void
 prof_idump(void)
 {
-	char filename[DUMP_FILENAME_BUFSIZE];
+	prof_tdata_t *prof_tdata;
+	char filename[PATH_MAX + 1];
+
+	cassert(config_prof);
 
 	if (prof_booted == false)
 		return;
-	malloc_mutex_lock(&enq_mtx);
-	if (enq) {
-		enq_idump = true;
-		malloc_mutex_unlock(&enq_mtx);
+	/*
+	 * Don't call prof_tdata_get() here, because it could cause recursive
+	 * allocation.
+	 */
+	prof_tdata = *prof_tdata_tsd_get();
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		return;
+	if (prof_tdata->enq) {
+		prof_tdata->enq_idump = true;
 		return;
 	}
-	malloc_mutex_unlock(&enq_mtx);
 
 	if (opt_prof_prefix[0] != '\0') {
 		malloc_mutex_lock(&prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'i', prof_dump_iseq);
 		prof_dump_iseq++;
 		malloc_mutex_unlock(&prof_dump_seq_mtx);
-		prof_dump(filename, false, false);
+		prof_dump(false, filename, false);
 	}
 }
 
@@ -1019,6 +993,8 @@ prof_mdump(const char *filename)
 {
 	char filename_buf[DUMP_FILENAME_BUFSIZE];
 
+	cassert(config_prof);
+
 	if (opt_prof == false || prof_booted == false)
 		return (true);
 
@@ -1032,30 +1008,37 @@ prof_mdump(const char *filename)
 		malloc_mutex_unlock(&prof_dump_seq_mtx);
 		filename = filename_buf;
 	}
-	return (prof_dump(filename, false, true));
+	return (prof_dump(true, filename, false));
 }
 
 void
 prof_gdump(void)
 {
+	prof_tdata_t *prof_tdata;
 	char filename[DUMP_FILENAME_BUFSIZE];
 
+	cassert(config_prof);
+
 	if (prof_booted == false)
 		return;
-	malloc_mutex_lock(&enq_mtx);
-	if (enq) {
-		enq_gdump = true;
-		malloc_mutex_unlock(&enq_mtx);
+	/*
+	 * Don't call prof_tdata_get() here, because it could cause recursive
+	 * allocation.
+	 */
+	prof_tdata = *prof_tdata_tsd_get();
+	if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+		return;
+	if (prof_tdata->enq) {
+		prof_tdata->enq_gdump = true;
 		return;
 	}
-	malloc_mutex_unlock(&enq_mtx);
 
 	if (opt_prof_prefix[0] != '\0') {
 		malloc_mutex_lock(&prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'u', prof_dump_useq);
 		prof_dump_useq++;
 		malloc_mutex_unlock(&prof_dump_seq_mtx);
-		prof_dump(filename, false, false);
+		prof_dump(false, filename, false);
 	}
 }
 
@@ -1066,11 +1049,13 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
 	uint64_t h;
 	prof_bt_t *bt = (prof_bt_t *)key;
 
+	cassert(config_prof);
 	assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64));
 	assert(hash1 != NULL);
 	assert(hash2 != NULL);
 
-	h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU);
+	h = hash(bt->vec, bt->len * sizeof(void *),
+	    UINT64_C(0x94122f335b332aea));
 	if (minbits <= 32) {
 		/*
 		 * Avoid doing multiple hashes, since a single hash provides
@@ -1081,7 +1066,7 @@ prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2)
 	} else {
 		ret1 = h;
 		ret2 = hash(bt->vec, bt->len * sizeof(void *),
-		    0x8432a476666bbc13LLU);
+		    UINT64_C(0x8432a476666bbc13));
 	}
 
 	*hash1 = ret1;
@@ -1094,16 +1079,28 @@ prof_bt_keycomp(const void *k1, const void *k2)
 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
 
+	cassert(config_prof);
+
 	if (bt1->len != bt2->len)
 		return (false);
 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
 }
 
+static malloc_mutex_t *
+prof_ctx_mutex_choose(void)
+{
+	unsigned nctxs = atomic_add_u(&cum_ctxs, 1);
+
+	return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]);
+}
+
 prof_tdata_t *
 prof_tdata_init(void)
 {
 	prof_tdata_t *prof_tdata;
 
+	cassert(config_prof);
+
 	/* Initialize an empty cache for this thread. */
 	prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
 	if (prof_tdata == NULL)
@@ -1116,51 +1113,77 @@ prof_tdata_init(void)
 	}
 	ql_new(&prof_tdata->lru_ql);
 
-	prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max);
+	prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX);
 	if (prof_tdata->vec == NULL) {
 		ckh_delete(&prof_tdata->bt2cnt);
 		idalloc(prof_tdata);
 		return (NULL);
 	}
 
-	prof_tdata->prn_state = 0;
+	prof_tdata->prng_state = 0;
 	prof_tdata->threshold = 0;
 	prof_tdata->accum = 0;
 
-	PROF_TCACHE_SET(prof_tdata);
+	prof_tdata->enq = false;
+	prof_tdata->enq_idump = false;
+	prof_tdata->enq_gdump = false;
+
+	prof_tdata_tsd_set(&prof_tdata);
 
 	return (prof_tdata);
 }
 
-static void
+void
 prof_tdata_cleanup(void *arg)
 {
 	prof_thr_cnt_t *cnt;
-	prof_tdata_t *prof_tdata = (prof_tdata_t *)arg;
+	prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg;
 
-	/*
-	 * Delete the hash table.  All of its contents can still be iterated
-	 * over via the LRU.
-	 */
-	ckh_delete(&prof_tdata->bt2cnt);
+	cassert(config_prof);
 
-	/* Iteratively merge cnt's into the global stats and delete them. */
-	while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
-		ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
-		prof_ctx_merge(cnt->ctx, cnt);
-		idalloc(cnt);
+	if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) {
+		/*
+		 * Another destructor deallocated memory after this destructor
+		 * was called.  Reset prof_tdata to PROF_TDATA_STATE_PURGATORY
+		 * in order to receive another callback.
+		 */
+		prof_tdata = PROF_TDATA_STATE_PURGATORY;
+		prof_tdata_tsd_set(&prof_tdata);
+	} else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) {
+		/*
+		 * The previous time this destructor was called, we set the key
+		 * to PROF_TDATA_STATE_PURGATORY so that other destructors
+		 * wouldn't cause re-creation of the prof_tdata.  This time, do
+		 * nothing, so that the destructor will not be called again.
+		 */
+	} else if (prof_tdata != NULL) {
+		/*
+		 * Delete the hash table.  All of its contents can still be
+		 * iterated over via the LRU.
+		 */
+		ckh_delete(&prof_tdata->bt2cnt);
+		/*
+		 * Iteratively merge cnt's into the global stats and delete
+		 * them.
+		 */
+		while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
+			ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
+			prof_ctx_merge(cnt->ctx, cnt);
+			idalloc(cnt);
+		}
+		idalloc(prof_tdata->vec);
+		idalloc(prof_tdata);
+		prof_tdata = PROF_TDATA_STATE_PURGATORY;
+		prof_tdata_tsd_set(&prof_tdata);
 	}
-
-	idalloc(prof_tdata->vec);
-
-	idalloc(prof_tdata);
-	PROF_TCACHE_SET(NULL);
 }
 
 void
 prof_boot0(void)
 {
 
+	cassert(config_prof);
+
 	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
 	    sizeof(PROF_PREFIX_DEFAULT));
 }
@@ -1169,6 +1192,8 @@ void
 prof_boot1(void)
 {
 
+	cassert(config_prof);
+
 	/*
 	 * opt_prof and prof_promote must be in their final state before any
 	 * arenas are initialized, so this function must be executed early.
@@ -1190,41 +1215,46 @@ prof_boot1(void)
 			prof_interval = 0;
 	}
 
-	prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT);
+	prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE);
 }
 
 bool
 prof_boot2(void)
 {
 
+	cassert(config_prof);
+
 	if (opt_prof) {
+		unsigned i;
+
 		if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp))
 			return (true);
 		if (malloc_mutex_init(&bt2ctx_mtx))
 			return (true);
-		if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup)
-		    != 0) {
+		if (prof_tdata_tsd_boot()) {
 			malloc_write(
 			    "<jemalloc>: Error in pthread_key_create()\n");
 			abort();
 		}
 
-		prof_bt_max = (1U << opt_lg_prof_bt_max);
 		if (malloc_mutex_init(&prof_dump_seq_mtx))
 			return (true);
 
-		if (malloc_mutex_init(&enq_mtx))
-			return (true);
-		enq = false;
-		enq_idump = false;
-		enq_gdump = false;
-
 		if (atexit(prof_fdump) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
 			if (opt_abort)
 				abort();
 		}
+
+		ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
+		    sizeof(malloc_mutex_t));
+		if (ctx_locks == NULL)
+			return (true);
+		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
+			if (malloc_mutex_init(&ctx_locks[i]))
+				return (true);
+		}
 	}
 
 #ifdef JEMALLOC_PROF_LIBGCC
@@ -1241,4 +1271,3 @@ prof_boot2(void)
 }
 
 /******************************************************************************/
-#endif /* JEMALLOC_PROF */
diff --git a/deps/jemalloc/src/quarantine.c b/deps/jemalloc/src/quarantine.c
new file mode 100644
index 00000000..9005ab3b
--- /dev/null
+++ b/deps/jemalloc/src/quarantine.c
@@ -0,0 +1,210 @@
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/*
+ * quarantine pointers close to NULL are used to encode state information that
+ * is used for cleaning up during thread shutdown.
+ */
+#define	QUARANTINE_STATE_REINCARNATED	((quarantine_t *)(uintptr_t)1)
+#define	QUARANTINE_STATE_PURGATORY	((quarantine_t *)(uintptr_t)2)
+#define	QUARANTINE_STATE_MAX		QUARANTINE_STATE_PURGATORY
+
+/******************************************************************************/
+/* Data. */
+
+typedef struct quarantine_obj_s quarantine_obj_t;
+typedef struct quarantine_s quarantine_t;
+
+struct quarantine_obj_s {
+	void	*ptr;
+	size_t	usize;
+};
+
+struct quarantine_s {
+	size_t			curbytes;
+	size_t			curobjs;
+	size_t			first;
+#define	LG_MAXOBJS_INIT 10
+	size_t			lg_maxobjs;
+	quarantine_obj_t	objs[1]; /* Dynamically sized ring buffer. */
+};
+
+static void	quarantine_cleanup(void *arg);
+
+malloc_tsd_data(static, quarantine, quarantine_t *, NULL)
+malloc_tsd_funcs(JEMALLOC_INLINE, quarantine, quarantine_t *, NULL,
+    quarantine_cleanup)
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static quarantine_t	*quarantine_init(size_t lg_maxobjs);
+static quarantine_t	*quarantine_grow(quarantine_t *quarantine);
+static void	quarantine_drain(quarantine_t *quarantine, size_t upper_bound);
+
+/******************************************************************************/
+
+static quarantine_t *
+quarantine_init(size_t lg_maxobjs)
+{
+	quarantine_t *quarantine;
+
+	quarantine = (quarantine_t *)imalloc(offsetof(quarantine_t, objs) +
+	    ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)));
+	if (quarantine == NULL)
+		return (NULL);
+	quarantine->curbytes = 0;
+	quarantine->curobjs = 0;
+	quarantine->first = 0;
+	quarantine->lg_maxobjs = lg_maxobjs;
+
+	quarantine_tsd_set(&quarantine);
+
+	return (quarantine);
+}
+
+static quarantine_t *
+quarantine_grow(quarantine_t *quarantine)
+{
+	quarantine_t *ret;
+
+	ret = quarantine_init(quarantine->lg_maxobjs + 1);
+	if (ret == NULL)
+		return (quarantine);
+
+	ret->curbytes = quarantine->curbytes;
+	ret->curobjs = quarantine->curobjs;
+	if (quarantine->first + quarantine->curobjs <= (ZU(1) <<
+	    quarantine->lg_maxobjs)) {
+		/* objs ring buffer data are contiguous. */
+		memcpy(ret->objs, &quarantine->objs[quarantine->first],
+		    quarantine->curobjs * sizeof(quarantine_obj_t));
+	} else {
+		/* objs ring buffer data wrap around. */
+		size_t ncopy_a = (ZU(1) << quarantine->lg_maxobjs) -
+		    quarantine->first;
+		size_t ncopy_b = quarantine->curobjs - ncopy_a;
+
+		memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy_a
+		    * sizeof(quarantine_obj_t));
+		memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
+		    sizeof(quarantine_obj_t));
+	}
+
+	return (ret);
+}
+
+static void
+quarantine_drain(quarantine_t *quarantine, size_t upper_bound)
+{
+
+	while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0) {
+		quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
+		assert(obj->usize == isalloc(obj->ptr, config_prof));
+		idalloc(obj->ptr);
+		quarantine->curbytes -= obj->usize;
+		quarantine->curobjs--;
+		quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
+		    quarantine->lg_maxobjs) - 1);
+	}
+}
+
+void
+quarantine(void *ptr)
+{
+	quarantine_t *quarantine;
+	size_t usize = isalloc(ptr, config_prof);
+
+	cassert(config_fill);
+	assert(opt_quarantine);
+
+	quarantine = *quarantine_tsd_get();
+	if ((uintptr_t)quarantine <= (uintptr_t)QUARANTINE_STATE_MAX) {
+		if (quarantine == NULL) {
+			if ((quarantine = quarantine_init(LG_MAXOBJS_INIT)) ==
+			    NULL) {
+				idalloc(ptr);
+				return;
+			}
+		} else {
+			if (quarantine == QUARANTINE_STATE_PURGATORY) {
+				/*
+				 * Make a note that quarantine() was called
+				 * after quarantine_cleanup() was called.
+				 */
+				quarantine = QUARANTINE_STATE_REINCARNATED;
+				quarantine_tsd_set(&quarantine);
+			}
+			idalloc(ptr);
+			return;
+		}
+	}
+	/*
+	 * Drain one or more objects if the quarantine size limit would be
+	 * exceeded by appending ptr.
+	 */
+	if (quarantine->curbytes + usize > opt_quarantine) {
+		size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine
+		    - usize : 0;
+		quarantine_drain(quarantine, upper_bound);
+	}
+	/* Grow the quarantine ring buffer if it's full. */
+	if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs))
+		quarantine = quarantine_grow(quarantine);
+	/* quarantine_grow() must free a slot if it fails to grow. */
+	assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs));
+	/* Append ptr if its size doesn't exceed the quarantine size. */
+	if (quarantine->curbytes + usize <= opt_quarantine) {
+		size_t offset = (quarantine->first + quarantine->curobjs) &
+		    ((ZU(1) << quarantine->lg_maxobjs) - 1);
+		quarantine_obj_t *obj = &quarantine->objs[offset];
+		obj->ptr = ptr;
+		obj->usize = usize;
+		quarantine->curbytes += usize;
+		quarantine->curobjs++;
+		if (opt_junk)
+			memset(ptr, 0x5a, usize);
+	} else {
+		assert(quarantine->curbytes == 0);
+		idalloc(ptr);
+	}
+}
+
+static void
+quarantine_cleanup(void *arg)
+{
+	quarantine_t *quarantine = *(quarantine_t **)arg;
+
+	if (quarantine == QUARANTINE_STATE_REINCARNATED) {
+		/*
+		 * Another destructor deallocated memory after this destructor
+		 * was called.  Reset quarantine to QUARANTINE_STATE_PURGATORY
+		 * in order to receive another callback.
+		 */
+		quarantine = QUARANTINE_STATE_PURGATORY;
+		quarantine_tsd_set(&quarantine);
+	} else if (quarantine == QUARANTINE_STATE_PURGATORY) {
+		/*
+		 * The previous time this destructor was called, we set the key
+		 * to QUARANTINE_STATE_PURGATORY so that other destructors
+		 * wouldn't cause re-creation of the quarantine.  This time, do
+		 * nothing, so that the destructor will not be called again.
+		 */
+	} else if (quarantine != NULL) {
+		quarantine_drain(quarantine, 0);
+		idalloc(quarantine);
+		quarantine = QUARANTINE_STATE_PURGATORY;
+		quarantine_tsd_set(&quarantine);
+	}
+}
+
+bool
+quarantine_boot(void)
+{
+
+	cassert(config_fill);
+
+	if (quarantine_tsd_boot())
+		return (true);
+
+	return (false);
+}
diff --git a/deps/jemalloc/src/stats.c b/deps/jemalloc/src/stats.c
index dc172e42..433b80d1 100644
--- a/deps/jemalloc/src/stats.c
+++ b/deps/jemalloc/src/stats.c
@@ -39,140 +39,40 @@
 
 bool	opt_stats_print = false;
 
-#ifdef JEMALLOC_STATS
 size_t	stats_cactive = 0;
-#endif
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-#ifdef JEMALLOC_STATS
-static void	malloc_vcprintf(void (*write_cb)(void *, const char *),
-    void *cbopaque, const char *format, va_list ap);
 static void	stats_arena_bins_print(void (*write_cb)(void *, const char *),
     void *cbopaque, unsigned i);
 static void	stats_arena_lruns_print(void (*write_cb)(void *, const char *),
     void *cbopaque, unsigned i);
 static void	stats_arena_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i);
-#endif
+    void *cbopaque, unsigned i, bool bins, bool large);
 
 /******************************************************************************/
 
-/*
- * We don't want to depend on vsnprintf() for production builds, since that can
- * cause unnecessary bloat for static binaries.  u2s() provides minimal integer
- * printing functionality, so that malloc_printf() use can be limited to
- * JEMALLOC_STATS code.
- */
-char *
-u2s(uint64_t x, unsigned base, char *s)
-{
-	unsigned i;
-
-	i = UMAX2S_BUFSIZE - 1;
-	s[i] = '\0';
-	switch (base) {
-	case 10:
-		do {
-			i--;
-			s[i] = "0123456789"[x % (uint64_t)10];
-			x /= (uint64_t)10;
-		} while (x > 0);
-		break;
-	case 16:
-		do {
-			i--;
-			s[i] = "0123456789abcdef"[x & 0xf];
-			x >>= 4;
-		} while (x > 0);
-		break;
-	default:
-		do {
-			i--;
-			s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x %
-			    (uint64_t)base];
-			x /= (uint64_t)base;
-		} while (x > 0);
-	}
-
-	return (&s[i]);
-}
-
-#ifdef JEMALLOC_STATS
-static void
-malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, va_list ap)
-{
-	char buf[4096];
-
-	if (write_cb == NULL) {
-		/*
-		 * The caller did not provide an alternate write_cb callback
-		 * function, so use the default one.  malloc_write() is an
-		 * inline function, so use malloc_message() directly here.
-		 */
-		write_cb = JEMALLOC_P(malloc_message);
-		cbopaque = NULL;
-	}
-
-	vsnprintf(buf, sizeof(buf), format, ap);
-	write_cb(cbopaque, buf);
-}
-
-/*
- * Print to a callback function in such a way as to (hopefully) avoid memory
- * allocation.
- */
-JEMALLOC_ATTR(format(printf, 3, 4))
-void
-malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, ...)
-{
-	va_list ap;
-
-	va_start(ap, format);
-	malloc_vcprintf(write_cb, cbopaque, format, ap);
-	va_end(ap);
-}
-
-/*
- * Print to stderr in such a way as to (hopefully) avoid memory allocation.
- */
-JEMALLOC_ATTR(format(printf, 1, 2))
-void
-malloc_printf(const char *format, ...)
-{
-	va_list ap;
-
-	va_start(ap, format);
-	malloc_vcprintf(NULL, NULL, format, ap);
-	va_end(ap);
-}
-#endif
-
-#ifdef JEMALLOC_STATS
 static void
 stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
     unsigned i)
 {
-	size_t pagesize;
+	size_t page;
 	bool config_tcache;
 	unsigned nbins, j, gap_start;
 
-	CTL_GET("arenas.pagesize", &pagesize, size_t);
+	CTL_GET("arenas.page", &page, size_t);
 
 	CTL_GET("config.tcache", &config_tcache, bool);
 	if (config_tcache) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "bins:     bin    size regs pgs    allocated      nmalloc"
+		    "bins:     bin  size regs pgs    allocated      nmalloc"
 		    "      ndalloc    nrequests       nfills     nflushes"
-		    "      newruns       reruns      maxruns      curruns\n");
+		    "      newruns       reruns      curruns\n");
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
-		    "bins:     bin    size regs pgs    allocated      nmalloc"
-		    "      ndalloc      newruns       reruns      maxruns"
-		    "      curruns\n");
+		    "bins:     bin  size regs pgs    allocated      nmalloc"
+		    "      ndalloc      newruns       reruns      curruns\n");
 	}
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	for (j = 0, gap_start = UINT_MAX; j < nbins; j++) {
@@ -183,12 +83,11 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			if (gap_start == UINT_MAX)
 				gap_start = j;
 		} else {
-			unsigned ntbins_, nqbins, ncbins, nsbins;
 			size_t reg_size, run_size, allocated;
 			uint32_t nregs;
 			uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 			uint64_t reruns;
-			size_t highruns, curruns;
+			size_t curruns;
 
 			if (gap_start != UINT_MAX) {
 				if (j > gap_start + 1) {
@@ -203,10 +102,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				}
 				gap_start = UINT_MAX;
 			}
-			CTL_GET("arenas.ntbins", &ntbins_, unsigned);
-			CTL_GET("arenas.nqbins", &nqbins, unsigned);
-			CTL_GET("arenas.ncbins", &ncbins, unsigned);
-			CTL_GET("arenas.nsbins", &nsbins, unsigned);
 			CTL_J_GET("arenas.bin.0.size", &reg_size, size_t);
 			CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t);
 			CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t);
@@ -226,36 +121,25 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			}
 			CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns,
 			    uint64_t);
-			CTL_IJ_GET("stats.arenas.0.bins.0.highruns", &highruns,
-			    size_t);
 			CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns,
 			    size_t);
 			if (config_tcache) {
 				malloc_cprintf(write_cb, cbopaque,
-				    "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64
+				    "%13u %5zu %4u %3zu %12zu %12"PRIu64
 				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
 				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
-				    " %12zu %12zu\n",
-				    j,
-				    j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
-				    "Q" : j < ntbins_ + nqbins + ncbins ? "C" :
-				    "S",
-				    reg_size, nregs, run_size / pagesize,
+				    " %12zu\n",
+				    j, reg_size, nregs, run_size / page,
 				    allocated, nmalloc, ndalloc, nrequests,
-				    nfills, nflushes, nruns, reruns, highruns,
-				    curruns);
+				    nfills, nflushes, nruns, reruns, curruns);
 			} else {
 				malloc_cprintf(write_cb, cbopaque,
-				    "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64
+				    "%13u %5zu %4u %3zu %12zu %12"PRIu64
 				    " %12"PRIu64" %12"PRIu64" %12"PRIu64
-				    " %12zu %12zu\n",
-				    j,
-				    j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
-				    "Q" : j < ntbins_ + nqbins + ncbins ? "C" :
-				    "S",
-				    reg_size, nregs, run_size / pagesize,
+				    " %12zu\n",
+				    j, reg_size, nregs, run_size / page,
 				    allocated, nmalloc, ndalloc, nruns, reruns,
-				    highruns, curruns);
+				    curruns);
 			}
 		}
 	}
@@ -275,18 +159,18 @@ static void
 stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
     unsigned i)
 {
-	size_t pagesize, nlruns, j;
+	size_t page, nlruns, j;
 	ssize_t gap_start;
 
-	CTL_GET("arenas.pagesize", &pagesize, size_t);
+	CTL_GET("arenas.page", &page, size_t);
 
 	malloc_cprintf(write_cb, cbopaque,
 	    "large:   size pages      nmalloc      ndalloc    nrequests"
-	    "      maxruns      curruns\n");
+	    "      curruns\n");
 	CTL_GET("arenas.nlruns", &nlruns, size_t);
 	for (j = 0, gap_start = -1; j < nlruns; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
-		size_t run_size, highruns, curruns;
+		size_t run_size, curruns;
 
 		CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc,
 		    uint64_t);
@@ -299,8 +183,6 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				gap_start = j;
 		} else {
 			CTL_J_GET("arenas.lrun.0.size", &run_size, size_t);
-			CTL_IJ_GET("stats.arenas.0.lruns.0.highruns", &highruns,
-			    size_t);
 			CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns,
 			    size_t);
 			if (gap_start != -1) {
@@ -310,9 +192,9 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			}
 			malloc_cprintf(write_cb, cbopaque,
 			    "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64
-			    " %12zu %12zu\n",
-			    run_size, run_size / pagesize, nmalloc, ndalloc,
-			    nrequests, highruns, curruns);
+			    " %12zu\n",
+			    run_size, run_size / page, nmalloc, ndalloc,
+			    nrequests, curruns);
 		}
 	}
 	if (gap_start != -1)
@@ -321,17 +203,17 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i)
+    unsigned i, bool bins, bool large)
 {
 	unsigned nthreads;
-	size_t pagesize, pactive, pdirty, mapped;
+	size_t page, pactive, pdirty, mapped;
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
 	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
 	size_t large_allocated;
 	uint64_t large_nmalloc, large_ndalloc, large_nrequests;
 
-	CTL_GET("arenas.pagesize", &pagesize, size_t);
+	CTL_GET("arenas.page", &page, size_t);
 
 	CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned);
 	malloc_cprintf(write_cb, cbopaque,
@@ -369,15 +251,15 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	    small_nmalloc + large_nmalloc,
 	    small_ndalloc + large_ndalloc,
 	    small_nrequests + large_nrequests);
-	malloc_cprintf(write_cb, cbopaque, "active:  %12zu\n",
-	    pactive * pagesize );
+	malloc_cprintf(write_cb, cbopaque, "active:  %12zu\n", pactive * page);
 	CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t);
 	malloc_cprintf(write_cb, cbopaque, "mapped:  %12zu\n", mapped);
 
-	stats_arena_bins_print(write_cb, cbopaque, i);
-	stats_arena_lruns_print(write_cb, cbopaque, i);
+	if (bins)
+		stats_arena_bins_print(write_cb, cbopaque, i);
+	if (large)
+		stats_arena_lruns_print(write_cb, cbopaque, i);
 }
-#endif
 
 void
 stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
@@ -386,7 +268,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	int err;
 	uint64_t epoch;
 	size_t u64sz;
-	char s[UMAX2S_BUFSIZE];
 	bool general = true;
 	bool merged = true;
 	bool unmerged = true;
@@ -402,8 +283,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	 * */
 	epoch = 1;
 	u64sz = sizeof(uint64_t);
-	err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch,
-	    sizeof(uint64_t));
+	err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t));
 	if (err != 0) {
 		if (err == EAGAIN) {
 			malloc_write("<jemalloc>: Memory allocation failure in "
@@ -415,42 +295,33 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		abort();
 	}
 
-	if (write_cb == NULL) {
-		/*
-		 * The caller did not provide an alternate write_cb callback
-		 * function, so use the default one.  malloc_write() is an
-		 * inline function, so use malloc_message() directly here.
-		 */
-		write_cb = JEMALLOC_P(malloc_message);
-		cbopaque = NULL;
-	}
-
 	if (opts != NULL) {
 		unsigned i;
 
 		for (i = 0; opts[i] != '\0'; i++) {
 			switch (opts[i]) {
-				case 'g':
-					general = false;
-					break;
-				case 'm':
-					merged = false;
-					break;
-				case 'a':
-					unmerged = false;
-					break;
-				case 'b':
-					bins = false;
-					break;
-				case 'l':
-					large = false;
-					break;
-				default:;
+			case 'g':
+				general = false;
+				break;
+			case 'm':
+				merged = false;
+				break;
+			case 'a':
+				unmerged = false;
+				break;
+			case 'b':
+				bins = false;
+				break;
+			case 'l':
+				large = false;
+				break;
+			default:;
 			}
 		}
 	}
 
-	write_cb(cbopaque, "___ Begin jemalloc statistics ___\n");
+	malloc_cprintf(write_cb, cbopaque,
+	    "___ Begin jemalloc statistics ___\n");
 	if (general) {
 		int err;
 		const char *cpv;
@@ -465,229 +336,126 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		cpsz = sizeof(const char *);
 
 		CTL_GET("version", &cpv, const char *);
-		write_cb(cbopaque, "Version: ");
-		write_cb(cbopaque, cpv);
-		write_cb(cbopaque, "\n");
+		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
 		CTL_GET("config.debug", &bv, bool);
-		write_cb(cbopaque, "Assertions ");
-		write_cb(cbopaque, bv ? "enabled" : "disabled");
-		write_cb(cbopaque, "\n");
+		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
+		    bv ? "enabled" : "disabled");
 
 #define OPT_WRITE_BOOL(n)						\
-		if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz,	\
-		    NULL, 0)) == 0) {					\
-			write_cb(cbopaque, "  opt."#n": ");		\
-			write_cb(cbopaque, bv ? "true" : "false");	\
-			write_cb(cbopaque, "\n");			\
+		if ((err = je_mallctl("opt."#n, &bv, &bsz, NULL, 0))	\
+		    == 0) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %s\n", bv ? "true" : "false");	\
 		}
 #define OPT_WRITE_SIZE_T(n)						\
-		if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz,	\
-		    NULL, 0)) == 0) {					\
-			write_cb(cbopaque, "  opt."#n": ");		\
-			write_cb(cbopaque, u2s(sv, 10, s));		\
-			write_cb(cbopaque, "\n");			\
+		if ((err = je_mallctl("opt."#n, &sv, &ssz, NULL, 0))	\
+		    == 0) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			"  opt."#n": %zu\n", sv);			\
 		}
 #define OPT_WRITE_SSIZE_T(n)						\
-		if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz,	\
-		    NULL, 0)) == 0) {					\
-			if (ssv >= 0) {					\
-				write_cb(cbopaque, "  opt."#n": ");	\
-				write_cb(cbopaque, u2s(ssv, 10, s));	\
-			} else {					\
-				write_cb(cbopaque, "  opt."#n": -");	\
-				write_cb(cbopaque, u2s(-ssv, 10, s));	\
-			}						\
-			write_cb(cbopaque, "\n");			\
+		if ((err = je_mallctl("opt."#n, &ssv, &sssz, NULL, 0))	\
+		    == 0) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %zd\n", ssv);			\
 		}
 #define OPT_WRITE_CHAR_P(n)						\
-		if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz,	\
-		    NULL, 0)) == 0) {					\
-			write_cb(cbopaque, "  opt."#n": \"");		\
-			write_cb(cbopaque, cpv);			\
-			write_cb(cbopaque, "\"\n");			\
+		if ((err = je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0))	\
+		    == 0) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": \"%s\"\n", cpv);		\
 		}
 
-		write_cb(cbopaque, "Run-time option settings:\n");
+		malloc_cprintf(write_cb, cbopaque,
+		    "Run-time option settings:\n");
 		OPT_WRITE_BOOL(abort)
-		OPT_WRITE_SIZE_T(lg_qspace_max)
-		OPT_WRITE_SIZE_T(lg_cspace_max)
 		OPT_WRITE_SIZE_T(lg_chunk)
 		OPT_WRITE_SIZE_T(narenas)
 		OPT_WRITE_SSIZE_T(lg_dirty_mult)
 		OPT_WRITE_BOOL(stats_print)
 		OPT_WRITE_BOOL(junk)
+		OPT_WRITE_SIZE_T(quarantine)
+		OPT_WRITE_BOOL(redzone)
 		OPT_WRITE_BOOL(zero)
-		OPT_WRITE_BOOL(sysv)
+		OPT_WRITE_BOOL(utrace)
+		OPT_WRITE_BOOL(valgrind)
 		OPT_WRITE_BOOL(xmalloc)
 		OPT_WRITE_BOOL(tcache)
-		OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep)
 		OPT_WRITE_SSIZE_T(lg_tcache_max)
 		OPT_WRITE_BOOL(prof)
 		OPT_WRITE_CHAR_P(prof_prefix)
-		OPT_WRITE_SIZE_T(lg_prof_bt_max)
 		OPT_WRITE_BOOL(prof_active)
 		OPT_WRITE_SSIZE_T(lg_prof_sample)
 		OPT_WRITE_BOOL(prof_accum)
-		OPT_WRITE_SSIZE_T(lg_prof_tcmax)
 		OPT_WRITE_SSIZE_T(lg_prof_interval)
 		OPT_WRITE_BOOL(prof_gdump)
+		OPT_WRITE_BOOL(prof_final)
 		OPT_WRITE_BOOL(prof_leak)
-		OPT_WRITE_BOOL(overcommit)
 
 #undef OPT_WRITE_BOOL
 #undef OPT_WRITE_SIZE_T
 #undef OPT_WRITE_SSIZE_T
 #undef OPT_WRITE_CHAR_P
 
-		write_cb(cbopaque, "CPUs: ");
-		write_cb(cbopaque, u2s(ncpus, 10, s));
-		write_cb(cbopaque, "\n");
+		malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus);
 
 		CTL_GET("arenas.narenas", &uv, unsigned);
-		write_cb(cbopaque, "Max arenas: ");
-		write_cb(cbopaque, u2s(uv, 10, s));
-		write_cb(cbopaque, "\n");
+		malloc_cprintf(write_cb, cbopaque, "Max arenas: %u\n", uv);
 
-		write_cb(cbopaque, "Pointer size: ");
-		write_cb(cbopaque, u2s(sizeof(void *), 10, s));
-		write_cb(cbopaque, "\n");
+		malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n",
+		    sizeof(void *));
 
 		CTL_GET("arenas.quantum", &sv, size_t);
-		write_cb(cbopaque, "Quantum size: ");
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, "\n");
-
-		CTL_GET("arenas.cacheline", &sv, size_t);
-		write_cb(cbopaque, "Cacheline size (assumed): ");
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, "\n");
-
-		CTL_GET("arenas.subpage", &sv, size_t);
-		write_cb(cbopaque, "Subpage spacing: ");
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, "\n");
-
-		if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz,
-		    NULL, 0)) == 0) {
-			write_cb(cbopaque, "Tiny 2^n-spaced sizes: [");
-			write_cb(cbopaque, u2s(sv, 10, s));
-			write_cb(cbopaque, "..");
-
-			CTL_GET("arenas.tspace_max", &sv, size_t);
-			write_cb(cbopaque, u2s(sv, 10, s));
-			write_cb(cbopaque, "]\n");
-		}
+		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv);
 
-		CTL_GET("arenas.qspace_min", &sv, size_t);
-		write_cb(cbopaque, "Quantum-spaced sizes: [");
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, "..");
-		CTL_GET("arenas.qspace_max", &sv, size_t);
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, "]\n");
-
-		CTL_GET("arenas.cspace_min", &sv, size_t);
-		write_cb(cbopaque, "Cacheline-spaced sizes: [");
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, "..");
-		CTL_GET("arenas.cspace_max", &sv, size_t);
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, "]\n");
-
-		CTL_GET("arenas.sspace_min", &sv, size_t);
-		write_cb(cbopaque, "Subpage-spaced sizes: [");
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, "..");
-		CTL_GET("arenas.sspace_max", &sv, size_t);
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, "]\n");
+		CTL_GET("arenas.page", &sv, size_t);
+		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
 
 		CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t);
 		if (ssv >= 0) {
-			write_cb(cbopaque,
-			    "Min active:dirty page ratio per arena: ");
-			write_cb(cbopaque, u2s((1U << ssv), 10, s));
-			write_cb(cbopaque, ":1\n");
+			malloc_cprintf(write_cb, cbopaque,
+			    "Min active:dirty page ratio per arena: %u:1\n",
+			    (1U << ssv));
 		} else {
-			write_cb(cbopaque,
+			malloc_cprintf(write_cb, cbopaque,
 			    "Min active:dirty page ratio per arena: N/A\n");
 		}
-		if ((err = JEMALLOC_P(mallctl)("arenas.tcache_max", &sv,
-		    &ssz, NULL, 0)) == 0) {
-			write_cb(cbopaque,
-			    "Maximum thread-cached size class: ");
-			write_cb(cbopaque, u2s(sv, 10, s));
-			write_cb(cbopaque, "\n");
-		}
-		if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv,
-		    &ssz, NULL, 0)) == 0) {
-			size_t tcache_gc_sweep = (1U << ssv);
-			bool tcache_enabled;
-			CTL_GET("opt.tcache", &tcache_enabled, bool);
-			write_cb(cbopaque, "Thread cache GC sweep interval: ");
-			write_cb(cbopaque, tcache_enabled && ssv >= 0 ?
-			    u2s(tcache_gc_sweep, 10, s) : "N/A");
-			write_cb(cbopaque, "\n");
+		if ((err = je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0))
+		    == 0) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Maximum thread-cached size class: %zu\n", sv);
 		}
-		if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0))
-		   == 0 && bv) {
-			CTL_GET("opt.lg_prof_bt_max", &sv, size_t);
-			write_cb(cbopaque, "Maximum profile backtrace depth: ");
-			write_cb(cbopaque, u2s((1U << sv), 10, s));
-			write_cb(cbopaque, "\n");
-
-			CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t);
-			write_cb(cbopaque,
-			    "Maximum per thread backtrace cache: ");
-			if (ssv >= 0) {
-				write_cb(cbopaque, u2s((1U << ssv), 10, s));
-				write_cb(cbopaque, " (2^");
-				write_cb(cbopaque, u2s(ssv, 10, s));
-				write_cb(cbopaque, ")\n");
-			} else
-				write_cb(cbopaque, "N/A\n");
-
+		if ((err = je_mallctl("opt.prof", &bv, &bsz, NULL, 0)) == 0 &&
+		    bv) {
 			CTL_GET("opt.lg_prof_sample", &sv, size_t);
-			write_cb(cbopaque, "Average profile sample interval: ");
-			write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s));
-			write_cb(cbopaque, " (2^");
-			write_cb(cbopaque, u2s(sv, 10, s));
-			write_cb(cbopaque, ")\n");
+			malloc_cprintf(write_cb, cbopaque,
+			    "Average profile sample interval: %"PRIu64
+			    " (2^%zu)\n", (((uint64_t)1U) << sv), sv);
 
 			CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
-			write_cb(cbopaque, "Average profile dump interval: ");
 			if (ssv >= 0) {
-				write_cb(cbopaque, u2s((((uint64_t)1U) << ssv),
-				    10, s));
-				write_cb(cbopaque, " (2^");
-				write_cb(cbopaque, u2s(ssv, 10, s));
-				write_cb(cbopaque, ")\n");
-			} else
-				write_cb(cbopaque, "N/A\n");
+				malloc_cprintf(write_cb, cbopaque,
+				    "Average profile dump interval: %"PRIu64
+				    " (2^%zd)\n",
+				    (((uint64_t)1U) << ssv), ssv);
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "Average profile dump interval: N/A\n");
+			}
 		}
-		CTL_GET("arenas.chunksize", &sv, size_t);
-		write_cb(cbopaque, "Chunk size: ");
-		write_cb(cbopaque, u2s(sv, 10, s));
 		CTL_GET("opt.lg_chunk", &sv, size_t);
-		write_cb(cbopaque, " (2^");
-		write_cb(cbopaque, u2s(sv, 10, s));
-		write_cb(cbopaque, ")\n");
+		malloc_cprintf(write_cb, cbopaque, "Chunk size: %zu (2^%zu)\n",
+		    (ZU(1) << sv), sv);
 	}
 
-#ifdef JEMALLOC_STATS
-	{
-		int err;
-		size_t sszp, ssz;
+	if (config_stats) {
 		size_t *cactive;
 		size_t allocated, active, mapped;
-		size_t chunks_current, chunks_high, swap_avail;
+		size_t chunks_current, chunks_high;
 		uint64_t chunks_total;
 		size_t huge_allocated;
 		uint64_t huge_nmalloc, huge_ndalloc;
 
-		sszp = sizeof(size_t *);
-		ssz = sizeof(size_t);
-
 		CTL_GET("stats.cactive", &cactive, size_t *);
 		CTL_GET("stats.allocated", &allocated, size_t);
 		CTL_GET("stats.active", &active, size_t);
@@ -702,24 +470,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		CTL_GET("stats.chunks.total", &chunks_total, uint64_t);
 		CTL_GET("stats.chunks.high", &chunks_high, size_t);
 		CTL_GET("stats.chunks.current", &chunks_current, size_t);
-		if ((err = JEMALLOC_P(mallctl)("swap.avail", &swap_avail, &ssz,
-		    NULL, 0)) == 0) {
-			size_t lg_chunk;
-
-			malloc_cprintf(write_cb, cbopaque, "chunks: nchunks   "
-			    "highchunks    curchunks   swap_avail\n");
-			CTL_GET("opt.lg_chunk", &lg_chunk, size_t);
-			malloc_cprintf(write_cb, cbopaque,
-			    "  %13"PRIu64"%13zu%13zu%13zu\n",
-			    chunks_total, chunks_high, chunks_current,
-			    swap_avail << lg_chunk);
-		} else {
-			malloc_cprintf(write_cb, cbopaque, "chunks: nchunks   "
-			    "highchunks    curchunks\n");
-			malloc_cprintf(write_cb, cbopaque,
-			    "  %13"PRIu64"%13zu%13zu\n",
-			    chunks_total, chunks_high, chunks_current);
-		}
+		malloc_cprintf(write_cb, cbopaque, "chunks: nchunks   "
+		    "highchunks    curchunks\n");
+		malloc_cprintf(write_cb, cbopaque, "  %13"PRIu64"%13zu%13zu\n",
+		    chunks_total, chunks_high, chunks_current);
 
 		/* Print huge stats. */
 		CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t);
@@ -736,11 +490,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 			CTL_GET("arenas.narenas", &narenas, unsigned);
 			{
-				bool initialized[narenas];
+				VARIABLE_ARRAY(bool, initialized, narenas);
 				size_t isz;
 				unsigned i, ninitialized;
 
-				isz = sizeof(initialized);
+				isz = sizeof(bool) * narenas;
 				xmallctl("arenas.initialized", initialized,
 				    &isz, NULL, 0);
 				for (i = ninitialized = 0; i < narenas; i++) {
@@ -753,7 +507,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 					malloc_cprintf(write_cb, cbopaque,
 					    "\nMerged arenas stats:\n");
 					stats_arena_print(write_cb, cbopaque,
-					    narenas);
+					    narenas, bins, large);
 				}
 			}
 		}
@@ -765,11 +519,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 			CTL_GET("arenas.narenas", &narenas, unsigned);
 			{
-				bool initialized[narenas];
+				VARIABLE_ARRAY(bool, initialized, narenas);
 				size_t isz;
 				unsigned i;
 
-				isz = sizeof(initialized);
+				isz = sizeof(bool) * narenas;
 				xmallctl("arenas.initialized", initialized,
 				    &isz, NULL, 0);
 
@@ -779,12 +533,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 						    cbopaque,
 						    "\narenas[%u]:\n", i);
 						stats_arena_print(write_cb,
-						    cbopaque, i);
+						    cbopaque, i, bins, large);
 					}
 				}
 			}
 		}
 	}
-#endif /* #ifdef JEMALLOC_STATS */
-	write_cb(cbopaque, "--- End jemalloc statistics ---\n");
+	malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n");
 }
diff --git a/deps/jemalloc/src/tcache.c b/deps/jemalloc/src/tcache.c
index 31c329e1..60244c45 100644
--- a/deps/jemalloc/src/tcache.c
+++ b/deps/jemalloc/src/tcache.c
@@ -1,70 +1,92 @@
 #define	JEMALLOC_TCACHE_C_
 #include "jemalloc/internal/jemalloc_internal.h"
-#ifdef JEMALLOC_TCACHE
+
 /******************************************************************************/
 /* Data. */
 
+malloc_tsd_data(, tcache, tcache_t *, NULL)
+malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default)
+
 bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
-ssize_t	opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
 
 tcache_bin_info_t	*tcache_bin_info;
 static unsigned		stack_nelms; /* Total stack elms per tcache. */
 
-/* Map of thread-specific caches. */
-#ifndef NO_TLS
-__thread tcache_t	*tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
-#endif
+size_t			nhbins;
+size_t			tcache_maxclass;
 
-/*
- * Same contents as tcache, but initialized such that the TSD destructor is
- * called when a thread exits, so that the cache can be cleaned up.
- */
-pthread_key_t		tcache_tsd;
+/******************************************************************************/
 
-size_t				nhbins;
-size_t				tcache_maxclass;
-unsigned			tcache_gc_incr;
+size_t	tcache_salloc(const void *ptr)
+{
 
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
+	return (arena_salloc(ptr, false));
+}
 
-static void	tcache_thread_cleanup(void *arg);
+void
+tcache_event_hard(tcache_t *tcache)
+{
+	size_t binind = tcache->next_gc_bin;
+	tcache_bin_t *tbin = &tcache->tbins[binind];
+	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
 
-/******************************************************************************/
+	if (tbin->low_water > 0) {
+		/*
+		 * Flush (ceiling) 3/4 of the objects below the low water mark.
+		 */
+		if (binind < NBINS) {
+			tcache_bin_flush_small(tbin, binind, tbin->ncached -
+			    tbin->low_water + (tbin->low_water >> 2), tcache);
+		} else {
+			tcache_bin_flush_large(tbin, binind, tbin->ncached -
+			    tbin->low_water + (tbin->low_water >> 2), tcache);
+		}
+		/*
+		 * Reduce fill count by 2X.  Limit lg_fill_div such that the
+		 * fill count is always at least 1.
+		 */
+		if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1)
+			tbin->lg_fill_div++;
+	} else if (tbin->low_water < 0) {
+		/*
+		 * Increase fill count by 2X.  Make sure lg_fill_div stays
+		 * greater than 0.
+		 */
+		if (tbin->lg_fill_div > 1)
+			tbin->lg_fill_div--;
+	}
+	tbin->low_water = tbin->ncached;
+
+	tcache->next_gc_bin++;
+	if (tcache->next_gc_bin == nhbins)
+		tcache->next_gc_bin = 0;
+	tcache->ev_cnt = 0;
+}
 
 void *
 tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
 {
 	void *ret;
 
-	arena_tcache_fill_small(tcache->arena, tbin, binind
-#ifdef JEMALLOC_PROF
-	    , tcache->prof_accumbytes
-#endif
-	    );
-#ifdef JEMALLOC_PROF
-	tcache->prof_accumbytes = 0;
-#endif
+	arena_tcache_fill_small(tcache->arena, tbin, binind,
+	    config_prof ? tcache->prof_accumbytes : 0);
+	if (config_prof)
+		tcache->prof_accumbytes = 0;
 	ret = tcache_alloc_easy(tbin);
 
 	return (ret);
 }
 
 void
-tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-    , tcache_t *tcache
-#endif
-    )
+tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
+    tcache_t *tcache)
 {
 	void *ptr;
 	unsigned i, nflush, ndeferred;
-#ifdef JEMALLOC_STATS
 	bool merged_stats = false;
-#endif
 
-	assert(binind < nbins);
+	assert(binind < NBINS);
 	assert(rem <= tbin->ncached);
 
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
@@ -74,25 +96,21 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
 		arena_t *arena = chunk->arena;
 		arena_bin_t *bin = &arena->bins[binind];
 
-#ifdef JEMALLOC_PROF
-		if (arena == tcache->arena) {
+		if (config_prof && arena == tcache->arena) {
 			malloc_mutex_lock(&arena->lock);
 			arena_prof_accum(arena, tcache->prof_accumbytes);
 			malloc_mutex_unlock(&arena->lock);
 			tcache->prof_accumbytes = 0;
 		}
-#endif
 
 		malloc_mutex_lock(&bin->lock);
-#ifdef JEMALLOC_STATS
-		if (arena == tcache->arena) {
+		if (config_stats && arena == tcache->arena) {
 			assert(merged_stats == false);
 			merged_stats = true;
 			bin->stats.nflushes++;
 			bin->stats.nrequests += tbin->tstats.nrequests;
 			tbin->tstats.nrequests = 0;
 		}
-#endif
 		ndeferred = 0;
 		for (i = 0; i < nflush; i++) {
 			ptr = tbin->avail[i];
@@ -100,10 +118,15 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (chunk->arena == arena) {
 				size_t pageind = ((uintptr_t)ptr -
-				    (uintptr_t)chunk) >> PAGE_SHIFT;
+				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_t *mapelm =
-				    &chunk->map[pageind-map_bias];
-				arena_dalloc_bin(arena, chunk, ptr, mapelm);
+				    arena_mapp_get(chunk, pageind);
+				if (config_fill && opt_junk) {
+					arena_alloc_junk_small(ptr,
+					    &arena_bin_info[binind], true);
+				}
+				arena_dalloc_bin_locked(arena, chunk, ptr,
+				    mapelm);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -117,8 +140,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
 		}
 		malloc_mutex_unlock(&bin->lock);
 	}
-#ifdef JEMALLOC_STATS
-	if (merged_stats == false) {
+	if (config_stats && merged_stats == false) {
 		/*
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
@@ -130,7 +152,6 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
 		tbin->tstats.nrequests = 0;
 		malloc_mutex_unlock(&bin->lock);
 	}
-#endif
 
 	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
 	    rem * sizeof(void *));
@@ -140,17 +161,12 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
 }
 
 void
-tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-    , tcache_t *tcache
-#endif
-    )
+tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
+    tcache_t *tcache)
 {
 	void *ptr;
 	unsigned i, nflush, ndeferred;
-#ifdef JEMALLOC_STATS
 	bool merged_stats = false;
-#endif
 
 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);
@@ -162,30 +178,28 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 		arena_t *arena = chunk->arena;
 
 		malloc_mutex_lock(&arena->lock);
-#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
-		if (arena == tcache->arena) {
-#endif
-#ifdef JEMALLOC_PROF
-			arena_prof_accum(arena, tcache->prof_accumbytes);
-			tcache->prof_accumbytes = 0;
-#endif
-#ifdef JEMALLOC_STATS
-			merged_stats = true;
-			arena->stats.nrequests_large += tbin->tstats.nrequests;
-			arena->stats.lstats[binind - nbins].nrequests +=
-			    tbin->tstats.nrequests;
-			tbin->tstats.nrequests = 0;
-#endif
-#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+		if ((config_prof || config_stats) && arena == tcache->arena) {
+			if (config_prof) {
+				arena_prof_accum(arena,
+				    tcache->prof_accumbytes);
+				tcache->prof_accumbytes = 0;
+			}
+			if (config_stats) {
+				merged_stats = true;
+				arena->stats.nrequests_large +=
+				    tbin->tstats.nrequests;
+				arena->stats.lstats[binind - NBINS].nrequests +=
+				    tbin->tstats.nrequests;
+				tbin->tstats.nrequests = 0;
+			}
 		}
-#endif
 		ndeferred = 0;
 		for (i = 0; i < nflush; i++) {
 			ptr = tbin->avail[i];
 			assert(ptr != NULL);
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (chunk->arena == arena)
-				arena_dalloc_large(arena, chunk, ptr);
+				arena_dalloc_large_locked(arena, chunk, ptr);
 			else {
 				/*
 				 * This object was allocated via a different
@@ -199,8 +213,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 		}
 		malloc_mutex_unlock(&arena->lock);
 	}
-#ifdef JEMALLOC_STATS
-	if (merged_stats == false) {
+	if (config_stats && merged_stats == false) {
 		/*
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
@@ -208,12 +221,11 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 		arena_t *arena = tcache->arena;
 		malloc_mutex_lock(&arena->lock);
 		arena->stats.nrequests_large += tbin->tstats.nrequests;
-		arena->stats.lstats[binind - nbins].nrequests +=
+		arena->stats.lstats[binind - NBINS].nrequests +=
 		    tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
 		malloc_mutex_unlock(&arena->lock);
 	}
-#endif
 
 	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
 	    rem * sizeof(void *));
@@ -222,6 +234,33 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
 		tbin->low_water = tbin->ncached;
 }
 
+void
+tcache_arena_associate(tcache_t *tcache, arena_t *arena)
+{
+
+	if (config_stats) {
+		/* Link into list of extant tcaches. */
+		malloc_mutex_lock(&arena->lock);
+		ql_elm_new(tcache, link);
+		ql_tail_insert(&arena->tcache_ql, tcache, link);
+		malloc_mutex_unlock(&arena->lock);
+	}
+	tcache->arena = arena;
+}
+
+void
+tcache_arena_dissociate(tcache_t *tcache)
+{
+
+	if (config_stats) {
+		/* Unlink from list of extant tcaches. */
+		malloc_mutex_lock(&tcache->arena->lock);
+		ql_remove(&tcache->arena->tcache_ql, tcache, link);
+		malloc_mutex_unlock(&tcache->arena->lock);
+		tcache_stats_merge(tcache, tcache->arena);
+	}
+}
+
 tcache_t *
 tcache_create(arena_t *arena)
 {
@@ -244,7 +283,7 @@ tcache_create(arena_t *arena)
 	 */
 	size = (size + CACHELINE_MASK) & (-CACHELINE);
 
-	if (size <= small_maxclass)
+	if (size <= SMALL_MAXCLASS)
 		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
 	else if (size <= tcache_maxclass)
 		tcache = (tcache_t *)arena_malloc_large(arena, size, true);
@@ -254,15 +293,8 @@ tcache_create(arena_t *arena)
 	if (tcache == NULL)
 		return (NULL);
 
-#ifdef JEMALLOC_STATS
-	/* Link into list of extant tcaches. */
-	malloc_mutex_lock(&arena->lock);
-	ql_elm_new(tcache, link);
-	ql_tail_insert(&arena->tcache_ql, tcache, link);
-	malloc_mutex_unlock(&arena->lock);
-#endif
+	tcache_arena_associate(tcache, arena);
 
-	tcache->arena = arena;
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	for (i = 0; i < nhbins; i++) {
 		tcache->tbins[i].lg_fill_div = 1;
@@ -271,7 +303,7 @@ tcache_create(arena_t *arena)
 		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
 	}
 
-	TCACHE_SET(tcache);
+	tcache_tsd_set(&tcache);
 
 	return (tcache);
 }
@@ -282,121 +314,96 @@ tcache_destroy(tcache_t *tcache)
 	unsigned i;
 	size_t tcache_size;
 
-#ifdef JEMALLOC_STATS
-	/* Unlink from list of extant tcaches. */
-	malloc_mutex_lock(&tcache->arena->lock);
-	ql_remove(&tcache->arena->tcache_ql, tcache, link);
-	malloc_mutex_unlock(&tcache->arena->lock);
-	tcache_stats_merge(tcache, tcache->arena);
-#endif
+	tcache_arena_dissociate(tcache);
 
-	for (i = 0; i < nbins; i++) {
+	for (i = 0; i < NBINS; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		tcache_bin_flush_small(tbin, i, 0
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-		    , tcache
-#endif
-		    );
-
-#ifdef JEMALLOC_STATS
-		if (tbin->tstats.nrequests != 0) {
+		tcache_bin_flush_small(tbin, i, 0, tcache);
+
+		if (config_stats && tbin->tstats.nrequests != 0) {
 			arena_t *arena = tcache->arena;
 			arena_bin_t *bin = &arena->bins[i];
 			malloc_mutex_lock(&bin->lock);
 			bin->stats.nrequests += tbin->tstats.nrequests;
 			malloc_mutex_unlock(&bin->lock);
 		}
-#endif
 	}
 
 	for (; i < nhbins; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		tcache_bin_flush_large(tbin, i, 0
-#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
-		    , tcache
-#endif
-		    );
-
-#ifdef JEMALLOC_STATS
-		if (tbin->tstats.nrequests != 0) {
+		tcache_bin_flush_large(tbin, i, 0, tcache);
+
+		if (config_stats && tbin->tstats.nrequests != 0) {
 			arena_t *arena = tcache->arena;
 			malloc_mutex_lock(&arena->lock);
 			arena->stats.nrequests_large += tbin->tstats.nrequests;
-			arena->stats.lstats[i - nbins].nrequests +=
+			arena->stats.lstats[i - NBINS].nrequests +=
 			    tbin->tstats.nrequests;
 			malloc_mutex_unlock(&arena->lock);
 		}
-#endif
 	}
 
-#ifdef JEMALLOC_PROF
-	if (tcache->prof_accumbytes > 0) {
+	if (config_prof && tcache->prof_accumbytes > 0) {
 		malloc_mutex_lock(&tcache->arena->lock);
 		arena_prof_accum(tcache->arena, tcache->prof_accumbytes);
 		malloc_mutex_unlock(&tcache->arena->lock);
 	}
-#endif
 
-	tcache_size = arena_salloc(tcache);
-	if (tcache_size <= small_maxclass) {
+	tcache_size = arena_salloc(tcache, false);
+	if (tcache_size <= SMALL_MAXCLASS) {
 		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
 		arena_t *arena = chunk->arena;
 		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
-		    PAGE_SHIFT;
-		arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
-		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
-		    (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) <<
-		    PAGE_SHIFT));
-		arena_bin_t *bin = run->bin;
+		    LG_PAGE;
+		arena_chunk_map_t *mapelm = arena_mapp_get(chunk, pageind);
 
-		malloc_mutex_lock(&bin->lock);
-		arena_dalloc_bin(arena, chunk, tcache, mapelm);
-		malloc_mutex_unlock(&bin->lock);
+		arena_dalloc_bin(arena, chunk, tcache, pageind, mapelm);
 	} else if (tcache_size <= tcache_maxclass) {
 		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
 		arena_t *arena = chunk->arena;
 
-		malloc_mutex_lock(&arena->lock);
 		arena_dalloc_large(arena, chunk, tcache);
-		malloc_mutex_unlock(&arena->lock);
 	} else
 		idalloc(tcache);
 }
 
-static void
+void
 tcache_thread_cleanup(void *arg)
 {
-	tcache_t *tcache = (tcache_t *)arg;
+	tcache_t *tcache = *(tcache_t **)arg;
 
-	if (tcache == (void *)(uintptr_t)1) {
+	if (tcache == TCACHE_STATE_DISABLED) {
+		/* Do nothing. */
+	} else if (tcache == TCACHE_STATE_REINCARNATED) {
 		/*
-		 * The previous time this destructor was called, we set the key
-		 * to 1 so that other destructors wouldn't cause re-creation of
-		 * the tcache.  This time, do nothing, so that the destructor
-		 * will not be called again.
+		 * Another destructor called an allocator function after this
+		 * destructor was called.  Reset tcache to
+		 * TCACHE_STATE_PURGATORY in order to receive another callback.
 		 */
-	} else if (tcache == (void *)(uintptr_t)2) {
+		tcache = TCACHE_STATE_PURGATORY;
+		tcache_tsd_set(&tcache);
+	} else if (tcache == TCACHE_STATE_PURGATORY) {
 		/*
-		 * Another destructor called an allocator function after this
-		 * destructor was called.  Reset tcache to 1 in order to
-		 * receive another callback.
+		 * The previous time this destructor was called, we set the key
+		 * to TCACHE_STATE_PURGATORY so that other destructors wouldn't
+		 * cause re-creation of the tcache.  This time, do nothing, so
+		 * that the destructor will not be called again.
 		 */
-		TCACHE_SET((uintptr_t)1);
 	} else if (tcache != NULL) {
-		assert(tcache != (void *)(uintptr_t)1);
+		assert(tcache != TCACHE_STATE_PURGATORY);
 		tcache_destroy(tcache);
-		TCACHE_SET((uintptr_t)1);
+		tcache = TCACHE_STATE_PURGATORY;
+		tcache_tsd_set(&tcache);
 	}
 }
 
-#ifdef JEMALLOC_STATS
 void
 tcache_stats_merge(tcache_t *tcache, arena_t *arena)
 {
 	unsigned i;
 
 	/* Merge and reset tcache stats. */
-	for (i = 0; i < nbins; i++) {
+	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 		tcache_bin_t *tbin = &tcache->tbins[i];
 		malloc_mutex_lock(&bin->lock);
@@ -406,75 +413,62 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena)
 	}
 
 	for (; i < nhbins; i++) {
-		malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins];
+		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
 		tcache_bin_t *tbin = &tcache->tbins[i];
 		arena->stats.nrequests_large += tbin->tstats.nrequests;
 		lstats->nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
 	}
 }
-#endif
 
 bool
-tcache_boot(void)
+tcache_boot0(void)
 {
+	unsigned i;
 
-	if (opt_tcache) {
-		unsigned i;
-
-		/*
-		 * If necessary, clamp opt_lg_tcache_max, now that
-		 * small_maxclass and arena_maxclass are known.
-		 */
-		if (opt_lg_tcache_max < 0 || (1U <<
-		    opt_lg_tcache_max) < small_maxclass)
-			tcache_maxclass = small_maxclass;
-		else if ((1U << opt_lg_tcache_max) > arena_maxclass)
-			tcache_maxclass = arena_maxclass;
-		else
-			tcache_maxclass = (1U << opt_lg_tcache_max);
-
-		nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
-
-		/* Initialize tcache_bin_info. */
-		tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
-		    sizeof(tcache_bin_info_t));
-		if (tcache_bin_info == NULL)
-			return (true);
-		stack_nelms = 0;
-		for (i = 0; i < nbins; i++) {
-			if ((arena_bin_info[i].nregs << 1) <=
-			    TCACHE_NSLOTS_SMALL_MAX) {
-				tcache_bin_info[i].ncached_max =
-				    (arena_bin_info[i].nregs << 1);
-			} else {
-				tcache_bin_info[i].ncached_max =
-				    TCACHE_NSLOTS_SMALL_MAX;
-			}
-			stack_nelms += tcache_bin_info[i].ncached_max;
-		}
-		for (; i < nhbins; i++) {
-			tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
-			stack_nelms += tcache_bin_info[i].ncached_max;
-		}
-
-		/* Compute incremental GC event threshold. */
-		if (opt_lg_tcache_gc_sweep >= 0) {
-			tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
-			    nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
-			    0) ? 0 : 1);
-		} else
-			tcache_gc_incr = 0;
-
-		if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) !=
-		    0) {
-			malloc_write(
-			    "<jemalloc>: Error in pthread_key_create()\n");
-			abort();
+	/*
+	 * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is
+	 * known.
+	 */
+	if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS)
+		tcache_maxclass = SMALL_MAXCLASS;
+	else if ((1U << opt_lg_tcache_max) > arena_maxclass)
+		tcache_maxclass = arena_maxclass;
+	else
+		tcache_maxclass = (1U << opt_lg_tcache_max);
+
+	nhbins = NBINS + (tcache_maxclass >> LG_PAGE);
+
+	/* Initialize tcache_bin_info. */
+	tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
+	    sizeof(tcache_bin_info_t));
+	if (tcache_bin_info == NULL)
+		return (true);
+	stack_nelms = 0;
+	for (i = 0; i < NBINS; i++) {
+		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
+			tcache_bin_info[i].ncached_max =
+			    (arena_bin_info[i].nregs << 1);
+		} else {
+			tcache_bin_info[i].ncached_max =
+			    TCACHE_NSLOTS_SMALL_MAX;
 		}
+		stack_nelms += tcache_bin_info[i].ncached_max;
+	}
+	for (; i < nhbins; i++) {
+		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
+		stack_nelms += tcache_bin_info[i].ncached_max;
 	}
 
 	return (false);
 }
-/******************************************************************************/
-#endif /* JEMALLOC_TCACHE */
+
+bool
+tcache_boot1(void)
+{
+
+	if (tcache_tsd_boot() || tcache_enabled_tsd_boot())
+		return (true);
+
+	return (false);
+}
diff --git a/deps/jemalloc/src/tsd.c b/deps/jemalloc/src/tsd.c
new file mode 100644
index 00000000..961a5463
--- /dev/null
+++ b/deps/jemalloc/src/tsd.c
@@ -0,0 +1,107 @@
+#define	JEMALLOC_TSD_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Data. */
+
+static unsigned ncleanups;
+static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
+
+/******************************************************************************/
+
+void *
+malloc_tsd_malloc(size_t size)
+{
+
+	/* Avoid choose_arena() in order to dodge bootstrapping issues. */
+	return (arena_malloc(arenas[0], size, false, false));
+}
+
+void
+malloc_tsd_dalloc(void *wrapper)
+{
+
+	idalloc(wrapper);
+}
+
+void
+malloc_tsd_no_cleanup(void *arg)
+{
+
+	not_reached();
+}
+
+#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
+#ifndef _WIN32
+JEMALLOC_EXPORT
+#endif
+void
+_malloc_thread_cleanup(void)
+{
+	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
+	unsigned i;
+
+	for (i = 0; i < ncleanups; i++)
+		pending[i] = true;
+
+	do {
+		again = false;
+		for (i = 0; i < ncleanups; i++) {
+			if (pending[i]) {
+				pending[i] = cleanups[i]();
+				if (pending[i])
+					again = true;
+			}
+		}
+	} while (again);
+}
+#endif
+
+void
+malloc_tsd_cleanup_register(bool (*f)(void))
+{
+
+	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
+	cleanups[ncleanups] = f;
+	ncleanups++;
+}
+
+void
+malloc_tsd_boot(void)
+{
+
+	ncleanups = 0;
+}
+
+#ifdef _WIN32
+static BOOL WINAPI
+_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
+{
+
+	switch (fdwReason) {
+#ifdef JEMALLOC_LAZY_LOCK
+	case DLL_THREAD_ATTACH:
+		isthreaded = true;
+		break;
+#endif
+	case DLL_THREAD_DETACH:
+		_malloc_thread_cleanup();
+		break;
+	default:
+		break;
+	}
+	return (true);
+}
+
+#ifdef _MSC_VER
+#  ifdef _M_IX86
+#    pragma comment(linker, "/INCLUDE:__tls_used")
+#  else
+#    pragma comment(linker, "/INCLUDE:_tls_used")
+#  endif
+#  pragma section(".CRT$XLY",long,read)
+#endif
+JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
+static const BOOL	(WINAPI *tls_callback)(HINSTANCE hinstDLL,
+    DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
+#endif
diff --git a/deps/jemalloc/src/util.c b/deps/jemalloc/src/util.c
new file mode 100644
index 00000000..9b73c3ec
--- /dev/null
+++ b/deps/jemalloc/src/util.c
@@ -0,0 +1,646 @@
+#define	assert(e) do {							\
+	if (config_debug && !(e)) {					\
+		malloc_write("<jemalloc>: Failed assertion\n");		\
+		abort();						\
+	}								\
+} while (0)
+
+#define	not_reached() do {						\
+	if (config_debug) {						\
+		malloc_write("<jemalloc>: Unreachable code reached\n");	\
+		abort();						\
+	}								\
+} while (0)
+
+#define	not_implemented() do {						\
+	if (config_debug) {						\
+		malloc_write("<jemalloc>: Not implemented\n");		\
+		abort();						\
+	}								\
+} while (0)
+
+#define	JEMALLOC_UTIL_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static void	wrtmessage(void *cbopaque, const char *s);
+#define	U2S_BUFSIZE	((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
+static char	*u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
+    size_t *slen_p);
+#define	D2S_BUFSIZE	(1 + U2S_BUFSIZE)
+static char	*d2s(intmax_t x, char sign, char *s, size_t *slen_p);
+#define	O2S_BUFSIZE	(1 + U2S_BUFSIZE)
+static char	*o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
+#define	X2S_BUFSIZE	(2 + U2S_BUFSIZE)
+static char	*x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
+    size_t *slen_p);
+
+/******************************************************************************/
+
+/* malloc_message() setup. */
+static void
+wrtmessage(void *cbopaque, const char *s)
+{
+
+#ifdef SYS_write
+	/*
+	 * Use syscall(2) rather than write(2) when possible in order to avoid
+	 * the possibility of memory allocation within libc.  This is necessary
+	 * on FreeBSD; most operating systems do not have this problem though.
+	 */
+	UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s));
+#else
+	UNUSED int result = write(STDERR_FILENO, s, strlen(s));
+#endif
+}
+
+JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
+
+/*
+ * Wrapper around malloc_message() that avoids the need for
+ * je_malloc_message(...) throughout the code.
+ */
+void
+malloc_write(const char *s)
+{
+
+	if (je_malloc_message != NULL)
+		je_malloc_message(NULL, s);
+	else
+		wrtmessage(NULL, s);
+}
+
+/*
+ * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so
+ * provide a wrapper.
+ */
+int
+buferror(char *buf, size_t buflen)
+{
+
+#ifdef _WIN32
+	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, GetLastError(), 0,
+	    (LPSTR)buf, buflen, NULL);
+	return (0);
+#elif defined(_GNU_SOURCE)
+	char *b = strerror_r(errno, buf, buflen);
+	if (b != buf) {
+		strncpy(buf, b, buflen);
+		buf[buflen-1] = '\0';
+	}
+	return (0);
+#else
+	return (strerror_r(errno, buf, buflen));
+#endif
+}
+
+uintmax_t
+malloc_strtoumax(const char *nptr, char **endptr, int base)
+{
+	uintmax_t ret, digit;
+	int b;
+	bool neg;
+	const char *p, *ns;
+
+	if (base < 0 || base == 1 || base > 36) {
+		set_errno(EINVAL);
+		return (UINTMAX_MAX);
+	}
+	b = base;
+
+	/* Swallow leading whitespace and get sign, if any. */
+	neg = false;
+	p = nptr;
+	while (true) {
+		switch (*p) {
+		case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
+			p++;
+			break;
+		case '-':
+			neg = true;
+			/* Fall through. */
+		case '+':
+			p++;
+			/* Fall through. */
+		default:
+			goto label_prefix;
+		}
+	}
+
+	/* Get prefix, if any. */
+	label_prefix:
+	/*
+	 * Note where the first non-whitespace/sign character is so that it is
+	 * possible to tell whether any digits are consumed (e.g., "  0" vs.
+	 * "  -x").
+	 */
+	ns = p;
+	if (*p == '0') {
+		switch (p[1]) {
+		case '0': case '1': case '2': case '3': case '4': case '5':
+		case '6': case '7':
+			if (b == 0)
+				b = 8;
+			if (b == 8)
+				p++;
+			break;
+		case 'x':
+			switch (p[2]) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f':
+				if (b == 0)
+					b = 16;
+				if (b == 16)
+					p += 2;
+				break;
+			default:
+				break;
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	if (b == 0)
+		b = 10;
+
+	/* Convert. */
+	ret = 0;
+	while ((*p >= '0' && *p <= '9' && (digit = *p - '0') < b)
+	    || (*p >= 'A' && *p <= 'Z' && (digit = 10 + *p - 'A') < b)
+	    || (*p >= 'a' && *p <= 'z' && (digit = 10 + *p - 'a') < b)) {
+		uintmax_t pret = ret;
+		ret *= b;
+		ret += digit;
+		if (ret < pret) {
+			/* Overflow. */
+			set_errno(ERANGE);
+			return (UINTMAX_MAX);
+		}
+		p++;
+	}
+	if (neg)
+		ret = -ret;
+
+	if (endptr != NULL) {
+		if (p == ns) {
+			/* No characters were converted. */
+			*endptr = (char *)nptr;
+		} else
+			*endptr = (char *)p;
+	}
+
+	return (ret);
+}
+
+static char *
+u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p)
+{
+	unsigned i;
+
+	i = U2S_BUFSIZE - 1;
+	s[i] = '\0';
+	switch (base) {
+	case 10:
+		do {
+			i--;
+			s[i] = "0123456789"[x % (uint64_t)10];
+			x /= (uint64_t)10;
+		} while (x > 0);
+		break;
+	case 16: {
+		const char *digits = (uppercase)
+		    ? "0123456789ABCDEF"
+		    : "0123456789abcdef";
+
+		do {
+			i--;
+			s[i] = digits[x & 0xf];
+			x >>= 4;
+		} while (x > 0);
+		break;
+	} default: {
+		const char *digits = (uppercase)
+		    ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+		    : "0123456789abcdefghijklmnopqrstuvwxyz";
+
+		assert(base >= 2 && base <= 36);
+		do {
+			i--;
+			s[i] = digits[x % (uint64_t)base];
+			x /= (uint64_t)base;
+		} while (x > 0);
+	}}
+
+	*slen_p = U2S_BUFSIZE - 1 - i;
+	return (&s[i]);
+}
+
+static char *
+d2s(intmax_t x, char sign, char *s, size_t *slen_p)
+{
+	bool neg;
+
+	if ((neg = (x < 0)))
+		x = -x;
+	s = u2s(x, 10, false, s, slen_p);
+	if (neg)
+		sign = '-';
+	switch (sign) {
+	case '-':
+		if (neg == false)
+			break;
+		/* Fall through. */
+	case ' ':
+	case '+':
+		s--;
+		(*slen_p)++;
+		*s = sign;
+		break;
+	default: not_reached();
+	}
+	return (s);
+}
+
+static char *
+o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p)
+{
+
+	s = u2s(x, 8, false, s, slen_p);
+	if (alt_form && *s != '0') {
+		s--;
+		(*slen_p)++;
+		*s = '0';
+	}
+	return (s);
+}
+
+static char *
+x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p)
+{
+
+	s = u2s(x, 16, uppercase, s, slen_p);
+	if (alt_form) {
+		s -= 2;
+		(*slen_p) += 2;
+		memcpy(s, uppercase ? "0X" : "0x", 2);
+	}
+	return (s);
+}
+
+int
+malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
+{
+	int ret;
+	size_t i;
+	const char *f;
+
+#define	APPEND_C(c) do {						\
+	if (i < size)							\
+		str[i] = (c);						\
+	i++;								\
+} while (0)
+#define	APPEND_S(s, slen) do {						\
+	if (i < size) {							\
+		size_t cpylen = (slen <= size - i) ? slen : size - i;	\
+		memcpy(&str[i], s, cpylen);				\
+	}								\
+	i += slen;							\
+} while (0)
+#define	APPEND_PADDED_S(s, slen, width, left_justify) do {		\
+	/* Left padding. */						\
+	size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ?	\
+	    (size_t)width - slen : 0);					\
+	if (left_justify == false && pad_len != 0) {			\
+		size_t j;						\
+		for (j = 0; j < pad_len; j++)				\
+			APPEND_C(' ');					\
+	}								\
+	/* Value. */							\
+	APPEND_S(s, slen);						\
+	/* Right padding. */						\
+	if (left_justify && pad_len != 0) {				\
+		size_t j;						\
+		for (j = 0; j < pad_len; j++)				\
+			APPEND_C(' ');					\
+	}								\
+} while (0)
+#define GET_ARG_NUMERIC(val, len) do {					\
+	switch (len) {							\
+	case '?':							\
+		val = va_arg(ap, int);					\
+		break;							\
+	case '?' | 0x80:						\
+		val = va_arg(ap, unsigned int);				\
+		break;							\
+	case 'l':							\
+		val = va_arg(ap, long);					\
+		break;							\
+	case 'l' | 0x80:						\
+		val = va_arg(ap, unsigned long);			\
+		break;							\
+	case 'q':							\
+		val = va_arg(ap, long long);				\
+		break;							\
+	case 'q' | 0x80:						\
+		val = va_arg(ap, unsigned long long);			\
+		break;							\
+	case 'j':							\
+		val = va_arg(ap, intmax_t);				\
+		break;							\
+	case 't':							\
+		val = va_arg(ap, ptrdiff_t);				\
+		break;							\
+	case 'z':							\
+		val = va_arg(ap, ssize_t);				\
+		break;							\
+	case 'z' | 0x80:						\
+		val = va_arg(ap, size_t);				\
+		break;							\
+	case 'p': /* Synthetic; used for %p. */				\
+		val = va_arg(ap, uintptr_t);				\
+		break;							\
+	default: not_reached();						\
+	}								\
+} while (0)
+
+	i = 0;
+	f = format;
+	while (true) {
+		switch (*f) {
+		case '\0': goto label_out;
+		case '%': {
+			bool alt_form = false;
+			bool zero_pad = false;
+			bool left_justify = false;
+			bool plus_space = false;
+			bool plus_plus = false;
+			int prec = -1;
+			int width = -1;
+			unsigned char len = '?';
+
+			f++;
+			if (*f == '%') {
+				/* %% */
+				APPEND_C(*f);
+				break;
+			}
+			/* Flags. */
+			while (true) {
+				switch (*f) {
+				case '#':
+					assert(alt_form == false);
+					alt_form = true;
+					break;
+				case '0':
+					assert(zero_pad == false);
+					zero_pad = true;
+					break;
+				case '-':
+					assert(left_justify == false);
+					left_justify = true;
+					break;
+				case ' ':
+					assert(plus_space == false);
+					plus_space = true;
+					break;
+				case '+':
+					assert(plus_plus == false);
+					plus_plus = true;
+					break;
+				default: goto label_width;
+				}
+				f++;
+			}
+			/* Width. */
+			label_width:
+			switch (*f) {
+			case '*':
+				width = va_arg(ap, int);
+				f++;
+				break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9': {
+				uintmax_t uwidth;
+				set_errno(0);
+				uwidth = malloc_strtoumax(f, (char **)&f, 10);
+				assert(uwidth != UINTMAX_MAX || get_errno() !=
+				    ERANGE);
+				width = (int)uwidth;
+				if (*f == '.') {
+					f++;
+					goto label_precision;
+				} else
+					goto label_length;
+				break;
+			} case '.':
+				f++;
+				goto label_precision;
+			default: goto label_length;
+			}
+			/* Precision. */
+			label_precision:
+			switch (*f) {
+			case '*':
+				prec = va_arg(ap, int);
+				f++;
+				break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9': {
+				uintmax_t uprec;
+				set_errno(0);
+				uprec = malloc_strtoumax(f, (char **)&f, 10);
+				assert(uprec != UINTMAX_MAX || get_errno() !=
+				    ERANGE);
+				prec = (int)uprec;
+				break;
+			}
+			default: break;
+			}
+			/* Length. */
+			label_length:
+			switch (*f) {
+			case 'l':
+				f++;
+				if (*f == 'l') {
+					len = 'q';
+					f++;
+				} else
+					len = 'l';
+				break;
+			case 'j':
+				len = 'j';
+				f++;
+				break;
+			case 't':
+				len = 't';
+				f++;
+				break;
+			case 'z':
+				len = 'z';
+				f++;
+				break;
+			default: break;
+			}
+			/* Conversion specifier. */
+			switch (*f) {
+				char *s;
+				size_t slen;
+			case 'd': case 'i': {
+				intmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[D2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len);
+				s = d2s(val, (plus_plus ? '+' : (plus_space ?
+				    ' ' : '-')), buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'o': {
+				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[O2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len | 0x80);
+				s = o2s(val, alt_form, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'u': {
+				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[U2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len | 0x80);
+				s = u2s(val, 10, false, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'x': case 'X': {
+				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
+				char buf[X2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, len | 0x80);
+				s = x2s(val, alt_form, *f == 'X', buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			} case 'c': {
+				unsigned char val;
+				char buf[2];
+
+				assert(len == '?' || len == 'l');
+				assert_not_implemented(len != 'l');
+				val = va_arg(ap, int);
+				buf[0] = val;
+				buf[1] = '\0';
+				APPEND_PADDED_S(buf, 1, width, left_justify);
+				f++;
+				break;
+			} case 's':
+				assert(len == '?' || len == 'l');
+				assert_not_implemented(len != 'l');
+				s = va_arg(ap, char *);
+				slen = (prec == -1) ? strlen(s) : prec;
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			case 'p': {
+				uintmax_t val;
+				char buf[X2S_BUFSIZE];
+
+				GET_ARG_NUMERIC(val, 'p');
+				s = x2s(val, true, false, buf, &slen);
+				APPEND_PADDED_S(s, slen, width, left_justify);
+				f++;
+				break;
+			}
+			default: not_implemented();
+			}
+			break;
+		} default: {
+			APPEND_C(*f);
+			f++;
+			break;
+		}}
+	}
+	label_out:
+	if (i < size)
+		str[i] = '\0';
+	else
+		str[size - 1] = '\0';
+	ret = i;
+
+#undef APPEND_C
+#undef APPEND_S
+#undef APPEND_PADDED_S
+#undef GET_ARG_NUMERIC
+	return (ret);
+}
+
+JEMALLOC_ATTR(format(printf, 3, 4))
+int
+malloc_snprintf(char *str, size_t size, const char *format, ...)
+{
+	int ret;
+	va_list ap;
+
+	va_start(ap, format);
+	ret = malloc_vsnprintf(str, size, format, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+void
+malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap)
+{
+	char buf[MALLOC_PRINTF_BUFSIZE];
+
+	if (write_cb == NULL) {
+		/*
+		 * The caller did not provide an alternate write_cb callback
+		 * function, so use the default one.  malloc_write() is an
+		 * inline function, so use malloc_message() directly here.
+		 */
+		write_cb = (je_malloc_message != NULL) ? je_malloc_message :
+		    wrtmessage;
+		cbopaque = NULL;
+	}
+
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	write_cb(cbopaque, buf);
+}
+
+/*
+ * Print to a callback function in such a way as to (hopefully) avoid memory
+ * allocation.
+ */
+JEMALLOC_ATTR(format(printf, 3, 4))
+void
+malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(write_cb, cbopaque, format, ap);
+	va_end(ap);
+}
+
+/* Print to stderr in such a way as to avoid memory allocation. */
+JEMALLOC_ATTR(format(printf, 1, 2))
+void
+malloc_printf(const char *format, ...)
+{
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(NULL, NULL, format, ap);
+	va_end(ap);
+}
diff --git a/deps/jemalloc/src/zone.c b/deps/jemalloc/src/zone.c
index 2c1b2318..cde5d49a 100644
--- a/deps/jemalloc/src/zone.c
+++ b/deps/jemalloc/src/zone.c
@@ -3,11 +3,18 @@
 #  error "This source file is for zones on Darwin (OS X)."
 #endif
 
+/*
+ * The malloc_default_purgeable_zone function is only available on >= 10.6.
+ * We need to check whether it is present at runtime, thus the weak_import.
+ */
+extern malloc_zone_t *malloc_default_purgeable_zone(void)
+JEMALLOC_ATTR(weak_import);
+
 /******************************************************************************/
 /* Data. */
 
-static malloc_zone_t zone, szone;
-static struct malloc_introspection_t zone_introspect, ozone_introspect;
+static malloc_zone_t zone;
+static struct malloc_introspection_t zone_introspect;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
@@ -18,8 +25,10 @@ static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
 static void	*zone_valloc(malloc_zone_t *zone, size_t size);
 static void	zone_free(malloc_zone_t *zone, void *ptr);
 static void	*zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
-#if (JEMALLOC_ZONE_VERSION >= 6)
+#if (JEMALLOC_ZONE_VERSION >= 5)
 static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 6)
     size_t size);
 static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
     size_t size);
@@ -28,19 +37,6 @@ static void	*zone_destroy(malloc_zone_t *zone);
 static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
 static void	zone_force_lock(malloc_zone_t *zone);
 static void	zone_force_unlock(malloc_zone_t *zone);
-static size_t	ozone_size(malloc_zone_t *zone, void *ptr);
-static void	ozone_free(malloc_zone_t *zone, void *ptr);
-static void	*ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
-static unsigned	ozone_batch_malloc(malloc_zone_t *zone, size_t size,
-    void **results, unsigned num_requested);
-static void	ozone_batch_free(malloc_zone_t *zone, void **to_be_freed,
-    unsigned num);
-#if (JEMALLOC_ZONE_VERSION >= 6)
-static void	ozone_free_definite_size(malloc_zone_t *zone, void *ptr,
-    size_t size);
-#endif
-static void	ozone_force_lock(malloc_zone_t *zone);
-static void	ozone_force_unlock(malloc_zone_t *zone);
 
 /******************************************************************************/
 /*
@@ -60,21 +56,21 @@ zone_size(malloc_zone_t *zone, void *ptr)
 	 * not work in practice, we must check all pointers to assure that they
 	 * reside within a mapped chunk before determining size.
 	 */
-	return (ivsalloc(ptr));
+	return (ivsalloc(ptr, config_prof));
 }
 
 static void *
 zone_malloc(malloc_zone_t *zone, size_t size)
 {
 
-	return (JEMALLOC_P(malloc)(size));
+	return (je_malloc(size));
 }
 
 static void *
 zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
 {
 
-	return (JEMALLOC_P(calloc)(num, size));
+	return (je_calloc(num, size));
 }
 
 static void *
@@ -82,7 +78,7 @@ zone_valloc(malloc_zone_t *zone, size_t size)
 {
 	void *ret = NULL; /* Assignment avoids useless compiler warning. */
 
-	JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size);
+	je_posix_memalign(&ret, PAGE, size);
 
 	return (ret);
 }
@@ -91,33 +87,48 @@ static void
 zone_free(malloc_zone_t *zone, void *ptr)
 {
 
-	JEMALLOC_P(free)(ptr);
+	if (ivsalloc(ptr, config_prof) != 0) {
+		je_free(ptr);
+		return;
+	}
+
+	free(ptr);
 }
 
 static void *
 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 {
 
-	return (JEMALLOC_P(realloc)(ptr, size));
+	if (ivsalloc(ptr, config_prof) != 0)
+		return (je_realloc(ptr, size));
+
+	return (realloc(ptr, size));
 }
 
-#if (JEMALLOC_ZONE_VERSION >= 6)
+#if (JEMALLOC_ZONE_VERSION >= 5)
 static void *
 zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
 {
 	void *ret = NULL; /* Assignment avoids useless compiler warning. */
 
-	JEMALLOC_P(posix_memalign)(&ret, alignment, size);
+	je_posix_memalign(&ret, alignment, size);
 
 	return (ret);
 }
+#endif
 
+#if (JEMALLOC_ZONE_VERSION >= 6)
 static void
 zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 {
 
-	assert(ivsalloc(ptr) == size);
-	JEMALLOC_P(free)(ptr);
+	if (ivsalloc(ptr, config_prof) != 0) {
+		assert(ivsalloc(ptr, config_prof) == size);
+		je_free(ptr);
+		return;
+	}
+
+	free(ptr);
 }
 #endif
 
@@ -133,22 +144,10 @@ zone_destroy(malloc_zone_t *zone)
 static size_t
 zone_good_size(malloc_zone_t *zone, size_t size)
 {
-	size_t ret;
-	void *p;
 
-	/*
-	 * Actually create an object of the appropriate size, then find out
-	 * how large it could have been without moving up to the next size
-	 * class.
-	 */
-	p = JEMALLOC_P(malloc)(size);
-	if (p != NULL) {
-		ret = isalloc(p);
-		JEMALLOC_P(free)(p);
-	} else
-		ret = size;
-
-	return (ret);
+	if (size == 0)
+		size = 1;
+	return (s2u(size));
 }
 
 static void
@@ -164,11 +163,12 @@ zone_force_unlock(malloc_zone_t *zone)
 {
 
 	if (isthreaded)
-		jemalloc_postfork();
+		jemalloc_postfork_parent();
 }
 
-malloc_zone_t *
-create_zone(void)
+JEMALLOC_ATTR(constructor)
+void
+register_zone(void)
 {
 
 	zone.size = (void *)zone_size;
@@ -183,10 +183,15 @@ create_zone(void)
 	zone.batch_free = NULL;
 	zone.introspect = &zone_introspect;
 	zone.version = JEMALLOC_ZONE_VERSION;
-#if (JEMALLOC_ZONE_VERSION >= 6)
+#if (JEMALLOC_ZONE_VERSION >= 5)
 	zone.memalign = zone_memalign;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 6)
 	zone.free_definite_size = zone_free_definite_size;
 #endif
+#if (JEMALLOC_ZONE_VERSION >= 8)
+	zone.pressure_relief = NULL;
+#endif
 
 	zone_introspect.enumerator = NULL;
 	zone_introspect.good_size = (void *)zone_good_size;
@@ -199,156 +204,45 @@ create_zone(void)
 #if (JEMALLOC_ZONE_VERSION >= 6)
 	zone_introspect.zone_locked = NULL;
 #endif
-
-	return (&zone);
-}
-
-static size_t
-ozone_size(malloc_zone_t *zone, void *ptr)
-{
-	size_t ret;
-
-	ret = ivsalloc(ptr);
-	if (ret == 0)
-		ret = szone.size(zone, ptr);
-
-	return (ret);
-}
-
-static void
-ozone_free(malloc_zone_t *zone, void *ptr)
-{
-
-	if (ivsalloc(ptr) != 0)
-		JEMALLOC_P(free)(ptr);
-	else {
-		size_t size = szone.size(zone, ptr);
-		if (size != 0)
-			(szone.free)(zone, ptr);
-	}
-}
-
-static void *
-ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
-{
-	size_t oldsize;
-
-	if (ptr == NULL)
-		return (JEMALLOC_P(malloc)(size));
-
-	oldsize = ivsalloc(ptr);
-	if (oldsize != 0)
-		return (JEMALLOC_P(realloc)(ptr, size));
-	else {
-		oldsize = szone.size(zone, ptr);
-		if (oldsize == 0)
-			return (JEMALLOC_P(malloc)(size));
-		else {
-			void *ret = JEMALLOC_P(malloc)(size);
-			if (ret != NULL) {
-				memcpy(ret, ptr, (oldsize < size) ? oldsize :
-				    size);
-				(szone.free)(zone, ptr);
-			}
-			return (ret);
-		}
-	}
-}
-
-static unsigned
-ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results,
-    unsigned num_requested)
-{
-
-	/* Don't bother implementing this interface, since it isn't required. */
-	return (0);
-}
-
-static void
-ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num)
-{
-	unsigned i;
-
-	for (i = 0; i < num; i++)
-		ozone_free(zone, to_be_freed[i]);
-}
-
-#if (JEMALLOC_ZONE_VERSION >= 6)
-static void
-ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
-{
-
-	if (ivsalloc(ptr) != 0) {
-		assert(ivsalloc(ptr) == size);
-		JEMALLOC_P(free)(ptr);
-	} else {
-		assert(size == szone.size(zone, ptr));
-		szone.free_definite_size(zone, ptr, size);
-	}
-}
+#if (JEMALLOC_ZONE_VERSION >= 7)
+	zone_introspect.enable_discharge_checking = NULL;
+	zone_introspect.disable_discharge_checking = NULL;
+	zone_introspect.discharge = NULL;
+#ifdef __BLOCKS__
+	zone_introspect.enumerate_discharged_pointers = NULL;
+#else
+	zone_introspect.enumerate_unavailable_without_blocks = NULL;
+#endif
 #endif
 
-static void
-ozone_force_lock(malloc_zone_t *zone)
-{
-
-	/* jemalloc locking is taken care of by the normal jemalloc zone. */
-	szone.introspect->force_lock(zone);
-}
-
-static void
-ozone_force_unlock(malloc_zone_t *zone)
-{
-
-	/* jemalloc locking is taken care of by the normal jemalloc zone. */
-	szone.introspect->force_unlock(zone);
-}
+	/*
+	 * The default purgeable zone is created lazily by OSX's libc.  It uses
+	 * the default zone when it is created for "small" allocations
+	 * (< 15 KiB), but assumes the default zone is a scalable_zone.  This
+	 * obviously fails when the default zone is the jemalloc zone, so
+	 * malloc_default_purgeable_zone is called beforehand so that the
+	 * default purgeable zone is created when the default zone is still
+	 * a scalable_zone.  As purgeable zones only exist on >= 10.6, we need
+	 * to check for the existence of malloc_default_purgeable_zone() at
+	 * run time.
+	 */
+	if (malloc_default_purgeable_zone != NULL)
+		malloc_default_purgeable_zone();
 
-/*
- * Overlay the default scalable zone (szone) such that existing allocations are
- * drained, and further allocations come from jemalloc.  This is necessary
- * because Core Foundation directly accesses and uses the szone before the
- * jemalloc library is even loaded.
- */
-void
-szone2ozone(malloc_zone_t *zone)
-{
+	/* Register the custom zone.  At this point it won't be the default. */
+	malloc_zone_register(&zone);
 
 	/*
-	 * Stash a copy of the original szone so that we can call its
-	 * functions as needed.  Note that the internally, the szone stores its
-	 * bookkeeping data structures immediately following the malloc_zone_t
-	 * header, so when calling szone functions, we need to pass a pointer
-	 * to the original zone structure.
+	 * Unregister and reregister the default zone.  On OSX >= 10.6,
+	 * unregistering takes the last registered zone and places it at the
+	 * location of the specified zone.  Unregistering the default zone thus
+	 * makes the last registered one the default.  On OSX < 10.6,
+	 * unregistering shifts all registered zones.  The first registered zone
+	 * then becomes the default.
 	 */
-	memcpy(&szone, zone, sizeof(malloc_zone_t));
-
-	zone->size = (void *)ozone_size;
-	zone->malloc = (void *)zone_malloc;
-	zone->calloc = (void *)zone_calloc;
-	zone->valloc = (void *)zone_valloc;
-	zone->free = (void *)ozone_free;
-	zone->realloc = (void *)ozone_realloc;
-	zone->destroy = (void *)zone_destroy;
-	zone->zone_name = "jemalloc_ozone";
-	zone->batch_malloc = ozone_batch_malloc;
-	zone->batch_free = ozone_batch_free;
-	zone->introspect = &ozone_introspect;
-	zone->version = JEMALLOC_ZONE_VERSION;
-#if (JEMALLOC_ZONE_VERSION >= 6)
-	zone->memalign = zone_memalign;
-	zone->free_definite_size = ozone_free_definite_size;
-#endif
-
-	ozone_introspect.enumerator = NULL;
-	ozone_introspect.good_size = (void *)zone_good_size;
-	ozone_introspect.check = NULL;
-	ozone_introspect.print = NULL;
-	ozone_introspect.log = NULL;
-	ozone_introspect.force_lock = (void *)ozone_force_lock;
-	ozone_introspect.force_unlock = (void *)ozone_force_unlock;
-	ozone_introspect.statistics = NULL;
-#if (JEMALLOC_ZONE_VERSION >= 6)
-	ozone_introspect.zone_locked = NULL;
-#endif
+	do {
+		malloc_zone_t *default_zone = malloc_default_zone();
+		malloc_zone_unregister(default_zone);
+		malloc_zone_register(default_zone);
+	} while (malloc_default_zone() != &zone);
 }
diff --git a/deps/jemalloc/test/aligned_alloc.c b/deps/jemalloc/test/aligned_alloc.c
new file mode 100644
index 00000000..5a9b0cae
--- /dev/null
+++ b/deps/jemalloc/test/aligned_alloc.c
@@ -0,0 +1,119 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+#define CHUNK 0x400000
+/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
+#define MAXALIGN ((size_t)0x2000000LU)
+#define NITER 4
+
+int
+main(void)
+{
+	size_t alignment, size, total;
+	unsigned i;
+	void *p, *ps[NITER];
+
+	malloc_printf("Test begin\n");
+
+	/* Test error conditions. */
+	alignment = 0;
+	set_errno(0);
+	p = aligned_alloc(alignment, 1);
+	if (p != NULL || get_errno() != EINVAL) {
+		malloc_printf(
+		    "Expected error for invalid alignment %zu\n", alignment);
+	}
+
+	for (alignment = sizeof(size_t); alignment < MAXALIGN;
+	    alignment <<= 1) {
+		set_errno(0);
+		p = aligned_alloc(alignment + 1, 1);
+		if (p != NULL || get_errno() != EINVAL) {
+			malloc_printf(
+			    "Expected error for invalid alignment %zu\n",
+			    alignment + 1);
+		}
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = UINT64_C(0x8000000000000000);
+	size      = UINT64_C(0x8000000000000000);
+#else
+	alignment = 0x80000000LU;
+	size      = 0x80000000LU;
+#endif
+	set_errno(0);
+	p = aligned_alloc(alignment, size);
+	if (p != NULL || get_errno() != ENOMEM) {
+		malloc_printf(
+		    "Expected error for aligned_alloc(%zu, %zu)\n",
+		    alignment, size);
+	}
+
+#if LG_SIZEOF_PTR == 3
+	alignment = UINT64_C(0x4000000000000000);
+	size      = UINT64_C(0x8400000000000001);
+#else
+	alignment = 0x40000000LU;
+	size      = 0x84000001LU;
+#endif
+	set_errno(0);
+	p = aligned_alloc(alignment, size);
+	if (p != NULL || get_errno() != ENOMEM) {
+		malloc_printf(
+		    "Expected error for aligned_alloc(%zu, %zu)\n",
+		    alignment, size);
+	}
+
+	alignment = 0x10LU;
+#if LG_SIZEOF_PTR == 3
+	size = UINT64_C(0xfffffffffffffff0);
+#else
+	size = 0xfffffff0LU;
+#endif
+	set_errno(0);
+	p = aligned_alloc(alignment, size);
+	if (p != NULL || get_errno() != ENOMEM) {
+		malloc_printf(
+		    "Expected error for aligned_alloc(&p, %zu, %zu)\n",
+		    alignment, size);
+	}
+
+	for (i = 0; i < NITER; i++)
+		ps[i] = NULL;
+
+	for (alignment = 8;
+	    alignment <= MAXALIGN;
+	    alignment <<= 1) {
+		total = 0;
+		malloc_printf("Alignment: %zu\n", alignment);
+		for (size = 1;
+		    size < 3 * alignment && size < (1U << 31);
+		    size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+			for (i = 0; i < NITER; i++) {
+				ps[i] = aligned_alloc(alignment, size);
+				if (ps[i] == NULL) {
+					char buf[BUFERROR_BUF];
+
+					buferror(buf, sizeof(buf));
+					malloc_printf(
+					    "Error for size %zu (%#zx): %s\n",
+					    size, size, buf);
+					exit(1);
+				}
+				total += malloc_usable_size(ps[i]);
+				if (total >= (MAXALIGN << 1))
+					break;
+			}
+			for (i = 0; i < NITER; i++) {
+				if (ps[i] != NULL) {
+					free(ps[i]);
+					ps[i] = NULL;
+				}
+			}
+		}
+	}
+
+	malloc_printf("Test end\n");
+	return (0);
+}
diff --git a/deps/jemalloc/test/aligned_alloc.exp b/deps/jemalloc/test/aligned_alloc.exp
new file mode 100644
index 00000000..b5061c72
--- /dev/null
+++ b/deps/jemalloc/test/aligned_alloc.exp
@@ -0,0 +1,25 @@
+Test begin
+Alignment: 8
+Alignment: 16
+Alignment: 32
+Alignment: 64
+Alignment: 128
+Alignment: 256
+Alignment: 512
+Alignment: 1024
+Alignment: 2048
+Alignment: 4096
+Alignment: 8192
+Alignment: 16384
+Alignment: 32768
+Alignment: 65536
+Alignment: 131072
+Alignment: 262144
+Alignment: 524288
+Alignment: 1048576
+Alignment: 2097152
+Alignment: 4194304
+Alignment: 8388608
+Alignment: 16777216
+Alignment: 33554432
+Test end
diff --git a/deps/jemalloc/test/allocated.c b/deps/jemalloc/test/allocated.c
index b1e40e47..9884905d 100644
--- a/deps/jemalloc/test/allocated.c
+++ b/deps/jemalloc/test/allocated.c
@@ -1,17 +1,8 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <pthread.h>
-#include <assert.h>
-#include <errno.h>
-#include <string.h>
-
 #define	JEMALLOC_MANGLE
 #include "jemalloc_test.h"
 
 void *
-thread_start(void *arg)
+je_thread_start(void *arg)
 {
 	int err;
 	void *p;
@@ -20,89 +11,85 @@ thread_start(void *arg)
 	size_t sz, usize;
 
 	sz = sizeof(a0);
-	if ((err = JEMALLOC_P(mallctl)("thread.allocated", &a0, &sz, NULL,
-	    0))) {
+	if ((err = mallctl("thread.allocated", &a0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 #ifdef JEMALLOC_STATS
 			assert(false);
 #endif
-			goto RETURN;
+			goto label_return;
 		}
-		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
 		    strerror(err));
 		exit(1);
 	}
 	sz = sizeof(ap0);
-	if ((err = JEMALLOC_P(mallctl)("thread.allocatedp", &ap0, &sz, NULL,
-	    0))) {
+	if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 #ifdef JEMALLOC_STATS
 			assert(false);
 #endif
-			goto RETURN;
+			goto label_return;
 		}
-		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
 		    strerror(err));
 		exit(1);
 	}
 	assert(*ap0 == a0);
 
 	sz = sizeof(d0);
-	if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL,
-	    0))) {
+	if ((err = mallctl("thread.deallocated", &d0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 #ifdef JEMALLOC_STATS
 			assert(false);
 #endif
-			goto RETURN;
+			goto label_return;
 		}
-		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
 		    strerror(err));
 		exit(1);
 	}
 	sz = sizeof(dp0);
-	if ((err = JEMALLOC_P(mallctl)("thread.deallocatedp", &dp0, &sz, NULL,
-	    0))) {
+	if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 #ifdef JEMALLOC_STATS
 			assert(false);
 #endif
-			goto RETURN;
+			goto label_return;
 		}
-		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
 		    strerror(err));
 		exit(1);
 	}
 	assert(*dp0 == d0);
 
-	p = JEMALLOC_P(malloc)(1);
+	p = malloc(1);
 	if (p == NULL) {
-		fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+		malloc_printf("%s(): Error in malloc()\n", __func__);
 		exit(1);
 	}
 
 	sz = sizeof(a1);
-	JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0);
+	mallctl("thread.allocated", &a1, &sz, NULL, 0);
 	sz = sizeof(ap1);
-	JEMALLOC_P(mallctl)("thread.allocatedp", &ap1, &sz, NULL, 0);
+	mallctl("thread.allocatedp", &ap1, &sz, NULL, 0);
 	assert(*ap1 == a1);
 	assert(ap0 == ap1);
 
-	usize = JEMALLOC_P(malloc_usable_size)(p);
+	usize = malloc_usable_size(p);
 	assert(a0 + usize <= a1);
 
-	JEMALLOC_P(free)(p);
+	free(p);
 
 	sz = sizeof(d1);
-	JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0);
+	mallctl("thread.deallocated", &d1, &sz, NULL, 0);
 	sz = sizeof(dp1);
-	JEMALLOC_P(mallctl)("thread.deallocatedp", &dp1, &sz, NULL, 0);
+	mallctl("thread.deallocatedp", &dp1, &sz, NULL, 0);
 	assert(*dp1 == d1);
 	assert(dp0 == dp1);
 
 	assert(d0 + usize <= d1);
 
-RETURN:
+label_return:
 	return (NULL);
 }
 
@@ -110,33 +97,22 @@ int
 main(void)
 {
 	int ret = 0;
-	pthread_t thread;
+	je_thread_t thread;
 
-	fprintf(stderr, "Test begin\n");
+	malloc_printf("Test begin\n");
 
-	thread_start(NULL);
+	je_thread_start(NULL);
 
-	if (pthread_create(&thread, NULL, thread_start, NULL)
-	    != 0) {
-		fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
-		ret = 1;
-		goto RETURN;
-	}
-	pthread_join(thread, (void *)&ret);
+	je_thread_create(&thread, je_thread_start, NULL);
+	je_thread_join(thread, (void *)&ret);
 
-	thread_start(NULL);
+	je_thread_start(NULL);
 
-	if (pthread_create(&thread, NULL, thread_start, NULL)
-	    != 0) {
-		fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
-		ret = 1;
-		goto RETURN;
-	}
-	pthread_join(thread, (void *)&ret);
+	je_thread_create(&thread, je_thread_start, NULL);
+	je_thread_join(thread, (void *)&ret);
 
-	thread_start(NULL);
+	je_thread_start(NULL);
 
-RETURN:
-	fprintf(stderr, "Test end\n");
+	malloc_printf("Test end\n");
 	return (ret);
 }
diff --git a/deps/jemalloc/test/allocm.c b/deps/jemalloc/test/allocm.c
index 59d0002e..80be673b 100644
--- a/deps/jemalloc/test/allocm.c
+++ b/deps/jemalloc/test/allocm.c
@@ -1,13 +1,9 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-
 #define	JEMALLOC_MANGLE
 #include "jemalloc_test.h"
 
 #define CHUNK 0x400000
-/* #define MAXALIGN ((size_t)0x80000000000LLU) */
-#define MAXALIGN ((size_t)0x2000000LLU)
+/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
+#define MAXALIGN ((size_t)0x2000000LU)
 #define NITER 4
 
 int
@@ -15,79 +11,122 @@ main(void)
 {
 	int r;
 	void *p;
-	size_t sz, alignment, total, tsz;
+	size_t nsz, rsz, sz, alignment, total;
 	unsigned i;
 	void *ps[NITER];
 
-	fprintf(stderr, "Test begin\n");
+	malloc_printf("Test begin\n");
 
-	sz = 0;
-	r = JEMALLOC_P(allocm)(&p, &sz, 42, 0);
+	sz = 42;
+	nsz = 0;
+	r = nallocm(&nsz, sz, 0);
 	if (r != ALLOCM_SUCCESS) {
-		fprintf(stderr, "Unexpected allocm() error\n");
+		malloc_printf("Unexpected nallocm() error\n");
 		abort();
 	}
-	if (sz < 42)
-		fprintf(stderr, "Real size smaller than expected\n");
-	if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
-		fprintf(stderr, "Unexpected dallocm() error\n");
+	rsz = 0;
+	r = allocm(&p, &rsz, sz, 0);
+	if (r != ALLOCM_SUCCESS) {
+		malloc_printf("Unexpected allocm() error\n");
+		abort();
+	}
+	if (rsz < sz)
+		malloc_printf("Real size smaller than expected\n");
+	if (nsz != rsz)
+		malloc_printf("nallocm()/allocm() rsize mismatch\n");
+	if (dallocm(p, 0) != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected dallocm() error\n");
 
-	r = JEMALLOC_P(allocm)(&p, NULL, 42, 0);
+	r = allocm(&p, NULL, sz, 0);
 	if (r != ALLOCM_SUCCESS) {
-		fprintf(stderr, "Unexpected allocm() error\n");
+		malloc_printf("Unexpected allocm() error\n");
 		abort();
 	}
-	if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
-		fprintf(stderr, "Unexpected dallocm() error\n");
+	if (dallocm(p, 0) != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected dallocm() error\n");
 
-	r = JEMALLOC_P(allocm)(&p, NULL, 42, ALLOCM_ZERO);
+	nsz = 0;
+	r = nallocm(&nsz, sz, ALLOCM_ZERO);
+	if (r != ALLOCM_SUCCESS) {
+		malloc_printf("Unexpected nallocm() error\n");
+		abort();
+	}
+	rsz = 0;
+	r = allocm(&p, &rsz, sz, ALLOCM_ZERO);
 	if (r != ALLOCM_SUCCESS) {
-		fprintf(stderr, "Unexpected allocm() error\n");
+		malloc_printf("Unexpected allocm() error\n");
 		abort();
 	}
-	if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS)
-		fprintf(stderr, "Unexpected dallocm() error\n");
+	if (nsz != rsz)
+		malloc_printf("nallocm()/allocm() rsize mismatch\n");
+	if (dallocm(p, 0) != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected dallocm() error\n");
 
 #if LG_SIZEOF_PTR == 3
-	alignment = 0x8000000000000000LLU;
-	sz        = 0x8000000000000000LLU;
+	alignment = UINT64_C(0x8000000000000000);
+	sz        = UINT64_C(0x8000000000000000);
 #else
 	alignment = 0x80000000LU;
 	sz        = 0x80000000LU;
 #endif
-	r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+	nsz = 0;
+	r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		malloc_printf(
+		    "Expected error for nallocm(&nsz, %zu, %#x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+	rsz = 0;
+	r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment));
 	if (r == ALLOCM_SUCCESS) {
-		fprintf(stderr,
-		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		malloc_printf(
+		    "Expected error for allocm(&p, %zu, %#x)\n",
 		    sz, ALLOCM_ALIGN(alignment));
 	}
+	if (nsz != rsz)
+		malloc_printf("nallocm()/allocm() rsize mismatch\n");
 
 #if LG_SIZEOF_PTR == 3
-	alignment = 0x4000000000000000LLU;
-	sz        = 0x8400000000000001LLU;
+	alignment = UINT64_C(0x4000000000000000);
+	sz        = UINT64_C(0x8400000000000001);
 #else
 	alignment = 0x40000000LU;
 	sz        = 0x84000001LU;
 #endif
-	r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+	nsz = 0;
+	r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment));
+	if (r != ALLOCM_SUCCESS)
+		malloc_printf("Unexpected nallocm() error\n");
+	rsz = 0;
+	r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment));
 	if (r == ALLOCM_SUCCESS) {
-		fprintf(stderr,
-		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		malloc_printf(
+		    "Expected error for allocm(&p, %zu, %#x)\n",
 		    sz, ALLOCM_ALIGN(alignment));
 	}
 
-	alignment = 0x10LLU;
+	alignment = 0x10LU;
 #if LG_SIZEOF_PTR == 3
-	sz   = 0xfffffffffffffff0LLU;
+	sz = UINT64_C(0xfffffffffffffff0);
 #else
-	sz   = 0xfffffff0LU;
+	sz = 0xfffffff0LU;
 #endif
-	r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment));
+	nsz = 0;
+	r = nallocm(&nsz, sz, ALLOCM_ALIGN(alignment));
 	if (r == ALLOCM_SUCCESS) {
-		fprintf(stderr,
-		    "Expected error for allocm(&p, %zu, 0x%x)\n",
+		malloc_printf(
+		    "Expected error for nallocm(&nsz, %zu, %#x)\n",
 		    sz, ALLOCM_ALIGN(alignment));
 	}
+	rsz = 0;
+	r = allocm(&p, &rsz, sz, ALLOCM_ALIGN(alignment));
+	if (r == ALLOCM_SUCCESS) {
+		malloc_printf(
+		    "Expected error for allocm(&p, %zu, %#x)\n",
+		    sz, ALLOCM_ALIGN(alignment));
+	}
+	if (nsz != rsz)
+		malloc_printf("nallocm()/allocm() rsize mismatch\n");
 
 	for (i = 0; i < NITER; i++)
 		ps[i] = NULL;
@@ -96,38 +135,60 @@ main(void)
 	    alignment <= MAXALIGN;
 	    alignment <<= 1) {
 		total = 0;
-		fprintf(stderr, "Alignment: %zu\n", alignment);
+		malloc_printf("Alignment: %zu\n", alignment);
 		for (sz = 1;
 		    sz < 3 * alignment && sz < (1U << 31);
 		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
 			for (i = 0; i < NITER; i++) {
-				r = JEMALLOC_P(allocm)(&ps[i], NULL, sz,
+				nsz = 0;
+				r = nallocm(&nsz, sz,
+				    ALLOCM_ALIGN(alignment) | ALLOCM_ZERO);
+				if (r != ALLOCM_SUCCESS) {
+					malloc_printf(
+					    "nallocm() error for size %zu"
+					    " (%#zx): %d\n",
+					    sz, sz, r);
+					exit(1);
+				}
+				rsz = 0;
+				r = allocm(&ps[i], &rsz, sz,
 				    ALLOCM_ALIGN(alignment) | ALLOCM_ZERO);
 				if (r != ALLOCM_SUCCESS) {
-					fprintf(stderr,
-					    "Error for size %zu (0x%zx): %d\n",
+					malloc_printf(
+					    "allocm() error for size %zu"
+					    " (%#zx): %d\n",
 					    sz, sz, r);
 					exit(1);
 				}
+				if (rsz < sz) {
+					malloc_printf(
+					    "Real size smaller than"
+					    " expected\n");
+				}
+				if (nsz != rsz) {
+					malloc_printf(
+					    "nallocm()/allocm() rsize"
+					    " mismatch\n");
+				}
 				if ((uintptr_t)p & (alignment-1)) {
-					fprintf(stderr,
+					malloc_printf(
 					    "%p inadequately aligned for"
 					    " alignment: %zu\n", p, alignment);
 				}
-				JEMALLOC_P(sallocm)(ps[i], &tsz, 0);
-				total += tsz;
+				sallocm(ps[i], &rsz, 0);
+				total += rsz;
 				if (total >= (MAXALIGN << 1))
 					break;
 			}
 			for (i = 0; i < NITER; i++) {
 				if (ps[i] != NULL) {
-					JEMALLOC_P(dallocm)(ps[i], 0);
+					dallocm(ps[i], 0);
 					ps[i] = NULL;
 				}
 			}
 		}
 	}
 
-	fprintf(stderr, "Test end\n");
+	malloc_printf("Test end\n");
 	return (0);
 }
diff --git a/deps/jemalloc/test/bitmap.c b/deps/jemalloc/test/bitmap.c
index adfaacfe..b2cb6300 100644
--- a/deps/jemalloc/test/bitmap.c
+++ b/deps/jemalloc/test/bitmap.c
@@ -1,18 +1,6 @@
 #define	JEMALLOC_MANGLE
 #include "jemalloc_test.h"
 
-/*
- * Avoid using the assert() from jemalloc_internal.h, since it requires
- * internal libjemalloc functionality.
- * */
-#include <assert.h>
-
-/*
- * Directly include the bitmap code, since it isn't exposed outside
- * libjemalloc.
- */
-#include "../src/bitmap.c"
-
 #if (LG_BITMAP_MAXBITS > 12)
 #  define MAXBITS	4500
 #else
@@ -42,11 +30,13 @@ test_bitmap_init(void)
 		bitmap_info_init(&binfo, i);
 		{
 			size_t j;
-			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+				bitmap_info_ngroups(&binfo));
 			bitmap_init(bitmap, &binfo);
 
 			for (j = 0; j < i; j++)
 				assert(bitmap_get(bitmap, &binfo, j) == false);
+			free(bitmap);
 
 		}
 	}
@@ -62,12 +52,14 @@ test_bitmap_set(void)
 		bitmap_info_init(&binfo, i);
 		{
 			size_t j;
-			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+				bitmap_info_ngroups(&binfo));
 			bitmap_init(bitmap, &binfo);
 
 			for (j = 0; j < i; j++)
 				bitmap_set(bitmap, &binfo, j);
 			assert(bitmap_full(bitmap, &binfo));
+			free(bitmap);
 		}
 	}
 }
@@ -82,7 +74,8 @@ test_bitmap_unset(void)
 		bitmap_info_init(&binfo, i);
 		{
 			size_t j;
-			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+				bitmap_info_ngroups(&binfo));
 			bitmap_init(bitmap, &binfo);
 
 			for (j = 0; j < i; j++)
@@ -93,6 +86,7 @@ test_bitmap_unset(void)
 			for (j = 0; j < i; j++)
 				bitmap_set(bitmap, &binfo, j);
 			assert(bitmap_full(bitmap, &binfo));
+			free(bitmap);
 		}
 	}
 }
@@ -107,7 +101,8 @@ test_bitmap_sfu(void)
 		bitmap_info_init(&binfo, i);
 		{
 			ssize_t j;
-			bitmap_t bitmap[bitmap_info_ngroups(&binfo)];
+			bitmap_t *bitmap = malloc(sizeof(bitmap_t) *
+				bitmap_info_ngroups(&binfo));
 			bitmap_init(bitmap, &binfo);
 
 			/* Iteratively set bits starting at the beginning. */
@@ -137,6 +132,7 @@ test_bitmap_sfu(void)
 			}
 			assert(bitmap_sfu(bitmap, &binfo) == i - 1);
 			assert(bitmap_full(bitmap, &binfo));
+			free(bitmap);
 		}
 	}
 }
@@ -144,7 +140,7 @@ test_bitmap_sfu(void)
 int
 main(void)
 {
-	fprintf(stderr, "Test begin\n");
+	malloc_printf("Test begin\n");
 
 	test_bitmap_size();
 	test_bitmap_init();
@@ -152,6 +148,6 @@ main(void)
 	test_bitmap_unset();
 	test_bitmap_sfu();
 
-	fprintf(stderr, "Test end\n");
+	malloc_printf("Test end\n");
 	return (0);
 }
diff --git a/deps/jemalloc/test/jemalloc_test.h.in b/deps/jemalloc/test/jemalloc_test.h.in
index 0c48895e..e38b48ef 100644
--- a/deps/jemalloc/test/jemalloc_test.h.in
+++ b/deps/jemalloc/test/jemalloc_test.h.in
@@ -4,3 +4,50 @@
  * have a different name.
  */
 #include "jemalloc/jemalloc@install_suffix@.h"
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/* Abstraction layer for threading in tests */
+#ifdef _WIN32
+#include <windows.h>
+
+typedef HANDLE je_thread_t;
+
+void
+je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg)
+{
+	LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc;
+	*thread = CreateThread(NULL, 0, routine, arg, 0, NULL);
+	if (*thread == NULL) {
+		malloc_printf("Error in CreateThread()\n");
+		exit(1);
+	}
+}
+
+void
+je_thread_join(je_thread_t thread, void **ret)
+{
+	WaitForSingleObject(thread, INFINITE);
+}
+
+#else
+#include <pthread.h>
+
+typedef pthread_t je_thread_t;
+
+void
+je_thread_create(je_thread_t *thread, void *(*proc)(void *), void *arg)
+{
+
+	if (pthread_create(thread, NULL, proc, arg) != 0) {
+		malloc_printf("Error in pthread_create()\n");
+		exit(1);
+	}
+}
+
+void
+je_thread_join(je_thread_t thread, void **ret)
+{
+
+	pthread_join(thread, ret);
+}
+#endif
diff --git a/deps/jemalloc/test/mremap.c b/deps/jemalloc/test/mremap.c
index 146c66f4..47efa7c4 100644
--- a/deps/jemalloc/test/mremap.c
+++ b/deps/jemalloc/test/mremap.c
@@ -1,9 +1,3 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <errno.h>
-#include <string.h>
-
 #define	JEMALLOC_MANGLE
 #include "jemalloc_test.h"
 
@@ -14,33 +8,32 @@ main(void)
 	size_t sz, lg_chunk, chunksize, i;
 	char *p, *q;
 
-	fprintf(stderr, "Test begin\n");
+	malloc_printf("Test begin\n");
 
 	sz = sizeof(lg_chunk);
-	if ((err = JEMALLOC_P(mallctl)("opt.lg_chunk", &lg_chunk, &sz, NULL,
-	    0))) {
+	if ((err = mallctl("opt.lg_chunk", &lg_chunk, &sz, NULL, 0))) {
 		assert(err != ENOENT);
-		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
 		    strerror(err));
 		ret = 1;
-		goto RETURN;
+		goto label_return;
 	}
 	chunksize = ((size_t)1U) << lg_chunk;
 
 	p = (char *)malloc(chunksize);
 	if (p == NULL) {
-		fprintf(stderr, "malloc(%zu) --> %p\n", chunksize, p);
+		malloc_printf("malloc(%zu) --> %p\n", chunksize, p);
 		ret = 1;
-		goto RETURN;
+		goto label_return;
 	}
 	memset(p, 'a', chunksize);
 
 	q = (char *)realloc(p, chunksize * 2);
 	if (q == NULL) {
-		fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize * 2,
+		malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize * 2,
 		    q);
 		ret = 1;
-		goto RETURN;
+		goto label_return;
 	}
 	for (i = 0; i < chunksize; i++) {
 		assert(q[i] == 'a');
@@ -50,9 +43,9 @@ main(void)
 
 	q = (char *)realloc(p, chunksize);
 	if (q == NULL) {
-		fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize, q);
+		malloc_printf("realloc(%p, %zu) --> %p\n", p, chunksize, q);
 		ret = 1;
-		goto RETURN;
+		goto label_return;
 	}
 	for (i = 0; i < chunksize; i++) {
 		assert(q[i] == 'a');
@@ -61,7 +54,7 @@ main(void)
 	free(q);
 
 	ret = 0;
-RETURN:
-	fprintf(stderr, "Test end\n");
+label_return:
+	malloc_printf("Test end\n");
 	return (ret);
 }
diff --git a/deps/jemalloc/test/posix_memalign.c b/deps/jemalloc/test/posix_memalign.c
index 3e306c01..2185bcf7 100644
--- a/deps/jemalloc/test/posix_memalign.c
+++ b/deps/jemalloc/test/posix_memalign.c
@@ -1,15 +1,9 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-
 #define	JEMALLOC_MANGLE
 #include "jemalloc_test.h"
 
 #define CHUNK 0x400000
-/* #define MAXALIGN ((size_t)0x80000000000LLU) */
-#define MAXALIGN ((size_t)0x2000000LLU)
+/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
+#define MAXALIGN ((size_t)0x2000000LU)
 #define NITER 4
 
 int
@@ -20,13 +14,13 @@ main(void)
 	int err;
 	void *p, *ps[NITER];
 
-	fprintf(stderr, "Test begin\n");
+	malloc_printf("Test begin\n");
 
 	/* Test error conditions. */
 	for (alignment = 0; alignment < sizeof(void *); alignment++) {
-		err = JEMALLOC_P(posix_memalign)(&p, alignment, 1);
+		err = posix_memalign(&p, alignment, 1);
 		if (err != EINVAL) {
-			fprintf(stderr,
+			malloc_printf(
 			    "Expected error for invalid alignment %zu\n",
 			    alignment);
 		}
@@ -34,51 +28,51 @@ main(void)
 
 	for (alignment = sizeof(size_t); alignment < MAXALIGN;
 	    alignment <<= 1) {
-		err = JEMALLOC_P(posix_memalign)(&p, alignment + 1, 1);
+		err = posix_memalign(&p, alignment + 1, 1);
 		if (err == 0) {
-			fprintf(stderr,
+			malloc_printf(
 			    "Expected error for invalid alignment %zu\n",
 			    alignment + 1);
 		}
 	}
 
 #if LG_SIZEOF_PTR == 3
-	alignment = 0x8000000000000000LLU;
-	size      = 0x8000000000000000LLU;
+	alignment = UINT64_C(0x8000000000000000);
+	size      = UINT64_C(0x8000000000000000);
 #else
 	alignment = 0x80000000LU;
 	size      = 0x80000000LU;
 #endif
-	err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+	err = posix_memalign(&p, alignment, size);
 	if (err == 0) {
-		fprintf(stderr,
+		malloc_printf(
 		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
 		    alignment, size);
 	}
 
 #if LG_SIZEOF_PTR == 3
-	alignment = 0x4000000000000000LLU;
-	size      = 0x8400000000000001LLU;
+	alignment = UINT64_C(0x4000000000000000);
+	size      = UINT64_C(0x8400000000000001);
 #else
 	alignment = 0x40000000LU;
 	size      = 0x84000001LU;
 #endif
-	err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+	err = posix_memalign(&p, alignment, size);
 	if (err == 0) {
-		fprintf(stderr,
+		malloc_printf(
 		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
 		    alignment, size);
 	}
 
-	alignment = 0x10LLU;
+	alignment = 0x10LU;
 #if LG_SIZEOF_PTR == 3
-	size = 0xfffffffffffffff0LLU;
+	size = UINT64_C(0xfffffffffffffff0);
 #else
 	size = 0xfffffff0LU;
 #endif
-	err = JEMALLOC_P(posix_memalign)(&p, alignment, size);
+	err = posix_memalign(&p, alignment, size);
 	if (err == 0) {
-		fprintf(stderr,
+		malloc_printf(
 		    "Expected error for posix_memalign(&p, %zu, %zu)\n",
 		    alignment, size);
 	}
@@ -90,32 +84,32 @@ main(void)
 	    alignment <= MAXALIGN;
 	    alignment <<= 1) {
 		total = 0;
-		fprintf(stderr, "Alignment: %zu\n", alignment);
+		malloc_printf("Alignment: %zu\n", alignment);
 		for (size = 1;
 		    size < 3 * alignment && size < (1U << 31);
 		    size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
 			for (i = 0; i < NITER; i++) {
-				err = JEMALLOC_P(posix_memalign)(&ps[i],
+				err = posix_memalign(&ps[i],
 				    alignment, size);
 				if (err) {
-					fprintf(stderr,
-					    "Error for size %zu (0x%zx): %s\n",
+					malloc_printf(
+					    "Error for size %zu (%#zx): %s\n",
 					    size, size, strerror(err));
 					exit(1);
 				}
-				total += JEMALLOC_P(malloc_usable_size)(ps[i]);
+				total += malloc_usable_size(ps[i]);
 				if (total >= (MAXALIGN << 1))
 					break;
 			}
 			for (i = 0; i < NITER; i++) {
 				if (ps[i] != NULL) {
-					JEMALLOC_P(free)(ps[i]);
+					free(ps[i]);
 					ps[i] = NULL;
 				}
 			}
 		}
 	}
 
-	fprintf(stderr, "Test end\n");
+	malloc_printf("Test end\n");
 	return (0);
 }
diff --git a/deps/jemalloc/test/rallocm.c b/deps/jemalloc/test/rallocm.c
index ccf326bb..c5dedf48 100644
--- a/deps/jemalloc/test/rallocm.c
+++ b/deps/jemalloc/test/rallocm.c
@@ -1,9 +1,3 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <assert.h>
-
 #define	JEMALLOC_MANGLE
 #include "jemalloc_test.h"
 
@@ -15,113 +9,119 @@ main(void)
 	size_t sz, tsz;
 	int r;
 
-	fprintf(stderr, "Test begin\n");
+	malloc_printf("Test begin\n");
 
 	/* Get page size. */
 	{
+#ifdef _WIN32
+		SYSTEM_INFO si;
+		GetSystemInfo(&si);
+		pagesize = (size_t)si.dwPageSize;
+#else
 		long result = sysconf(_SC_PAGESIZE);
 		assert(result != -1);
 		pagesize = (size_t)result;
+#endif
 	}
 
-	r = JEMALLOC_P(allocm)(&p, &sz, 42, 0);
+	r = allocm(&p, &sz, 42, 0);
 	if (r != ALLOCM_SUCCESS) {
-		fprintf(stderr, "Unexpected allocm() error\n");
+		malloc_printf("Unexpected allocm() error\n");
 		abort();
 	}
 
 	q = p;
-	r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 0, ALLOCM_NO_MOVE);
+	r = rallocm(&q, &tsz, sz, 0, ALLOCM_NO_MOVE);
 	if (r != ALLOCM_SUCCESS)
-		fprintf(stderr, "Unexpected rallocm() error\n");
+		malloc_printf("Unexpected rallocm() error\n");
 	if (q != p)
-		fprintf(stderr, "Unexpected object move\n");
+		malloc_printf("Unexpected object move\n");
 	if (tsz != sz) {
-		fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+		malloc_printf("Unexpected size change: %zu --> %zu\n",
 		    sz, tsz);
 	}
 
 	q = p;
-	r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 5, ALLOCM_NO_MOVE);
+	r = rallocm(&q, &tsz, sz, 5, ALLOCM_NO_MOVE);
 	if (r != ALLOCM_SUCCESS)
-		fprintf(stderr, "Unexpected rallocm() error\n");
+		malloc_printf("Unexpected rallocm() error\n");
 	if (q != p)
-		fprintf(stderr, "Unexpected object move\n");
+		malloc_printf("Unexpected object move\n");
 	if (tsz != sz) {
-		fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+		malloc_printf("Unexpected size change: %zu --> %zu\n",
 		    sz, tsz);
 	}
 
 	q = p;
-	r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE);
+	r = rallocm(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE);
 	if (r != ALLOCM_ERR_NOT_MOVED)
-		fprintf(stderr, "Unexpected rallocm() result\n");
+		malloc_printf("Unexpected rallocm() result\n");
 	if (q != p)
-		fprintf(stderr, "Unexpected object move\n");
+		malloc_printf("Unexpected object move\n");
 	if (tsz != sz) {
-		fprintf(stderr, "Unexpected size change: %zu --> %zu\n",
+		malloc_printf("Unexpected size change: %zu --> %zu\n",
 		    sz, tsz);
 	}
 
 	q = p;
-	r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, 0);
+	r = rallocm(&q, &tsz, sz + 5, 0, 0);
 	if (r != ALLOCM_SUCCESS)
-		fprintf(stderr, "Unexpected rallocm() error\n");
+		malloc_printf("Unexpected rallocm() error\n");
 	if (q == p)
-		fprintf(stderr, "Expected object move\n");
+		malloc_printf("Expected object move\n");
 	if (tsz == sz) {
-		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		malloc_printf("Expected size change: %zu --> %zu\n",
 		    sz, tsz);
 	}
 	p = q;
 	sz = tsz;
 
-	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, 0);
+	r = rallocm(&q, &tsz, pagesize*2, 0, 0);
 	if (r != ALLOCM_SUCCESS)
-		fprintf(stderr, "Unexpected rallocm() error\n");
+		malloc_printf("Unexpected rallocm() error\n");
 	if (q == p)
-		fprintf(stderr, "Expected object move\n");
+		malloc_printf("Expected object move\n");
 	if (tsz == sz) {
-		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		malloc_printf("Expected size change: %zu --> %zu\n",
 		    sz, tsz);
 	}
 	p = q;
 	sz = tsz;
 
-	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, 0);
+	r = rallocm(&q, &tsz, pagesize*4, 0, 0);
 	if (r != ALLOCM_SUCCESS)
-		fprintf(stderr, "Unexpected rallocm() error\n");
+		malloc_printf("Unexpected rallocm() error\n");
 	if (tsz == sz) {
-		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		malloc_printf("Expected size change: %zu --> %zu\n",
 		    sz, tsz);
 	}
 	p = q;
 	sz = tsz;
 
-	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE);
+	r = rallocm(&q, &tsz, pagesize*2, 0, ALLOCM_NO_MOVE);
 	if (r != ALLOCM_SUCCESS)
-		fprintf(stderr, "Unexpected rallocm() error\n");
+		malloc_printf("Unexpected rallocm() error\n");
 	if (q != p)
-		fprintf(stderr, "Unexpected object move\n");
+		malloc_printf("Unexpected object move\n");
 	if (tsz == sz) {
-		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		malloc_printf("Expected size change: %zu --> %zu\n",
 		    sz, tsz);
 	}
 	sz = tsz;
 
-	r = JEMALLOC_P(rallocm)(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE);
+	r = rallocm(&q, &tsz, pagesize*4, 0, ALLOCM_NO_MOVE);
 	if (r != ALLOCM_SUCCESS)
-		fprintf(stderr, "Unexpected rallocm() error\n");
+		malloc_printf("Unexpected rallocm() error\n");
 	if (q != p)
-		fprintf(stderr, "Unexpected object move\n");
+		malloc_printf("Unexpected object move\n");
 	if (tsz == sz) {
-		fprintf(stderr, "Expected size change: %zu --> %zu\n",
+		malloc_printf("Expected size change: %zu --> %zu\n",
 		    sz, tsz);
 	}
 	sz = tsz;
 
-	JEMALLOC_P(dallocm)(p, 0);
+	dallocm(p, 0);
 
-	fprintf(stderr, "Test end\n");
+	malloc_printf("Test end\n");
 	return (0);
 }
diff --git a/deps/jemalloc/test/thread_arena.c b/deps/jemalloc/test/thread_arena.c
index ef8d6817..2020d994 100644
--- a/deps/jemalloc/test/thread_arena.c
+++ b/deps/jemalloc/test/thread_arena.c
@@ -1,16 +1,10 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <pthread.h>
-#include <string.h>
-#include <assert.h>
-
 #define	JEMALLOC_MANGLE
 #include "jemalloc_test.h"
 
 #define NTHREADS 10
 
 void *
-thread_start(void *arg)
+je_thread_start(void *arg)
 {
 	unsigned main_arena_ind = *(unsigned *)arg;
 	void *p;
@@ -18,24 +12,24 @@ thread_start(void *arg)
 	size_t size;
 	int err;
 
-	p = JEMALLOC_P(malloc)(1);
+	p = malloc(1);
 	if (p == NULL) {
-		fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+		malloc_printf("%s(): Error in malloc()\n", __func__);
 		return (void *)1;
 	}
 
 	size = sizeof(arena_ind);
-	if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size,
-	    &main_arena_ind, sizeof(main_arena_ind)))) {
-		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+	if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind,
+	    sizeof(main_arena_ind)))) {
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
 		    strerror(err));
 		return (void *)1;
 	}
 
 	size = sizeof(arena_ind);
-	if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL,
+	if ((err = mallctl("thread.arena", &arena_ind, &size, NULL,
 	    0))) {
-		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
 		    strerror(err));
 		return (void *)1;
 	}
@@ -52,41 +46,33 @@ main(void)
 	unsigned arena_ind;
 	size_t size;
 	int err;
-	pthread_t threads[NTHREADS];
+	je_thread_t threads[NTHREADS];
 	unsigned i;
 
-	fprintf(stderr, "Test begin\n");
+	malloc_printf("Test begin\n");
 
-	p = JEMALLOC_P(malloc)(1);
+	p = malloc(1);
 	if (p == NULL) {
-		fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+		malloc_printf("%s(): Error in malloc()\n", __func__);
 		ret = 1;
-		goto RETURN;
+		goto label_return;
 	}
 
 	size = sizeof(arena_ind);
-	if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL,
-	    0))) {
-		fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+	if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) {
+		malloc_printf("%s(): Error in mallctl(): %s\n", __func__,
 		    strerror(err));
 		ret = 1;
-		goto RETURN;
+		goto label_return;
 	}
 
-	for (i = 0; i < NTHREADS; i++) {
-		if (pthread_create(&threads[i], NULL, thread_start,
-		    (void *)&arena_ind) != 0) {
-			fprintf(stderr, "%s(): Error in pthread_create()\n",
-			    __func__);
-			ret = 1;
-			goto RETURN;
-		}
-	}
+	for (i = 0; i < NTHREADS; i++)
+		je_thread_create(&threads[i], je_thread_start, (void *)&arena_ind);
 
 	for (i = 0; i < NTHREADS; i++)
-		pthread_join(threads[i], (void *)&ret);
+		je_thread_join(threads[i], (void *)&ret);
 
-RETURN:
-	fprintf(stderr, "Test end\n");
+label_return:
+	malloc_printf("Test end\n");
 	return (ret);
 }
diff --git a/deps/jemalloc/test/thread_tcache_enabled.c b/deps/jemalloc/test/thread_tcache_enabled.c
new file mode 100644
index 00000000..2061b7bb
--- /dev/null
+++ b/deps/jemalloc/test/thread_tcache_enabled.c
@@ -0,0 +1,91 @@
+#define	JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+void *
+je_thread_start(void *arg)
+{
+	int err;
+	size_t sz;
+	bool e0, e1;
+
+	sz = sizeof(bool);
+	if ((err = mallctl("thread.tcache.enabled", &e0, &sz, NULL, 0))) {
+		if (err == ENOENT) {
+#ifdef JEMALLOC_TCACHE
+			assert(false);
+#endif
+		}
+		goto label_return;
+	}
+
+	if (e0) {
+		e1 = false;
+		assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz)
+		    == 0);
+		assert(e0);
+	}
+
+	e1 = true;
+	assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0);
+	assert(e0 == false);
+
+	e1 = true;
+	assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0);
+	assert(e0);
+
+	e1 = false;
+	assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0);
+	assert(e0);
+
+	e1 = false;
+	assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0);
+	assert(e0 == false);
+
+	free(malloc(1));
+	e1 = true;
+	assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0);
+	assert(e0 == false);
+
+	free(malloc(1));
+	e1 = true;
+	assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0);
+	assert(e0);
+
+	free(malloc(1));
+	e1 = false;
+	assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0);
+	assert(e0);
+
+	free(malloc(1));
+	e1 = false;
+	assert(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz) == 0);
+	assert(e0 == false);
+
+	free(malloc(1));
+label_return:
+	return (NULL);
+}
+
+int
+main(void)
+{
+	int ret = 0;
+	je_thread_t thread;
+
+	malloc_printf("Test begin\n");
+
+	je_thread_start(NULL);
+
+	je_thread_create(&thread, je_thread_start, NULL);
+	je_thread_join(thread, (void *)&ret);
+
+	je_thread_start(NULL);
+
+	je_thread_create(&thread, je_thread_start, NULL);
+	je_thread_join(thread, (void *)&ret);
+
+	je_thread_start(NULL);
+
+	malloc_printf("Test end\n");
+	return (ret);
+}
diff --git a/deps/jemalloc/test/thread_tcache_enabled.exp b/deps/jemalloc/test/thread_tcache_enabled.exp
new file mode 100644
index 00000000..369a88dd
--- /dev/null
+++ b/deps/jemalloc/test/thread_tcache_enabled.exp
@@ -0,0 +1,2 @@
+Test begin
+Test end
diff --git a/src/zmalloc.h b/src/zmalloc.h
index ff555619..89f5b6ee 100644
--- a/src/zmalloc.h
+++ b/src/zmalloc.h
@@ -47,11 +47,10 @@
 
 #elif defined(USE_JEMALLOC)
 #define ZMALLOC_LIB ("jemalloc-" __xstr(JEMALLOC_VERSION_MAJOR) "." __xstr(JEMALLOC_VERSION_MINOR) "." __xstr(JEMALLOC_VERSION_BUGFIX))
-#define JEMALLOC_MANGLE
 #include <jemalloc/jemalloc.h>
 #if (JEMALLOC_VERSION_MAJOR == 2 && JEMALLOC_VERSION_MINOR >= 1) || (JEMALLOC_VERSION_MAJOR > 2)
 #define HAVE_MALLOC_SIZE 1
-#define zmalloc_size(p) JEMALLOC_P(malloc_usable_size)(p)
+#define zmalloc_size(p) je_malloc_usable_size(p)
 #else
 #error "Newer version of jemalloc required"
 #endif