Projects STRLCPY graphql-engine Commits 9c2ea26c
🤬
Showing first 13 files as there are too many
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/LICENSE
     1 +MIT License
     2 + 
     3 +Copyright (c) 2018-2021 Microsoft Corporation, Daan Leijen
     4 + 
     5 +Permission is hereby granted, free of charge, to any person obtaining a copy
     6 +of this software and associated documentation files (the "Software"), to deal
     7 +in the Software without restriction, including without limitation the rights
     8 +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9 +copies of the Software, and to permit persons to whom the Software is
     10 +furnished to do so, subject to the following conditions:
     11 + 
     12 +The above copyright notice and this permission notice shall be included in all
     13 +copies or substantial portions of the Software.
     14 + 
     15 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     18 +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20 +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21 +SOFTWARE.
     22 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/include/mimalloc/atomic.h
     1 +/* ----------------------------------------------------------------------------
     2 +Copyright (c) 2018-2023 Microsoft Research, Daan Leijen
     3 +This is free software; you can redistribute it and/or modify it under the
     4 +terms of the MIT license. A copy of the license can be found in the file
     5 +"LICENSE" at the root of this distribution.
     6 +-----------------------------------------------------------------------------*/
     7 +#pragma once
     8 +#ifndef MIMALLOC_ATOMIC_H
     9 +#define MIMALLOC_ATOMIC_H
     10 + 
     11 +// --------------------------------------------------------------------------------------------
     12 +// Atomics
     13 +// We need to be portable between C, C++, and MSVC.
     14 +// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
     15 +// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
     16 +// To gain better insight in the range of used atomics, we use explicitly named memory order operations
     17 +// instead of passing the memory order as a parameter.
     18 +// -----------------------------------------------------------------------------------------------
     19 + 
     20 +#if defined(__cplusplus)
     21 +// Use C++ atomics
     22 +#include <atomic>
     23 +#define _Atomic(tp) std::atomic<tp>
     24 +#define mi_atomic(name) std::atomic_##name
     25 +#define mi_memory_order(name) std::memory_order_##name
     26 +#if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571
     27 + #define MI_ATOMIC_VAR_INIT(x) x
     28 +#else
     29 + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
     30 +#endif
     31 +#elif defined(_MSC_VER)
     32 +// Use MSVC C wrapper for C11 atomics
     33 +#define _Atomic(tp) tp
     34 +#define MI_ATOMIC_VAR_INIT(x) x
     35 +#define mi_atomic(name) mi_atomic_##name
     36 +#define mi_memory_order(name) mi_memory_order_##name
     37 +#else
     38 +// Use C11 atomics
     39 +#include <stdatomic.h>
     40 +#define mi_atomic(name) atomic_##name
     41 +#define mi_memory_order(name) memory_order_##name
     42 +#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
     43 +#endif
     44 + 
     45 +// Various defines for all used memory orders in mimalloc
     46 +#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \
     47 + mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail)
     48 + 
     49 +#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \
     50 + mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail)
     51 + 
     52 +#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
     53 +#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
     54 +#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
     55 +#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
     56 +#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release))
     57 +#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel))
     58 +#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
     59 +#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
     60 +#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
     61 +#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
     62 + 
     63 +#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
     64 +#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
     65 +#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
     66 +#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
     67 +#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
     68 +#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
     69 + 
     70 +#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1)
     71 +#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1)
     72 +#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1)
     73 +#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1)
     74 + 
     75 +static inline void mi_atomic_yield(void);
     76 +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add);
     77 +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
     78 + 
     79 + 
     80 +#if defined(__cplusplus) || !defined(_MSC_VER)
     81 + 
     82 +// In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value)
     83 +// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well
     84 +#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p)
     85 +#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p)
     86 + 
     87 +// In C++ we need to add casts to help resolve templates if NULL is passed
     88 +#if defined(__cplusplus)
     89 +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x)
     90 +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x)
     91 +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des)
     92 +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des)
     93 +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des)
     94 +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x)
     95 +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x)
     96 +#else
     97 +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x)
     98 +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x)
     99 +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des)
     100 +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des)
     101 +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des)
     102 +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x)
     103 +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x)
     104 +#endif
     105 + 
     106 +// These are used by the statistics
     107 +static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
     108 + return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
     109 +}
     110 +static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
     111 + int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p);
     112 + while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, &current, x)) { /* nothing */ };
     113 +}
     114 + 
     115 +// Used by timers
     116 +#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
     117 +#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
     118 +#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
     119 +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
     120 + 
     121 + 
     122 + 
     123 +#elif defined(_MSC_VER)
     124 + 
     125 +// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics.
     126 +#define WIN32_LEAN_AND_MEAN
     127 +#include <windows.h>
     128 +#include <intrin.h>
     129 +#ifdef _WIN64
     130 +typedef LONG64 msc_intptr_t;
     131 +#define MI_64(f) f##64
     132 +#else
     133 +typedef LONG msc_intptr_t;
     134 +#define MI_64(f) f
     135 +#endif
     136 + 
     137 +typedef enum mi_memory_order_e {
     138 + mi_memory_order_relaxed,
     139 + mi_memory_order_consume,
     140 + mi_memory_order_acquire,
     141 + mi_memory_order_release,
     142 + mi_memory_order_acq_rel,
     143 + mi_memory_order_seq_cst
     144 +} mi_memory_order;
     145 + 
     146 +static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) {
     147 + (void)(mo);
     148 + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
     149 +}
     150 +static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) {
     151 + (void)(mo);
     152 + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub));
     153 +}
     154 +static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
     155 + (void)(mo);
     156 + return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
     157 +}
     158 +static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
     159 + (void)(mo);
     160 + return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
     161 +}
     162 +static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
     163 + (void)(mo1); (void)(mo2);
     164 + uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected));
     165 + if (read == *expected) {
     166 + return true;
     167 + }
     168 + else {
     169 + *expected = read;
     170 + return false;
     171 + }
     172 +}
     173 +static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
     174 + return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2);
     175 +}
     176 +static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) {
     177 + (void)(mo);
     178 + return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
     179 +}
     180 +static inline void mi_atomic_thread_fence(mi_memory_order mo) {
     181 + (void)(mo);
     182 + _Atomic(uintptr_t) x = 0;
     183 + mi_atomic_exchange_explicit(&x, 1, mo);
     184 +}
     185 +static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) {
     186 + (void)(mo);
     187 +#if defined(_M_IX86) || defined(_M_X64)
     188 + return *p;
     189 +#else
     190 + uintptr_t x = *p;
     191 + if (mo > mi_memory_order_relaxed) {
     192 + while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ };
     193 + }
     194 + return x;
     195 +#endif
     196 +}
     197 +static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
     198 + (void)(mo);
     199 +#if defined(_M_IX86) || defined(_M_X64)
     200 + *p = x;
     201 +#else
     202 + mi_atomic_exchange_explicit(p, x, mo);
     203 +#endif
     204 +}
     205 +static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) {
     206 + (void)(mo);
     207 +#if defined(_M_X64)
     208 + return *p;
     209 +#else
     210 + int64_t old = *p;
     211 + int64_t x = old;
     212 + while ((old = InterlockedCompareExchange64(p, x, old)) != x) {
     213 + x = old;
     214 + }
     215 + return x;
     216 +#endif
     217 +}
     218 +static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) {
     219 + (void)(mo);
     220 +#if defined(x_M_IX86) || defined(_M_X64)
     221 + *p = x;
     222 +#else
     223 + InterlockedExchange64(p, x);
     224 +#endif
     225 +}
     226 + 
     227 +// These are used by the statistics
     228 +static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) {
     229 +#ifdef _WIN64
     230 + return (int64_t)mi_atomic_addi((int64_t*)p, add);
     231 +#else
     232 + int64_t current;
     233 + int64_t sum;
     234 + do {
     235 + current = *p;
     236 + sum = current + add;
     237 + } while (_InterlockedCompareExchange64(p, sum, current) != current);
     238 + return current;
     239 +#endif
     240 +}
     241 +static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
     242 + int64_t current;
     243 + do {
     244 + current = *p;
     245 + } while (current < x && _InterlockedCompareExchange64(p, x, current) != current);
     246 +}
     247 + 
     248 +// The pointer macros cast to `uintptr_t`.
     249 +#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p))
     250 +#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p))
     251 +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
     252 +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
     253 +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
     254 +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
     255 +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
     256 +#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
     257 +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
     258 + 
     259 +#define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire))
     260 +#define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed))
     261 +#define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release))
     262 +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed))
     263 + 
     264 + 
     265 +#endif
     266 + 
     267 + 
     268 +// Atomically add a signed value; returns the previous value.
     269 +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) {
     270 + return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add);
     271 +}
     272 + 
     273 +// Atomically subtract a signed value; returns the previous value.
     274 +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
     275 + return (intptr_t)mi_atomic_addi(p, -sub);
     276 +}
     277 + 
     278 +typedef _Atomic(uintptr_t) mi_atomic_once_t;
     279 + 
     280 +// Returns true only on the first invocation
     281 +static inline bool mi_atomic_once( mi_atomic_once_t* once ) {
     282 + if (mi_atomic_load_relaxed(once) != 0) return false; // quick test
     283 + uintptr_t expected = 0;
     284 + return mi_atomic_cas_strong_acq_rel(once, &expected, 1); // try to set to 1
     285 +}
     286 + 
     287 +// Yield
     288 +#if defined(__cplusplus)
     289 +#include <thread>
     290 +static inline void mi_atomic_yield(void) {
     291 + std::this_thread::yield();
     292 +}
     293 +#elif defined(_WIN32)
     294 +#define WIN32_LEAN_AND_MEAN
     295 +#include <windows.h>
     296 +static inline void mi_atomic_yield(void) {
     297 + YieldProcessor();
     298 +}
     299 +#elif defined(__SSE2__)
     300 +#include <emmintrin.h>
     301 +static inline void mi_atomic_yield(void) {
     302 + _mm_pause();
     303 +}
     304 +#elif (defined(__GNUC__) || defined(__clang__)) && \
     305 + (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \
     306 + defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
     307 +#if defined(__x86_64__) || defined(__i386__)
     308 +static inline void mi_atomic_yield(void) {
     309 + __asm__ volatile ("pause" ::: "memory");
     310 +}
     311 +#elif defined(__aarch64__)
     312 +static inline void mi_atomic_yield(void) {
     313 + __asm__ volatile("wfe");
     314 +}
     315 +#elif (defined(__arm__) && __ARM_ARCH__ >= 7)
     316 +static inline void mi_atomic_yield(void) {
     317 + __asm__ volatile("yield" ::: "memory");
     318 +}
     319 +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
     320 +static inline void mi_atomic_yield(void) {
     321 + __asm__ __volatile__ ("or 27,27,27" ::: "memory");
     322 +}
     323 +#elif defined(__armel__) || defined(__ARMEL__)
     324 +static inline void mi_atomic_yield(void) {
     325 + __asm__ volatile ("nop" ::: "memory");
     326 +}
     327 +#endif
     328 +#elif defined(__sun)
     329 +// Fallback for other archs
     330 +#include <synch.h>
     331 +static inline void mi_atomic_yield(void) {
     332 + smt_pause();
     333 +}
     334 +#elif defined(__wasi__)
     335 +#include <sched.h>
     336 +static inline void mi_atomic_yield(void) {
     337 + sched_yield();
     338 +}
     339 +#else
     340 +#include <unistd.h>
     341 +static inline void mi_atomic_yield(void) {
     342 + sleep(0);
     343 +}
     344 +#endif
     345 + 
     346 + 
     347 +#endif // __MIMALLOC_ATOMIC_H
     348 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/include/mimalloc/internal.h
     1 +/* ----------------------------------------------------------------------------
     2 +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
     3 +This is free software; you can redistribute it and/or modify it under the
     4 +terms of the MIT license. A copy of the license can be found in the file
     5 +"LICENSE" at the root of this distribution.
     6 +-----------------------------------------------------------------------------*/
     7 +#pragma once
     8 +#ifndef MIMALLOC_INTERNAL_H
     9 +#define MIMALLOC_INTERNAL_H
     10 + 
     11 + 
     12 +// --------------------------------------------------------------------------
     13 +// This file contains the interal API's of mimalloc and various utility
     14 +// functions and macros.
     15 +// --------------------------------------------------------------------------
     16 + 
     17 +#include "mimalloc/types.h"
     18 +#include "mimalloc/track.h"
     19 + 
     20 +#if (MI_DEBUG>0)
     21 +#define mi_trace_message(...) _mi_trace_message(__VA_ARGS__)
     22 +#else
     23 +#define mi_trace_message(...)
     24 +#endif
     25 + 
     26 +#define MI_CACHE_LINE 64
     27 +#if defined(_MSC_VER)
     28 +#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
     29 +#pragma warning(disable:26812) // unscoped enum warning
     30 +#define mi_decl_noinline __declspec(noinline)
     31 +#define mi_decl_thread __declspec(thread)
     32 +#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
     33 +#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
     34 +#define mi_decl_noinline __attribute__((noinline))
     35 +#define mi_decl_thread __thread
     36 +#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
     37 +#else
     38 +#define mi_decl_noinline
     39 +#define mi_decl_thread __thread // hope for the best :-)
     40 +#define mi_decl_cache_align
     41 +#endif
     42 + 
     43 +#if defined(__EMSCRIPTEN__) && !defined(__wasi__)
     44 +#define __wasi__
     45 +#endif
     46 + 
     47 +#if defined(__cplusplus)
     48 +#define mi_decl_externc extern "C"
     49 +#else
     50 +#define mi_decl_externc
     51 +#endif
     52 + 
     53 +// pthreads
     54 +#if !defined(_WIN32) && !defined(__wasi__)
     55 +#define MI_USE_PTHREADS
     56 +#include <pthread.h>
     57 +#endif
     58 + 
     59 +// "options.c"
     60 +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
     61 +void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
     62 +void _mi_warning_message(const char* fmt, ...);
     63 +void _mi_verbose_message(const char* fmt, ...);
     64 +void _mi_trace_message(const char* fmt, ...);
     65 +void _mi_options_init(void);
     66 +void _mi_error_message(int err, const char* fmt, ...);
     67 + 
     68 +// random.c
     69 +void _mi_random_init(mi_random_ctx_t* ctx);
     70 +void _mi_random_init_weak(mi_random_ctx_t* ctx);
     71 +void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx);
     72 +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
     73 +uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
     74 +uintptr_t _mi_heap_random_next(mi_heap_t* heap);
     75 +uintptr_t _mi_os_random_weak(uintptr_t extra_seed);
     76 +static inline uintptr_t _mi_random_shuffle(uintptr_t x);
     77 + 
     78 +// init.c
     79 +extern mi_decl_cache_align mi_stats_t _mi_stats_main;
     80 +extern mi_decl_cache_align const mi_page_t _mi_page_empty;
     81 +bool _mi_is_main_thread(void);
     82 +size_t _mi_current_thread_count(void);
     83 +bool _mi_preloading(void); // true while the C runtime is not ready
     84 +mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
     85 +mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
     86 +void _mi_thread_done(mi_heap_t* heap);
     87 + 
     88 +// os.c
     89 +void _mi_os_init(void); // called from process init
     90 +void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data
     91 +void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data
     92 +size_t _mi_os_page_size(void);
     93 +size_t _mi_os_good_alloc_size(size_t size);
     94 +bool _mi_os_has_overcommit(void);
     95 + 
     96 +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats);
     97 +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
     98 +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
     99 +bool _mi_os_protect(void* addr, size_t size);
     100 +bool _mi_os_unprotect(void* addr, size_t size);
     101 + 
     102 +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats);
     103 +void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats);
     104 +void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
     105 +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
     106 +bool _mi_os_use_large_page(size_t size, size_t alignment);
     107 +size_t _mi_os_large_page_size(void);
     108 + 
     109 +void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
     110 +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
     111 +void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
     112 + 
     113 +// arena.c
     114 +mi_arena_id_t _mi_arena_id_none(void);
     115 +void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats);
     116 +void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
     117 +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
     118 +bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id);
     119 +bool _mi_arena_is_os_allocated(size_t arena_memid);
     120 + 
     121 +// "segment-cache.c"
     122 +void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
     123 +bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
     124 +void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
     125 +void _mi_segment_cache_free_all(mi_os_tld_t* tld);
     126 +void _mi_segment_map_allocated_at(const mi_segment_t* segment);
     127 +void _mi_segment_map_freed_at(const mi_segment_t* segment);
     128 + 
     129 +// "segment.c"
     130 +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
     131 +void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
     132 +void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
     133 +bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
     134 +void _mi_segment_thread_collect(mi_segments_tld_t* tld);
     135 + 
     136 +#if MI_HUGE_PAGE_ABANDON
     137 +void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
     138 +#else
     139 +void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
     140 +#endif
     141 + 
     142 +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
     143 +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
     144 +void _mi_abandoned_await_readers(void);
     145 +void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
     146 + 
     147 +// "page.c"
     148 +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc;
     149 + 
     150 +void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks
     151 +void _mi_page_unfull(mi_page_t* page);
     152 +void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page
     153 +void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread...
     154 +void _mi_heap_delayed_free_all(mi_heap_t* heap);
     155 +bool _mi_heap_delayed_free_partial(mi_heap_t* heap);
     156 +void _mi_heap_collect_retired(mi_heap_t* heap, bool force);
     157 + 
     158 +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
     159 +bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
     160 +size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
     161 +void _mi_deferred_free(mi_heap_t* heap, bool force);
     162 + 
     163 +void _mi_page_free_collect(mi_page_t* page,bool force);
     164 +void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments
     165 + 
     166 +size_t _mi_bin_size(uint8_t bin); // for stats
     167 +uint8_t _mi_bin(size_t size); // for stats
     168 + 
     169 +// "heap.c"
     170 +void _mi_heap_destroy_pages(mi_heap_t* heap);
     171 +void _mi_heap_collect_abandon(mi_heap_t* heap);
     172 +void _mi_heap_set_default_direct(mi_heap_t* heap);
     173 +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid);
     174 +void _mi_heap_destroy_all(void);
     175 + 
     176 +// "stats.c"
     177 +void _mi_stats_done(mi_stats_t* stats);
     178 +mi_msecs_t _mi_clock_now(void);
     179 +mi_msecs_t _mi_clock_end(mi_msecs_t start);
     180 +mi_msecs_t _mi_clock_start(void);
     181 + 
     182 +// "alloc.c"
     183 +void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
     184 +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
     185 +void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
     186 +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
     187 +mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
     188 +bool _mi_free_delayed_block(mi_block_t* block);
     189 +void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
     190 +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size);
     191 + 
     192 +// option.c, c primitives
     193 +char _mi_toupper(char c);
     194 +int _mi_strnicmp(const char* s, const char* t, size_t n);
     195 +void _mi_strlcpy(char* dest, const char* src, size_t dest_size);
     196 +void _mi_strlcat(char* dest, const char* src, size_t dest_size);
     197 +size_t _mi_strlen(const char* s);
     198 +size_t _mi_strnlen(const char* s, size_t max_len);
     199 + 
     200 + 
     201 +#if MI_DEBUG>1
     202 +bool _mi_page_is_valid(mi_page_t* page);
     203 +#endif
     204 + 
     205 + 
     206 +// ------------------------------------------------------
     207 +// Branches
     208 +// ------------------------------------------------------
     209 + 
     210 +#if defined(__GNUC__) || defined(__clang__)
     211 +#define mi_unlikely(x) (__builtin_expect(!!(x),false))
     212 +#define mi_likely(x) (__builtin_expect(!!(x),true))
     213 +#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
     214 +#define mi_unlikely(x) (x) [[unlikely]]
     215 +#define mi_likely(x) (x) [[likely]]
     216 +#else
     217 +#define mi_unlikely(x) (x)
     218 +#define mi_likely(x) (x)
     219 +#endif
     220 + 
     221 +#ifndef __has_builtin
     222 +#define __has_builtin(x) 0
     223 +#endif
     224 + 
     225 + 
     226 +/* -----------------------------------------------------------
     227 + Error codes passed to `_mi_fatal_error`
     228 + All are recoverable but EFAULT is a serious error and aborts by default in secure mode.
     229 + For portability define undefined error codes using common Unix codes:
     230 + <https://www-numi.fnal.gov/offline_software/srt_public_context/WebDocs/Errors/unix_system_errors.html>
     231 +----------------------------------------------------------- */
     232 +#include <errno.h>
     233 +#ifndef EAGAIN // double free
     234 +#define EAGAIN (11)
     235 +#endif
     236 +#ifndef ENOMEM // out of memory
     237 +#define ENOMEM (12)
     238 +#endif
     239 +#ifndef EFAULT // corrupted free-list or meta-data
     240 +#define EFAULT (14)
     241 +#endif
     242 +#ifndef EINVAL // trying to free an invalid pointer
     243 +#define EINVAL (22)
     244 +#endif
     245 +#ifndef EOVERFLOW // count*size overflow
     246 +#define EOVERFLOW (75)
     247 +#endif
     248 + 
     249 + 
     250 +/* -----------------------------------------------------------
     251 + Inlined definitions
     252 +----------------------------------------------------------- */
     253 +#define MI_UNUSED(x) (void)(x)
     254 +#if (MI_DEBUG>0)
     255 +#define MI_UNUSED_RELEASE(x)
     256 +#else
     257 +#define MI_UNUSED_RELEASE(x) MI_UNUSED(x)
     258 +#endif
     259 + 
     260 +#define MI_INIT4(x) x(),x(),x(),x()
     261 +#define MI_INIT8(x) MI_INIT4(x),MI_INIT4(x)
     262 +#define MI_INIT16(x) MI_INIT8(x),MI_INIT8(x)
     263 +#define MI_INIT32(x) MI_INIT16(x),MI_INIT16(x)
     264 +#define MI_INIT64(x) MI_INIT32(x),MI_INIT32(x)
     265 +#define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x)
     266 +#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x)
     267 + 
     268 + 
     269 +// Is `x` a power of two? (0 is considered a power of two)
     270 +static inline bool _mi_is_power_of_two(uintptr_t x) {
     271 + return ((x & (x - 1)) == 0);
     272 +}
     273 + 
     274 +// Is a pointer aligned?
     275 +static inline bool _mi_is_aligned(void* p, size_t alignment) {
     276 + mi_assert_internal(alignment != 0);
     277 + return (((uintptr_t)p % alignment) == 0);
     278 +}
     279 + 
     280 +// Align upwards
     281 +static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
     282 + mi_assert_internal(alignment != 0);
     283 + uintptr_t mask = alignment - 1;
     284 + if ((alignment & mask) == 0) { // power of two?
     285 + return ((sz + mask) & ~mask);
     286 + }
     287 + else {
     288 + return (((sz + mask)/alignment)*alignment);
     289 + }
     290 +}
     291 + 
     292 +// Align downwards
     293 +static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
     294 + mi_assert_internal(alignment != 0);
     295 + uintptr_t mask = alignment - 1;
     296 + if ((alignment & mask) == 0) { // power of two?
     297 + return (sz & ~mask);
     298 + }
     299 + else {
     300 + return ((sz / alignment) * alignment);
     301 + }
     302 +}
     303 + 
     304 +// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
     305 +static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
     306 + mi_assert_internal(divider != 0);
     307 + return (divider == 0 ? size : ((size + divider - 1) / divider));
     308 +}
     309 + 
     310 +// Is memory zero initialized?
     311 +static inline bool mi_mem_is_zero(void* p, size_t size) {
     312 + for (size_t i = 0; i < size; i++) {
     313 + if (((uint8_t*)p)[i] != 0) return false;
     314 + }
     315 + return true;
     316 +}
     317 + 
     318 + 
     319 +// Align a byte size to a size in _machine words_,
     320 +// i.e. byte size == `wsize*sizeof(void*)`.
     321 +static inline size_t _mi_wsize_from_size(size_t size) {
     322 + mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t));
     323 + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
     324 +}
     325 + 
     326 +// Overflow detecting multiply
     327 +#if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5))
     328 +#include <limits.h> // UINT_MAX, ULONG_MAX
     329 +#if defined(_CLOCK_T) // for Illumos
     330 +#undef _CLOCK_T
     331 +#endif
     332 +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
     333 + #if (SIZE_MAX == ULONG_MAX)
     334 + return __builtin_umull_overflow(count, size, (unsigned long *)total);
     335 + #elif (SIZE_MAX == UINT_MAX)
     336 + return __builtin_umul_overflow(count, size, (unsigned int *)total);
     337 + #else
     338 + return __builtin_umulll_overflow(count, size, (unsigned long long *)total);
     339 + #endif
     340 +}
     341 +#else /* __builtin_umul_overflow is unavailable */
     342 +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
     343 + #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
     344 + *total = count * size;
     345 + // note: gcc/clang optimize this to directly check the overflow flag
     346 + return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count);
     347 +}
     348 +#endif
     349 + 
     350 +// Safe multiply `count*size` into `total`; return `true` on overflow.
     351 +static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) {
     352 + if (count==1) { // quick check for the case where count is one (common for C++ allocators)
     353 + *total = size;
     354 + return false;
     355 + }
     356 + else if mi_unlikely(mi_mul_overflow(count, size, total)) {
     357 + #if MI_DEBUG > 0
     358 + _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size);
     359 + #endif
     360 + *total = SIZE_MAX;
     361 + return true;
     362 + }
     363 + else return false;
     364 +}
     365 + 
     366 + 
     367 +/*----------------------------------------------------------------------------------------
     368 + Heap functions
     369 +------------------------------------------------------------------------------------------- */
     370 + 
     371 +extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap
     372 + 
     373 +static inline bool mi_heap_is_backing(const mi_heap_t* heap) {
     374 + return (heap->tld->heap_backing == heap);
     375 +}
     376 + 
     377 +static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
     378 + mi_assert_internal(heap != NULL);
     379 + return (heap != &_mi_heap_empty);
     380 +}
     381 + 
     382 +static inline uintptr_t _mi_ptr_cookie(const void* p) {
     383 + extern mi_heap_t _mi_heap_main;
     384 + mi_assert_internal(_mi_heap_main.cookie != 0);
     385 + return ((uintptr_t)p ^ _mi_heap_main.cookie);
     386 +}
     387 + 
     388 +/* -----------------------------------------------------------
     389 + Pages
     390 +----------------------------------------------------------- */
     391 + 
     392 +static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) {
     393 + mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE));
     394 + const size_t idx = _mi_wsize_from_size(size);
     395 + mi_assert_internal(idx < MI_PAGES_DIRECT);
     396 + return heap->pages_free_direct[idx];
     397 +}
     398 + 
     399 +// Segment that contains the pointer
     400 +// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
     401 +// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
     402 +// therefore we align one byte before `p`.
     403 +static inline mi_segment_t* _mi_ptr_segment(const void* p) {
     404 + mi_assert_internal(p != NULL);
     405 + return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
     406 +}
     407 + 
     408 +static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) {
     409 + mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0);
     410 + return (mi_page_t*)(s);
     411 +}
     412 + 
     413 +static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
     414 + mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0);
     415 + return (mi_slice_t*)(p);
     416 +}
     417 + 
     418 +// Segment belonging to a page
     419 +static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
     420 + mi_segment_t* segment = _mi_ptr_segment(page);
     421 + mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries));
     422 + return segment;
     423 +}
     424 + 
     425 +static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
     426 + mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset);
     427 + mi_assert_internal(start >= _mi_ptr_segment(slice)->slices);
     428 + mi_assert_internal(start->slice_offset == 0);
     429 + mi_assert_internal(start + start->slice_count > slice);
     430 + return start;
     431 +}
     432 + 
     433 +// Get the page containing the pointer (performance critical as it is called in mi_free)
     434 +static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
     435 + mi_assert_internal(p > (void*)segment);
     436 + ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
     437 + mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE);
     438 + size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
     439 + mi_assert_internal(idx <= segment->slice_entries);
     440 + mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
     441 + mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data
     442 + mi_assert_internal(slice->slice_offset == 0);
     443 + mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries);
     444 + return mi_slice_to_page(slice);
     445 +}
     446 + 
     447 +// Quick page start for initialized pages
     448 +static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
     449 + return _mi_segment_page_start(segment, page, page_size);
     450 +}
     451 + 
     452 +// Get the page containing the pointer
     453 +static inline mi_page_t* _mi_ptr_page(void* p) {
     454 + return _mi_segment_page_of(_mi_ptr_segment(p), p);
     455 +}
     456 + 
     457 +// Get the block size of a page (special case for huge objects)
     458 +static inline size_t mi_page_block_size(const mi_page_t* page) {
     459 + const size_t bsize = page->xblock_size;
     460 + mi_assert_internal(bsize > 0);
     461 + if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) {
     462 + return bsize;
     463 + }
     464 + else {
     465 + size_t psize;
     466 + _mi_segment_page_start(_mi_page_segment(page), page, &psize);
     467 + return psize;
     468 + }
     469 +}
     470 + 
     471 +static inline bool mi_page_is_huge(const mi_page_t* page) {
     472 + return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
     473 +}
     474 + 
     475 +// Get the usable block size of a page without fixed padding.
     476 +// This may still include internal padding due to alignment and rounding up size classes.
     477 +static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
     478 + return mi_page_block_size(page) - MI_PADDING_SIZE;
     479 +}
     480 + 
     481 +// size of a segment
     482 +static inline size_t mi_segment_size(mi_segment_t* segment) {
     483 + return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
     484 +}
     485 + 
     486 +static inline uint8_t* mi_segment_end(mi_segment_t* segment) {
     487 + return (uint8_t*)segment + mi_segment_size(segment);
     488 +}
     489 + 
     490 +// Thread free access
     491 +static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
     492 + return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3);
     493 +}
     494 + 
     495 +static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) {
     496 + return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3);
     497 +}
     498 + 
     499 +// Heap access
     500 +static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
     501 + return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap));
     502 +}
     503 + 
     504 +static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
     505 + mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
     506 + mi_atomic_store_release(&page->xheap,(uintptr_t)heap);
     507 +}
     508 + 
     509 +// Thread free flag helpers
     510 +static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
     511 + return (mi_block_t*)(tf & ~0x03);
     512 +}
     513 +static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) {
     514 + return (mi_delayed_t)(tf & 0x03);
     515 +}
     516 +static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) {
     517 + return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed);
     518 +}
     519 +static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) {
     520 + return mi_tf_make(mi_tf_block(tf),delayed);
     521 +}
     522 +static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) {
     523 + return mi_tf_make(block, mi_tf_delayed(tf));
     524 +}
     525 + 
     526 +// are all blocks in a page freed?
     527 +// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
     528 +static inline bool mi_page_all_free(const mi_page_t* page) {
     529 + mi_assert_internal(page != NULL);
     530 + return (page->used == 0);
     531 +}
     532 + 
     533 +// are there any available blocks?
     534 +static inline bool mi_page_has_any_available(const mi_page_t* page) {
     535 + mi_assert_internal(page != NULL && page->reserved > 0);
     536 + return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
     537 +}
     538 + 
     539 +// are there immediately available blocks, i.e. blocks available on the free list.
     540 +static inline bool mi_page_immediate_available(const mi_page_t* page) {
     541 + mi_assert_internal(page != NULL);
     542 + return (page->free != NULL);
     543 +}
     544 + 
     545 +// is more than 7/8th of a page in use?
     546 +static inline bool mi_page_mostly_used(const mi_page_t* page) {
     547 + if (page==NULL) return true;
     548 + uint16_t frac = page->reserved / 8U;
     549 + return (page->reserved - page->used <= frac);
     550 +}
     551 + 
     552 +static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
     553 + return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
     554 +}
     555 + 
     556 + 
     557 + 
     558 +//-----------------------------------------------------------
     559 +// Page flags
     560 +//-----------------------------------------------------------
     561 +static inline bool mi_page_is_in_full(const mi_page_t* page) {
     562 + return page->flags.x.in_full;
     563 +}
     564 + 
     565 +static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
     566 + page->flags.x.in_full = in_full;
     567 +}
     568 + 
     569 +static inline bool mi_page_has_aligned(const mi_page_t* page) {
     570 + return page->flags.x.has_aligned;
     571 +}
     572 + 
     573 +static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
     574 + page->flags.x.has_aligned = has_aligned;
     575 +}
     576 + 
     577 + 
     578 +/* -------------------------------------------------------------------
     579 +Encoding/Decoding the free list next pointers
     580 + 
     581 +This is to protect against buffer overflow exploits where the
     582 +free list is mutated. Many hardened allocators xor the next pointer `p`
     583 +with a secret key `k1`, as `p^k1`. This prevents overwriting with known
     584 +values but might be still too weak: if the attacker can guess
     585 +the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
     586 +Moreover, if multiple blocks can be read as well, the attacker can
     587 +xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
     588 +about the pointers (and subsequently `k1`).
     589 + 
     590 +Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
     591 +Since these operations are not associative, the above approaches do not
     592 +work so well any more even if the `p` can be guesstimated. For example,
     593 +for the read case we can subtract two entries to discard the `+k1` term,
     594 +but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
     595 +We include the left-rotation since xor and addition are otherwise linear
     596 +in the lowest bit. Finally, both keys are unique per page which reduces
     597 +the re-use of keys by a large factor.
     598 + 
     599 +We also pass a separate `null` value to be used as `NULL` or otherwise
     600 +`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
     601 +------------------------------------------------------------------- */
     602 + 
     603 +static inline bool mi_is_in_same_segment(const void* p, const void* q) {
     604 + return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
     605 +}
     606 + 
     607 +static inline bool mi_is_in_same_page(const void* p, const void* q) {
     608 + mi_segment_t* segment = _mi_ptr_segment(p);
     609 + if (_mi_ptr_segment(q) != segment) return false;
     610 + // assume q may be invalid // return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q));
     611 + mi_page_t* page = _mi_segment_page_of(segment, p);
     612 + size_t psize;
     613 + uint8_t* start = _mi_segment_page_start(segment, page, &psize);
     614 + return (start <= (uint8_t*)q && (uint8_t*)q < start + psize);
     615 +}
     616 + 
     617 +static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
     618 + shift %= MI_INTPTR_BITS;
     619 + return (shift==0 ? x : ((x << shift) | (x >> (MI_INTPTR_BITS - shift))));
     620 +}
     621 +static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
     622 + shift %= MI_INTPTR_BITS;
     623 + return (shift==0 ? x : ((x >> shift) | (x << (MI_INTPTR_BITS - shift))));
     624 +}
     625 + 
     626 +static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) {
     627 + void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]);
     628 + return (p==null ? NULL : p);
     629 +}
     630 + 
     631 +static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) {
     632 + uintptr_t x = (uintptr_t)(p==NULL ? null : p);
     633 + return mi_rotl(x ^ keys[1], keys[0]) + keys[0];
     634 +}
     635 + 
     636 +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) {
     637 + mi_track_mem_defined(block,sizeof(mi_block_t));
     638 + mi_block_t* next;
     639 + #ifdef MI_ENCODE_FREELIST
     640 + next = (mi_block_t*)mi_ptr_decode(null, block->next, keys);
     641 + #else
     642 + MI_UNUSED(keys); MI_UNUSED(null);
     643 + next = (mi_block_t*)block->next;
     644 + #endif
     645 + mi_track_mem_noaccess(block,sizeof(mi_block_t));
     646 + return next;
     647 +}
     648 + 
     649 +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
     650 + mi_track_mem_undefined(block,sizeof(mi_block_t));
     651 + #ifdef MI_ENCODE_FREELIST
     652 + block->next = mi_ptr_encode(null, next, keys);
     653 + #else
     654 + MI_UNUSED(keys); MI_UNUSED(null);
     655 + block->next = (mi_encoded_t)next;
     656 + #endif
     657 + mi_track_mem_noaccess(block,sizeof(mi_block_t));
     658 +}
     659 + 
     660 +static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
     661 + #ifdef MI_ENCODE_FREELIST
     662 + mi_block_t* next = mi_block_nextx(page,block,page->keys);
     663 + // check for free list corruption: is `next` at least in the same page?
     664 + // TODO: check if `next` is `page->block_size` aligned?
     665 + if mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next)) {
     666 + _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next);
     667 + next = NULL;
     668 + }
     669 + return next;
     670 + #else
     671 + MI_UNUSED(page);
     672 + return mi_block_nextx(page,block,NULL);
     673 + #endif
     674 +}
     675 + 
     676 +static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
     677 + #ifdef MI_ENCODE_FREELIST
     678 + mi_block_set_nextx(page,block,next, page->keys);
     679 + #else
     680 + MI_UNUSED(page);
     681 + mi_block_set_nextx(page,block,next,NULL);
     682 + #endif
     683 +}
     684 + 
     685 + 
     686 +// -------------------------------------------------------------------
     687 +// commit mask
     688 +// -------------------------------------------------------------------
     689 + 
     690 +static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) {
     691 + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     692 + cm->mask[i] = 0;
     693 + }
     694 +}
     695 + 
     696 +static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) {
     697 + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     698 + cm->mask[i] = ~((size_t)0);
     699 + }
     700 +}
     701 + 
     702 +static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) {
     703 + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     704 + if (cm->mask[i] != 0) return false;
     705 + }
     706 + return true;
     707 +}
     708 + 
     709 +static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) {
     710 + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
     711 + if (cm->mask[i] != ~((size_t)0)) return false;
     712 + }
     713 + return true;
     714 +}
     715 + 
     716 +// defined in `segment.c`:
     717 +size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total);
     718 +size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);
     719 + 
     720 +#define mi_commit_mask_foreach(cm,idx,count) \
     721 + idx = 0; \
     722 + while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) {
     723 +
     724 +#define mi_commit_mask_foreach_end() \
     725 + idx += count; \
     726 + }
     727 +
     728 + 
     729 + 
     730 + 
     731 +// -------------------------------------------------------------------
     732 +// Fast "random" shuffle
     733 +// -------------------------------------------------------------------
     734 + 
     735 +static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
     736 + if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros
     737 +#if (MI_INTPTR_SIZE==8)
     738 + // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
     739 + x ^= x >> 30;
     740 + x *= 0xbf58476d1ce4e5b9UL;
     741 + x ^= x >> 27;
     742 + x *= 0x94d049bb133111ebUL;
     743 + x ^= x >> 31;
     744 +#elif (MI_INTPTR_SIZE==4)
     745 + // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
     746 + x ^= x >> 16;
     747 + x *= 0x7feb352dUL;
     748 + x ^= x >> 15;
     749 + x *= 0x846ca68bUL;
     750 + x ^= x >> 16;
     751 +#endif
     752 + return x;
     753 +}
     754 + 
     755 +// -------------------------------------------------------------------
     756 +// Optimize numa node access for the common case (= one node)
     757 +// -------------------------------------------------------------------
     758 + 
     759 +int _mi_os_numa_node_get(mi_os_tld_t* tld);
     760 +size_t _mi_os_numa_node_count_get(void);
     761 + 
     762 +extern _Atomic(size_t) _mi_numa_node_count;
     763 +static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
     764 + if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; }
     765 + else return _mi_os_numa_node_get(tld);
     766 +}
     767 +static inline size_t _mi_os_numa_node_count(void) {
     768 + const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
     769 + if mi_likely(count > 0) { return count; }
     770 + else return _mi_os_numa_node_count_get();
     771 +}
     772 + 
     773 + 
     774 + 
     775 +// -----------------------------------------------------------------------
     776 +// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
     777 +// -----------------------------------------------------------------------
     778 + 
     779 +#if defined(__GNUC__)
     780 + 
     781 +#include <limits.h> // LONG_MAX
     782 +#define MI_HAVE_FAST_BITSCAN
     783 +static inline size_t mi_clz(uintptr_t x) {
     784 + if (x==0) return MI_INTPTR_BITS;
     785 +#if (INTPTR_MAX == LONG_MAX)
     786 + return __builtin_clzl(x);
     787 +#else
     788 + return __builtin_clzll(x);
     789 +#endif
     790 +}
     791 +static inline size_t mi_ctz(uintptr_t x) {
     792 + if (x==0) return MI_INTPTR_BITS;
     793 +#if (INTPTR_MAX == LONG_MAX)
     794 + return __builtin_ctzl(x);
     795 +#else
     796 + return __builtin_ctzll(x);
     797 +#endif
     798 +}
     799 + 
     800 +#elif defined(_MSC_VER)
     801 + 
     802 +#include <limits.h> // LONG_MAX
     803 +#include <intrin.h> // BitScanReverse64
     804 +#define MI_HAVE_FAST_BITSCAN
     805 +static inline size_t mi_clz(uintptr_t x) {
     806 + if (x==0) return MI_INTPTR_BITS;
     807 + unsigned long idx;
     808 +#if (INTPTR_MAX == LONG_MAX)
     809 + _BitScanReverse(&idx, x);
     810 +#else
     811 + _BitScanReverse64(&idx, x);
     812 +#endif
     813 + return ((MI_INTPTR_BITS - 1) - idx);
     814 +}
     815 +static inline size_t mi_ctz(uintptr_t x) {
     816 + if (x==0) return MI_INTPTR_BITS;
     817 + unsigned long idx;
     818 +#if (INTPTR_MAX == LONG_MAX)
     819 + _BitScanForward(&idx, x);
     820 +#else
     821 + _BitScanForward64(&idx, x);
     822 +#endif
     823 + return idx;
     824 +}
     825 + 
     826 +#else
     827 +static inline size_t mi_ctz32(uint32_t x) {
     828 + // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
     829 + static const unsigned char debruijn[32] = {
     830 + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
     831 + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
     832 + };
     833 + if (x==0) return 32;
     834 + return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
     835 +}
     836 +static inline size_t mi_clz32(uint32_t x) {
     837 + // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
     838 + static const uint8_t debruijn[32] = {
     839 + 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
     840 + 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
     841 + };
     842 + if (x==0) return 32;
     843 + x |= x >> 1;
     844 + x |= x >> 2;
     845 + x |= x >> 4;
     846 + x |= x >> 8;
     847 + x |= x >> 16;
     848 + return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
     849 +}
     850 + 
     851 +static inline size_t mi_clz(uintptr_t x) {
     852 + if (x==0) return MI_INTPTR_BITS;
     853 +#if (MI_INTPTR_BITS <= 32)
     854 + return mi_clz32((uint32_t)x);
     855 +#else
     856 + size_t count = mi_clz32((uint32_t)(x >> 32));
     857 + if (count < 32) return count;
     858 + return (32 + mi_clz32((uint32_t)x));
     859 +#endif
     860 +}
     861 +static inline size_t mi_ctz(uintptr_t x) {
     862 + if (x==0) return MI_INTPTR_BITS;
     863 +#if (MI_INTPTR_BITS <= 32)
     864 + return mi_ctz32((uint32_t)x);
     865 +#else
     866 + size_t count = mi_ctz32((uint32_t)x);
     867 + if (count < 32) return count;
     868 + return (32 + mi_ctz32((uint32_t)(x>>32)));
     869 +#endif
     870 +}
     871 + 
     872 +#endif
     873 + 
     874 +// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero)
     875 +static inline size_t mi_bsr(uintptr_t x) {
     876 + return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x));
     877 +}
     878 + 
     879 + 
     880 +// ---------------------------------------------------------------------------------
     881 +// Provide our own `_mi_memcpy` for potential performance optimizations.
     882 +//
     883 +// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
     884 +// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
     885 +// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
     886 +// ---------------------------------------------------------------------------------
     887 + 
     888 +#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
     889 +#include <intrin.h>
     890 +#include <string.h>
     891 +extern bool _mi_cpu_has_fsrm;
     892 +static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
     893 + if (_mi_cpu_has_fsrm) {
     894 + __movsb((unsigned char*)dst, (const unsigned char*)src, n);
     895 + }
     896 + else {
     897 + memcpy(dst, src, n);
     898 + }
     899 +}
     900 +static inline void _mi_memzero(void* dst, size_t n) {
     901 + if (_mi_cpu_has_fsrm) {
     902 + __stosb((unsigned char*)dst, 0, n);
     903 + }
     904 + else {
     905 + memset(dst, 0, n);
     906 + }
     907 +}
     908 +#else
     909 +#include <string.h>
     910 +static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
     911 + memcpy(dst, src, n);
     912 +}
     913 +static inline void _mi_memzero(void* dst, size_t n) {
     914 + memset(dst, 0, n);
     915 +}
     916 +#endif
     917 + 
     918 + 
     919 +// -------------------------------------------------------------------------------
     920 +// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
     921 +// This is used for example in `mi_realloc`.
     922 +// -------------------------------------------------------------------------------
     923 + 
     924 +#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
     925 +// On GCC/CLang we provide a hint that the pointers are word aligned.
     926 +#include <string.h>
     927 +static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
     928 + mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
     929 + void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
     930 + const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
     931 + _mi_memcpy(adst, asrc, n);
     932 +}
     933 + 
     934 +static inline void _mi_memzero_aligned(void* dst, size_t n) {
     935 + mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
     936 + void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
     937 + _mi_memzero(adst, n);
     938 +}
     939 +#else
     940 +// Default fallback on `_mi_memcpy`
     941 +static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
     942 + mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
     943 + _mi_memcpy(dst, src, n);
     944 +}
     945 + 
     946 +static inline void _mi_memzero_aligned(void* dst, size_t n) {
     947 + mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
     948 + _mi_memzero(dst, n);
     949 +}
     950 +#endif
     951 + 
     952 + 
     953 +#endif
     954 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/include/mimalloc/prim.h
     1 +/* ----------------------------------------------------------------------------
     2 +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
     3 +This is free software; you can redistribute it and/or modify it under the
     4 +terms of the MIT license. A copy of the license can be found in the file
     5 +"LICENSE" at the root of this distribution.
     6 +-----------------------------------------------------------------------------*/
     7 +#pragma once
     8 +#ifndef MIMALLOC_PRIM_H
     9 +#define MIMALLOC_PRIM_H
     10 + 
     11 + 
     12 +// --------------------------------------------------------------------------
     13 +// This file specifies the primitive portability API.
     14 +// Each OS/host needs to implement these primitives, see `src/prim`
     15 +// for implementations on Window, macOS, WASI, and Linux/Unix.
     16 +//
     17 +// note: on all primitive functions, we always get:
     18 +// addr != NULL and page aligned
     19 +// size > 0 and page aligned
     20 +// return value is an error code an int where 0 is success.
     21 +// --------------------------------------------------------------------------
     22 + 
     23 +// OS memory configuration
     24 +typedef struct mi_os_mem_config_s {
     25 + size_t page_size; // 4KiB
     26 + size_t large_page_size; // 2MiB
     27 + size_t alloc_granularity; // smallest allocation size (on Windows 64KiB)
     28 + bool has_overcommit; // can we reserve more memory than can be actually committed?
     29 + bool must_free_whole; // must allocated blocks free as a whole (false for mmap, true for VirtualAlloc)
     30 +} mi_os_mem_config_t;
     31 + 
     32 +// Initialize
     33 +void _mi_prim_mem_init( mi_os_mem_config_t* config );
     34 + 
     35 +// Free OS memory
     36 +int _mi_prim_free(void* addr, size_t size );
     37 +
     38 +// Allocate OS memory. Return NULL on error.
     39 +// The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
     40 +// pre: !commit => !allow_large
     41 +// try_alignment >= _mi_os_page_size() and a power of 2
     42 +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr);
     43 + 
     44 +// Commit memory. Returns error code or 0 on success.
     45 +int _mi_prim_commit(void* addr, size_t size, bool commit);
     46 + 
     47 +// Reset memory. The range keeps being accessible but the content might be reset.
     48 +// Returns error code or 0 on success.
     49 +int _mi_prim_reset(void* addr, size_t size);
     50 + 
     51 +// Protect memory. Returns error code or 0 on success.
     52 +int _mi_prim_protect(void* addr, size_t size, bool protect);
     53 + 
     54 +// Allocate huge (1GiB) pages possibly associated with a NUMA node.
     55 +// pre: size > 0 and a multiple of 1GiB.
     56 +// addr is either NULL or an address hint.
     57 +// numa_node is either negative (don't care), or a numa node number.
     58 +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr);
     59 + 
     60 +// Return the current NUMA node
     61 +size_t _mi_prim_numa_node(void);
     62 + 
     63 +// Return the number of logical NUMA nodes
     64 +size_t _mi_prim_numa_node_count(void);
     65 + 
     66 +// Clock ticks
     67 +mi_msecs_t _mi_prim_clock_now(void);
     68 + 
     69 +// Return process information (only for statistics)
     70 +typedef struct mi_process_info_s {
     71 + mi_msecs_t elapsed;
     72 + mi_msecs_t utime;
     73 + mi_msecs_t stime;
     74 + size_t current_rss;
     75 + size_t peak_rss;
     76 + size_t current_commit;
     77 + size_t peak_commit;
     78 + size_t page_faults;
     79 +} mi_process_info_t;
     80 + 
     81 +void _mi_prim_process_info(mi_process_info_t* pinfo);
     82 + 
     83 +// Default stderr output. (only for warnings etc. with verbose enabled)
     84 +// msg != NULL && _mi_strlen(msg) > 0
     85 +void _mi_prim_out_stderr( const char* msg );
     86 + 
     87 +// Get an environment variable. (only for options)
     88 +// name != NULL, result != NULL, result_size >= 64
     89 +bool _mi_prim_getenv(const char* name, char* result, size_t result_size);
     90 + 
     91 + 
     92 +// Fill a buffer with strong randomness; return `false` on error or if
     93 +// there is no strong randomization available.
     94 +bool _mi_prim_random_buf(void* buf, size_t buf_len);
     95 + 
     96 +// Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination.
     97 +void _mi_prim_thread_init_auto_done(void);
     98 + 
     99 +// Called on process exit and may take action to clean up resources associated with the thread auto done.
     100 +void _mi_prim_thread_done_auto_done(void);
     101 + 
     102 +// Called when the default heap for a thread changes
     103 +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
     104 + 
     105 + 
     106 +//-------------------------------------------------------------------
     107 +// Thread id: `_mi_prim_thread_id()`
     108 +//
     109 +// Getting the thread id should be performant as it is called in the
     110 +// fast path of `_mi_free` and we specialize for various platforms as
     111 +// inlined definitions. Regular code should call `init.c:_mi_thread_id()`.
     112 +// We only require _mi_prim_thread_id() to return a unique id
     113 +// for each thread (unequal to zero).
     114 +//-------------------------------------------------------------------
     115 + 
     116 +// defined in `init.c`; do not use these directly
     117 +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
     118 +extern bool _mi_process_is_initialized; // has mi_process_init been called?
     119 + 
     120 +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
     121 + 
     122 +#if defined(_WIN32)
     123 + 
     124 +#define WIN32_LEAN_AND_MEAN
     125 +#include <windows.h>
     126 +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
     127 + // Windows: works on Intel and ARM in both 32- and 64-bit
     128 + return (uintptr_t)NtCurrentTeb();
     129 +}
     130 + 
     131 +// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
     132 +// both the OS and libc implementation so we use specific tests for each main platform.
     133 +// If you test on another platform and it works please send a PR :-)
     134 +// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
     135 +#elif defined(__GNUC__) && ( \
     136 + (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
     137 + || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \
     138 + || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
     139 + || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
     140 + || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
     141 + )
     142 + 
     143 +static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
     144 + void* res;
     145 + const size_t ofs = (slot*sizeof(void*));
     146 + #if defined(__i386__)
     147 + __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS
     148 + #elif defined(__APPLE__) && defined(__x86_64__)
     149 + __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS
     150 + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
     151 + __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI
     152 + #elif defined(__x86_64__)
     153 + __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS
     154 + #elif defined(__arm__)
     155 + void** tcb; MI_UNUSED(ofs);
     156 + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
     157 + res = tcb[slot];
     158 + #elif defined(__aarch64__)
     159 + void** tcb; MI_UNUSED(ofs);
     160 + #if defined(__APPLE__) // M1, issue #343
     161 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
     162 + #else
     163 + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
     164 + #endif
     165 + res = tcb[slot];
     166 + #endif
     167 + return res;
     168 +}
     169 + 
     170 +// setting a tls slot is only used on macOS for now
     171 +static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
     172 + const size_t ofs = (slot*sizeof(void*));
     173 + #if defined(__i386__)
     174 + __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS
     175 + #elif defined(__APPLE__) && defined(__x86_64__)
     176 + __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS
     177 + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
     178 + __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI
     179 + #elif defined(__x86_64__)
     180 + __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS
     181 + #elif defined(__arm__)
     182 + void** tcb; MI_UNUSED(ofs);
     183 + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
     184 + tcb[slot] = value;
     185 + #elif defined(__aarch64__)
     186 + void** tcb; MI_UNUSED(ofs);
     187 + #if defined(__APPLE__) // M1, issue #343
     188 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
     189 + #else
     190 + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
     191 + #endif
     192 + tcb[slot] = value;
     193 + #endif
     194 +}
     195 + 
     196 +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
     197 + #if defined(__BIONIC__)
     198 + // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
     199 + // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
     200 + return (uintptr_t)mi_prim_tls_slot(1);
     201 + #else
     202 + // in all our other targets, slot 0 is the thread id
     203 + // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
     204 + // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
     205 + return (uintptr_t)mi_prim_tls_slot(0);
     206 + #endif
     207 +}
     208 + 
     209 +#else
     210 + 
     211 +// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
     212 +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
     213 + return (uintptr_t)&_mi_heap_default;
     214 +}
     215 + 
     216 +#endif
     217 + 
     218 + 
     219 + 
     220 +/* ----------------------------------------------------------------------------------------
     221 +The thread local default heap: `_mi_prim_get_default_heap()`
     222 +This is inlined here as it is on the fast path for allocation functions.
     223 + 
     224 +On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
     225 +__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
     226 +that the storage will always be available (allocated on the thread stacks).
     227 + 
     228 +On some platforms though we cannot use that when overriding `malloc` since the underlying
     229 +TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
     230 +We try to circumvent this in an efficient way:
     231 +- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
     232 + loader itself calls `malloc` even before the modules are initialized.
     233 +- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
     234 +- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323)
     235 +------------------------------------------------------------------------------------------- */
     236 + 
     237 +static inline mi_heap_t* mi_prim_get_default_heap(void);
     238 + 
     239 +#if defined(MI_MALLOC_OVERRIDE)
     240 +#if defined(__APPLE__) // macOS
     241 + #define MI_TLS_SLOT 89 // seems unused?
     242 + // #define MI_TLS_RECURSE_GUARD 1
     243 + // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
     244 + // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
     245 +#elif defined(__OpenBSD__)
     246 + // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
     247 + // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
     248 + #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
     249 + // #elif defined(__DragonFly__)
     250 + // #warning "mimalloc is not working correctly on DragonFly yet."
     251 + // #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
     252 +#elif defined(__ANDROID__)
     253 + // See issue #381
     254 + #define MI_TLS_PTHREAD
     255 +#endif
     256 +#endif
     257 + 
     258 + 
     259 +#if defined(MI_TLS_SLOT)
     260 + 
     261 +static inline mi_heap_t* mi_prim_get_default_heap(void) {
     262 + mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT);
     263 + if mi_unlikely(heap == NULL) {
     264 + #ifdef __GNUC__
     265 + __asm(""); // prevent conditional load of the address of _mi_heap_empty
     266 + #endif
     267 + heap = (mi_heap_t*)&_mi_heap_empty;
     268 + }
     269 + return heap;
     270 +}
     271 + 
     272 +#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
     273 + 
     274 +static inline mi_heap_t** mi_prim_tls_pthread_heap_slot(void) {
     275 + pthread_t self = pthread_self();
     276 + #if defined(__DragonFly__)
     277 + if (self==NULL) return NULL;
     278 + #endif
     279 + return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS);
     280 +}
     281 + 
     282 +static inline mi_heap_t* mi_prim_get_default_heap(void) {
     283 + mi_heap_t** pheap = mi_prim_tls_pthread_heap_slot();
     284 + if mi_unlikely(pheap == NULL) return _mi_heap_main_get();
     285 + mi_heap_t* heap = *pheap;
     286 + if mi_unlikely(heap == NULL) return (mi_heap_t*)&_mi_heap_empty;
     287 + return heap;
     288 +}
     289 + 
     290 +#elif defined(MI_TLS_PTHREAD)
     291 + 
     292 +extern pthread_key_t _mi_heap_default_key;
     293 +static inline mi_heap_t* mi_prim_get_default_heap(void) {
     294 + mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
     295 + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
     296 +}
     297 + 
     298 +#else // default using a thread local variable; used on most platforms.
     299 + 
     300 +static inline mi_heap_t* mi_prim_get_default_heap(void) {
     301 + #if defined(MI_TLS_RECURSE_GUARD)
     302 + if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
     303 + #endif
     304 + return _mi_heap_default;
     305 +}
     306 + 
     307 +#endif // mi_prim_get_default_heap()
     308 + 
     309 + 
     310 + 
     311 +#endif // MIMALLOC_PRIM_H
     312 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/include/mimalloc/track.h
     1 +/* ----------------------------------------------------------------------------
     2 +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
     3 +This is free software; you can redistribute it and/or modify it under the
     4 +terms of the MIT license. A copy of the license can be found in the file
     5 +"LICENSE" at the root of this distribution.
     6 +-----------------------------------------------------------------------------*/
     7 +#pragma once
     8 +#ifndef MIMALLOC_TRACK_H
     9 +#define MIMALLOC_TRACK_H
     10 + 
     11 +/* ------------------------------------------------------------------------------------------------------
     12 +Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers.
     13 +These can be defined for tracking allocation:
     14 + 
     15 + #define mi_track_malloc_size(p,reqsize,size,zero)
     16 + #define mi_track_free_size(p,_size)
     17 + 
     18 +The macros are set up such that the size passed to `mi_track_free_size`
     19 +always matches the size of `mi_track_malloc_size`. (currently, `size == mi_usable_size(p)`).
     20 +The `reqsize` is what the user requested, and `size >= reqsize`.
     21 +The `size` is either byte precise (and `size==reqsize`) if `MI_PADDING` is enabled,
     22 +or otherwise it is the usable block size which may be larger than the original request.
     23 +Use `_mi_block_size_of(void* p)` to get the full block size that was allocated (including padding etc).
     24 +The `zero` parameter is `true` if the allocated block is zero initialized.
     25 + 
     26 +Optional:
     27 + 
     28 + #define mi_track_align(p,alignedp,offset,size)
     29 + #define mi_track_resize(p,oldsize,newsize)
     30 + #define mi_track_init()
     31 + 
     32 +The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block.
     33 +The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`).
     34 +The `mi_track_resize` is currently unused but could be called on reallocations within a block.
     35 +`mi_track_init` is called at program start.
     36 + 
     37 +The following macros are for tools like asan and valgrind to track whether memory is
     38 +defined, undefined, or not accessible at all:
     39 + 
     40 + #define mi_track_mem_defined(p,size)
     41 + #define mi_track_mem_undefined(p,size)
     42 + #define mi_track_mem_noaccess(p,size)
     43 + 
     44 +-------------------------------------------------------------------------------------------------------*/
     45 + 
     46 +#if MI_TRACK_VALGRIND
     47 +// valgrind tool
     48 + 
     49 +#define MI_TRACK_ENABLED 1
     50 +#define MI_TRACK_HEAP_DESTROY 1 // track free of individual blocks on heap_destroy
     51 +#define MI_TRACK_TOOL "valgrind"
     52 + 
     53 +#include <valgrind/valgrind.h>
     54 +#include <valgrind/memcheck.h>
     55 + 
     56 +#define mi_track_malloc_size(p,reqsize,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
     57 +#define mi_track_free_size(p,_size) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
     58 +#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
     59 +#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size)
     60 +#define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size)
     61 +#define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size)
     62 + 
     63 +#elif MI_TRACK_ASAN
     64 +// address sanitizer
     65 + 
     66 +#define MI_TRACK_ENABLED 1
     67 +#define MI_TRACK_HEAP_DESTROY 0
     68 +#define MI_TRACK_TOOL "asan"
     69 + 
     70 +#include <sanitizer/asan_interface.h>
     71 + 
     72 +#define mi_track_malloc_size(p,reqsize,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size)
     73 +#define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size)
     74 +#define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size)
     75 +#define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size)
     76 +#define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size)
     77 + 
     78 +#elif MI_TRACK_ETW
     79 +// windows event tracing
     80 + 
     81 +#define MI_TRACK_ENABLED 1
     82 +#define MI_TRACK_HEAP_DESTROY 0
     83 +#define MI_TRACK_TOOL "ETW"
     84 + 
     85 +#define WIN32_LEAN_AND_MEAN
     86 +#include <windows.h>
     87 +#include "../src/prim/windows/etw.h"
     88 + 
     89 +#define mi_track_init() EventRegistermicrosoft_windows_mimalloc();
     90 +#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)(p), size)
     91 +#define mi_track_free_size(p,size) EventWriteETW_MI_FREE((UINT64)(p), size)
     92 + 
     93 +#else
     94 +// no tracking
     95 + 
     96 +#define MI_TRACK_ENABLED 0
     97 +#define MI_TRACK_HEAP_DESTROY 0
     98 +#define MI_TRACK_TOOL "none"
     99 + 
     100 +#define mi_track_malloc_size(p,reqsize,size,zero)
     101 +#define mi_track_free_size(p,_size)
     102 + 
     103 +#endif
     104 + 
     105 +// -------------------
     106 +// Utility definitions
     107 + 
     108 +#ifndef mi_track_resize
     109 +#define mi_track_resize(p,oldsize,newsize) mi_track_free_size(p,oldsize); mi_track_malloc(p,newsize,false)
     110 +#endif
     111 + 
     112 +#ifndef mi_track_align
     113 +#define mi_track_align(p,alignedp,offset,size) mi_track_mem_noaccess(p,offset)
     114 +#endif
     115 + 
     116 +#ifndef mi_track_init
     117 +#define mi_track_init()
     118 +#endif
     119 + 
     120 +#ifndef mi_track_mem_defined
     121 +#define mi_track_mem_defined(p,size)
     122 +#endif
     123 + 
     124 +#ifndef mi_track_mem_undefined
     125 +#define mi_track_mem_undefined(p,size)
     126 +#endif
     127 + 
     128 +#ifndef mi_track_mem_noaccess
     129 +#define mi_track_mem_noaccess(p,size)
     130 +#endif
     131 + 
     132 + 
     133 +#if MI_PADDING
     134 +#define mi_track_malloc(p,reqsize,zero) \
     135 + if ((p)!=NULL) { \
     136 + mi_assert_internal(mi_usable_size(p)==(reqsize)); \
     137 + mi_track_malloc_size(p,reqsize,reqsize,zero); \
     138 + }
     139 +#else
     140 +#define mi_track_malloc(p,reqsize,zero) \
     141 + if ((p)!=NULL) { \
     142 + mi_assert_internal(mi_usable_size(p)>=(reqsize)); \
     143 + mi_track_malloc_size(p,reqsize,mi_usable_size(p),zero); \
     144 + }
     145 +#endif
     146 + 
     147 +#endif
     148 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/include/mimalloc/types.h
     1 +/* ----------------------------------------------------------------------------
     2 +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
     3 +This is free software; you can redistribute it and/or modify it under the
     4 +terms of the MIT license. A copy of the license can be found in the file
     5 +"LICENSE" at the root of this distribution.
     6 +-----------------------------------------------------------------------------*/
     7 +#pragma once
     8 +#ifndef MIMALLOC_TYPES_H
     9 +#define MIMALLOC_TYPES_H
     10 + 
     11 +// --------------------------------------------------------------------------
     12 +// This file contains the main type definitions for mimalloc:
     13 +// mi_heap_t : all data for a thread-local heap, contains
     14 +// lists of all managed heap pages.
     15 +// mi_segment_t : a larger chunk of memory (32GiB) from where pages
     16 +// are allocated.
     17 +// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from
     18 +// where objects are allocated.
     19 +// --------------------------------------------------------------------------
     20 + 
     21 + 
     22 +#include <stddef.h> // ptrdiff_t
     23 +#include <stdint.h> // uintptr_t, uint16_t, etc
     24 +#include "mimalloc/atomic.h" // _Atomic
     25 + 
     26 +#ifdef _MSC_VER
     27 +#pragma warning(disable:4214) // bitfield is not int
     28 +#endif
     29 + 
     30 +// Minimal alignment necessary. On most platforms 16 bytes are needed
     31 +// due to SSE registers for example. This must be at least `sizeof(void*)`
     32 +#ifndef MI_MAX_ALIGN_SIZE
     33 +#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
     34 +#endif
     35 + 
     36 +// ------------------------------------------------------
     37 +// Variants
     38 +// ------------------------------------------------------
     39 + 
     40 +// Define NDEBUG in the release version to disable assertions.
     41 +// #define NDEBUG
     42 + 
     43 +// Define MI_TRACK_<tool> to enable tracking support
     44 +// #define MI_TRACK_VALGRIND 1
     45 +// #define MI_TRACK_ASAN 1
     46 +// #define MI_TRACK_ETW 1
     47 + 
     48 +// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance).
     49 +// #define MI_STAT 1
     50 + 
     51 +// Define MI_SECURE to enable security mitigations
     52 +// #define MI_SECURE 1 // guard page around metadata
     53 +// #define MI_SECURE 2 // guard page around each mimalloc page
     54 +// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
     55 +// #define MI_SECURE 4 // checks for double free. (may be more expensive)
     56 + 
     57 +#if !defined(MI_SECURE)
     58 +#define MI_SECURE 0
     59 +#endif
     60 + 
     61 +// Define MI_DEBUG for debug mode
     62 +// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free.
     63 +// #define MI_DEBUG 2 // + internal assertion checks
     64 +// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON)
     65 +#if !defined(MI_DEBUG)
     66 +#if !defined(NDEBUG) || defined(_DEBUG)
     67 +#define MI_DEBUG 2
     68 +#else
     69 +#define MI_DEBUG 0
     70 +#endif
     71 +#endif
     72 + 
     73 +// Reserve extra padding at the end of each block to be more resilient against heap block overflows.
     74 +// The padding can detect buffer overflow on free.
     75 +#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || (MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_TRACK_ETW))
     76 +#define MI_PADDING 1
     77 +#endif
     78 + 
     79 +// Check padding bytes; allows byte-precise buffer overflow detection
     80 +#if !defined(MI_PADDING_CHECK) && MI_PADDING && (MI_SECURE>=3 || MI_DEBUG>=1)
     81 +#define MI_PADDING_CHECK 1
     82 +#endif
     83 + 
     84 + 
     85 +// Encoded free lists allow detection of corrupted free lists
     86 +// and can detect buffer overflows, modify after free, and double `free`s.
     87 +#if (MI_SECURE>=3 || MI_DEBUG>=1)
     88 +#define MI_ENCODE_FREELIST 1
     89 +#endif
     90 + 
     91 + 
     92 +// We used to abandon huge pages but to eagerly deallocate if freed from another thread,
     93 +// but that makes it not possible to visit them during a heap walk or include them in a
     94 +// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from
     95 +// another thread so most memory is available until it gets properly freed by the owning thread.
     96 +// #define MI_HUGE_PAGE_ABANDON 1
     97 + 
     98 + 
     99 +// ------------------------------------------------------
     100 +// Platform specific values
     101 +// ------------------------------------------------------
     102 + 
     103 +// ------------------------------------------------------
     104 +// Size of a pointer.
     105 +// We assume that `sizeof(void*)==sizeof(intptr_t)`
     106 +// and it holds for all platforms we know of.
     107 +//
     108 +// However, the C standard only requires that:
     109 +// p == (void*)((intptr_t)p))
     110 +// but we also need:
     111 +// i == (intptr_t)((void*)i)
     112 +// or otherwise one might define an intptr_t type that is larger than a pointer...
     113 +// ------------------------------------------------------
     114 + 
     115 +#if INTPTR_MAX > INT64_MAX
     116 +# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example)
     117 +#elif INTPTR_MAX == INT64_MAX
     118 +# define MI_INTPTR_SHIFT (3)
     119 +#elif INTPTR_MAX == INT32_MAX
     120 +# define MI_INTPTR_SHIFT (2)
     121 +#else
     122 +#error platform pointers must be 32, 64, or 128 bits
     123 +#endif
     124 + 
     125 +#if SIZE_MAX == UINT64_MAX
     126 +# define MI_SIZE_SHIFT (3)
     127 +typedef int64_t mi_ssize_t;
     128 +#elif SIZE_MAX == UINT32_MAX
     129 +# define MI_SIZE_SHIFT (2)
     130 +typedef int32_t mi_ssize_t;
     131 +#else
     132 +#error platform objects must be 32 or 64 bits
     133 +#endif
     134 + 
     135 +#if (SIZE_MAX/2) > LONG_MAX
     136 +# define MI_ZU(x) x##ULL
     137 +# define MI_ZI(x) x##LL
     138 +#else
     139 +# define MI_ZU(x) x##UL
     140 +# define MI_ZI(x) x##L
     141 +#endif
     142 + 
     143 +#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
     144 +#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
     145 + 
     146 +#define MI_SIZE_SIZE (1<<MI_SIZE_SHIFT)
     147 +#define MI_SIZE_BITS (MI_SIZE_SIZE*8)
     148 + 
     149 +#define MI_KiB (MI_ZU(1024))
     150 +#define MI_MiB (MI_KiB*MI_KiB)
     151 +#define MI_GiB (MI_MiB*MI_KiB)
     152 + 
     153 + 
     154 +// ------------------------------------------------------
     155 +// Main internal data-structures
     156 +// ------------------------------------------------------
     157 + 
     158 +// Main tuning parameters for segment and page sizes
     159 +// Sizes for 64-bit (usually divide by two for 32-bit)
     160 +#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit)
     161 + 
     162 +#if MI_INTPTR_SIZE > 4
     163 +#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB
     164 +#else
     165 +#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit
     166 +#endif
     167 + 
     168 +#define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB
     169 +#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB
     170 + 
     171 + 
     172 +// Derived constants
     173 +#define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT)
     174 +#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
     175 +#define MI_SEGMENT_MASK (MI_SEGMENT_ALIGN - 1)
     176 +#define MI_SEGMENT_SLICE_SIZE (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT)
     177 +#define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
     178 + 
     179 +#define MI_SMALL_PAGE_SIZE (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
     180 +#define MI_MEDIUM_PAGE_SIZE (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)
     181 + 
     182 +#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 8KiB on 64-bit
     183 +#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB on 64-bit
     184 +#define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
     185 +#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32MiB on 64-bit
     186 +#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
     187 + 
     188 +// Maximum number of size classes. (spaced exponentially in 12.5% increments)
     189 +#define MI_BIN_HUGE (73U)
     190 + 
     191 +#if (MI_MEDIUM_OBJ_WSIZE_MAX >= 655360)
     192 +#error "mimalloc internal: define more bins"
     193 +#endif
     194 + 
     195 +// Maximum slice offset (15)
     196 +#define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
     197 + 
     198 +// Used as a special value to encode block sizes in 32 bits.
     199 +#define MI_HUGE_BLOCK_SIZE ((uint32_t)(2*MI_GiB))
     200 + 
     201 +// blocks up to this size are always allocated aligned
     202 +#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE)
     203 + 
     204 +// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments
     205 +#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
     206 + 
     207 + 
     208 +// ------------------------------------------------------
     209 +// Mimalloc pages contain allocated blocks
     210 +// ------------------------------------------------------
     211 + 
     212 +// The free lists use encoded next fields
     213 +// (Only actually encodes when MI_ENCODED_FREELIST is defined.)
     214 +typedef uintptr_t mi_encoded_t;
     215 + 
     216 +// thread id's
     217 +typedef size_t mi_threadid_t;
     218 + 
     219 +// free lists contain blocks
     220 +typedef struct mi_block_s {
     221 + mi_encoded_t next;
     222 +} mi_block_t;
     223 + 
     224 + 
     225 +// The delayed flags are used for efficient multi-threaded free-ing
     226 +typedef enum mi_delayed_e {
     227 + MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list
     228 + MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap
     229 + MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
     230 + MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim
     231 +} mi_delayed_t;
     232 + 
     233 + 
     234 +// The `in_full` and `has_aligned` page flags are put in a union to efficiently
     235 +// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
     236 +#if !MI_TSAN
     237 +typedef union mi_page_flags_s {
     238 + uint8_t full_aligned;
     239 + struct {
     240 + uint8_t in_full : 1;
     241 + uint8_t has_aligned : 1;
     242 + } x;
     243 +} mi_page_flags_t;
     244 +#else
     245 +// under thread sanitizer, use a byte for each flag to suppress warning, issue #130
     246 +typedef union mi_page_flags_s {
     247 + uint16_t full_aligned;
     248 + struct {
     249 + uint8_t in_full;
     250 + uint8_t has_aligned;
     251 + } x;
     252 +} mi_page_flags_t;
     253 +#endif
     254 + 
     255 +// Thread free list.
     256 +// We use the bottom 2 bits of the pointer for mi_delayed_t flags
     257 +typedef uintptr_t mi_thread_free_t;
     258 + 
     259 +// A page contains blocks of one specific size (`block_size`).
     260 +// Each page has three list of free blocks:
     261 +// `free` for blocks that can be allocated,
     262 +// `local_free` for freed blocks that are not yet available to `mi_malloc`
     263 +// `thread_free` for freed blocks by other threads
     264 +// The `local_free` and `thread_free` lists are migrated to the `free` list
     265 +// when it is exhausted. The separate `local_free` list is necessary to
     266 +// implement a monotonic heartbeat. The `thread_free` list is needed for
     267 +// avoiding atomic operations in the common case.
     268 +//
     269 +//
     270 +// `used - |thread_free|` == actual blocks that are in use (alive)
     271 +// `used - |thread_free| + |free| + |local_free| == capacity`
     272 +//
     273 +// We don't count `freed` (as |free|) but use `used` to reduce
     274 +// the number of memory accesses in the `mi_page_all_free` function(s).
     275 +//
     276 +// Notes:
     277 +// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
     278 +// - Using `uint16_t` does not seem to slow things down
     279 +// - The size is 8 words on 64-bit which helps the page index calculations
     280 +// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
     281 +// and 12 are still good for address calculation)
     282 +// - To limit the structure size, the `xblock_size` is 32-bits only; for
     283 +// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
     284 +// - `thread_free` uses the bottom bits as a delayed-free flags to optimize
     285 +// concurrent frees where only the first concurrent free adds to the owning
     286 +// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
     287 +// The invariant is that no-delayed-free is only set if there is
     288 +// at least one block that will be added, or as already been added, to
     289 +// the owning heap `thread_delayed_free` list. This guarantees that pages
     290 +// will be freed correctly even if only other threads free blocks.
     291 +typedef struct mi_page_s {
     292 + // "owned" by the segment
     293 + uint32_t slice_count; // slices in this page (0 if not a page)
     294 + uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
     295 + uint8_t is_reset : 1; // `true` if the page memory was reset
     296 + uint8_t is_committed : 1; // `true` if the page virtual memory is committed
     297 + uint8_t is_zero_init : 1; // `true` if the page was zero initialized
     298 + 
     299 + // layout like this to optimize access in `mi_malloc` and `mi_free`
     300 + uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
     301 + uint16_t reserved; // number of blocks reserved in memory
     302 + mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
     303 + uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized
     304 + uint8_t retire_expire : 7; // expiration count for retired blocks
     305 + 
     306 + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
     307 + uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
     308 + uint32_t xblock_size; // size available in each block (always `>0`)
     309 + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
     310 + 
     311 + #if (MI_ENCODE_FREELIST || MI_PADDING)
     312 + uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
     313 + #endif
     314 + 
     315 + _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
     316 + _Atomic(uintptr_t) xheap;
     317 + 
     318 + struct mi_page_s* next; // next page owned by this thread with the same `block_size`
     319 + struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
     320 + 
     321 + // 64-bit 9 words, 32-bit 12 words, (+2 for secure)
     322 + #if MI_INTPTR_SIZE==8
     323 + uintptr_t padding[1];
     324 + #endif
     325 +} mi_page_t;
     326 + 
     327 + 
     328 + 
     329 +typedef enum mi_page_kind_e {
     330 + MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment
     331 + MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment
     332 + MI_PAGE_LARGE, // larger blocks go into a page of just one block
     333 + MI_PAGE_HUGE, // huge blocks (> 16 MiB) are put into a single page in a single segment.
     334 +} mi_page_kind_t;
     335 + 
     336 +typedef enum mi_segment_kind_e {
     337 + MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside.
     338 + MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
     339 +} mi_segment_kind_t;
     340 + 
     341 +// ------------------------------------------------------
     342 +// A segment holds a commit mask where a bit is set if
     343 +// the corresponding MI_COMMIT_SIZE area is committed.
     344 +// The MI_COMMIT_SIZE must be a multiple of the slice
     345 +// size. If it is equal we have the most fine grained
     346 +// decommit (but setting it higher can be more efficient).
     347 +// The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will
     348 +// be committed in one go which can be set higher than
     349 +// MI_COMMIT_SIZE for efficiency (while the decommit mask
     350 +// is still tracked in fine-grained MI_COMMIT_SIZE chunks)
     351 +// ------------------------------------------------------
     352 + 
     353 +#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB
     354 +#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB
     355 +#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)
     356 +#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS
     357 +#define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
     358 + 
     359 +#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS))
     360 +#error "the segment size must be exactly divisible by the (commit size * size_t bits)"
     361 +#endif
     362 + 
     363 +typedef struct mi_commit_mask_s {
     364 + size_t mask[MI_COMMIT_MASK_FIELD_COUNT];
     365 +} mi_commit_mask_t;
     366 + 
     367 +typedef mi_page_t mi_slice_t;
     368 +typedef int64_t mi_msecs_t;
     369 + 
     370 + 
     371 +// Segments are large allocated memory blocks (8mb on 64 bit) from
     372 +// the OS. Inside segments we allocated fixed size _pages_ that
     373 +// contain blocks.
     374 +typedef struct mi_segment_s {
     375 + size_t memid; // memory id for arena allocation
     376 + bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)
     377 + bool mem_is_large; // in large/huge os pages?
     378 + bool mem_is_committed; // `true` if the whole segment is eagerly committed
     379 + size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX)
     380 + size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX)
     381 + 
     382 + bool allow_decommit;
     383 + mi_msecs_t decommit_expire;
     384 + mi_commit_mask_t decommit_mask;
     385 + mi_commit_mask_t commit_mask;
     386 + 
     387 + _Atomic(struct mi_segment_s*) abandoned_next;
     388 + 
     389 + // from here is zero initialized
     390 + struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)
     391 +
     392 + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
     393 + size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
     394 + size_t used; // count of pages in use
     395 + uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
     396 + 
     397 + size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
     398 + size_t segment_info_slices; // initial slices we are using segment info and possible guard pages.
     399 + 
     400 + // layout like this to optimize access in `mi_free`
     401 + mi_segment_kind_t kind;
     402 + size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
     403 + _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
     404 + 
     405 + mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment
     406 +} mi_segment_t;
     407 + 
     408 + 
     409 +// ------------------------------------------------------
     410 +// Heaps
     411 +// Provide first-class heaps to allocate from.
     412 +// A heap just owns a set of pages for allocation and
     413 +// can only be allocate/reallocate from the thread that created it.
     414 +// Freeing blocks can be done from any thread though.
     415 +// Per thread, the segments are shared among its heaps.
     416 +// Per thread, there is always a default heap that is
     417 +// used for allocation; it is initialized to statically
     418 +// point to an empty heap to avoid initialization checks
     419 +// in the fast path.
     420 +// ------------------------------------------------------
     421 + 
     422 +// Thread local data
     423 +typedef struct mi_tld_s mi_tld_t;
     424 + 
     425 +// Pages of a certain block size are held in a queue.
     426 +typedef struct mi_page_queue_s {
     427 + mi_page_t* first;
     428 + mi_page_t* last;
     429 + size_t block_size;
     430 +} mi_page_queue_t;
     431 + 
     432 +#define MI_BIN_FULL (MI_BIN_HUGE+1)
     433 + 
     434 +// Random context
     435 +typedef struct mi_random_cxt_s {
     436 + uint32_t input[16];
     437 + uint32_t output[16];
     438 + int output_available;
     439 + bool weak;
     440 +} mi_random_ctx_t;
     441 + 
     442 + 
     443 +// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows
     444 +#if (MI_PADDING)
     445 +typedef struct mi_padding_s {
     446 + uint32_t canary; // encoded block value to check validity of the padding (in case of overflow)
     447 + uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes)
     448 +} mi_padding_t;
     449 +#define MI_PADDING_SIZE (sizeof(mi_padding_t))
     450 +#define MI_PADDING_WSIZE ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE)
     451 +#else
     452 +#define MI_PADDING_SIZE 0
     453 +#define MI_PADDING_WSIZE 0
     454 +#endif
     455 + 
     456 +#define MI_PAGES_DIRECT (MI_SMALL_WSIZE_MAX + MI_PADDING_WSIZE + 1)
     457 + 
     458 + 
     459 +// A heap owns a set of pages.
     460 +struct mi_heap_s {
     461 + mi_tld_t* tld;
     462 + mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
     463 + mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
     464 + _Atomic(mi_block_t*) thread_delayed_free;
     465 + mi_threadid_t thread_id; // thread this heap belongs too
     466 + mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
     467 + uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
     468 + uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list
     469 + mi_random_ctx_t random; // random number context used for secure allocation
     470 + size_t page_count; // total number of pages in the `pages` queues.
     471 + size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues)
     472 + size_t page_retired_max; // largest retired index into the `pages` array.
     473 + mi_heap_t* next; // list of heaps per thread
     474 + bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
     475 +};
     476 + 
     477 + 
     478 + 
     479 +// ------------------------------------------------------
     480 +// Debug
     481 +// ------------------------------------------------------
     482 + 
     483 +#if !defined(MI_DEBUG_UNINIT)
     484 +#define MI_DEBUG_UNINIT (0xD0)
     485 +#endif
     486 +#if !defined(MI_DEBUG_FREED)
     487 +#define MI_DEBUG_FREED (0xDF)
     488 +#endif
     489 +#if !defined(MI_DEBUG_PADDING)
     490 +#define MI_DEBUG_PADDING (0xDE)
     491 +#endif
     492 + 
     493 +#if (MI_DEBUG)
     494 +// use our own assertion to print without memory allocation
     495 +void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func );
     496 +#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__))
     497 +#else
     498 +#define mi_assert(x)
     499 +#endif
     500 + 
     501 +#if (MI_DEBUG>1)
     502 +#define mi_assert_internal mi_assert
     503 +#else
     504 +#define mi_assert_internal(x)
     505 +#endif
     506 + 
     507 +#if (MI_DEBUG>2)
     508 +#define mi_assert_expensive mi_assert
     509 +#else
     510 +#define mi_assert_expensive(x)
     511 +#endif
     512 + 
     513 +// ------------------------------------------------------
     514 +// Statistics
     515 +// ------------------------------------------------------
     516 + 
     517 +#ifndef MI_STAT
     518 +#if (MI_DEBUG>0)
     519 +#define MI_STAT 2
     520 +#else
     521 +#define MI_STAT 0
     522 +#endif
     523 +#endif
     524 + 
     525 +typedef struct mi_stat_count_s {
     526 + int64_t allocated;
     527 + int64_t freed;
     528 + int64_t peak;
     529 + int64_t current;
     530 +} mi_stat_count_t;
     531 + 
     532 +typedef struct mi_stat_counter_s {
     533 + int64_t total;
     534 + int64_t count;
     535 +} mi_stat_counter_t;
     536 + 
     537 +typedef struct mi_stats_s {
     538 + mi_stat_count_t segments;
     539 + mi_stat_count_t pages;
     540 + mi_stat_count_t reserved;
     541 + mi_stat_count_t committed;
     542 + mi_stat_count_t reset;
     543 + mi_stat_count_t page_committed;
     544 + mi_stat_count_t segments_abandoned;
     545 + mi_stat_count_t pages_abandoned;
     546 + mi_stat_count_t threads;
     547 + mi_stat_count_t normal;
     548 + mi_stat_count_t huge;
     549 + mi_stat_count_t large;
     550 + mi_stat_count_t malloc;
     551 + mi_stat_count_t segments_cache;
     552 + mi_stat_counter_t pages_extended;
     553 + mi_stat_counter_t mmap_calls;
     554 + mi_stat_counter_t commit_calls;
     555 + mi_stat_counter_t page_no_retire;
     556 + mi_stat_counter_t searches;
     557 + mi_stat_counter_t normal_count;
     558 + mi_stat_counter_t huge_count;
     559 + mi_stat_counter_t large_count;
     560 +#if MI_STAT>1
     561 + mi_stat_count_t normal_bins[MI_BIN_HUGE+1];
     562 +#endif
     563 +} mi_stats_t;
     564 + 
     565 + 
     566 +void _mi_stat_increase(mi_stat_count_t* stat, size_t amount);
     567 +void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
     568 +void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
     569 + 
     570 +#if (MI_STAT)
     571 +#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount)
     572 +#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount)
     573 +#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount)
     574 +#else
     575 +#define mi_stat_increase(stat,amount) (void)0
     576 +#define mi_stat_decrease(stat,amount) (void)0
     577 +#define mi_stat_counter_increase(stat,amount) (void)0
     578 +#endif
     579 + 
     580 +#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount)
     581 +#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount)
     582 +#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
     583 + 
     584 +// ------------------------------------------------------
     585 +// Thread Local data
     586 +// ------------------------------------------------------
     587 + 
     588 +// A "span" is is an available range of slices. The span queues keep
     589 +// track of slice spans of at most the given `slice_count` (but more than the previous size class).
     590 +typedef struct mi_span_queue_s {
     591 + mi_slice_t* first;
     592 + mi_slice_t* last;
     593 + size_t slice_count;
     594 +} mi_span_queue_t;
     595 + 
     596 +#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT)
     597 + 
     598 +// OS thread local data
     599 +typedef struct mi_os_tld_s {
     600 + size_t region_idx; // start point for next allocation
     601 + mi_stats_t* stats; // points to tld stats
     602 +} mi_os_tld_t;
     603 + 
     604 + 
     605 +// Segments thread local data
     606 +typedef struct mi_segments_tld_s {
     607 + mi_span_queue_t spans[MI_SEGMENT_BIN_MAX+1]; // free slice spans inside segments
     608 + size_t count; // current number of segments;
     609 + size_t peak_count; // peak number of segments
     610 + size_t current_size; // current size of all segments
     611 + size_t peak_size; // peak size of all segments
     612 + mi_stats_t* stats; // points to tld stats
     613 + mi_os_tld_t* os; // points to os stats
     614 +} mi_segments_tld_t;
     615 + 
     616 +// Thread local data
     617 +struct mi_tld_s {
     618 + unsigned long long heartbeat; // monotonic heartbeat count
     619 + bool recurse; // true if deferred was called; used to prevent infinite recursion.
     620 + mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
     621 + mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
     622 + mi_segments_tld_t segments; // segment tld
     623 + mi_os_tld_t os; // os tld
     624 + mi_stats_t stats; // statistics
     625 +};
     626 + 
     627 +#endif
     628 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/include/mimalloc-new-delete.h
     1 +/* ----------------------------------------------------------------------------
     2 +Copyright (c) 2018-2020 Microsoft Research, Daan Leijen
     3 +This is free software; you can redistribute it and/or modify it under the
     4 +terms of the MIT license. A copy of the license can be found in the file
     5 +"LICENSE" at the root of this distribution.
     6 +-----------------------------------------------------------------------------*/
     7 +#pragma once
     8 +#ifndef MIMALLOC_NEW_DELETE_H
     9 +#define MIMALLOC_NEW_DELETE_H
     10 + 
     11 +// ----------------------------------------------------------------------------
     12 +// This header provides convenient overrides for the new and
     13 +// delete operations in C++.
     14 +//
     15 +// This header should be included in only one source file!
     16 +//
     17 +// On Windows, or when linking dynamically with mimalloc, these
     18 +// can be more performant than the standard new-delete operations.
     19 +// See <https://en.cppreference.com/w/cpp/memory/new/operator_new>
     20 +// ---------------------------------------------------------------------------
     21 +#if defined(__cplusplus)
     22 + #include <new>
     23 + #include <mimalloc.h>
     24 + 
     25 + #if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_)
     26 + // stay consistent with VCRT definitions
     27 + #define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n)
     28 + #define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n)
     29 + #else
     30 + #define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict
     31 + #define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict
     32 + #endif
     33 + 
     34 + void operator delete(void* p) noexcept { mi_free(p); };
     35 + void operator delete[](void* p) noexcept { mi_free(p); };
     36 + 
     37 + void operator delete (void* p, const std::nothrow_t&) noexcept { mi_free(p); }
     38 + void operator delete[](void* p, const std::nothrow_t&) noexcept { mi_free(p); }
     39 + 
     40 + mi_decl_new(n) void* operator new(std::size_t n) noexcept(false) { return mi_new(n); }
     41 + mi_decl_new(n) void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); }
     42 + 
     43 + mi_decl_new_nothrow(n) void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
     44 + mi_decl_new_nothrow(n) void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
     45 + 
     46 + #if (__cplusplus >= 201402L || _MSC_VER >= 1916)
     47 + void operator delete (void* p, std::size_t n) noexcept { mi_free_size(p,n); };
     48 + void operator delete[](void* p, std::size_t n) noexcept { mi_free_size(p,n); };
     49 + #endif
     50 + 
     51 + #if (__cplusplus > 201402L || defined(__cpp_aligned_new))
     52 + void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
     53 + void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
     54 + void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
     55 + void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
     56 + void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
     57 + void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
     58 + 
     59 + void* operator new (std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
     60 + void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
     61 + void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
     62 + void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
     63 + #endif
     64 +#endif
     65 + 
     66 +#endif // MIMALLOC_NEW_DELETE_H
     67 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/include/mimalloc-override.h
     1 +/* ----------------------------------------------------------------------------
     2 +Copyright (c) 2018-2020 Microsoft Research, Daan Leijen
     3 +This is free software; you can redistribute it and/or modify it under the
     4 +terms of the MIT license. A copy of the license can be found in the file
     5 +"LICENSE" at the root of this distribution.
     6 +-----------------------------------------------------------------------------*/
     7 +#pragma once
     8 +#ifndef MIMALLOC_OVERRIDE_H
     9 +#define MIMALLOC_OVERRIDE_H
     10 + 
     11 +/* ----------------------------------------------------------------------------
     12 +This header can be used to statically redirect malloc/free and new/delete
     13 +to the mimalloc variants. This can be useful if one can include this file on
     14 +each source file in a project (but be careful when using external code to
     15 +not accidentally mix pointers from different allocators).
     16 +-----------------------------------------------------------------------------*/
     17 + 
     18 +#include <mimalloc.h>
     19 + 
     20 +// Standard C allocation
     21 +#define malloc(n) mi_malloc(n)
     22 +#define calloc(n,c) mi_calloc(n,c)
     23 +#define realloc(p,n) mi_realloc(p,n)
     24 +#define free(p) mi_free(p)
     25 + 
     26 +#define strdup(s) mi_strdup(s)
     27 +#define strndup(s,n) mi_strndup(s,n)
     28 +#define realpath(f,n) mi_realpath(f,n)
     29 + 
     30 +// Microsoft extensions
     31 +#define _expand(p,n) mi_expand(p,n)
     32 +#define _msize(p) mi_usable_size(p)
     33 +#define _recalloc(p,n,c) mi_recalloc(p,n,c)
     34 + 
     35 +#define _strdup(s) mi_strdup(s)
     36 +#define _strndup(s,n) mi_strndup(s,n)
     37 +#define _wcsdup(s) (wchar_t*)mi_wcsdup((const unsigned short*)(s))
     38 +#define _mbsdup(s) mi_mbsdup(s)
     39 +#define _dupenv_s(b,n,v) mi_dupenv_s(b,n,v)
     40 +#define _wdupenv_s(b,n,v) mi_wdupenv_s((unsigned short*)(b),n,(const unsigned short*)(v))
     41 + 
     42 +// Various Posix and Unix variants
     43 +#define reallocf(p,n) mi_reallocf(p,n)
     44 +#define malloc_size(p) mi_usable_size(p)
     45 +#define malloc_usable_size(p) mi_usable_size(p)
     46 +#define cfree(p) mi_free(p)
     47 + 
     48 +#define valloc(n) mi_valloc(n)
     49 +#define pvalloc(n) mi_pvalloc(n)
     50 +#define reallocarray(p,s,n) mi_reallocarray(p,s,n)
     51 +#define reallocarr(p,s,n) mi_reallocarr(p,s,n)
     52 +#define memalign(a,n) mi_memalign(a,n)
     53 +#define aligned_alloc(a,n) mi_aligned_alloc(a,n)
     54 +#define posix_memalign(p,a,n) mi_posix_memalign(p,a,n)
     55 +#define _posix_memalign(p,a,n) mi_posix_memalign(p,a,n)
     56 + 
     57 +// Microsoft aligned variants
     58 +#define _aligned_malloc(n,a) mi_malloc_aligned(n,a)
     59 +#define _aligned_realloc(p,n,a) mi_realloc_aligned(p,n,a)
     60 +#define _aligned_recalloc(p,s,n,a) mi_aligned_recalloc(p,s,n,a)
     61 +#define _aligned_msize(p,a,o) mi_usable_size(p)
     62 +#define _aligned_free(p) mi_free(p)
     63 +#define _aligned_offset_malloc(n,a,o) mi_malloc_aligned_at(n,a,o)
     64 +#define _aligned_offset_realloc(p,n,a,o) mi_realloc_aligned_at(p,n,a,o)
     65 +#define _aligned_offset_recalloc(p,s,n,a,o) mi_recalloc_aligned_at(p,s,n,a,o)
     66 + 
     67 +#endif // MIMALLOC_OVERRIDE_H
     68 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/include/mimalloc.h
     1 +/* ----------------------------------------------------------------------------
     2 +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
     3 +This is free software; you can redistribute it and/or modify it under the
     4 +terms of the MIT license. A copy of the license can be found in the file
     5 +"LICENSE" at the root of this distribution.
     6 +-----------------------------------------------------------------------------*/
     7 +#pragma once
     8 +#ifndef MIMALLOC_H
     9 +#define MIMALLOC_H
     10 + 
     11 +#define MI_MALLOC_VERSION 211 // major + 2 digits minor
     12 + 
     13 +// ------------------------------------------------------
     14 +// Compiler specific attributes
     15 +// ------------------------------------------------------
     16 + 
     17 +#ifdef __cplusplus
     18 + #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11
     19 + #define mi_attr_noexcept noexcept
     20 + #else
     21 + #define mi_attr_noexcept throw()
     22 + #endif
     23 +#else
     24 + #define mi_attr_noexcept
     25 +#endif
     26 + 
     27 +#if defined(__cplusplus) && (__cplusplus >= 201703)
     28 + #define mi_decl_nodiscard [[nodiscard]]
     29 +#elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl
     30 + #define mi_decl_nodiscard __attribute__((warn_unused_result))
     31 +#elif defined(_HAS_NODISCARD)
     32 + #define mi_decl_nodiscard _NODISCARD
     33 +#elif (_MSC_VER >= 1700)
     34 + #define mi_decl_nodiscard _Check_return_
     35 +#else
     36 + #define mi_decl_nodiscard
     37 +#endif
     38 + 
     39 +#if defined(_MSC_VER) || defined(__MINGW32__)
     40 + #if !defined(MI_SHARED_LIB)
     41 + #define mi_decl_export
     42 + #elif defined(MI_SHARED_LIB_EXPORT)
     43 + #define mi_decl_export __declspec(dllexport)
     44 + #else
     45 + #define mi_decl_export __declspec(dllimport)
     46 + #endif
     47 + #if defined(__MINGW32__)
     48 + #define mi_decl_restrict
     49 + #define mi_attr_malloc __attribute__((malloc))
     50 + #else
     51 + #if (_MSC_VER >= 1900) && !defined(__EDG__)
     52 + #define mi_decl_restrict __declspec(allocator) __declspec(restrict)
     53 + #else
     54 + #define mi_decl_restrict __declspec(restrict)
     55 + #endif
     56 + #define mi_attr_malloc
     57 + #endif
     58 + #define mi_cdecl __cdecl
     59 + #define mi_attr_alloc_size(s)
     60 + #define mi_attr_alloc_size2(s1,s2)
     61 + #define mi_attr_alloc_align(p)
     62 +#elif defined(__GNUC__) // includes clang and icc
     63 + #if defined(MI_SHARED_LIB) && defined(MI_SHARED_LIB_EXPORT)
     64 + #define mi_decl_export __attribute__((visibility("default")))
     65 + #else
     66 + #define mi_decl_export
     67 + #endif
     68 + #define mi_cdecl // leads to warnings... __attribute__((cdecl))
     69 + #define mi_decl_restrict
     70 + #define mi_attr_malloc __attribute__((malloc))
     71 + #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5)
     72 + #define mi_attr_alloc_size(s)
     73 + #define mi_attr_alloc_size2(s1,s2)
     74 + #define mi_attr_alloc_align(p)
     75 + #elif defined(__INTEL_COMPILER)
     76 + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s)))
     77 + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2)))
     78 + #define mi_attr_alloc_align(p)
     79 + #else
     80 + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s)))
     81 + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2)))
     82 + #define mi_attr_alloc_align(p) __attribute__((alloc_align(p)))
     83 + #endif
     84 +#else
     85 + #define mi_cdecl
     86 + #define mi_decl_export
     87 + #define mi_decl_restrict
     88 + #define mi_attr_malloc
     89 + #define mi_attr_alloc_size(s)
     90 + #define mi_attr_alloc_size2(s1,s2)
     91 + #define mi_attr_alloc_align(p)
     92 +#endif
     93 + 
     94 +// ------------------------------------------------------
     95 +// Includes
     96 +// ------------------------------------------------------
     97 + 
     98 +#include <stddef.h> // size_t
     99 +#include <stdbool.h> // bool
     100 +#include <stdint.h> // INTPTR_MAX
     101 + 
     102 +#ifdef __cplusplus
     103 +extern "C" {
     104 +#endif
     105 + 
     106 +// ------------------------------------------------------
     107 +// Standard malloc interface
     108 +// ------------------------------------------------------
     109 + 
     110 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
     111 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
     112 +mi_decl_nodiscard mi_decl_export void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
     113 +mi_decl_export void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
     114 + 
     115 +mi_decl_export void mi_free(void* p) mi_attr_noexcept;
     116 +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept mi_attr_malloc;
     117 +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept mi_attr_malloc;
     118 +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc;
     119 + 
     120 +// ------------------------------------------------------
     121 +// Extended functionality
     122 +// ------------------------------------------------------
     123 +#define MI_SMALL_WSIZE_MAX (128)
     124 +#define MI_SMALL_SIZE_MAX (MI_SMALL_WSIZE_MAX*sizeof(void*))
     125 + 
     126 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
     127 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
     128 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
     129 + 
     130 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
     131 +mi_decl_nodiscard mi_decl_export void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3);
     132 +mi_decl_nodiscard mi_decl_export void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
     133 + 
     134 +mi_decl_nodiscard mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept;
     135 +mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept;
     136 + 
     137 + 
     138 +// ------------------------------------------------------
     139 +// Internals
     140 +// ------------------------------------------------------
     141 + 
     142 +typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
     143 +mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept;
     144 + 
     145 +typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg);
     146 +mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept;
     147 + 
     148 +typedef void (mi_cdecl mi_error_fun)(int err, void* arg);
     149 +mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg);
     150 + 
     151 +mi_decl_export void mi_collect(bool force) mi_attr_noexcept;
     152 +mi_decl_export int mi_version(void) mi_attr_noexcept;
     153 +mi_decl_export void mi_stats_reset(void) mi_attr_noexcept;
     154 +mi_decl_export void mi_stats_merge(void) mi_attr_noexcept;
     155 +mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL
     156 +mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
     157 + 
     158 +mi_decl_export void mi_process_init(void) mi_attr_noexcept;
     159 +mi_decl_export void mi_thread_init(void) mi_attr_noexcept;
     160 +mi_decl_export void mi_thread_done(void) mi_attr_noexcept;
     161 +mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
     162 + 
     163 +mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
     164 + size_t* current_rss, size_t* peak_rss,
     165 + size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
     166 + 
     167 +// -------------------------------------------------------------------------------------
     168 +// Aligned allocation
     169 +// Note that `alignment` always follows `size` for consistency with unaligned
     170 +// allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`.
     171 +// -------------------------------------------------------------------------------------
     172 + 
     173 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
     174 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
     175 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
     176 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
     177 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3);
     178 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
     179 +mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3);
     180 +mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2);
     181 + 
     182 + 
     183 +// -------------------------------------------------------------------------------------
     184 +// Heaps: first-class, but can only allocate from the same thread that created it.
     185 +// -------------------------------------------------------------------------------------
     186 + 
     187 +struct mi_heap_s;
     188 +typedef struct mi_heap_s mi_heap_t;
     189 + 
     190 +mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new(void);
     191 +mi_decl_export void mi_heap_delete(mi_heap_t* heap);
     192 +mi_decl_export void mi_heap_destroy(mi_heap_t* heap);
     193 +mi_decl_export mi_heap_t* mi_heap_set_default(mi_heap_t* heap);
     194 +mi_decl_export mi_heap_t* mi_heap_get_default(void);
     195 +mi_decl_export mi_heap_t* mi_heap_get_backing(void);
     196 +mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept;
     197 + 
     198 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
     199 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
     200 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
     201 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
     202 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
     203 + 
     204 +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
     205 +mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);
     206 +mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
     207 + 
     208 +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept mi_attr_malloc;
     209 +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept mi_attr_malloc;
     210 +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc;
     211 + 
     212 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3);
     213 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
     214 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3);
     215 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
     216 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4);
     217 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
     218 +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4);
     219 +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3);
     220 + 
     221 + 
     222 +// --------------------------------------------------------------------------------
     223 +// Zero initialized re-allocation.
     224 +// Only valid on memory that was originally allocated with zero initialization too.
     225 +// e.g. `mi_calloc`, `mi_zalloc`, `mi_zalloc_aligned` etc.
     226 +// see <https://github.com/microsoft/mimalloc/issues/63#issuecomment-508272992>
     227 +// --------------------------------------------------------------------------------
     228 + 
     229 +mi_decl_nodiscard mi_decl_export void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
     230 +mi_decl_nodiscard mi_decl_export void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3);
     231 + 
     232 +mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3);
     233 +mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2);
     234 +mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4);
     235 +mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(2,3);
     236 + 
     237 +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
     238 +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);
     239 + 
     240 +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4);
     241 +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3);
     242 +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5);
     243 +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(3,4);
     244 + 
     245 + 
     246 +// ------------------------------------------------------
     247 +// Analysis
     248 +// ------------------------------------------------------
     249 + 
     250 +mi_decl_export bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
     251 +mi_decl_export bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
     252 +mi_decl_export bool mi_check_owned(const void* p);
     253 + 
     254 +// An area of heap space contains blocks of a single size.
     255 +typedef struct mi_heap_area_s {
     256 + void* blocks; // start of the area containing heap blocks
     257 + size_t reserved; // bytes reserved for this area (virtual)
     258 + size_t committed; // current available bytes for this area
     259 + size_t used; // number of allocated blocks
     260 + size_t block_size; // size in bytes of each block
     261 + size_t full_block_size; // size in bytes of a full block including padding and metadata.
     262 +} mi_heap_area_t;
     263 + 
     264 +typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
     265 + 
     266 +mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
     267 + 
     268 +// Experimental
     269 +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
     270 +mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept;
     271 + 
     272 +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
     273 +mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
     274 + 
     275 +mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
     276 +mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
     277 + 
     278 +mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;
     279 + 
     280 +// Experimental: heaps associated with specific memory arena's
     281 +typedef int mi_arena_id_t;
     282 +mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
     283 +mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
     284 +mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
     285 +mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
     286 + 
     287 +#if MI_MALLOC_VERSION >= 200
     288 +// Create a heap that only allocates in the specified arena
     289 +mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
     290 +#endif
     291 + 
     292 +// deprecated
     293 +mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
     294 + 
     295 + 
     296 +// ------------------------------------------------------
     297 +// Convenience
     298 +// ------------------------------------------------------
     299 + 
     300 +#define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
     301 +#define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
     302 +#define mi_calloc_tp(tp,n) ((tp*)mi_calloc(n,sizeof(tp)))
     303 +#define mi_mallocn_tp(tp,n) ((tp*)mi_mallocn(n,sizeof(tp)))
     304 +#define mi_reallocn_tp(p,tp,n) ((tp*)mi_reallocn(p,n,sizeof(tp)))
     305 +#define mi_recalloc_tp(p,tp,n) ((tp*)mi_recalloc(p,n,sizeof(tp)))
     306 + 
     307 +#define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
     308 +#define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
     309 +#define mi_heap_calloc_tp(hp,tp,n) ((tp*)mi_heap_calloc(hp,n,sizeof(tp)))
     310 +#define mi_heap_mallocn_tp(hp,tp,n) ((tp*)mi_heap_mallocn(hp,n,sizeof(tp)))
     311 +#define mi_heap_reallocn_tp(hp,p,tp,n) ((tp*)mi_heap_reallocn(hp,p,n,sizeof(tp)))
     312 +#define mi_heap_recalloc_tp(hp,p,tp,n) ((tp*)mi_heap_recalloc(hp,p,n,sizeof(tp)))
     313 + 
     314 + 
     315 +// ------------------------------------------------------
     316 +// Options
     317 +// ------------------------------------------------------
     318 + 
     319 +typedef enum mi_option_e {
     320 + // stable options
     321 + mi_option_show_errors,
     322 + mi_option_show_stats,
     323 + mi_option_verbose,
     324 + // some of the following options are experimental
     325 + // (deprecated options are kept for binary backward compatibility with v1.x versions)
     326 + mi_option_eager_commit,
     327 + mi_option_deprecated_eager_region_commit,
     328 + mi_option_deprecated_reset_decommits,
     329 + mi_option_large_os_pages, // use large (2MiB) OS pages, implies eager commit
     330 + mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup
     331 + mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
     332 + mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup
     333 + mi_option_deprecated_segment_cache,
     334 + mi_option_page_reset,
     335 + mi_option_abandoned_page_decommit,
     336 + mi_option_deprecated_segment_reset,
     337 + mi_option_eager_commit_delay,
     338 + mi_option_decommit_delay,
     339 + mi_option_use_numa_nodes, // 0 = use available numa nodes, otherwise use at most N nodes.
     340 + mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only reserved arenas)
     341 + mi_option_os_tag,
     342 + mi_option_max_errors,
     343 + mi_option_max_warnings,
     344 + mi_option_max_segment_reclaim,
     345 + mi_option_allow_decommit,
     346 + mi_option_segment_decommit_delay,
     347 + mi_option_decommit_extend_delay,
     348 + mi_option_destroy_on_exit,
     349 + _mi_option_last
     350 +} mi_option_t;
     351 + 
     352 + 
     353 +mi_decl_nodiscard mi_decl_export bool mi_option_is_enabled(mi_option_t option);
     354 +mi_decl_export void mi_option_enable(mi_option_t option);
     355 +mi_decl_export void mi_option_disable(mi_option_t option);
     356 +mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable);
     357 +mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable);
     358 + 
     359 +mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option);
     360 +mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max);
     361 +mi_decl_export void mi_option_set(mi_option_t option, long value);
     362 +mi_decl_export void mi_option_set_default(mi_option_t option, long value);
     363 + 
     364 + 
     365 +// -------------------------------------------------------------------------------------------------------
     366 +// "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions.
     367 +// (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.)
     368 +// note: we use `mi_cfree` as "checked free" and it checks if the pointer is in our heap before free-ing.
     369 +// -------------------------------------------------------------------------------------------------------
     370 + 
     371 +mi_decl_export void mi_cfree(void* p) mi_attr_noexcept;
     372 +mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept;
     373 +mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept;
     374 +mi_decl_nodiscard mi_decl_export size_t mi_malloc_good_size(size_t size) mi_attr_noexcept;
     375 +mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept;
     376 + 
     377 +mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept;
     378 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1);
     379 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
     380 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
     381 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1);
     382 + 
     383 +mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3);
     384 +mi_decl_nodiscard mi_decl_export int mi_reallocarr(void* p, size_t count, size_t size) mi_attr_noexcept;
     385 +mi_decl_nodiscard mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept;
     386 +mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept;
     387 + 
     388 +mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept mi_attr_malloc;
     389 +mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept mi_attr_malloc;
     390 +mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept;
     391 +mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept;
     392 + 
     393 +mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept;
     394 +mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept;
     395 +mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept;
     396 + 
     397 +// The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`.
     398 +// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception).
     399 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1);
     400 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
     401 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
     402 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
     403 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2);
     404 +mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2);
     405 +mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3);
     406 + 
     407 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) mi_attr_malloc mi_attr_alloc_size(2);
     408 +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3);
     409 + 
     410 +#ifdef __cplusplus
     411 +}
     412 +#endif
     413 + 
     414 +// ---------------------------------------------------------------------------------------------
     415 +// Implement the C++ std::allocator interface for use in STL containers.
     416 +// (note: see `mimalloc-new-delete.h` for overriding the new/delete operators globally)
     417 +// ---------------------------------------------------------------------------------------------
     418 +#ifdef __cplusplus
     419 + 
     420 +#include <cstddef> // std::size_t
     421 +#include <cstdint> // PTRDIFF_MAX
     422 +#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11
     423 +#include <type_traits> // std::true_type
     424 +#include <utility> // std::forward
     425 +#endif
     426 + 
     427 +template<class T> struct _mi_stl_allocator_common {
     428 + typedef T value_type;
     429 + typedef std::size_t size_type;
     430 + typedef std::ptrdiff_t difference_type;
     431 + typedef value_type& reference;
     432 + typedef value_type const& const_reference;
     433 + typedef value_type* pointer;
     434 + typedef value_type const* const_pointer;
     435 + 
     436 + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11
     437 + using propagate_on_container_copy_assignment = std::true_type;
     438 + using propagate_on_container_move_assignment = std::true_type;
     439 + using propagate_on_container_swap = std::true_type;
     440 + template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); }
     441 + template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); }
     442 + #else
     443 + void construct(pointer p, value_type const& val) { ::new(p) value_type(val); }
     444 + void destroy(pointer p) { p->~value_type(); }
     445 + #endif
     446 + 
     447 + size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); }
     448 + pointer address(reference x) const { return &x; }
     449 + const_pointer address(const_reference x) const { return &x; }
     450 +};
     451 + 
     452 +template<class T> struct mi_stl_allocator : public _mi_stl_allocator_common<T> {
     453 + using typename _mi_stl_allocator_common<T>::size_type;
     454 + using typename _mi_stl_allocator_common<T>::value_type;
     455 + using typename _mi_stl_allocator_common<T>::pointer;
     456 + template <class U> struct rebind { typedef mi_stl_allocator<U> other; };
     457 + 
     458 + mi_stl_allocator() mi_attr_noexcept = default;
     459 + mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept = default;
     460 + template<class U> mi_stl_allocator(const mi_stl_allocator<U>&) mi_attr_noexcept { }
     461 + mi_stl_allocator select_on_container_copy_construction() const { return *this; }
     462 + void deallocate(T* p, size_type) { mi_free(p); }
     463 + 
     464 + #if (__cplusplus >= 201703L) // C++17
     465 + mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_new_n(count, sizeof(T))); }
     466 + mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); }
     467 + #else
     468 + mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_new_n(count, sizeof(value_type))); }
     469 + #endif
     470 + 
     471 + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11
     472 + using is_always_equal = std::true_type;
     473 + #endif
     474 +};
     475 + 
     476 +template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return true; }
     477 +template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return false; }
     478 + 
     479 + 
     480 +#if (__cplusplus >= 201103L) || (_MSC_VER >= 1900) // C++11
     481 +#define MI_HAS_HEAP_STL_ALLOCATOR 1
     482 + 
     483 +#include <memory> // std::shared_ptr
     484 + 
     485 +// Common base class for STL allocators in a specific heap
     486 +template<class T, bool _mi_destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
     487 + using typename _mi_stl_allocator_common<T>::size_type;
     488 + using typename _mi_stl_allocator_common<T>::value_type;
     489 + using typename _mi_stl_allocator_common<T>::pointer;
     490 + 
     491 + _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { } /* will not delete nor destroy the passed in heap */
     492 + 
     493 + #if (__cplusplus >= 201703L) // C++17
     494 + mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
     495 + mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); }
     496 + #else
     497 + mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); }
     498 + #endif
     499 + 
     500 + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11
     501 + using is_always_equal = std::false_type;
     502 + #endif
     503 + 
     504 + void collect(bool force) { mi_heap_collect(this->heap.get(), force); }
     505 + template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) const { return (this->heap == x.heap); }
     506 + 
     507 +protected:
     508 + std::shared_ptr<mi_heap_t> heap;
     509 + template<class U, bool D> friend struct _mi_heap_stl_allocator_common;
     510 +
     511 + _mi_heap_stl_allocator_common() {
     512 + mi_heap_t* hp = mi_heap_new();
     513 + this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */
     514 + }
     515 + _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { }
     516 + template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) mi_attr_noexcept : heap(x.heap) { }
     517 + 
     518 +private:
     519 + static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } }
     520 + static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } }
     521 +};
     522 + 
     523 +// STL allocator allocation in a specific heap
     524 +template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T, false> {
     525 + using typename _mi_heap_stl_allocator_common<T, false>::size_type;
     526 + mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T, false>() { } // creates fresh heap that is deleted when the destructor is called
     527 + mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { } // no delete nor destroy on the passed in heap
     528 + template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, false>(x) { }
     529 + 
     530 + mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; }
     531 + void deallocate(T* p, size_type) { mi_free(p); }
     532 + template<class U> struct rebind { typedef mi_heap_stl_allocator<U> other; };
     533 +};
     534 + 
     535 +template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); }
     536 +template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
     537 + 
     538 + 
     539 +// STL allocator allocation in a specific heap, where `free` does nothing and
     540 +// the heap is destroyed in one go on destruction -- use with care!
     541 +template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T, true> {
     542 + using typename _mi_heap_stl_allocator_common<T, true>::size_type;
     543 + mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T, true>() { } // creates fresh heap that is destroyed when the destructor is called
     544 + mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { } // no delete nor destroy on the passed in heap
     545 + template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, true>(x) { }
     546 + 
     547 + mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; }
     548 + void deallocate(T*, size_type) { /* do nothing as we destroy the heap on destruct. */ }
     549 + template<class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; };
     550 +};
     551 + 
     552 +template<class T1, class T2> bool operator==(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); }
     553 +template<class T1, class T2> bool operator!=(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
     554 + 
     555 +#endif // C++11
     556 + 
     557 +#endif // __cplusplus
     558 + 
     559 +#endif
     560 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/readme.md
     1 + 
     2 +<img align="left" width="100" height="100" src="doc/mimalloc-logo.png"/>
     3 + 
     4 +[<img align="right" src="https://dev.azure.com/Daan0324/mimalloc/_apis/build/status/microsoft.mimalloc?branchName=dev"/>](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary)
     5 + 
     6 +# mimalloc
     7 + 
     8 +&nbsp;
     9 + 
     10 +mimalloc (pronounced "me-malloc")
     11 +is a general purpose allocator with excellent [performance](#performance) characteristics.
     12 +Initially developed by Daan Leijen for the runtime systems of the
     13 +[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
     14 + 
     15 +Latest release tag: `v2.1.1` (2023-04-03).
     16 +Latest stable tag: `v1.8.1` (2023-04-03).
     17 + 
     18 +mimalloc is a drop-in replacement for `malloc` and can be used in other programs
     19 +without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
     20 +```
     21 +> LD_PRELOAD=/usr/lib/libmimalloc.so myprogram
     22 +```
     23 +It also includes a robust way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include:
     24 + 
     25 +- __small and consistent__: the library is about 8k LOC using simple and
     26 + consistent data structures. This makes it very suitable
     27 + to integrate and adapt in other projects. For runtime systems it
     28 + provides hooks for a monotonic _heartbeat_ and deferred freeing (for
     29 + bounded worst-case times with reference counting).
     30 + Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS,
     31 + Linux, WASM, various BSD's, Haiku, MUSL, etc) and has excellent support for dynamic overriding.
     32 +- __free list sharding__: instead of one big free list (per size class) we have
     33 + many smaller lists per "mimalloc page" which reduces fragmentation and
     34 + increases locality --
     35 + things that are allocated close in time get allocated close in memory.
     36 + (A mimalloc page contains blocks of one size class and is usually 64KiB on a 64-bit system).
     37 +- __free list multi-sharding__: the big idea! Not only do we shard the free list
     38 + per mimalloc page, but for each page we have multiple free lists. In particular, there
     39 + is one list for thread-local `free` operations, and another one for concurrent `free`
     40 + operations. Free-ing from another thread can now be a single CAS without needing
     41 + sophisticated coordination between threads. Since there will be
     42 + thousands of separate free lists, contention is naturally distributed over the heap,
     43 + and the chance of contending on a single location will be low -- this is quite
     44 + similar to randomized algorithms like skip lists where adding
     45 + a random oracle removes the need for a more complex algorithm.
     46 +- __eager page reset__: when a "page" becomes empty (with increased chance
     47 + due to free list sharding) the memory is marked to the OS as unused (reset or decommitted)
     48 + reducing (real) memory pressure and fragmentation, especially in long running
     49 + programs.
     50 +- __secure__: _mimalloc_ can be built in secure mode, adding guard pages,
     51 + randomized allocation, encrypted free lists, etc. to protect against various
     52 + heap vulnerabilities. The performance penalty is usually around 10% on average
     53 + over our benchmarks.
     54 +- __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions.
     55 + A heap can be destroyed at once instead of deallocating each object separately.
     56 +- __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation
     57 + times (_wcat_) (upto OS primitives), bounded space overhead (~0.2% meta-data, with low
     58 + internal fragmentation), and has no internal points of contention using only atomic operations.
     59 +- __fast__: In our benchmarks (see [below](#performance)),
     60 + _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc),
     61 + and often uses less memory. A nice property is that it does consistently well over a wide range
     62 + of benchmarks. There is also good huge OS page support for larger server programs.
     63 + 
     64 +The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API.
     65 +You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results.
     66 + 
     67 +Enjoy!
     68 + 
     69 +### Branches
     70 + 
     71 +* `master`: latest stable release (based on `dev-slice`).
     72 +* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's.
     73 +* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev`.
     74 + 
     75 +### Releases
     76 + 
     77 +Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage
     78 + and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
     79 + (see [below](#performance)); please report if you observe any significant performance regression.
     80 + 
     81 +* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms.
     82 + 
     83 +* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision
     84 + with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS
     85 + abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes.
     86 + 
     87 +* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support.
     88 + Support abitrary large alignments (in particular for `std::pmr` pools).
     89 + Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev).
     90 + Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho).
     91 + Various small bug fixes.
     92 + 
     93 +* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow
     94 + detection. Initial
     95 + support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .
     96 + 
     97 +* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation
     98 + even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix
     99 + warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object
     100 + allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes.
     101 + 
     102 +* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on
     103 + Windows 11, fix compilation with musl, potentially reduced
     104 + committed memory, add `bin/minject` for Windows,
     105 + improved wasm support, faster aligned allocation,
     106 + various small fixes.
     107 + 
     108 +* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
     109 + M1), improved performance for v2 for large objects, Python integration improvements, more standard
     110 + installation directories, various small fixes.
     111 + 
     112 +* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix
     113 + thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
     114 + 
     115 +* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental).
     116 + 
     117 +* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages.
     118 + 
     119 +* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
     120 + improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.
     121 + 
     122 +* [Older release notes](#older-release-notes)
     123 + 
     124 +Special thanks to:
     125 + 
     126 +* [David Carlier](https://devnexen.blogspot.com/) (@devnexen) for his many contributions, and making
     127 + mimalloc work better on many less common operating systems, like Haiku, Dragonfly, etc.
     128 +* Mary Feofanova (@mary3000), Evgeniy Moiseenko, and Manuel Pöter (@mpoeter) for making mimalloc TSAN checkable, and finding
     129 + memory model bugs using the [genMC] model checker.
     130 +* Weipeng Liu (@pongba), Zhuowei Li, Junhua Wang, and Jakub Szymanski, for their early support of mimalloc and deployment
     131 + at large scale services, leading to many improvements in the mimalloc algorithms for large workloads.
     132 +* Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs
     133 + in (early versions of) `mimalloc`.
     134 +* Manuel Pöter (@mpoeter) and Sam Gross(@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. Sam also created the [no GIL](https://github.com/colesbury/nogil) Python fork which
     135 + uses mimalloc internally.
     136 + 
     137 + 
     138 +[genMC]: https://plv.mpi-sws.org/genmc/
     139 + 
     140 +### Usage
     141 + 
     142 +mimalloc is used in various large scale low-latency services and programs, for example:
     143 + 
     144 +<a href="https://www.bing.com"><img height="50" align="left" src="https://upload.wikimedia.org/wikipedia/commons/e/e9/Bing_logo.svg"></a>
     145 +<a href="https://azure.microsoft.com/"><img height="50" align="left" src="https://upload.wikimedia.org/wikipedia/commons/a/a8/Microsoft_Azure_Logo.svg"></a>
     146 +<a href="https://deathstrandingpc.505games.com"><img height="100" src="doc/ds-logo.png"></a>
     147 +<a href="https://docs.unrealengine.com/4.26/en-US/WhatsNew/Builds/ReleaseNotes/4_25/"><img height="100" src="doc/unreal-logo.svg"></a>
     148 +<a href="https://cab.spbu.ru/software/spades/"><img height="100" src="doc/spades-logo.png"></a>
     149 + 
     150 + 
     151 +# Building
     152 + 
     153 +## Windows
     154 + 
     155 +Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build.
     156 +The `mimalloc` project builds a static library (in `out/msvc-x64`), while the
     157 +`mimalloc-override` project builds a DLL for overriding malloc
     158 +in the entire program.
     159 + 
     160 +## macOS, Linux, BSD, etc.
     161 + 
     162 +We use [`cmake`](https://cmake.org)<sup>1</sup> as the build system:
     163 + 
     164 +```
     165 +> mkdir -p out/release
     166 +> cd out/release
     167 +> cmake ../..
     168 +> make
     169 +```
     170 +This builds the library as a shared (dynamic)
     171 +library (`.so` or `.dylib`), a static library (`.a`), and
     172 +as a single object file (`.o`).
     173 + 
     174 +`> sudo make install` (install the library and header files in `/usr/local/lib` and `/usr/local/include`)
     175 + 
     176 +You can build the debug version which does many internal checks and
     177 +maintains detailed statistics as:
     178 + 
     179 +```
     180 +> mkdir -p out/debug
     181 +> cd out/debug
     182 +> cmake -DCMAKE_BUILD_TYPE=Debug ../..
     183 +> make
     184 +```
     185 +This will name the shared library as `libmimalloc-debug.so`.
     186 + 
     187 +Finally, you can build a _secure_ version that uses guard pages, encrypted
     188 +free lists, etc., as:
     189 +```
     190 +> mkdir -p out/secure
     191 +> cd out/secure
     192 +> cmake -DMI_SECURE=ON ../..
     193 +> make
     194 +```
     195 +This will name the shared library as `libmimalloc-secure.so`.
     196 +Use `ccmake`<sup>2</sup> instead of `cmake`
     197 +to see and customize all the available build options.
     198 + 
     199 +Notes:
     200 +1. Install CMake: `sudo apt-get install cmake`
     201 +2. Install CCMake: `sudo apt-get install cmake-curses-gui`
     202 + 
     203 + 
     204 +## Single source
     205 + 
     206 +You can also directly build the single `src/static.c` file as part of your project without
     207 +needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path.
     208 + 
     209 + 
     210 +# Using the library
     211 + 
     212 +The preferred usage is including `<mimalloc.h>`, linking with
     213 +the shared- or static library, and using the `mi_malloc` API exclusively for allocation. For example,
     214 +```
     215 +> gcc -o myprogram -lmimalloc myfile.c
     216 +```
     217 + 
     218 +mimalloc uses only safe OS calls (`mmap` and `VirtualAlloc`) and can co-exist
     219 +with other allocators linked to the same program.
     220 +If you use `cmake`, you can simply use:
     221 +```
     222 +find_package(mimalloc 1.4 REQUIRED)
     223 +```
     224 +in your `CMakeLists.txt` to find a locally installed mimalloc. Then use either:
     225 +```
     226 +target_link_libraries(myapp PUBLIC mimalloc)
     227 +```
     228 +to link with the shared (dynamic) library, or:
     229 +```
     230 +target_link_libraries(myapp PUBLIC mimalloc-static)
     231 +```
     232 +to link with the static library. See `test\CMakeLists.txt` for an example.
     233 + 
     234 +For best performance in C++ programs, it is also recommended to override the
     235 +global `new` and `delete` operators. For convience, mimalloc provides
     236 +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project.
     237 +In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator`
     238 +interface.
     239 + 
     240 +You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`)
     241 +and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version):
     242 +```
     243 +> env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363
     244 + 
     245 +175451865205073170563711388363 = 374456281610909315237213 * 468551
     246 + 
     247 +heap stats: peak total freed unit
     248 +normal 2: 16.4 kb 17.5 mb 17.5 mb 16 b ok
     249 +normal 3: 16.3 kb 15.2 mb 15.2 mb 24 b ok
     250 +normal 4: 64 b 4.6 kb 4.6 kb 32 b ok
     251 +normal 5: 80 b 118.4 kb 118.4 kb 40 b ok
     252 +normal 6: 48 b 48 b 48 b 48 b ok
     253 +normal 17: 960 b 960 b 960 b 320 b ok
     254 + 
     255 +heap stats: peak total freed unit
     256 + normal: 33.9 kb 32.8 mb 32.8 mb 1 b ok
     257 + huge: 0 b 0 b 0 b 1 b ok
     258 + total: 33.9 kb 32.8 mb 32.8 mb 1 b ok
     259 +malloc requested: 32.8 mb
     260 + 
     261 + committed: 58.2 kb 58.2 kb 58.2 kb 1 b ok
     262 + reserved: 2.0 mb 2.0 mb 2.0 mb 1 b ok
     263 + reset: 0 b 0 b 0 b 1 b ok
     264 + segments: 1 1 1
     265 +-abandoned: 0
     266 + pages: 6 6 6
     267 +-abandoned: 0
     268 + mmaps: 3
     269 + mmap fast: 0
     270 + mmap slow: 1
     271 + threads: 0
     272 + elapsed: 2.022s
     273 + process: user: 1.781s, system: 0.016s, faults: 756, reclaims: 0, rss: 2.7 mb
     274 +```
     275 + 
     276 +The above model of using the `mi_` prefixed API is not always possible
     277 +though in existing programs that already use the standard malloc interface,
     278 +and another option is to override the standard malloc interface
     279 +completely and redirect all calls to the _mimalloc_ library instead .
     280 + 
     281 +## Environment Options
     282 + 
     283 +You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)),
     284 +or via environment variables:
     285 + 
     286 +- `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates.
     287 +- `MIMALLOC_VERBOSE=1`: show verbose messages.
     288 +- `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages.
     289 +- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages that are not in use, to signal to the OS
     290 + that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server)
     291 + programs. By setting it to `0` this will no longer be done which can improve performance for batch-like programs.
     292 + As an alternative, the `MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page
     293 + reset occur less frequently instead of turning it off completely.
     294 +- `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected
     295 + at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than
     296 + the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA
     297 + nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed).
     298 +- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
     299 + improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
     300 + to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
     301 + the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
     302 + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).
     303 + <!--
     304 + - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions
     305 + show in the working set even though usually just a small part is committed to physical memory. This is why it
     306 + turned off by default on Windows as it looks not good in the task manager. However, turning it on has no
     307 + real drawbacks and may improve performance by a little.
     308 + -->
     309 +- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
     310 + startup and sometimes this can give a large (latency) performance improvement on big workloads.
     311 + Usually it is better to not use
     312 + `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
     313 + contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at
     314 + startup only once).
     315 + Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
     316 + With huge OS pages, it may be beneficial to set the setting
     317 + `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
     318 + of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
     319 + and allocate just a little to take up space in the huge OS page area (which cannot be reset).
     320 + The huge pages are usually allocated evenly among NUMA nodes.
     321 + We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
     322 + the huge pages at a specific numa node instead.
     323 + 
     324 +Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
     325 +for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
     326 +OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in large increments.
     327 + 
     328 +[linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5
     329 +[windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017
     330 + 
     331 +## Secure Mode
     332 + 
     333 +_mimalloc_ can be build in secure mode by using the `-DMI_SECURE=ON` flags in `cmake`. This build enables various mitigations
     334 +to make mimalloc more robust against exploits. In particular:
     335 + 
     336 +- All internal mimalloc pages are surrounded by guard pages and the heap metadata is behind a guard page as well (so a buffer overflow
     337 + exploit cannot reach into the metadata).
     338 +- All free list pointers are
     339 + [encoded](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396)
     340 + with per-page keys which is used both to prevent overwrites with a known pointer, as well as to detect heap corruption.
     341 +- Double free's are detected (and ignored).
     342 +- The free lists are initialized in a random order and allocation randomly chooses between extension and reuse within a page to
     343 + mitigate against attacks that rely on a predicable allocation order. Similarly, the larger heap blocks allocated by mimalloc
     344 + from the OS are also address randomized.
     345 + 
     346 +As always, evaluate with care as part of an overall security strategy as all of the above are mitigations but not guarantees.
     347 + 
     348 +## Debug Mode
     349 + 
     350 +When _mimalloc_ is built using debug mode, various checks are done at runtime to catch development errors.
     351 + 
     352 +- Statistics are maintained in detail for each object size. They can be shown using `MIMALLOC_SHOW_STATS=1` at runtime.
     353 +- All objects have padding at the end to detect (byte precise) heap block overflows.
     354 +- Double free's, and freeing invalid heap pointers are detected.
     355 +- Corrupted free-lists and some forms of use-after-free are detected.
     356 + 
     357 + 
     358 +# Overriding Standard Malloc
     359 + 
     360 +Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or _statically_.
     361 + 
     362 +## Dynamic override
     363 + 
     364 +This is the recommended way to override the standard malloc interface.
     365 + 
     366 +### Dynamic Override on Linux, BSD
     367 + 
     368 +On these ELF-based systems we preload the mimalloc shared
     369 +library so all calls to the standard `malloc` interface are
     370 +resolved to the _mimalloc_ library.
     371 +```
     372 +> env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram
     373 +```
     374 + 
     375 +You can set extra environment variables to check that mimalloc is running,
     376 +like:
     377 +```
     378 +> env MIMALLOC_VERBOSE=1 LD_PRELOAD=/usr/lib/libmimalloc.so myprogram
     379 +```
     380 +or run with the debug version to get detailed statistics:
     381 +```
     382 +> env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram
     383 +```
     384 + 
     385 +### Dynamic Override on MacOS
     386 + 
     387 +On macOS we can also preload the mimalloc shared
     388 +library so all calls to the standard `malloc` interface are
     389 +resolved to the _mimalloc_ library.
     390 +```
     391 +> env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram
     392 +```
     393 + 
     394 +Note that certain security restrictions may apply when doing this from
     395 +the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).
     396 + 
     397 + 
     398 +### Dynamic Override on Windows
     399 + 
     400 +<span id="override_on_windows">Overriding on Windows</span> is robust and has the
     401 +particular advantage to be able to redirect all malloc/free calls that go through
     402 +the (dynamic) C runtime allocator, including those from other DLL's or libraries.
     403 + 
     404 +The overriding on Windows requires that you link your program explicitly with
     405 +the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
     406 +Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put
     407 +in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
     408 +The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
     409 +mimalloc (in `mimalloc-override.dll`).
     410 + 
     411 +To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some
     412 +call to the mimalloc API in the `main` function, like `mi_version()`
     413 +(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project
     414 +for an example on how to use this. For best performance on Windows with C++, it
     415 +is also recommended to also override the `new`/`delete` operations (by including
     416 +[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project).
     417 + 
     418 +The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic
     419 +overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected.
     420 + 
     421 +(Note: in principle, it is possible to even patch existing executables without any recompilation
     422 +if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll`
     423 +into the import table (and put `mimalloc-redirect.dll` in the same folder)
     424 +Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)).
     425 + 
     426 + 
     427 +## Static override
     428 + 
     429 +On Unix-like systems, you can also statically link with _mimalloc_ to override the standard
     430 +malloc interface. The recommended way is to link the final program with the
     431 +_mimalloc_ single object file (`mimalloc.o`). We use
     432 +an object file instead of a library file as linkers give preference to
     433 +that over archives to resolve symbols. To ensure that the standard
     434 +malloc interface resolves to the _mimalloc_ library, link it as the first
     435 +object file. For example:
     436 +```
     437 +> gcc -o myprogram mimalloc.o myfile1.c ...
     438 +```
     439 + 
     440 +Another way to override statically that works on all platforms, is to
     441 +link statically to mimalloc (as shown in the introduction) and include a
     442 +header file in each source file that re-defines `malloc` etc. to `mi_malloc`.
     443 +This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-override.h). This only works reliably though if all sources are
     444 +under your control or otherwise mixing of pointers from different heaps may occur!
     445 + 
     446 + 
     447 +## Tools
     448 + 
     449 +Generally, we recommend using the standard allocator with memory tracking tools, but mimalloc
     450 +can also be build to support the [address sanitizer][asan] or the excellent [Valgrind] tool.
     451 +Moreover, it can be build to support Windows event tracing ([ETW]).
     452 +This has a small performance overhead but does allow detecting memory leaks and byte-precise
     453 +buffer overflows directly on final executables. See also the `test/test-wrong.c` file to test with various tools.
     454 + 
     455 +### Valgrind
     456 + 
     457 +To build with [valgrind] support, use the `MI_TRACK_VALGRIND=ON` cmake option:
     458 + 
     459 +```
     460 +> cmake ../.. -DMI_TRACK_VALGRIND=ON
     461 +```
     462 + 
     463 +This can also be combined with secure mode or debug mode.
     464 +You can then run your programs directly under valgrind:
     465 + 
     466 +```
     467 +> valgrind <myprogram>
     468 +```
     469 + 
     470 +If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly),
     471 +you also need to tell `valgrind` to not intercept those calls itself, and use:
     472 + 
     473 +```
     474 +> MIMALLOC_SHOW_STATS=1 valgrind --soname-synonyms=somalloc=*mimalloc* -- <myprogram>
     475 +```
     476 + 
     477 +By setting the `MIMALLOC_SHOW_STATS` environment variable you can check that mimalloc is indeed
     478 +used and not the standard allocator. Even though the [Valgrind option][valgrind-soname]
     479 +is called `--soname-synonyms`, this also
     480 +works when overriding with a static library or object file. Unfortunately, it is not possible to
     481 +dynamically override mimalloc using `LD_PRELOAD` together with `valgrind`.
     482 +See also the `test/test-wrong.c` file to test with `valgrind`.
     483 + 
     484 +Valgrind support is in its initial development -- please report any issues.
     485 + 
     486 +[Valgrind]: https://valgrind.org/
     487 +[valgrind-soname]: https://valgrind.org/docs/manual/manual-core.html#opt.soname-synonyms
     488 + 
     489 +### ASAN
     490 + 
     491 +To build with the address sanitizer, use the `-DMI_TRACK_ASAN=ON` cmake option:
     492 + 
     493 +```
     494 +> cmake ../.. -DMI_TRACK_ASAN=ON
     495 +```
     496 + 
     497 +This can also be combined with secure mode or debug mode.
     498 +You can then run your programs as:'
     499 + 
     500 +```
     501 +> ASAN_OPTIONS=verbosity=1 <myprogram>
     502 +```
     503 + 
     504 +When you link a program with an address sanitizer build of mimalloc, you should
     505 +generally compile that program too with the address sanitizer enabled.
     506 +For example, assuming you build mimalloc in `out/debug`:
     507 + 
     508 +```
     509 +clang -g -o test-wrong -Iinclude test/test-wrong.c out/debug/libmimalloc-asan-debug.a -lpthread -fsanitize=address -fsanitize-recover=address
     510 +```
     511 + 
     512 +Since the address sanitizer redirects the standard allocation functions, on some platforms (macOSX for example)
     513 +it is required to compile mimalloc with `-DMI_OVERRIDE=OFF`.
     514 +Adress sanitizer support is in its initial development -- please report any issues.
     515 + 
     516 +[asan]: https://github.com/google/sanitizers/wiki/AddressSanitizer
     517 + 
     518 +### ETW
     519 + 
     520 +Event tracing for Windows ([ETW]) provides a high performance way to capture all allocations though
     521 +mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACE_ETW=ON` cmake option.
     522 + 
     523 +You can then capture an allocation trace using the Windows performance recorder (WPR), using the
     524 +`src/prim/windows/etw-mimalloc.wprp` profile. In an admin prompt, you can use:
     525 +```
     526 +> wpr -start src\prim\windows\etw-mimalloc.wprp -filemode
     527 +> <my_mimalloc_program>
     528 +> wpr -stop <my_mimalloc_program>.etl
     529 +```
     530 +and then open `<my_mimalloc_program>.etl` in the Windows Performance Analyzer (WPA), or
     531 +use a tool like [TraceControl] that is specialized for analyzing mimalloc traces.
     532 + 
     533 +[ETW]: https://learn.microsoft.com/en-us/windows-hardware/test/wpt/event-tracing-for-windows
     534 +[TraceControl]: https://github.com/xinglonghe/TraceControl
     535 + 
     536 + 
     537 +# Performance
     538 + 
     539 +Last update: 2021-01-30
     540 + 
     541 +We tested _mimalloc_ against many other top allocators over a wide
     542 +range of benchmarks, ranging from various real world programs to
     543 +synthetic benchmarks that see how the allocator behaves under more
     544 +extreme circumstances. In our benchmark suite, _mimalloc_ outperforms other leading
     545 +allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and has a similar memory footprint. A nice property is that it
     546 +does consistently well over the wide range of benchmarks.
     547 + 
     548 +General memory allocators are interesting as there exists no algorithm that is
     549 +optimal -- for a given allocator one can usually construct a workload
     550 +where it does not do so well. The goal is thus to find an allocation
     551 +strategy that performs well over a wide range of benchmarks without
     552 +suffering from (too much) underperformance in less common situations.
     553 + 
     554 +As always, interpret these results with care since some benchmarks test synthetic
     555 +or uncommon situations that may never apply to your workloads. For example, most
     556 +allocators do not do well on `xmalloc-testN` but that includes even the best
     557 +industrial allocators like _jemalloc_ and _tcmalloc_ that are used in some of
     558 +the world's largest systems (like Chrome or FreeBSD).
     559 + 
     560 +Also, the benchmarks here do not measure the behaviour on very large and long-running server workloads,
     561 +or worst-case latencies of allocation. Much work has gone into `mimalloc` to work well on such
     562 +workloads (for example, to reduce virtual memory fragmentation on long-running services)
     563 +but such optimizations are not always reflected in the current benchmark suite.
     564 + 
     565 +We show here only an overview -- for
     566 +more specific details and further benchmarks we refer to the
     567 +[technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action).
     568 +The benchmark suite is automated and available separately
     569 +as [mimalloc-bench](https://github.com/daanx/mimalloc-bench).
     570 + 
     571 + 
     572 +## Benchmark Results on a 16-core AMD 5950x (Zen3)
     573 + 
     574 +Testing on the 16-core AMD 5950x processor at 3.4Ghz (4.9Ghz boost), with
     575 +with 32GiB memory at 3600Mhz, running Ubuntu 20.04 with glibc 2.31 and GCC 9.3.0.
     576 + 
     577 +We measure three versions of _mimalloc_: the main version `mi` (tag:v1.7.0),
     578 +the new v2.0 beta version as `xmi` (tag:v2.0.0), and the main version in secure mode as `smi` (tag:v1.7.0).
     579 + 
     580 +The other allocators are
     581 +Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (`tc`, tag:gperftools-2.8.1) used in Chrome,
     582 +Facebook's [_jemalloc_](https://github.com/jemalloc/jemalloc) (`je`, tag:5.2.1) by Jason Evans used in Firefox and FreeBSD,
     583 +the Intel thread building blocks [allocator](https://github.com/intel/tbb) (`tbb`, tag:v2020.3),
     584 +[rpmalloc](https://github.com/mjansson/rpmalloc) (`rp`,tag:1.4.1) by Mattias Jansson,
     585 +the original scalable [_Hoard_](https://github.com/emeryberger/Hoard) (git:d880f72) allocator by Emery Berger \[1],
     586 +the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:67ff31a) allocator by
     587 +Bobby Powers _et al_ \[8],
     588 +and finally the default system allocator (`glibc`, 2.31) (based on _PtMalloc2_).
     589 + 
     590 +<img width="90%" src="doc/bench-2021/bench-amd5950x-2021-01-30-a.svg"/>
     591 +<img width="90%" src="doc/bench-2021/bench-amd5950x-2021-01-30-b.svg"/>
     592 + 
     593 +Any benchmarks ending in `N` run on all 32 logical cores in parallel.
     594 +Results are averaged over 10 runs and reported relative
     595 +to mimalloc (where 1.2 means it took 1.2&times; longer to run).
     596 +The legend also contains the _overall relative score_ between the
     597 +allocators where 100 points is the maximum if an allocator is fastest on
     598 +all benchmarks.
     599 + 
     600 +The single threaded _cfrac_ benchmark by Dave Barrett is an implementation of
     601 +continued fraction factorization which uses many small short-lived allocations.
     602 +All allocators do well on such common usage, where _mimalloc_ is just a tad
     603 +faster than _tcmalloc_ and
     604 +_jemalloc_.
     605 + 
     606 +The _leanN_ program is interesting as a large realistic and
     607 +concurrent workload of the [Lean](https://github.com/leanprover/lean)
     608 +theorem prover compiling its own standard library, and there is a 13%
     609 +speedup over _tcmalloc_. This is
     610 +quite significant: if Lean spends 20% of its time in the
     611 +allocator that means that _mimalloc_ is 1.6&times; faster than _tcmalloc_
     612 +here. (This is surprising as that is not measured in a pure
     613 +allocation benchmark like _alloc-test_. We conjecture that we see this
     614 +outsized improvement here because _mimalloc_ has better locality in
     615 +the allocation which improves performance for the *other* computations
     616 +in a program as well).
     617 + 
     618 +The single threaded _redis_ benchmark again show that most allocators do well on such workloads.
     619 + 
     620 +The _larsonN_ server benchmark by Larson and Krishnan \[2] allocates and frees between threads. They observed this
     621 +behavior (which they call _bleeding_) in actual server applications, and the benchmark simulates this.
     622 +Here, _mimalloc_ is quite a bit faster than _tcmalloc_ and _jemalloc_ probably due to the object migration between different threads.
     623 + 
     624 +The _mstressN_ workload performs many allocations and re-allocations,
     625 +and migrates objects between threads (as in _larsonN_). However, it also
     626 +creates and destroys the _N_ worker threads a few times keeping some objects
     627 +alive beyond the life time of the allocating thread. We observed this
     628 +behavior in many larger server applications.
     629 + 
     630 +The [_rptestN_](https://github.com/mjansson/rpmalloc-benchmark) benchmark
     631 +by Mattias Jansson is a allocator test originally designed
     632 +for _rpmalloc_, and tries to simulate realistic allocation patterns over
     633 +multiple threads. Here the differences between allocators become more apparent.
     634 + 
     635 +The second benchmark set tests specific aspects of the allocators and
     636 +shows even more extreme differences between them.
     637 + 
     638 +The _alloc-test_, by
     639 +[OLogN Technologies AG](http://ithare.com/testing-memory-allocators-ptmalloc2-tcmalloc-hoard-jemalloc-while-trying-to-simulate-real-world-loads/), is a very allocation intensive benchmark doing millions of
     640 +allocations in various size classes. The test is scaled such that when an
     641 +allocator performs almost identically on _alloc-test1_ as _alloc-testN_ it
     642 +means that it scales linearly.
     643 + 
     644 +The _sh6bench_ and _sh8bench_ benchmarks are
     645 +developed by [MicroQuill](http://www.microquill.com/) as part of SmartHeap.
     646 +In _sh6bench_ _mimalloc_ does much
     647 +better than the others (more than 2.5&times; faster than _jemalloc_).
     648 +We cannot explain this well but believe it is
     649 +caused in part by the "reverse" free-ing pattern in _sh6bench_.
     650 +The _sh8bench_ is a variation with object migration
     651 +between threads; whereas _tcmalloc_ did well on _sh6bench_, the addition of object migration causes it to be 10&times; slower than before.
     652 + 
     653 +The _xmalloc-testN_ benchmark by Lever and Boreham \[5] and Christian Eder, simulates an asymmetric workload where
     654 +some threads only allocate, and others only free -- they observed this pattern in
     655 +larger server applications. Here we see that
     656 +the _mimalloc_ technique of having non-contended sharded thread free
     657 +lists pays off as it outperforms others by a very large margin. Only _rpmalloc_, _tbb_, and _glibc_ also scale well on this benchmark.
     658 + 
     659 +The _cache-scratch_ benchmark by Emery Berger \[1], and introduced with
     660 +the Hoard allocator to test for _passive-false_ sharing of cache lines.
     661 +With a single thread they all
     662 +perform the same, but when running with multiple threads the potential allocator
     663 +induced false sharing of the cache lines can cause large run-time differences.
     664 +Crundal \[6] describes in detail why the false cache line sharing occurs in the _tcmalloc_ design, and also discusses how this
     665 +can be avoided with some small implementation changes.
     666 +Only the _tbb_, _rpmalloc_ and _mesh_ allocators also avoid the
     667 +cache line sharing completely, while _Hoard_ and _glibc_ seem to mitigate
     668 +the effects. Kukanov and Voss \[7] describe in detail
     669 +how the design of _tbb_ avoids the false cache line sharing.
     670 + 
     671 + 
     672 +## On a 36-core Intel Xeon
     673 + 
     674 +For completeness, here are the results on a big Amazon
     675 +[c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized) instance
     676 +consisting of a 2&times;18-core Intel Xeon (Cascade Lake) at 3.4GHz (boost 3.5GHz)
     677 +with 144GiB ECC memory, running Ubuntu 20.04 with glibc 2.31, GCC 9.3.0, and
     678 +Clang 10.0.0. This time, the mimalloc allocators (mi, xmi, and smi) were
     679 +compiled with the Clang compiler instead of GCC.
     680 +The results are similar to the AMD results but it is interesting to
     681 +see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks.
     682 + 
     683 +<img width="90%" src="doc/bench-2021/bench-c5-18xlarge-2021-01-30-a.svg"/>
     684 +<img width="90%" src="doc/bench-2021/bench-c5-18xlarge-2021-01-30-b.svg"/>
     685 + 
     686 + 
     687 +## Peak Working Set
     688 + 
     689 +The following figure shows the peak working set (rss) of the allocators
     690 +on the benchmarks (on the c5.18xlarge instance).
     691 + 
     692 +<img width="90%" src="doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-a.svg"/>
     693 +<img width="90%" src="doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-b.svg"/>
     694 + 
     695 +Note that the _xmalloc-testN_ memory usage should be disregarded as it
     696 +allocates more the faster the program runs. Similarly, memory usage of
     697 +_larsonN_, _mstressN_, _rptestN_ and _sh8bench_ can vary depending on scheduling and
     698 +speed. Nevertheless, we hope to improve the memory usage on _mstressN_
     699 +and _rptestN_ (just as _cfrac_, _larsonN_ and _sh8bench_ have a small working set which skews the results).
     700 + 
     701 +<!--
     702 +# Previous Benchmarks
     703 + 
     704 +Todo: should we create a separate page for this?
     705 + 
     706 +## Benchmark Results on 36-core Intel: 2020-01-20
     707 + 
     708 +Testing on a big Amazon EC2 compute instance
     709 +([c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized))
     710 +consisting of a 72 processor Intel Xeon at 3GHz
     711 +with 144GiB ECC memory, running Ubuntu 18.04.1 with glibc 2.27 and GCC 7.4.0.
     712 +The measured allocators are _mimalloc_ (xmi, tag:v1.4.0, page reset enabled)
     713 +and its secure build as _smi_,
     714 +Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (tc, tag:gperftools-2.7) used in Chrome,
     715 +Facebook's [_jemalloc_](https://github.com/jemalloc/jemalloc) (je, tag:5.2.1) by Jason Evans used in Firefox and FreeBSD,
     716 +the Intel thread building blocks [allocator](https://github.com/intel/tbb) (tbb, tag:2020),
     717 +[rpmalloc](https://github.com/mjansson/rpmalloc) (rp,tag:1.4.0) by Mattias Jansson,
     718 +the original scalable [_Hoard_](https://github.com/emeryberger/Hoard) (tag:3.13) allocator by Emery Berger \[1],
     719 +the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:51222e7) allocator by
     720 +Bobby Powers _et al_ \[8],
     721 +and finally the default system allocator (glibc, 2.27) (based on _PtMalloc2_).
     722 + 
     723 +<img width="90%" src="doc/bench-2020/bench-c5-18xlarge-2020-01-20-a.svg"/>
     724 +<img width="90%" src="doc/bench-2020/bench-c5-18xlarge-2020-01-20-b.svg"/>
     725 + 
     726 +The following figure shows the peak working set (rss) of the allocators
     727 +on the benchmarks (on the c5.18xlarge instance).
     728 + 
     729 +<img width="90%" src="doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-a.svg"/>
     730 +<img width="90%" src="doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-b.svg"/>
     731 + 
     732 + 
     733 +## On 24-core AMD Epyc, 2020-01-16
     734 + 
     735 +For completeness, here are the results on a
     736 +[r5a.12xlarge](https://aws.amazon.com/ec2/instance-types/#Memory_Optimized) instance
     737 +having a 48 processor AMD Epyc 7000 at 2.5GHz with 384GiB of memory.
     738 +The results are similar to the Intel results but it is interesting to
     739 +see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks.
     740 + 
     741 +<img width="90%" src="doc/bench-2020/bench-r5a-12xlarge-2020-01-16-a.svg"/>
     742 +<img width="90%" src="doc/bench-2020/bench-r5a-12xlarge-2020-01-16-b.svg"/>
     743 + 
     744 +-->
     745 + 
     746 + 
     747 +# References
     748 + 
     749 +- \[1] Emery D. Berger, Kathryn S. McKinley, Robert D. Blumofe, and Paul R. Wilson.
     750 + _Hoard: A Scalable Memory Allocator for Multithreaded Applications_
     751 + the Ninth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS-IX). Cambridge, MA, November 2000.
     752 + [pdf](http://www.cs.utexas.edu/users/mckinley/papers/asplos-2000.pdf)
     753 + 
     754 +- \[2] P. Larson and M. Krishnan. _Memory allocation for long-running server applications_.
     755 + In ISMM, Vancouver, B.C., Canada, 1998. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.45.1947&rep=rep1&type=pdf)
     756 + 
     757 +- \[3] D. Grunwald, B. Zorn, and R. Henderson.
     758 + _Improving the cache locality of memory allocation_. In R. Cartwright, editor,
     759 + Proceedings of the Conference on Programming Language Design and Implementation, pages 177–186, New York, NY, USA, June 1993. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.43.6621&rep=rep1&type=pdf)
     760 + 
     761 +- \[4] J. Barnes and P. Hut. _A hierarchical O(n*log(n)) force-calculation algorithm_. Nature, 324:446-449, 1986.
     762 + 
     763 +- \[5] C. Lever, and D. Boreham. _Malloc() Performance in a Multithreaded Linux Environment._
     764 + In USENIX Annual Technical Conference, Freenix Session. San Diego, CA. Jun. 2000.
     765 + Available at <https://github.com/kuszmaul/SuperMalloc/tree/master/tests>
     766 + 
     767 +- \[6] Timothy Crundal. _Reducing Active-False Sharing in TCMalloc_. 2016. CS16S1 project at the Australian National University. [pdf](http://courses.cecs.anu.edu.au/courses/CSPROJECTS/16S1/Reports/Timothy_Crundal_Report.pdf)
     768 + 
     769 +- \[7] Alexey Kukanov, and Michael J Voss.
     770 + _The Foundations for Scalable Multi-Core Software in Intel Threading Building Blocks._
     771 + Intel Technology Journal 11 (4). 2007
     772 + 
     773 +- \[8] Bobby Powers, David Tench, Emery D. Berger, and Andrew McGregor.
     774 + _Mesh: Compacting Memory Management for C/C++_
     775 + In Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI'19), June 2019, pages 333-–346.
     776 + 
     777 +<!--
     778 +- \[9] Paul Liétar, Theodore Butler, Sylvan Clebsch, Sophia Drossopoulou, Juliana Franco, Matthew J Parkinson,
     779 + Alex Shamis, Christoph M Wintersteiger, and David Chisnall.
     780 + _Snmalloc: A Message Passing Allocator._
     781 + In Proceedings of the 2019 ACM SIGPLAN International Symposium on Memory Management, 122–135. ACM. 2019.
     782 +-->
     783 + 
     784 +# Contributing
     785 + 
     786 +This project welcomes contributions and suggestions. Most contributions require you to agree to a
     787 +Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
     788 +the rights to use your contribution. For details, visit https://cla.microsoft.com.
     789 + 
     790 +When you submit a pull request, a CLA-bot will automatically determine whether you need to provide
     791 +a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions
     792 +provided by the bot. You will only need to do this once across all repos using our CLA.
     793 + 
     794 + 
     795 +# Older Release Notes
     796 + 
     797 +* 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved
     798 + handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call.
     799 +* 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations,
     800 + support for IllumOS and Haiku, NUMA support for Vista/XP, improved NUMA detection for AMD Ryzen, ubsan support.
     801 +* 2020-05-05, `v1.6.3`: stable release 1.6: improved behavior in out-of-memory situations, improved malloc zones on macOS,
     802 + build PIC static libraries by default, add option to abort on out-of-memory, line buffered statistics.
     803 +* 2020-04-20, `v1.6.2`: stable release 1.6: fix compilation on Android, MingW, Raspberry, and Conda,
     804 + stability fix for Windows 7, fix multiple mimalloc instances in one executable, fix `strnlen` overload,
     805 + fix aligned debug padding.
     806 +* 2020-02-17, `v1.6.1`: stable release 1.6: minor updates (build with clang-cl, fix alignment issue for small objects).
     807 +* 2020-02-09, `v1.6.0`: stable release 1.6: fixed potential memory leak, improved overriding
     808 + and thread local support on FreeBSD, NetBSD, DragonFly, and macOSX. New byte-precise
     809 + heap block overflow detection in debug mode (besides the double-free detection and free-list
     810 + corruption detection). Add `nodiscard` attribute to most allocation functions.
     811 + Enable `MIMALLOC_PAGE_RESET` by default. New reclamation strategy for abandoned heap pages
     812 + for better memory footprint.
     813 +* 2020-02-09, `v1.5.0`: stable release 1.5: improved free performance, small bug fixes.
     814 +* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset,
     815 +more eager concurrent free, addition of STL allocator, fixed potential memory leak.
     816 +* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger
     817 +free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode.
     818 +* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates.
     819 +* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows.
     820 +* 2019-10-07, `v1.1.0`: stable release 1.1.
     821 +* 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support.
     822 +* 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements.
     823 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/src/alloc-aligned.c
     1 +/* ----------------------------------------------------------------------------
     2 +Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
     3 +This is free software; you can redistribute it and/or modify it under the
     4 +terms of the MIT license. A copy of the license can be found in the file
     5 +"LICENSE" at the root of this distribution.
     6 +-----------------------------------------------------------------------------*/
     7 + 
     8 +#include "mimalloc.h"
     9 +#include "mimalloc/internal.h"
     10 +#include "mimalloc/prim.h" // mi_prim_get_default_heap
     11 + 
     12 +#include <string.h> // memset
     13 + 
     14 +// ------------------------------------------------------
     15 +// Aligned Allocation
     16 +// ------------------------------------------------------
     17 + 
     18 +// Fallback primitive aligned allocation -- split out for better codegen
     19 +static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
     20 +{
     21 + mi_assert_internal(size <= PTRDIFF_MAX);
     22 + mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
     23 + 
     24 + const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)`
     25 + const size_t padsize = size + MI_PADDING_SIZE;
     26 + 
     27 + // use regular allocation if it is guaranteed to fit the alignment constraints
     28 + if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) {
     29 + void* p = _mi_heap_malloc_zero(heap, size, zero);
     30 + mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0);
     31 + return p;
     32 + }
     33 + 
     34 + void* p;
     35 + size_t oversize;
     36 + if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {
     37 + // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
     38 + // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the
     39 + // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
     40 + if mi_unlikely(offset != 0) {
     41 + // todo: cannot support offset alignment for very large alignments yet
     42 + #if MI_DEBUG > 0
     43 + _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
     44 + #endif
     45 + return NULL;
     46 + }
     47 + oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
     48 + p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block
     49 + // zero afterwards as only the area from the aligned_p may be committed!
     50 + if (p == NULL) return NULL;
     51 + }
     52 + else {
     53 + // otherwise over-allocate
     54 + oversize = size + alignment - 1;
     55 + p = _mi_heap_malloc_zero(heap, oversize, zero);
     56 + if (p == NULL) return NULL;
     57 + }
     58 + 
     59 + // .. and align within the allocation
     60 + const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask;
     61 + const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset);
     62 + mi_assert_internal(adjust < alignment);
     63 + void* aligned_p = (void*)((uintptr_t)p + adjust);
     64 + if (aligned_p != p) {
     65 + mi_page_t* page = _mi_ptr_page(p);
     66 + mi_page_set_has_aligned(page, true);
     67 + _mi_padding_shrink(page, (mi_block_t*)p, adjust + size);
     68 + }
     69 + // todo: expand padding if overallocated ?
     70 + 
     71 + mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
     72 + mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
     73 + mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
     74 + mi_assert_internal(mi_usable_size(aligned_p)>=size);
     75 + mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust);
     76 +
     77 + // now zero the block if needed
     78 + if (alignment > MI_ALIGNMENT_MAX) {
     79 + // for the tracker, on huge aligned allocations only from the start of the large block is defined
     80 + mi_track_mem_undefined(aligned_p, size);
     81 + if (zero) {
     82 + _mi_memzero(aligned_p, mi_usable_size(aligned_p));
     83 + }
     84 + }
     85 + 
     86 + if (p != aligned_p) {
     87 + mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p));
     88 + }
     89 + return aligned_p;
     90 +}
     91 + 
     92 +// Primitive aligned allocation
     93 +static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
     94 +{
     95 + // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size.
     96 + mi_assert(alignment > 0);
     97 + if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
     98 + #if MI_DEBUG > 0
     99 + _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment);
     100 + #endif
     101 + return NULL;
     102 + }
     103 + /*
     104 + if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers)
     105 + #if MI_DEBUG > 0
     106 + _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment);
     107 + #endif
     108 + return NULL;
     109 + }
     110 + */
     111 + if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
     112 + #if MI_DEBUG > 0
     113 + _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
     114 + #endif
     115 + return NULL;
     116 + }
     117 + const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
     118 + const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check
     119 + 
     120 + // try first if there happens to be a small block available with just the right alignment
     121 + if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) {
     122 + mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize);
     123 + const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0;
     124 + if mi_likely(page->free != NULL && is_aligned)
     125 + {
     126 + #if MI_STAT>1
     127 + mi_heap_stat_increase(heap, malloc, size);
     128 + #endif
     129 + void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc
     130 + mi_assert_internal(p != NULL);
     131 + mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
     132 + mi_track_malloc(p,size,zero);
     133 + return p;
     134 + }
     135 + }
     136 + // fallback
     137 + return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero);
     138 +}
     139 + 
     140 + 
     141 +// ------------------------------------------------------
     142 +// Optimized mi_heap_malloc_aligned / mi_malloc_aligned
     143 +// ------------------------------------------------------
     144 + 
     145 +mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
     146 + return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false);
     147 +}
     148 + 
     149 +mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
     150 + #if !MI_PADDING
     151 + // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`)
     152 + if (!_mi_is_power_of_two(alignment)) return NULL;
     153 + if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX)
     154 + #else
     155 + // with padding, we can only guarantee this for fixed alignments
     156 + if mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2)))
     157 + && size <= MI_SMALL_SIZE_MAX)
     158 + #endif
     159 + {
     160 + // fast path for common alignment and size
     161 + return mi_heap_malloc_small(heap, size);
     162 + }
     163 + else {
     164 + return mi_heap_malloc_aligned_at(heap, size, alignment, 0);
     165 + }
     166 +}
     167 + 
     168 +// ------------------------------------------------------
     169 +// Aligned Allocation
     170 +// ------------------------------------------------------
     171 + 
     172 +mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
     173 + return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true);
     174 +}
     175 + 
     176 +mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
     177 + return mi_heap_zalloc_aligned_at(heap, size, alignment, 0);
     178 +}
     179 + 
     180 +mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
     181 + size_t total;
     182 + if (mi_count_size_overflow(count, size, &total)) return NULL;
     183 + return mi_heap_zalloc_aligned_at(heap, total, alignment, offset);
     184 +}
     185 + 
     186 +mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept {
     187 + return mi_heap_calloc_aligned_at(heap,count,size,alignment,0);
     188 +}
     189 + 
     190 +mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
     191 + return mi_heap_malloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset);
     192 +}
     193 + 
     194 +mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
     195 + return mi_heap_malloc_aligned(mi_prim_get_default_heap(), size, alignment);
     196 +}
     197 + 
     198 +mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
     199 + return mi_heap_zalloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset);
     200 +}
     201 + 
     202 +mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
     203 + return mi_heap_zalloc_aligned(mi_prim_get_default_heap(), size, alignment);
     204 +}
     205 + 
     206 +mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
     207 + return mi_heap_calloc_aligned_at(mi_prim_get_default_heap(), count, size, alignment, offset);
     208 +}
     209 + 
     210 +mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept {
     211 + return mi_heap_calloc_aligned(mi_prim_get_default_heap(), count, size, alignment);
     212 +}
     213 + 
     214 + 
     215 +// ------------------------------------------------------
     216 +// Aligned re-allocation
     217 +// ------------------------------------------------------
     218 + 
     219 +static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset, bool zero) mi_attr_noexcept {
     220 + mi_assert(alignment > 0);
     221 + if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero);
     222 + if (p == NULL) return mi_heap_malloc_zero_aligned_at(heap,newsize,alignment,offset,zero);
     223 + size_t size = mi_usable_size(p);
     224 + if (newsize <= size && newsize >= (size - (size / 2))
     225 + && (((uintptr_t)p + offset) % alignment) == 0) {
     226 + return p; // reallocation still fits, is aligned and not more than 50% waste
     227 + }
     228 + else {
     229 + void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset);
     230 + if (newp != NULL) {
     231 + if (zero && newsize > size) {
     232 + const mi_page_t* page = _mi_ptr_page(newp);
     233 + if (page->is_zero) {
     234 + // already zero initialized
     235 + mi_assert_expensive(mi_mem_is_zero(newp,newsize));
     236 + }
     237 + else {
     238 + // also set last word in the previous allocation to zero to ensure any padding is zero-initialized
     239 + size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
     240 + memset((uint8_t*)newp + start, 0, newsize - start);
     241 + }
     242 + }
     243 + _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
     244 + mi_free(p); // only free if successful
     245 + }
     246 + return newp;
     247 + }
     248 +}
     249 + 
     250 +static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, bool zero) mi_attr_noexcept {
     251 + mi_assert(alignment > 0);
     252 + if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero);
     253 + size_t offset = ((uintptr_t)p % alignment); // use offset of previous allocation (p can be NULL)
     254 + return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero);
     255 +}
     256 + 
     257 +mi_decl_nodiscard void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
     258 + return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false);
     259 +}
     260 + 
     261 +mi_decl_nodiscard void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
     262 + return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false);
     263 +}
     264 + 
     265 +mi_decl_nodiscard void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
     266 + return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true);
     267 +}
     268 + 
     269 +mi_decl_nodiscard void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
     270 + return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true);
     271 +}
     272 + 
     273 +mi_decl_nodiscard void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
     274 + size_t total;
     275 + if (mi_count_size_overflow(newcount, size, &total)) return NULL;
     276 + return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset);
     277 +}
     278 + 
     279 +mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
     280 + size_t total;
     281 + if (mi_count_size_overflow(newcount, size, &total)) return NULL;
     282 + return mi_heap_rezalloc_aligned(heap, p, total, alignment);
     283 +}
     284 + 
     285 +mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
     286 + return mi_heap_realloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset);
     287 +}
     288 + 
     289 +mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
     290 + return mi_heap_realloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment);
     291 +}
     292 + 
     293 +mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
     294 + return mi_heap_rezalloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset);
     295 +}
     296 + 
     297 +mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
     298 + return mi_heap_rezalloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment);
     299 +}
     300 + 
     301 +mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
     302 + return mi_heap_recalloc_aligned_at(mi_prim_get_default_heap(), p, newcount, size, alignment, offset);
     303 +}
     304 + 
     305 +mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
     306 + return mi_heap_recalloc_aligned(mi_prim_get_default_heap(), p, newcount, size, alignment);
     307 +}
     308 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/src/alloc-override.c
     1 +/* ----------------------------------------------------------------------------
     2 +Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
     3 +This is free software; you can redistribute it and/or modify it under the
     4 +terms of the MIT license. A copy of the license can be found in the file
     5 +"LICENSE" at the root of this distribution.
     6 +-----------------------------------------------------------------------------*/
     7 + 
     8 +#if !defined(MI_IN_ALLOC_C)
     9 +#error "this file should be included from 'alloc.c' (so aliases can work)"
     10 +#endif
     11 + 
     12 +#if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL))
     13 +#error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)"
     14 +#endif
     15 + 
     16 +#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32))
     17 + 
     18 +#if defined(__APPLE__)
     19 +#include <AvailabilityMacros.h>
     20 +mi_decl_externc void vfree(void* p);
     21 +mi_decl_externc size_t malloc_size(const void* p);
     22 +mi_decl_externc size_t malloc_good_size(size_t size);
     23 +#endif
     24 + 
     25 +// helper definition for C override of C++ new
     26 +typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
     27 + 
     28 +// ------------------------------------------------------
     29 +// Override system malloc
     30 +// ------------------------------------------------------
     31 + 
     32 +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) && !MI_TRACK_ENABLED
     33 + // gcc, clang: use aliasing to alias the exported function to one of our `mi_` functions
     34 + #if (defined(__GNUC__) && __GNUC__ >= 9)
     35 + #pragma GCC diagnostic ignored "-Wattributes" // or we get warnings that nodiscard is ignored on a forward
     36 + #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"), copy(fun)));
     37 + #else
     38 + #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default")));
     39 + #endif
     40 + #define MI_FORWARD1(fun,x) MI_FORWARD(fun)
     41 + #define MI_FORWARD2(fun,x,y) MI_FORWARD(fun)
     42 + #define MI_FORWARD3(fun,x,y,z) MI_FORWARD(fun)
     43 + #define MI_FORWARD0(fun,x) MI_FORWARD(fun)
     44 + #define MI_FORWARD02(fun,x,y) MI_FORWARD(fun)
     45 +#else
     46 + // otherwise use forwarding by calling our `mi_` function
     47 + #define MI_FORWARD1(fun,x) { return fun(x); }
     48 + #define MI_FORWARD2(fun,x,y) { return fun(x,y); }
     49 + #define MI_FORWARD3(fun,x,y,z) { return fun(x,y,z); }
     50 + #define MI_FORWARD0(fun,x) { fun(x); }
     51 + #define MI_FORWARD02(fun,x,y) { fun(x,y); }
     52 +#endif
     53 + 
     54 + 
     55 +#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
     56 + // define MI_OSX_IS_INTERPOSED as we should not provide forwarding definitions for
     57 + // functions that are interposed (or the interposing does not work)
     58 + #define MI_OSX_IS_INTERPOSED
     59 + 
     60 + mi_decl_externc size_t mi_malloc_size_checked(void *p) {
     61 + if (!mi_is_in_heap_region(p)) return 0;
     62 + return mi_usable_size(p);
     63 + }
     64 + 
     65 + // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
     66 + // See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
     67 + struct mi_interpose_s {
     68 + const void* replacement;
     69 + const void* target;
     70 + };
     71 + #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
     72 + #define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun)
     73 + 
     74 + __attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) =
     75 + {
     76 + MI_INTERPOSE_MI(malloc),
     77 + MI_INTERPOSE_MI(calloc),
     78 + MI_INTERPOSE_MI(realloc),
     79 + MI_INTERPOSE_MI(strdup),
     80 + MI_INTERPOSE_MI(strndup),
     81 + MI_INTERPOSE_MI(realpath),
     82 + MI_INTERPOSE_MI(posix_memalign),
     83 + MI_INTERPOSE_MI(reallocf),
     84 + MI_INTERPOSE_MI(valloc),
     85 + MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked),
     86 + MI_INTERPOSE_MI(malloc_good_size),
     87 + #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
     88 + MI_INTERPOSE_MI(aligned_alloc),
     89 + #endif
     90 + #ifdef MI_OSX_ZONE
     91 + // we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely
     92 + MI_INTERPOSE_MI(free),
     93 + MI_INTERPOSE_FUN(vfree,mi_free),
     94 + #else
     95 + // sometimes code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
     96 + MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
     97 + MI_INTERPOSE_FUN(vfree,mi_cfree),
     98 + #endif
     99 + };
     100 + 
     101 + #ifdef __cplusplus
     102 + extern "C" {
     103 + #endif
     104 + void _ZdlPv(void* p); // delete
     105 + void _ZdaPv(void* p); // delete[]
     106 + void _ZdlPvm(void* p, size_t n); // delete
     107 + void _ZdaPvm(void* p, size_t n); // delete[]
     108 + void* _Znwm(size_t n); // new
     109 + void* _Znam(size_t n); // new[]
     110 + void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new nothrow
     111 + void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new[] nothrow
     112 + #ifdef __cplusplus
     113 + }
     114 + #endif
     115 + __attribute__((used)) static struct mi_interpose_s _mi_cxx_interposes[] __attribute__((section("__DATA, __interpose"))) =
     116 + {
     117 + MI_INTERPOSE_FUN(_ZdlPv,mi_free),
     118 + MI_INTERPOSE_FUN(_ZdaPv,mi_free),
     119 + MI_INTERPOSE_FUN(_ZdlPvm,mi_free_size),
     120 + MI_INTERPOSE_FUN(_ZdaPvm,mi_free_size),
     121 + MI_INTERPOSE_FUN(_Znwm,mi_new),
     122 + MI_INTERPOSE_FUN(_Znam,mi_new),
     123 + MI_INTERPOSE_FUN(_ZnwmRKSt9nothrow_t,mi_new_nothrow),
     124 + MI_INTERPOSE_FUN(_ZnamRKSt9nothrow_t,mi_new_nothrow),
     125 + };
     126 + 
     127 +#elif defined(_MSC_VER)
     128 + // cannot override malloc unless using a dll.
     129 + // we just override new/delete which does work in a static library.
     130 +#else
     131 + // On all other systems forward to our API
     132 + mi_decl_export void* malloc(size_t size) MI_FORWARD1(mi_malloc, size)
     133 + mi_decl_export void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n)
     134 + mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
     135 + mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p)
     136 +#endif
     137 + 
     138 +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__)
     139 +#pragma GCC visibility push(default)
     140 +#endif
     141 + 
     142 +// ------------------------------------------------------
     143 +// Override new/delete
     144 +// This is not really necessary as they usually call
     145 +// malloc/free anyway, but it improves performance.
     146 +// ------------------------------------------------------
     147 +#ifdef __cplusplus
     148 + // ------------------------------------------------------
     149 + // With a C++ compiler we override the new/delete operators.
     150 + // see <https://en.cppreference.com/w/cpp/memory/new/operator_new>
     151 + // ------------------------------------------------------
     152 + #include <new>
     153 + 
     154 + #ifndef MI_OSX_IS_INTERPOSED
     155 + void operator delete(void* p) noexcept MI_FORWARD0(mi_free,p)
     156 + void operator delete[](void* p) noexcept MI_FORWARD0(mi_free,p)
     157 + 
     158 + void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n)
     159 + void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n)
     160 + 
     161 + void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); }
     162 + void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); }
     163 + 
     164 + #if (__cplusplus >= 201402L || _MSC_VER >= 1916)
     165 + void operator delete (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n)
     166 + void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n)
     167 + #endif
     168 + #endif
     169 + 
     170 + #if (__cplusplus > 201402L && defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5))
     171 + void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
     172 + void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
     173 + void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
     174 + void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
     175 + void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
     176 + void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
     177 + 
     178 + void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
     179 + void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
     180 + void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
     181 + void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
     182 + #endif
     183 + 
     184 +#elif (defined(__GNUC__) || defined(__clang__))
     185 + // ------------------------------------------------------
     186 + // Override by defining the mangled C++ names of the operators (as
     187 + // used by GCC and CLang).
     188 + // See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling>
     189 + // ------------------------------------------------------
     190 + 
     191 + void _ZdlPv(void* p) MI_FORWARD0(mi_free,p) // delete
     192 + void _ZdaPv(void* p) MI_FORWARD0(mi_free,p) // delete[]
     193 + void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
     194 + void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
     195 + void _ZdlPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); }
     196 + void _ZdaPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); }
     197 + void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
     198 + void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
     199 + 
     200 + #if (MI_INTPTR_SIZE==8)
     201 + void* _Znwm(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit
     202 + void* _Znam(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit
     203 + void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
     204 + void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
     205 + void* _ZnwmSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
     206 + void* _ZnamSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
     207 + void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
     208 + void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
     209 + #elif (MI_INTPTR_SIZE==4)
     210 + void* _Znwj(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit
     211 + void* _Znaj(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit
     212 + void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
     213 + void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
     214 + void* _ZnwjSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
     215 + void* _ZnajSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
     216 + void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
     217 + void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
     218 + #else
     219 + #error "define overloads for new/delete for this platform (just for performance, can be skipped)"
     220 + #endif
     221 +#endif // __cplusplus
     222 + 
     223 +// ------------------------------------------------------
     224 +// Further Posix & Unix functions definitions
     225 +// ------------------------------------------------------
     226 + 
     227 +#ifdef __cplusplus
     228 +extern "C" {
     229 +#endif
     230 + 
     231 +#ifndef MI_OSX_IS_INTERPOSED
     232 + // Forward Posix/Unix calls as well
     233 + void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize)
     234 + size_t malloc_size(const void* p) MI_FORWARD1(mi_usable_size,p)
     235 + #if !defined(__ANDROID__) && !defined(__FreeBSD__)
     236 + size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p)
     237 + #else
     238 + size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p)
     239 + #endif
     240 + 
     241 + // No forwarding here due to aliasing/name mangling issues
     242 + void* valloc(size_t size) { return mi_valloc(size); }
     243 + void vfree(void* p) { mi_free(p); }
     244 + size_t malloc_good_size(size_t size) { return mi_malloc_good_size(size); }
     245 + int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }
     246 + 
     247 + // `aligned_alloc` is only available when __USE_ISOC11 is defined.
     248 + // Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot
     249 + // override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9.
     250 + // Fortunately, in the case where `aligned_alloc` is declared as `static inline` it
     251 + // uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we can avoid overriding it ourselves.
     252 + #if __USE_ISOC11
     253 + void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
     254 + #endif
     255 +#endif
     256 + 
     257 +// no forwarding here due to aliasing/name mangling issues
     258 +void cfree(void* p) { mi_free(p); }
     259 +void* pvalloc(size_t size) { return mi_pvalloc(size); }
     260 +void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); }
     261 +int reallocarr(void* p, size_t count, size_t size) { return mi_reallocarr(p, count, size); }
     262 +void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); }
     263 +void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
     264 + 
     265 +#if defined(__wasi__)
     266 + // forward __libc interface (see PR #667)
     267 + void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc, size)
     268 + void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc, count, size)
     269 + void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc, p, size)
     270 + void __libc_free(void* p) MI_FORWARD0(mi_free, p)
     271 + void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); }
     272 + 
     273 +#elif defined(__GLIBC__) && defined(__linux__)
     274 + // forward __libc interface (needed for glibc-based Linux distributions)
     275 + void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size)
     276 + void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size)
     277 + void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size)
     278 + void __libc_free(void* p) MI_FORWARD0(mi_free,p)
     279 + void __libc_cfree(void* p) MI_FORWARD0(mi_free,p)
     280 + 
     281 + void* __libc_valloc(size_t size) { return mi_valloc(size); }
     282 + void* __libc_pvalloc(size_t size) { return mi_pvalloc(size); }
     283 + void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment,size); }
     284 + int __posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p,alignment,size); }
     285 +#endif
     286 + 
     287 +#ifdef __cplusplus
     288 +}
     289 +#endif
     290 + 
     291 +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__)
     292 +#pragma GCC visibility pop
     293 +#endif
     294 + 
     295 +#endif // MI_MALLOC_OVERRIDE && !_WIN32
     296 + 
  • ■ ■ ■ ■ ■ ■
    preload-mimalloc/mimalloc/src/alloc-posix.c
     1 +/* ----------------------------------------------------------------------------
     2 +Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
     3 +This is free software; you can redistribute it and/or modify it under the
     4 +terms of the MIT license. A copy of the license can be found in the file
     5 +"LICENSE" at the root of this distribution.
     6 +-----------------------------------------------------------------------------*/
     7 + 
     8 +// ------------------------------------------------------------------------
     9 +// mi prefixed publi definitions of various Posix, Unix, and C++ functions
     10 +// for convenience and used when overriding these functions.
     11 +// ------------------------------------------------------------------------
     12 +#include "mimalloc.h"
     13 +#include "mimalloc/internal.h"
     14 + 
     15 +// ------------------------------------------------------
     16 +// Posix & Unix functions definitions
     17 +// ------------------------------------------------------
     18 + 
     19 +#include <errno.h>
     20 +#include <string.h> // memset
     21 +#include <stdlib.h> // getenv
     22 + 
     23 +#ifdef _MSC_VER
     24 +#pragma warning(disable:4996) // getenv _wgetenv
     25 +#endif
     26 + 
     27 +#ifndef EINVAL
     28 +#define EINVAL 22
     29 +#endif
     30 +#ifndef ENOMEM
     31 +#define ENOMEM 12
     32 +#endif
     33 + 
     34 + 
     35 +mi_decl_nodiscard size_t mi_malloc_size(const void* p) mi_attr_noexcept {
     36 + // if (!mi_is_in_heap_region(p)) return 0;
     37 + return mi_usable_size(p);
     38 +}
     39 + 
     40 +mi_decl_nodiscard size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept {
     41 + // if (!mi_is_in_heap_region(p)) return 0;
     42 + return mi_usable_size(p);
     43 +}
     44 + 
     45 +mi_decl_nodiscard size_t mi_malloc_good_size(size_t size) mi_attr_noexcept {
     46 + return mi_good_size(size);
     47 +}
     48 + 
     49 +void mi_cfree(void* p) mi_attr_noexcept {
     50 + if (mi_is_in_heap_region(p)) {
     51 + mi_free(p);
     52 + }
     53 +}
     54 + 
     55 +int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept {
     56 + // Note: The spec dictates we should not modify `*p` on an error. (issue#27)
     57 + // <http://man7.org/linux/man-pages/man3/posix_memalign.3.html>
     58 + if (p == NULL) return EINVAL;
     59 + if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment
     60 + if (alignment==0 || !_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2
     61 + void* q = mi_malloc_aligned(size, alignment);
     62 + if (q==NULL && size != 0) return ENOMEM;
     63 + mi_assert_internal(((uintptr_t)q % alignment) == 0);
     64 + *p = q;
     65 + return 0;
     66 +}
     67 + 
     68 +mi_decl_nodiscard mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept {
     69 + void* p = mi_malloc_aligned(size, alignment);
     70 + mi_assert_internal(((uintptr_t)p % alignment) == 0);
     71 + return p;
     72 +}
     73 + 
     74 +mi_decl_nodiscard mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept {
     75 + return mi_memalign( _mi_os_page_size(), size );
     76 +}
     77 + 
     78 +mi_decl_nodiscard mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept {
     79 + size_t psize = _mi_os_page_size();
     80 + if (size >= SIZE_MAX - psize) return NULL; // overflow
     81 + size_t asize = _mi_align_up(size, psize);
     82 + return mi_malloc_aligned(asize, psize);
     83 +}
     84 + 
     85 +mi_decl_nodiscard mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept {
     86 + // C11 requires the size to be an integral multiple of the alignment, see <https://en.cppreference.com/w/c/memory/aligned_alloc>.
     87 + // unfortunately, it turns out quite some programs pass a size that is not an integral multiple so skip this check..
     88 + /* if mi_unlikely((size & (alignment - 1)) != 0) { // C11 requires alignment>0 && integral multiple, see <https://en.cppreference.com/w/c/memory/aligned_alloc>
     89 + #if MI_DEBUG > 0
     90 + _mi_error_message(EOVERFLOW, "(mi_)aligned_alloc requires the size to be an integral multiple of the alignment (size %zu, alignment %zu)\n", size, alignment);
     91 + #endif
     92 + return NULL;
     93 + }
     94 + */
     95 + // C11 also requires alignment to be a power-of-two (and > 0) which is checked in mi_malloc_aligned
     96 + void* p = mi_malloc_aligned(size, alignment);
     97 + mi_assert_internal(((uintptr_t)p % alignment) == 0);
     98 + return p;
     99 +}
     100 + 
     101 +mi_decl_nodiscard void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD
     102 + void* newp = mi_reallocn(p,count,size);
     103 + if (newp==NULL) { errno = ENOMEM; }
     104 + return newp;
     105 +}
     106 + 
     107 +mi_decl_nodiscard int mi_reallocarr( void* p, size_t count, size_t size ) mi_attr_noexcept { // NetBSD
     108 + mi_assert(p != NULL);
     109 + if (p == NULL) {
     110 + errno = EINVAL;
     111 + return EINVAL;
     112 + }
     113 + void** op = (void**)p;
     114 + void* newp = mi_reallocarray(*op, count, size);
     115 + if mi_unlikely(newp == NULL) { return errno; }
     116 + *op = newp;
     117 + return 0;
     118 +}
     119 + 
     120 +void* mi__expand(void* p, size_t newsize) mi_attr_noexcept { // Microsoft
     121 + void* res = mi_expand(p, newsize);
     122 + if (res == NULL) { errno = ENOMEM; }
     123 + return res;
     124 +}
     125 + 
     126 +mi_decl_nodiscard mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept {
     127 + if (s==NULL) return NULL;
     128 + size_t len;
     129 + for(len = 0; s[len] != 0; len++) { }
     130 + size_t size = (len+1)*sizeof(unsigned short);
     131 + unsigned short* p = (unsigned short*)mi_malloc(size);
     132 + if (p != NULL) {
     133 + _mi_memcpy(p,s,size);
     134 + }
     135 + return p;
     136 +}
     137 + 
     138 +mi_decl_nodiscard mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept {
     139 + return (unsigned char*)mi_strdup((const char*)s);
     140 +}
     141 + 
     142 +int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept {
     143 + if (buf==NULL || name==NULL) return EINVAL;
     144 + if (size != NULL) *size = 0;
     145 + char* p = getenv(name); // mscver warning 4996
     146 + if (p==NULL) {
     147 + *buf = NULL;
     148 + }
     149 + else {
     150 + *buf = mi_strdup(p);
     151 + if (*buf==NULL) return ENOMEM;
     152 + if (size != NULL) *size = _mi_strlen(p);
     153 + }
     154 + return 0;
     155 +}
     156 + 
     157 +int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept {
     158 + if (buf==NULL || name==NULL) return EINVAL;
     159 + if (size != NULL) *size = 0;
     160 +#if !defined(_WIN32) || (defined(WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP))
     161 + // not supported
     162 + *buf = NULL;
     163 + return EINVAL;
     164 +#else
     165 + unsigned short* p = (unsigned short*)_wgetenv((const wchar_t*)name); // msvc warning 4996
     166 + if (p==NULL) {
     167 + *buf = NULL;
     168 + }
     169 + else {
     170 + *buf = mi_wcsdup(p);
     171 + if (*buf==NULL) return ENOMEM;
     172 + if (size != NULL) *size = wcslen((const wchar_t*)p);
     173 + }
     174 + return 0;
     175 +#endif
     176 +}
     177 + 
     178 +mi_decl_nodiscard void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { // Microsoft
     179 + return mi_recalloc_aligned_at(p, newcount, size, alignment, offset);
     180 +}
     181 + 
     182 +mi_decl_nodiscard void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { // Microsoft
     183 + return mi_recalloc_aligned(p, newcount, size, alignment);
     184 +}
     185 + 
Please wait...
Page is in error, reload to recover