STRLCPY/graphql-engine

server: Vendored mimalloc v2.1.1

GitOrigin-RevId: 99d281549e073edf35f4df9332e44507300a7e40

Brandon Simmons committed with hasura-bot 2 months ago

9c2ea26c

1 parent 956a698f

Total 40 files Show one by one

Showing first 13 files as there are too many

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/LICENSE

1	+	MIT License
2	+
3	+	Copyright (c) 2018-2021 Microsoft Corporation, Daan Leijen
4	+
5	+	Permission is hereby granted, free of charge, to any person obtaining a copy
6	+	of this software and associated documentation files (the "Software"), to deal
7	+	in the Software without restriction, including without limitation the rights
8	+	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9	+	copies of the Software, and to permit persons to whom the Software is
10	+	furnished to do so, subject to the following conditions:
11	+
12	+	The above copyright notice and this permission notice shall be included in all
13	+	copies or substantial portions of the Software.
14	+
15	+	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	+	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	+	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18	+	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	+	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20	+	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21	+	SOFTWARE.
22	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/include/mimalloc/atomic.h

1	+	/* ----------------------------------------------------------------------------
2	+	Copyright (c) 2018-2023 Microsoft Research, Daan Leijen
3	+	This is free software; you can redistribute it and/or modify it under the
4	+	terms of the MIT license. A copy of the license can be found in the file
5	+	"LICENSE" at the root of this distribution.
6	+	-----------------------------------------------------------------------------*/
7	+	#pragma once
8	+	#ifndef MIMALLOC_ATOMIC_H
9	+	#define MIMALLOC_ATOMIC_H
10	+
11	+	// --------------------------------------------------------------------------------------------
12	+	// Atomics
13	+	// We need to be portable between C, C++, and MSVC.
14	+	// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
15	+	// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
16	+	// To gain better insight in the range of used atomics, we use explicitly named memory order operations
17	+	// instead of passing the memory order as a parameter.
18	+	// -----------------------------------------------------------------------------------------------
19	+
20	+	#if defined(__cplusplus)
21	+	// Use C++ atomics
22	+	#include <atomic>
23	+	#define _Atomic(tp) std::atomic<tp>
24	+	#define mi_atomic(name) std::atomic_##name
25	+	#define mi_memory_order(name) std::memory_order_##name
26	+	#if !defined(ATOMIC_VAR_INIT) \|\| (__cplusplus >= 202002L) // c++20, see issue #571
27	+	#define MI_ATOMIC_VAR_INIT(x) x
28	+	#else
29	+	#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
30	+	#endif
31	+	#elif defined(_MSC_VER)
32	+	// Use MSVC C wrapper for C11 atomics
33	+	#define _Atomic(tp) tp
34	+	#define MI_ATOMIC_VAR_INIT(x) x
35	+	#define mi_atomic(name) mi_atomic_##name
36	+	#define mi_memory_order(name) mi_memory_order_##name
37	+	#else
38	+	// Use C11 atomics
39	+	#include <stdatomic.h>
40	+	#define mi_atomic(name) atomic_##name
41	+	#define mi_memory_order(name) memory_order_##name
42	+	#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x)
43	+	#endif
44	+
45	+	// Various defines for all used memory orders in mimalloc
46	+	#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \
47	+	mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail)
48	+
49	+	#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \
50	+	mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail)
51	+
52	+	#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
53	+	#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
54	+	#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
55	+	#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
56	+	#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release))
57	+	#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel))
58	+	#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
59	+	#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
60	+	#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
61	+	#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
62	+
63	+	#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
64	+	#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
65	+	#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
66	+	#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
67	+	#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
68	+	#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
69	+
70	+	#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1)
71	+	#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1)
72	+	#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1)
73	+	#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1)
74	+
75	+	static inline void mi_atomic_yield(void);
76	+	static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add);
77	+	static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
78	+
79	+
80	+	#if defined(__cplusplus) \|\| !defined(_MSC_VER)
81	+
82	+	// In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value)
83	+	// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well
84	+	#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p)
85	+	#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p)
86	+
87	+	// In C++ we need to add casts to help resolve templates if NULL is passed
88	+	#if defined(__cplusplus)
89	+	#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x)
90	+	#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x)
91	+	#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des)
92	+	#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des)
93	+	#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des)
94	+	#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x)
95	+	#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x)
96	+	#else
97	+	#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x)
98	+	#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x)
99	+	#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des)
100	+	#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des)
101	+	#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des)
102	+	#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x)
103	+	#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x)
104	+	#endif
105	+
106	+	// These are used by the statistics
107	+	static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
108	+	return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
109	+	}
110	+	static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
111	+	int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p);
112	+	while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t))p, &current, x)) { / nothing */ };
113	+	}
114	+
115	+	// Used by timers
116	+	#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
117	+	#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
118	+	#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
119	+	#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
120	+
121	+
122	+
123	+	#elif defined(_MSC_VER)
124	+
125	+	// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics.
126	+	#define WIN32_LEAN_AND_MEAN
127	+	#include <windows.h>
128	+	#include <intrin.h>
129	+	#ifdef _WIN64
130	+	typedef LONG64 msc_intptr_t;
131	+	#define MI_64(f) f##64
132	+	#else
133	+	typedef LONG msc_intptr_t;
134	+	#define MI_64(f) f
135	+	#endif
136	+
137	+	typedef enum mi_memory_order_e {
138	+	mi_memory_order_relaxed,
139	+	mi_memory_order_consume,
140	+	mi_memory_order_acquire,
141	+	mi_memory_order_release,
142	+	mi_memory_order_acq_rel,
143	+	mi_memory_order_seq_cst
144	+	} mi_memory_order;
145	+
146	+	static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) {
147	+	(void)(mo);
148	+	return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
149	+	}
150	+	static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) {
151	+	(void)(mo);
152	+	return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub));
153	+	}
154	+	static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
155	+	(void)(mo);
156	+	return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
157	+	}
158	+	static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
159	+	(void)(mo);
160	+	return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
161	+	}
162	+	static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)p, uintptr_t expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
163	+	(void)(mo1); (void)(mo2);
164	+	uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t)p, (msc_intptr_t)desired, (msc_intptr_t)(expected));
165	+	if (read == *expected) {
166	+	return true;
167	+	}
168	+	else {
169	+	*expected = read;
170	+	return false;
171	+	}
172	+	}
173	+	static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)p, uintptr_t expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
174	+	return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2);
175	+	}
176	+	static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) {
177	+	(void)(mo);
178	+	return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
179	+	}
180	+	static inline void mi_atomic_thread_fence(mi_memory_order mo) {
181	+	(void)(mo);
182	+	_Atomic(uintptr_t) x = 0;
183	+	mi_atomic_exchange_explicit(&x, 1, mo);
184	+	}
185	+	static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) {
186	+	(void)(mo);
187	+	#if defined(_M_IX86) \|\| defined(_M_X64)
188	+	return *p;
189	+	#else
190	+	uintptr_t x = *p;
191	+	if (mo > mi_memory_order_relaxed) {
192	+	while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ };
193	+	}
194	+	return x;
195	+	#endif
196	+	}
197	+	static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
198	+	(void)(mo);
199	+	#if defined(_M_IX86) \|\| defined(_M_X64)
200	+	*p = x;
201	+	#else
202	+	mi_atomic_exchange_explicit(p, x, mo);
203	+	#endif
204	+	}
205	+	static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) {
206	+	(void)(mo);
207	+	#if defined(_M_X64)
208	+	return *p;
209	+	#else
210	+	int64_t old = *p;
211	+	int64_t x = old;
212	+	while ((old = InterlockedCompareExchange64(p, x, old)) != x) {
213	+	x = old;
214	+	}
215	+	return x;
216	+	#endif
217	+	}
218	+	static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) {
219	+	(void)(mo);
220	+	#if defined(x_M_IX86) \|\| defined(_M_X64)
221	+	*p = x;
222	+	#else
223	+	InterlockedExchange64(p, x);
224	+	#endif
225	+	}
226	+
227	+	// These are used by the statistics
228	+	static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) {
229	+	#ifdef _WIN64
230	+	return (int64_t)mi_atomic_addi((int64_t*)p, add);
231	+	#else
232	+	int64_t current;
233	+	int64_t sum;
234	+	do {
235	+	current = *p;
236	+	sum = current + add;
237	+	} while (_InterlockedCompareExchange64(p, sum, current) != current);
238	+	return current;
239	+	#endif
240	+	}
241	+	static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
242	+	int64_t current;
243	+	do {
244	+	current = *p;
245	+	} while (current < x && _InterlockedCompareExchange64(p, x, current) != current);
246	+	}
247	+
248	+	// The pointer macros cast to `uintptr_t`.
249	+	#define mi_atomic_load_ptr_acquire(tp,p) (tp)mi_atomic_load_acquire((_Atomic(uintptr_t))(p))
250	+	#define mi_atomic_load_ptr_relaxed(tp,p) (tp)mi_atomic_load_relaxed((_Atomic(uintptr_t))(p))
251	+	#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
252	+	#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
253	+	#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t))(p),(uintptr_t)exp,(uintptr_t)des)
254	+	#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t))(p),(uintptr_t)exp,(uintptr_t)des)
255	+	#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t))(p),(uintptr_t)exp,(uintptr_t)des)
256	+	#define mi_atomic_exchange_ptr_release(tp,p,x) (tp)mi_atomic_exchange_release((_Atomic(uintptr_t))(p),(uintptr_t)x)
257	+	#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t))(p),(uintptr_t)x)
258	+
259	+	#define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire))
260	+	#define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed))
261	+	#define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release))
262	+	#define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed))
263	+
264	+
265	+	#endif
266	+
267	+
268	+	// Atomically add a signed value; returns the previous value.
269	+	static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) {
270	+	return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add);
271	+	}
272	+
273	+	// Atomically subtract a signed value; returns the previous value.
274	+	static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
275	+	return (intptr_t)mi_atomic_addi(p, -sub);
276	+	}
277	+
278	+	typedef _Atomic(uintptr_t) mi_atomic_once_t;
279	+
280	+	// Returns true only on the first invocation
281	+	static inline bool mi_atomic_once( mi_atomic_once_t* once ) {
282	+	if (mi_atomic_load_relaxed(once) != 0) return false; // quick test
283	+	uintptr_t expected = 0;
284	+	return mi_atomic_cas_strong_acq_rel(once, &expected, 1); // try to set to 1
285	+	}
286	+
287	+	// Yield
288	+	#if defined(__cplusplus)
289	+	#include <thread>
290	+	static inline void mi_atomic_yield(void) {
291	+	std::this_thread::yield();
292	+	}
293	+	#elif defined(_WIN32)
294	+	#define WIN32_LEAN_AND_MEAN
295	+	#include <windows.h>
296	+	static inline void mi_atomic_yield(void) {
297	+	YieldProcessor();
298	+	}
299	+	#elif defined(__SSE2__)
300	+	#include <emmintrin.h>
301	+	static inline void mi_atomic_yield(void) {
302	+	_mm_pause();
303	+	}
304	+	#elif (defined(__GNUC__) \|\| defined(__clang__)) && \
305	+	(defined(__x86_64__) \|\| defined(__i386__) \|\| defined(__arm__) \|\| defined(__armel__) \|\| defined(__ARMEL__) \|\| \
306	+	defined(__aarch64__) \|\| defined(__powerpc__) \|\| defined(__ppc__) \|\| defined(__PPC__))
307	+	#if defined(__x86_64__) \|\| defined(__i386__)
308	+	static inline void mi_atomic_yield(void) {
309	+	__asm__ volatile ("pause" ::: "memory");
310	+	}
311	+	#elif defined(__aarch64__)
312	+	static inline void mi_atomic_yield(void) {
313	+	__asm__ volatile("wfe");
314	+	}
315	+	#elif (defined(__arm__) && __ARM_ARCH__ >= 7)
316	+	static inline void mi_atomic_yield(void) {
317	+	__asm__ volatile("yield" ::: "memory");
318	+	}
319	+	#elif defined(__powerpc__) \|\| defined(__ppc__) \|\| defined(__PPC__)
320	+	static inline void mi_atomic_yield(void) {
321	+	__asm__ __volatile__ ("or 27,27,27" ::: "memory");
322	+	}
323	+	#elif defined(__armel__) \|\| defined(__ARMEL__)
324	+	static inline void mi_atomic_yield(void) {
325	+	__asm__ volatile ("nop" ::: "memory");
326	+	}
327	+	#endif
328	+	#elif defined(__sun)
329	+	// Fallback for other archs
330	+	#include <synch.h>
331	+	static inline void mi_atomic_yield(void) {
332	+	smt_pause();
333	+	}
334	+	#elif defined(__wasi__)
335	+	#include <sched.h>
336	+	static inline void mi_atomic_yield(void) {
337	+	sched_yield();
338	+	}
339	+	#else
340	+	#include <unistd.h>
341	+	static inline void mi_atomic_yield(void) {
342	+	sleep(0);
343	+	}
344	+	#endif
345	+
346	+
347	+	#endif // __MIMALLOC_ATOMIC_H
348	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/include/mimalloc/internal.h

1	+	/* ----------------------------------------------------------------------------
2	+	Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3	+	This is free software; you can redistribute it and/or modify it under the
4	+	terms of the MIT license. A copy of the license can be found in the file
5	+	"LICENSE" at the root of this distribution.
6	+	-----------------------------------------------------------------------------*/
7	+	#pragma once
8	+	#ifndef MIMALLOC_INTERNAL_H
9	+	#define MIMALLOC_INTERNAL_H
10	+
11	+
12	+	// --------------------------------------------------------------------------
13	+	// This file contains the interal API's of mimalloc and various utility
14	+	// functions and macros.
15	+	// --------------------------------------------------------------------------
16	+
17	+	#include "mimalloc/types.h"
18	+	#include "mimalloc/track.h"
19	+
20	+	#if (MI_DEBUG>0)
21	+	#define mi_trace_message(...) _mi_trace_message(__VA_ARGS__)
22	+	#else
23	+	#define mi_trace_message(...)
24	+	#endif
25	+
26	+	#define MI_CACHE_LINE 64
27	+	#if defined(_MSC_VER)
28	+	#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
29	+	#pragma warning(disable:26812) // unscoped enum warning
30	+	#define mi_decl_noinline __declspec(noinline)
31	+	#define mi_decl_thread __declspec(thread)
32	+	#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
33	+	#elif (defined(__GNUC__) && (__GNUC__ >= 3)) \|\| defined(__clang__) // includes clang and icc
34	+	#define mi_decl_noinline __attribute__((noinline))
35	+	#define mi_decl_thread __thread
36	+	#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
37	+	#else
38	+	#define mi_decl_noinline
39	+	#define mi_decl_thread __thread // hope for the best :-)
40	+	#define mi_decl_cache_align
41	+	#endif
42	+
43	+	#if defined(__EMSCRIPTEN__) && !defined(__wasi__)
44	+	#define __wasi__
45	+	#endif
46	+
47	+	#if defined(__cplusplus)
48	+	#define mi_decl_externc extern "C"
49	+	#else
50	+	#define mi_decl_externc
51	+	#endif
52	+
53	+	// pthreads
54	+	#if !defined(_WIN32) && !defined(__wasi__)
55	+	#define MI_USE_PTHREADS
56	+	#include <pthread.h>
57	+	#endif
58	+
59	+	// "options.c"
60	+	void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
61	+	void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
62	+	void _mi_warning_message(const char* fmt, ...);
63	+	void _mi_verbose_message(const char* fmt, ...);
64	+	void _mi_trace_message(const char* fmt, ...);
65	+	void _mi_options_init(void);
66	+	void _mi_error_message(int err, const char* fmt, ...);
67	+
68	+	// random.c
69	+	void _mi_random_init(mi_random_ctx_t* ctx);
70	+	void _mi_random_init_weak(mi_random_ctx_t* ctx);
71	+	void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx);
72	+	void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
73	+	uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
74	+	uintptr_t _mi_heap_random_next(mi_heap_t* heap);
75	+	uintptr_t _mi_os_random_weak(uintptr_t extra_seed);
76	+	static inline uintptr_t _mi_random_shuffle(uintptr_t x);
77	+
78	+	// init.c
79	+	extern mi_decl_cache_align mi_stats_t _mi_stats_main;
80	+	extern mi_decl_cache_align const mi_page_t _mi_page_empty;
81	+	bool _mi_is_main_thread(void);
82	+	size_t _mi_current_thread_count(void);
83	+	bool _mi_preloading(void); // true while the C runtime is not ready
84	+	mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
85	+	mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
86	+	void _mi_thread_done(mi_heap_t* heap);
87	+
88	+	// os.c
89	+	void _mi_os_init(void); // called from process init
90	+	void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data
91	+	void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data
92	+	size_t _mi_os_page_size(void);
93	+	size_t _mi_os_good_alloc_size(size_t size);
94	+	bool _mi_os_has_overcommit(void);
95	+
96	+	bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats);
97	+	bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
98	+	bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
99	+	bool _mi_os_protect(void* addr, size_t size);
100	+	bool _mi_os_unprotect(void* addr, size_t size);
101	+
102	+	void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats);
103	+	void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats);
104	+	void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
105	+	void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
106	+	bool _mi_os_use_large_page(size_t size, size_t alignment);
107	+	size_t _mi_os_large_page_size(void);
108	+
109	+	void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
110	+	void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
111	+	void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
112	+
113	+	// arena.c
114	+	mi_arena_id_t _mi_arena_id_none(void);
115	+	void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats);
116	+	void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
117	+	void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
118	+	bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id);
119	+	bool _mi_arena_is_os_allocated(size_t arena_memid);
120	+
121	+	// "segment-cache.c"
122	+	void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
123	+	bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
124	+	void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
125	+	void _mi_segment_cache_free_all(mi_os_tld_t* tld);
126	+	void _mi_segment_map_allocated_at(const mi_segment_t* segment);
127	+	void _mi_segment_map_freed_at(const mi_segment_t* segment);
128	+
129	+	// "segment.c"
130	+	mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
131	+	void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
132	+	void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
133	+	bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
134	+	void _mi_segment_thread_collect(mi_segments_tld_t* tld);
135	+
136	+	#if MI_HUGE_PAGE_ABANDON
137	+	void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
138	+	#else
139	+	void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
140	+	#endif
141	+
142	+	uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
143	+	void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
144	+	void _mi_abandoned_await_readers(void);
145	+	void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
146	+
147	+	// "page.c"
148	+	void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc;
149	+
150	+	void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks
151	+	void _mi_page_unfull(mi_page_t* page);
152	+	void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page
153	+	void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread...
154	+	void _mi_heap_delayed_free_all(mi_heap_t* heap);
155	+	bool _mi_heap_delayed_free_partial(mi_heap_t* heap);
156	+	void _mi_heap_collect_retired(mi_heap_t* heap, bool force);
157	+
158	+	void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
159	+	bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
160	+	size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
161	+	void _mi_deferred_free(mi_heap_t* heap, bool force);
162	+
163	+	void _mi_page_free_collect(mi_page_t* page,bool force);
164	+	void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments
165	+
166	+	size_t _mi_bin_size(uint8_t bin); // for stats
167	+	uint8_t _mi_bin(size_t size); // for stats
168	+
169	+	// "heap.c"
170	+	void _mi_heap_destroy_pages(mi_heap_t* heap);
171	+	void _mi_heap_collect_abandon(mi_heap_t* heap);
172	+	void _mi_heap_set_default_direct(mi_heap_t* heap);
173	+	bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid);
174	+	void _mi_heap_destroy_all(void);
175	+
176	+	// "stats.c"
177	+	void _mi_stats_done(mi_stats_t* stats);
178	+	mi_msecs_t _mi_clock_now(void);
179	+	mi_msecs_t _mi_clock_end(mi_msecs_t start);
180	+	mi_msecs_t _mi_clock_start(void);
181	+
182	+	// "alloc.c"
183	+	void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
184	+	void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
185	+	void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
186	+	void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
187	+	mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
188	+	bool _mi_free_delayed_block(mi_block_t* block);
189	+	void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
190	+	void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size);
191	+
192	+	// option.c, c primitives
193	+	char _mi_toupper(char c);
194	+	int _mi_strnicmp(const char* s, const char* t, size_t n);
195	+	void _mi_strlcpy(char* dest, const char* src, size_t dest_size);
196	+	void _mi_strlcat(char* dest, const char* src, size_t dest_size);
197	+	size_t _mi_strlen(const char* s);
198	+	size_t _mi_strnlen(const char* s, size_t max_len);
199	+
200	+
201	+	#if MI_DEBUG>1
202	+	bool _mi_page_is_valid(mi_page_t* page);
203	+	#endif
204	+
205	+
206	+	// ------------------------------------------------------
207	+	// Branches
208	+	// ------------------------------------------------------
209	+
210	+	#if defined(__GNUC__) \|\| defined(__clang__)
211	+	#define mi_unlikely(x) (__builtin_expect(!!(x),false))
212	+	#define mi_likely(x) (__builtin_expect(!!(x),true))
213	+	#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) \|\| (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L)
214	+	#define mi_unlikely(x) (x) [[unlikely]]
215	+	#define mi_likely(x) (x) [[likely]]
216	+	#else
217	+	#define mi_unlikely(x) (x)
218	+	#define mi_likely(x) (x)
219	+	#endif
220	+
221	+	#ifndef __has_builtin
222	+	#define __has_builtin(x) 0
223	+	#endif
224	+
225	+
226	+	/* -----------------------------------------------------------
227	+	Error codes passed to `_mi_fatal_error`
228	+	All are recoverable but EFAULT is a serious error and aborts by default in secure mode.
229	+	For portability define undefined error codes using common Unix codes:
230	+	<https://www-numi.fnal.gov/offline_software/srt_public_context/WebDocs/Errors/unix_system_errors.html>
231	+	----------------------------------------------------------- */
232	+	#include <errno.h>
233	+	#ifndef EAGAIN // double free
234	+	#define EAGAIN (11)
235	+	#endif
236	+	#ifndef ENOMEM // out of memory
237	+	#define ENOMEM (12)
238	+	#endif
239	+	#ifndef EFAULT // corrupted free-list or meta-data
240	+	#define EFAULT (14)
241	+	#endif
242	+	#ifndef EINVAL // trying to free an invalid pointer
243	+	#define EINVAL (22)
244	+	#endif
245	+	#ifndef EOVERFLOW // count*size overflow
246	+	#define EOVERFLOW (75)
247	+	#endif
248	+
249	+
250	+	/* -----------------------------------------------------------
251	+	Inlined definitions
252	+	----------------------------------------------------------- */
253	+	#define MI_UNUSED(x) (void)(x)
254	+	#if (MI_DEBUG>0)
255	+	#define MI_UNUSED_RELEASE(x)
256	+	#else
257	+	#define MI_UNUSED_RELEASE(x) MI_UNUSED(x)
258	+	#endif
259	+
260	+	#define MI_INIT4(x) x(),x(),x(),x()
261	+	#define MI_INIT8(x) MI_INIT4(x),MI_INIT4(x)
262	+	#define MI_INIT16(x) MI_INIT8(x),MI_INIT8(x)
263	+	#define MI_INIT32(x) MI_INIT16(x),MI_INIT16(x)
264	+	#define MI_INIT64(x) MI_INIT32(x),MI_INIT32(x)
265	+	#define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x)
266	+	#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x)
267	+
268	+
269	+	// Is `x` a power of two? (0 is considered a power of two)
270	+	static inline bool _mi_is_power_of_two(uintptr_t x) {
271	+	return ((x & (x - 1)) == 0);
272	+	}
273	+
274	+	// Is a pointer aligned?
275	+	static inline bool _mi_is_aligned(void* p, size_t alignment) {
276	+	mi_assert_internal(alignment != 0);
277	+	return (((uintptr_t)p % alignment) == 0);
278	+	}
279	+
280	+	// Align upwards
281	+	static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
282	+	mi_assert_internal(alignment != 0);
283	+	uintptr_t mask = alignment - 1;
284	+	if ((alignment & mask) == 0) { // power of two?
285	+	return ((sz + mask) & ~mask);
286	+	}
287	+	else {
288	+	return (((sz + mask)/alignment)*alignment);
289	+	}
290	+	}
291	+
292	+	// Align downwards
293	+	static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
294	+	mi_assert_internal(alignment != 0);
295	+	uintptr_t mask = alignment - 1;
296	+	if ((alignment & mask) == 0) { // power of two?
297	+	return (sz & ~mask);
298	+	}
299	+	else {
300	+	return ((sz / alignment) * alignment);
301	+	}
302	+	}
303	+
304	+	// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
305	+	static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
306	+	mi_assert_internal(divider != 0);
307	+	return (divider == 0 ? size : ((size + divider - 1) / divider));
308	+	}
309	+
310	+	// Is memory zero initialized?
311	+	static inline bool mi_mem_is_zero(void* p, size_t size) {
312	+	for (size_t i = 0; i < size; i++) {
313	+	if (((uint8_t*)p)[i] != 0) return false;
314	+	}
315	+	return true;
316	+	}
317	+
318	+
319	+	// Align a byte size to a size in _machine words_,
320	+	// i.e. byte size == `wsizesizeof(void)`.
321	+	static inline size_t _mi_wsize_from_size(size_t size) {
322	+	mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t));
323	+	return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
324	+	}
325	+
326	+	// Overflow detecting multiply
327	+	#if __has_builtin(__builtin_umul_overflow) \|\| (defined(__GNUC__) && (__GNUC__ >= 5))
328	+	#include <limits.h> // UINT_MAX, ULONG_MAX
329	+	#if defined(_CLOCK_T) // for Illumos
330	+	#undef _CLOCK_T
331	+	#endif
332	+	static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
333	+	#if (SIZE_MAX == ULONG_MAX)
334	+	return __builtin_umull_overflow(count, size, (unsigned long *)total);
335	+	#elif (SIZE_MAX == UINT_MAX)
336	+	return __builtin_umul_overflow(count, size, (unsigned int *)total);
337	+	#else
338	+	return __builtin_umulll_overflow(count, size, (unsigned long long *)total);
339	+	#endif
340	+	}
341	+	#else /* __builtin_umul_overflow is unavailable */
342	+	static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
343	+	#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
344	+	total = count size;
345	+	// note: gcc/clang optimize this to directly check the overflow flag
346	+	return ((size >= MI_MUL_NO_OVERFLOW \|\| count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count);
347	+	}
348	+	#endif
349	+
350	+	// Safe multiply `count*size` into `total`; return `true` on overflow.
351	+	static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) {
352	+	if (count==1) { // quick check for the case where count is one (common for C++ allocators)
353	+	*total = size;
354	+	return false;
355	+	}
356	+	else if mi_unlikely(mi_mul_overflow(count, size, total)) {
357	+	#if MI_DEBUG > 0
358	+	_mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size);
359	+	#endif
360	+	*total = SIZE_MAX;
361	+	return true;
362	+	}
363	+	else return false;
364	+	}
365	+
366	+
367	+	/*----------------------------------------------------------------------------------------
368	+	Heap functions
369	+	------------------------------------------------------------------------------------------- */
370	+
371	+	extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap
372	+
373	+	static inline bool mi_heap_is_backing(const mi_heap_t* heap) {
374	+	return (heap->tld->heap_backing == heap);
375	+	}
376	+
377	+	static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
378	+	mi_assert_internal(heap != NULL);
379	+	return (heap != &_mi_heap_empty);
380	+	}
381	+
382	+	static inline uintptr_t _mi_ptr_cookie(const void* p) {
383	+	extern mi_heap_t _mi_heap_main;
384	+	mi_assert_internal(_mi_heap_main.cookie != 0);
385	+	return ((uintptr_t)p ^ _mi_heap_main.cookie);
386	+	}
387	+
388	+	/* -----------------------------------------------------------
389	+	Pages
390	+	----------------------------------------------------------- */
391	+
392	+	static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) {
393	+	mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE));
394	+	const size_t idx = _mi_wsize_from_size(size);
395	+	mi_assert_internal(idx < MI_PAGES_DIRECT);
396	+	return heap->pages_free_direct[idx];
397	+	}
398	+
399	+	// Segment that contains the pointer
400	+	// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
401	+	// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
402	+	// therefore we align one byte before `p`.
403	+	static inline mi_segment_t* _mi_ptr_segment(const void* p) {
404	+	mi_assert_internal(p != NULL);
405	+	return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
406	+	}
407	+
408	+	static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) {
409	+	mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0);
410	+	return (mi_page_t*)(s);
411	+	}
412	+
413	+	static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
414	+	mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0);
415	+	return (mi_slice_t*)(p);
416	+	}
417	+
418	+	// Segment belonging to a page
419	+	static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
420	+	mi_segment_t* segment = _mi_ptr_segment(page);
421	+	mi_assert_internal(segment == NULL \|\| ((mi_slice_t)page >= segment->slices && (mi_slice_t)page < segment->slices + segment->slice_entries));
422	+	return segment;
423	+	}
424	+
425	+	static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
426	+	mi_slice_t* start = (mi_slice_t)((uint8_t)slice - slice->slice_offset);
427	+	mi_assert_internal(start >= _mi_ptr_segment(slice)->slices);
428	+	mi_assert_internal(start->slice_offset == 0);
429	+	mi_assert_internal(start + start->slice_count > slice);
430	+	return start;
431	+	}
432	+
433	+	// Get the page containing the pointer (performance critical as it is called in mi_free)
434	+	static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
435	+	mi_assert_internal(p > (void*)segment);
436	+	ptrdiff_t diff = (uint8_t)p - (uint8_t)segment;
437	+	mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE);
438	+	size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
439	+	mi_assert_internal(idx <= segment->slice_entries);
440	+	mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
441	+	mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data
442	+	mi_assert_internal(slice->slice_offset == 0);
443	+	mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries);
444	+	return mi_slice_to_page(slice);
445	+	}
446	+
447	+	// Quick page start for initialized pages
448	+	static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
449	+	return _mi_segment_page_start(segment, page, page_size);
450	+	}
451	+
452	+	// Get the page containing the pointer
453	+	static inline mi_page_t* _mi_ptr_page(void* p) {
454	+	return _mi_segment_page_of(_mi_ptr_segment(p), p);
455	+	}
456	+
457	+	// Get the block size of a page (special case for huge objects)
458	+	static inline size_t mi_page_block_size(const mi_page_t* page) {
459	+	const size_t bsize = page->xblock_size;
460	+	mi_assert_internal(bsize > 0);
461	+	if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) {
462	+	return bsize;
463	+	}
464	+	else {
465	+	size_t psize;
466	+	_mi_segment_page_start(_mi_page_segment(page), page, &psize);
467	+	return psize;
468	+	}
469	+	}
470	+
471	+	static inline bool mi_page_is_huge(const mi_page_t* page) {
472	+	return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
473	+	}
474	+
475	+	// Get the usable block size of a page without fixed padding.
476	+	// This may still include internal padding due to alignment and rounding up size classes.
477	+	static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
478	+	return mi_page_block_size(page) - MI_PADDING_SIZE;
479	+	}
480	+
481	+	// size of a segment
482	+	static inline size_t mi_segment_size(mi_segment_t* segment) {
483	+	return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
484	+	}
485	+
486	+	static inline uint8_t* mi_segment_end(mi_segment_t* segment) {
487	+	return (uint8_t*)segment + mi_segment_size(segment);
488	+	}
489	+
490	+	// Thread free access
491	+	static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
492	+	return (mi_block_t)(mi_atomic_load_relaxed(&((mi_page_t)page)->xthread_free) & ~3);
493	+	}
494	+
495	+	static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) {
496	+	return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3);
497	+	}
498	+
499	+	// Heap access
500	+	static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
501	+	return (mi_heap_t)(mi_atomic_load_relaxed(&((mi_page_t)page)->xheap));
502	+	}
503	+
504	+	static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
505	+	mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
506	+	mi_atomic_store_release(&page->xheap,(uintptr_t)heap);
507	+	}
508	+
509	+	// Thread free flag helpers
510	+	static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
511	+	return (mi_block_t*)(tf & ~0x03);
512	+	}
513	+	static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) {
514	+	return (mi_delayed_t)(tf & 0x03);
515	+	}
516	+	static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) {
517	+	return (mi_thread_free_t)((uintptr_t)block \| (uintptr_t)delayed);
518	+	}
519	+	static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) {
520	+	return mi_tf_make(mi_tf_block(tf),delayed);
521	+	}
522	+	static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) {
523	+	return mi_tf_make(block, mi_tf_delayed(tf));
524	+	}
525	+
526	+	// are all blocks in a page freed?
527	+	// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
528	+	static inline bool mi_page_all_free(const mi_page_t* page) {
529	+	mi_assert_internal(page != NULL);
530	+	return (page->used == 0);
531	+	}
532	+
533	+	// are there any available blocks?
534	+	static inline bool mi_page_has_any_available(const mi_page_t* page) {
535	+	mi_assert_internal(page != NULL && page->reserved > 0);
536	+	return (page->used < page->reserved \|\| (mi_page_thread_free(page) != NULL));
537	+	}
538	+
539	+	// are there immediately available blocks, i.e. blocks available on the free list.
540	+	static inline bool mi_page_immediate_available(const mi_page_t* page) {
541	+	mi_assert_internal(page != NULL);
542	+	return (page->free != NULL);
543	+	}
544	+
545	+	// is more than 7/8th of a page in use?
546	+	static inline bool mi_page_mostly_used(const mi_page_t* page) {
547	+	if (page==NULL) return true;
548	+	uint16_t frac = page->reserved / 8U;
549	+	return (page->reserved - page->used <= frac);
550	+	}
551	+
552	+	static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
553	+	return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
554	+	}
555	+
556	+
557	+
558	+	//-----------------------------------------------------------
559	+	// Page flags
560	+	//-----------------------------------------------------------
561	+	static inline bool mi_page_is_in_full(const mi_page_t* page) {
562	+	return page->flags.x.in_full;
563	+	}
564	+
565	+	static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
566	+	page->flags.x.in_full = in_full;
567	+	}
568	+
569	+	static inline bool mi_page_has_aligned(const mi_page_t* page) {
570	+	return page->flags.x.has_aligned;
571	+	}
572	+
573	+	static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
574	+	page->flags.x.has_aligned = has_aligned;
575	+	}
576	+
577	+
578	+	/* -------------------------------------------------------------------
579	+	Encoding/Decoding the free list next pointers
580	+
581	+	This is to protect against buffer overflow exploits where the
582	+	free list is mutated. Many hardened allocators xor the next pointer `p`
583	+	with a secret key `k1`, as `p^k1`. This prevents overwriting with known
584	+	values but might be still too weak: if the attacker can guess
585	+	the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
586	+	Moreover, if multiple blocks can be read as well, the attacker can
587	+	xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
588	+	about the pointers (and subsequently `k1`).
589	+
590	+	Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
591	+	Since these operations are not associative, the above approaches do not
592	+	work so well any more even if the `p` can be guesstimated. For example,
593	+	for the read case we can subtract two entries to discard the `+k1` term,
594	+	but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
595	+	We include the left-rotation since xor and addition are otherwise linear
596	+	in the lowest bit. Finally, both keys are unique per page which reduces
597	+	the re-use of keys by a large factor.
598	+
599	+	We also pass a separate `null` value to be used as `NULL` or otherwise
600	+	`(k2<<<k1)+k1` would appear (too) often as a sentinel value.
601	+	------------------------------------------------------------------- */
602	+
603	+	static inline bool mi_is_in_same_segment(const void* p, const void* q) {
604	+	return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
605	+	}
606	+
607	+	static inline bool mi_is_in_same_page(const void* p, const void* q) {
608	+	mi_segment_t* segment = _mi_ptr_segment(p);
609	+	if (_mi_ptr_segment(q) != segment) return false;
610	+	// assume q may be invalid // return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q));
611	+	mi_page_t* page = _mi_segment_page_of(segment, p);
612	+	size_t psize;
613	+	uint8_t* start = _mi_segment_page_start(segment, page, &psize);
614	+	return (start <= (uint8_t)q && (uint8_t)q < start + psize);
615	+	}
616	+
617	+	static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
618	+	shift %= MI_INTPTR_BITS;
619	+	return (shift==0 ? x : ((x << shift) \| (x >> (MI_INTPTR_BITS - shift))));
620	+	}
621	+	static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
622	+	shift %= MI_INTPTR_BITS;
623	+	return (shift==0 ? x : ((x >> shift) \| (x << (MI_INTPTR_BITS - shift))));
624	+	}
625	+
626	+	static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) {
627	+	void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]);
628	+	return (p==null ? NULL : p);
629	+	}
630	+
631	+	static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) {
632	+	uintptr_t x = (uintptr_t)(p==NULL ? null : p);
633	+	return mi_rotl(x ^ keys[1], keys[0]) + keys[0];
634	+	}
635	+
636	+	static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) {
637	+	mi_track_mem_defined(block,sizeof(mi_block_t));
638	+	mi_block_t* next;
639	+	#ifdef MI_ENCODE_FREELIST
640	+	next = (mi_block_t*)mi_ptr_decode(null, block->next, keys);
641	+	#else
642	+	MI_UNUSED(keys); MI_UNUSED(null);
643	+	next = (mi_block_t*)block->next;
644	+	#endif
645	+	mi_track_mem_noaccess(block,sizeof(mi_block_t));
646	+	return next;
647	+	}
648	+
649	+	static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
650	+	mi_track_mem_undefined(block,sizeof(mi_block_t));
651	+	#ifdef MI_ENCODE_FREELIST
652	+	block->next = mi_ptr_encode(null, next, keys);
653	+	#else
654	+	MI_UNUSED(keys); MI_UNUSED(null);
655	+	block->next = (mi_encoded_t)next;
656	+	#endif
657	+	mi_track_mem_noaccess(block,sizeof(mi_block_t));
658	+	}
659	+
660	+	static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
661	+	#ifdef MI_ENCODE_FREELIST
662	+	mi_block_t* next = mi_block_nextx(page,block,page->keys);
663	+	// check for free list corruption: is `next` at least in the same page?
664	+	// TODO: check if `next` is `page->block_size` aligned?
665	+	if mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next)) {
666	+	_mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next);
667	+	next = NULL;
668	+	}
669	+	return next;
670	+	#else
671	+	MI_UNUSED(page);
672	+	return mi_block_nextx(page,block,NULL);
673	+	#endif
674	+	}
675	+
676	+	static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
677	+	#ifdef MI_ENCODE_FREELIST
678	+	mi_block_set_nextx(page,block,next, page->keys);
679	+	#else
680	+	MI_UNUSED(page);
681	+	mi_block_set_nextx(page,block,next,NULL);
682	+	#endif
683	+	}
684	+
685	+
686	+	// -------------------------------------------------------------------
687	+	// commit mask
688	+	// -------------------------------------------------------------------
689	+
690	+	static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) {
691	+	for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
692	+	cm->mask[i] = 0;
693	+	}
694	+	}
695	+
696	+	static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) {
697	+	for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
698	+	cm->mask[i] = ~((size_t)0);
699	+	}
700	+	}
701	+
702	+	static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) {
703	+	for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
704	+	if (cm->mask[i] != 0) return false;
705	+	}
706	+	return true;
707	+	}
708	+
709	+	static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) {
710	+	for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
711	+	if (cm->mask[i] != ~((size_t)0)) return false;
712	+	}
713	+	return true;
714	+	}
715	+
716	+	// defined in `segment.c`:
717	+	size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total);
718	+	size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);
719	+
720	+	#define mi_commit_mask_foreach(cm,idx,count) \
721	+	idx = 0; \
722	+	while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) {
723	+
724	+	#define mi_commit_mask_foreach_end() \
725	+	idx += count; \
726	+	}
727	+
728	+
729	+
730	+
731	+	// -------------------------------------------------------------------
732	+	// Fast "random" shuffle
733	+	// -------------------------------------------------------------------
734	+
735	+	static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
736	+	if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros
737	+	#if (MI_INTPTR_SIZE==8)
738	+	// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
739	+	x ^= x >> 30;
740	+	x *= 0xbf58476d1ce4e5b9UL;
741	+	x ^= x >> 27;
742	+	x *= 0x94d049bb133111ebUL;
743	+	x ^= x >> 31;
744	+	#elif (MI_INTPTR_SIZE==4)
745	+	// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
746	+	x ^= x >> 16;
747	+	x *= 0x7feb352dUL;
748	+	x ^= x >> 15;
749	+	x *= 0x846ca68bUL;
750	+	x ^= x >> 16;
751	+	#endif
752	+	return x;
753	+	}
754	+
755	+	// -------------------------------------------------------------------
756	+	// Optimize numa node access for the common case (= one node)
757	+	// -------------------------------------------------------------------
758	+
759	+	int _mi_os_numa_node_get(mi_os_tld_t* tld);
760	+	size_t _mi_os_numa_node_count_get(void);
761	+
762	+	extern _Atomic(size_t) _mi_numa_node_count;
763	+	static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
764	+	if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; }
765	+	else return _mi_os_numa_node_get(tld);
766	+	}
767	+	static inline size_t _mi_os_numa_node_count(void) {
768	+	const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
769	+	if mi_likely(count > 0) { return count; }
770	+	else return _mi_os_numa_node_count_get();
771	+	}
772	+
773	+
774	+
775	+	// -----------------------------------------------------------------------
776	+	// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
777	+	// -----------------------------------------------------------------------
778	+
779	+	#if defined(__GNUC__)
780	+
781	+	#include <limits.h> // LONG_MAX
782	+	#define MI_HAVE_FAST_BITSCAN
783	+	static inline size_t mi_clz(uintptr_t x) {
784	+	if (x==0) return MI_INTPTR_BITS;
785	+	#if (INTPTR_MAX == LONG_MAX)
786	+	return __builtin_clzl(x);
787	+	#else
788	+	return __builtin_clzll(x);
789	+	#endif
790	+	}
791	+	static inline size_t mi_ctz(uintptr_t x) {
792	+	if (x==0) return MI_INTPTR_BITS;
793	+	#if (INTPTR_MAX == LONG_MAX)
794	+	return __builtin_ctzl(x);
795	+	#else
796	+	return __builtin_ctzll(x);
797	+	#endif
798	+	}
799	+
800	+	#elif defined(_MSC_VER)
801	+
802	+	#include <limits.h> // LONG_MAX
803	+	#include <intrin.h> // BitScanReverse64
804	+	#define MI_HAVE_FAST_BITSCAN
805	+	static inline size_t mi_clz(uintptr_t x) {
806	+	if (x==0) return MI_INTPTR_BITS;
807	+	unsigned long idx;
808	+	#if (INTPTR_MAX == LONG_MAX)
809	+	_BitScanReverse(&idx, x);
810	+	#else
811	+	_BitScanReverse64(&idx, x);
812	+	#endif
813	+	return ((MI_INTPTR_BITS - 1) - idx);
814	+	}
815	+	static inline size_t mi_ctz(uintptr_t x) {
816	+	if (x==0) return MI_INTPTR_BITS;
817	+	unsigned long idx;
818	+	#if (INTPTR_MAX == LONG_MAX)
819	+	_BitScanForward(&idx, x);
820	+	#else
821	+	_BitScanForward64(&idx, x);
822	+	#endif
823	+	return idx;
824	+	}
825	+
826	+	#else
827	+	static inline size_t mi_ctz32(uint32_t x) {
828	+	// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
829	+	static const unsigned char debruijn[32] = {
830	+	0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
831	+	31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
832	+	};
833	+	if (x==0) return 32;
834	+	return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
835	+	}
836	+	static inline size_t mi_clz32(uint32_t x) {
837	+	// de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
838	+	static const uint8_t debruijn[32] = {
839	+	31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
840	+	23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
841	+	};
842	+	if (x==0) return 32;
843	+	x \|= x >> 1;
844	+	x \|= x >> 2;
845	+	x \|= x >> 4;
846	+	x \|= x >> 8;
847	+	x \|= x >> 16;
848	+	return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
849	+	}
850	+
851	+	static inline size_t mi_clz(uintptr_t x) {
852	+	if (x==0) return MI_INTPTR_BITS;
853	+	#if (MI_INTPTR_BITS <= 32)
854	+	return mi_clz32((uint32_t)x);
855	+	#else
856	+	size_t count = mi_clz32((uint32_t)(x >> 32));
857	+	if (count < 32) return count;
858	+	return (32 + mi_clz32((uint32_t)x));
859	+	#endif
860	+	}
861	+	static inline size_t mi_ctz(uintptr_t x) {
862	+	if (x==0) return MI_INTPTR_BITS;
863	+	#if (MI_INTPTR_BITS <= 32)
864	+	return mi_ctz32((uint32_t)x);
865	+	#else
866	+	size_t count = mi_ctz32((uint32_t)x);
867	+	if (count < 32) return count;
868	+	return (32 + mi_ctz32((uint32_t)(x>>32)));
869	+	#endif
870	+	}
871	+
872	+	#endif
873	+
874	+	// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero)
875	+	static inline size_t mi_bsr(uintptr_t x) {
876	+	return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x));
877	+	}
878	+
879	+
880	+	// ---------------------------------------------------------------------------------
881	+	// Provide our own `_mi_memcpy` for potential performance optimizations.
882	+	//
883	+	// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
884	+	// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
885	+	// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
886	+	// ---------------------------------------------------------------------------------
887	+
888	+	#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) \|\| defined(_M_X64))
889	+	#include <intrin.h>
890	+	#include <string.h>
891	+	extern bool _mi_cpu_has_fsrm;
892	+	static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
893	+	if (_mi_cpu_has_fsrm) {
894	+	__movsb((unsigned char)dst, (const unsigned char)src, n);
895	+	}
896	+	else {
897	+	memcpy(dst, src, n);
898	+	}
899	+	}
900	+	static inline void _mi_memzero(void* dst, size_t n) {
901	+	if (_mi_cpu_has_fsrm) {
902	+	__stosb((unsigned char*)dst, 0, n);
903	+	}
904	+	else {
905	+	memset(dst, 0, n);
906	+	}
907	+	}
908	+	#else
909	+	#include <string.h>
910	+	static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
911	+	memcpy(dst, src, n);
912	+	}
913	+	static inline void _mi_memzero(void* dst, size_t n) {
914	+	memset(dst, 0, n);
915	+	}
916	+	#endif
917	+
918	+
919	+	// -------------------------------------------------------------------------------
920	+	// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
921	+	// This is used for example in `mi_realloc`.
922	+	// -------------------------------------------------------------------------------
923	+
924	+	#if (defined(__GNUC__) && (__GNUC__ >= 4)) \|\| defined(__clang__)
925	+	// On GCC/CLang we provide a hint that the pointers are word aligned.
926	+	#include <string.h>
927	+	static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
928	+	mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
929	+	void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
930	+	const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
931	+	_mi_memcpy(adst, asrc, n);
932	+	}
933	+
934	+	static inline void _mi_memzero_aligned(void* dst, size_t n) {
935	+	mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
936	+	void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
937	+	_mi_memzero(adst, n);
938	+	}
939	+	#else
940	+	// Default fallback on `_mi_memcpy`
941	+	static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
942	+	mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
943	+	_mi_memcpy(dst, src, n);
944	+	}
945	+
946	+	static inline void _mi_memzero_aligned(void* dst, size_t n) {
947	+	mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
948	+	_mi_memzero(dst, n);
949	+	}
950	+	#endif
951	+
952	+
953	+	#endif
954	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/include/mimalloc/prim.h

1	+	/* ----------------------------------------------------------------------------
2	+	Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3	+	This is free software; you can redistribute it and/or modify it under the
4	+	terms of the MIT license. A copy of the license can be found in the file
5	+	"LICENSE" at the root of this distribution.
6	+	-----------------------------------------------------------------------------*/
7	+	#pragma once
8	+	#ifndef MIMALLOC_PRIM_H
9	+	#define MIMALLOC_PRIM_H
10	+
11	+
12	+	// --------------------------------------------------------------------------
13	+	// This file specifies the primitive portability API.
14	+	// Each OS/host needs to implement these primitives, see `src/prim`
15	+	// for implementations on Window, macOS, WASI, and Linux/Unix.
16	+	//
17	+	// note: on all primitive functions, we always get:
18	+	// addr != NULL and page aligned
19	+	// size > 0 and page aligned
20	+	// return value is an error code an int where 0 is success.
21	+	// --------------------------------------------------------------------------
22	+
23	+	// OS memory configuration
24	+	typedef struct mi_os_mem_config_s {
25	+	size_t page_size; // 4KiB
26	+	size_t large_page_size; // 2MiB
27	+	size_t alloc_granularity; // smallest allocation size (on Windows 64KiB)
28	+	bool has_overcommit; // can we reserve more memory than can be actually committed?
29	+	bool must_free_whole; // must allocated blocks free as a whole (false for mmap, true for VirtualAlloc)
30	+	} mi_os_mem_config_t;
31	+
32	+	// Initialize
33	+	void _mi_prim_mem_init( mi_os_mem_config_t* config );
34	+
35	+	// Free OS memory
36	+	int _mi_prim_free(void* addr, size_t size );
37	+
38	+	// Allocate OS memory. Return NULL on error.
39	+	// The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
40	+	// pre: !commit => !allow_large
41	+	// try_alignment >= _mi_os_page_size() and a power of 2
42	+	int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr);
43	+
44	+	// Commit memory. Returns error code or 0 on success.
45	+	int _mi_prim_commit(void* addr, size_t size, bool commit);
46	+
47	+	// Reset memory. The range keeps being accessible but the content might be reset.
48	+	// Returns error code or 0 on success.
49	+	int _mi_prim_reset(void* addr, size_t size);
50	+
51	+	// Protect memory. Returns error code or 0 on success.
52	+	int _mi_prim_protect(void* addr, size_t size, bool protect);
53	+
54	+	// Allocate huge (1GiB) pages possibly associated with a NUMA node.
55	+	// pre: size > 0 and a multiple of 1GiB.
56	+	// addr is either NULL or an address hint.
57	+	// numa_node is either negative (don't care), or a numa node number.
58	+	int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr);
59	+
60	+	// Return the current NUMA node
61	+	size_t _mi_prim_numa_node(void);
62	+
63	+	// Return the number of logical NUMA nodes
64	+	size_t _mi_prim_numa_node_count(void);
65	+
66	+	// Clock ticks
67	+	mi_msecs_t _mi_prim_clock_now(void);
68	+
69	+	// Return process information (only for statistics)
70	+	typedef struct mi_process_info_s {
71	+	mi_msecs_t elapsed;
72	+	mi_msecs_t utime;
73	+	mi_msecs_t stime;
74	+	size_t current_rss;
75	+	size_t peak_rss;
76	+	size_t current_commit;
77	+	size_t peak_commit;
78	+	size_t page_faults;
79	+	} mi_process_info_t;
80	+
81	+	void _mi_prim_process_info(mi_process_info_t* pinfo);
82	+
83	+	// Default stderr output. (only for warnings etc. with verbose enabled)
84	+	// msg != NULL && _mi_strlen(msg) > 0
85	+	void _mi_prim_out_stderr( const char* msg );
86	+
87	+	// Get an environment variable. (only for options)
88	+	// name != NULL, result != NULL, result_size >= 64
89	+	bool _mi_prim_getenv(const char* name, char* result, size_t result_size);
90	+
91	+
92	+	// Fill a buffer with strong randomness; return `false` on error or if
93	+	// there is no strong randomization available.
94	+	bool _mi_prim_random_buf(void* buf, size_t buf_len);
95	+
96	+	// Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination.
97	+	void _mi_prim_thread_init_auto_done(void);
98	+
99	+	// Called on process exit and may take action to clean up resources associated with the thread auto done.
100	+	void _mi_prim_thread_done_auto_done(void);
101	+
102	+	// Called when the default heap for a thread changes
103	+	void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
104	+
105	+
106	+	//-------------------------------------------------------------------
107	+	// Thread id: `_mi_prim_thread_id()`
108	+	//
109	+	// Getting the thread id should be performant as it is called in the
110	+	// fast path of `_mi_free` and we specialize for various platforms as
111	+	// inlined definitions. Regular code should call `init.c:_mi_thread_id()`.
112	+	// We only require _mi_prim_thread_id() to return a unique id
113	+	// for each thread (unequal to zero).
114	+	//-------------------------------------------------------------------
115	+
116	+	// defined in `init.c`; do not use these directly
117	+	extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
118	+	extern bool _mi_process_is_initialized; // has mi_process_init been called?
119	+
120	+	static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
121	+
122	+	#if defined(_WIN32)
123	+
124	+	#define WIN32_LEAN_AND_MEAN
125	+	#include <windows.h>
126	+	static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
127	+	// Windows: works on Intel and ARM in both 32- and 64-bit
128	+	return (uintptr_t)NtCurrentTeb();
129	+	}
130	+
131	+	// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
132	+	// both the OS and libc implementation so we use specific tests for each main platform.
133	+	// If you test on another platform and it works please send a PR :-)
134	+	// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
135	+	#elif defined(__GNUC__) && ( \
136	+	(defined(__GLIBC__) && (defined(__x86_64__) \|\| defined(__i386__) \|\| defined(__arm__) \|\| defined(__aarch64__))) \
137	+	\|\| (defined(__APPLE__) && (defined(__x86_64__) \|\| defined(__aarch64__))) \
138	+	\|\| (defined(__BIONIC__) && (defined(__x86_64__) \|\| defined(__i386__) \|\| defined(__arm__) \|\| defined(__aarch64__))) \
139	+	\|\| (defined(__FreeBSD__) && (defined(__x86_64__) \|\| defined(__i386__) \|\| defined(__aarch64__))) \
140	+	\|\| (defined(__OpenBSD__) && (defined(__x86_64__) \|\| defined(__i386__) \|\| defined(__aarch64__))) \
141	+	)
142	+
143	+	static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
144	+	void* res;
145	+	const size_t ofs = (slotsizeof(void));
146	+	#if defined(__i386__)
147	+	__asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (((void*)ofs)) : ); // x86 32-bit always uses GS
148	+	#elif defined(__APPLE__) && defined(__x86_64__)
149	+	__asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (((void*)ofs)) : ); // x86_64 macOSX uses GS
150	+	#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
151	+	__asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (((void*)ofs)) : ); // x32 ABI
152	+	#elif defined(__x86_64__)
153	+	__asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (((void*)ofs)) : ); // x86_64 Linux, BSD uses FS
154	+	#elif defined(__arm__)
155	+	void** tcb; MI_UNUSED(ofs);
156	+	__asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
157	+	res = tcb[slot];
158	+	#elif defined(__aarch64__)
159	+	void** tcb; MI_UNUSED(ofs);
160	+	#if defined(__APPLE__) // M1, issue #343
161	+	__asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
162	+	#else
163	+	__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
164	+	#endif
165	+	res = tcb[slot];
166	+	#endif
167	+	return res;
168	+	}
169	+
170	+	// setting a tls slot is only used on macOS for now
171	+	static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
172	+	const size_t ofs = (slotsizeof(void));
173	+	#if defined(__i386__)
174	+	__asm__("movl %1,%%gs:%0" : "=m" (((void*)ofs)) : "rn" (value) : ); // 32-bit always uses GS
175	+	#elif defined(__APPLE__) && defined(__x86_64__)
176	+	__asm__("movq %1,%%gs:%0" : "=m" (((void*)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS
177	+	#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
178	+	__asm__("movl %1,%%fs:%0" : "=m" (((void*)ofs)) : "rn" (value) : ); // x32 ABI
179	+	#elif defined(__x86_64__)
180	+	__asm__("movq %1,%%fs:%0" : "=m" (((void*)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS
181	+	#elif defined(__arm__)
182	+	void** tcb; MI_UNUSED(ofs);
183	+	__asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
184	+	tcb[slot] = value;
185	+	#elif defined(__aarch64__)
186	+	void** tcb; MI_UNUSED(ofs);
187	+	#if defined(__APPLE__) // M1, issue #343
188	+	__asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
189	+	#else
190	+	__asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
191	+	#endif
192	+	tcb[slot] = value;
193	+	#endif
194	+	}
195	+
196	+	static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
197	+	#if defined(__BIONIC__)
198	+	// issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
199	+	// see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
200	+	return (uintptr_t)mi_prim_tls_slot(1);
201	+	#else
202	+	// in all our other targets, slot 0 is the thread id
203	+	// glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
204	+	// apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
205	+	return (uintptr_t)mi_prim_tls_slot(0);
206	+	#endif
207	+	}
208	+
209	+	#else
210	+
211	+	// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
212	+	static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
213	+	return (uintptr_t)&_mi_heap_default;
214	+	}
215	+
216	+	#endif
217	+
218	+
219	+
220	+	/* ----------------------------------------------------------------------------------------
221	+	The thread local default heap: `_mi_prim_get_default_heap()`
222	+	This is inlined here as it is on the fast path for allocation functions.
223	+
224	+	On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
225	+	__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
226	+	that the storage will always be available (allocated on the thread stacks).
227	+
228	+	On some platforms though we cannot use that when overriding `malloc` since the underlying
229	+	TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
230	+	We try to circumvent this in an efficient way:
231	+	- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
232	+	loader itself calls `malloc` even before the modules are initialized.
233	+	- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
234	+	- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323)
235	+	------------------------------------------------------------------------------------------- */
236	+
237	+	static inline mi_heap_t* mi_prim_get_default_heap(void);
238	+
239	+	#if defined(MI_MALLOC_OVERRIDE)
240	+	#if defined(__APPLE__) // macOS
241	+	#define MI_TLS_SLOT 89 // seems unused?
242	+	// #define MI_TLS_RECURSE_GUARD 1
243	+	// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
244	+	// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
245	+	#elif defined(__OpenBSD__)
246	+	// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
247	+	// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
248	+	#define MI_TLS_PTHREAD_SLOT_OFS (6sizeof(int) + 4sizeof(void*) + 24)
249	+	// #elif defined(__DragonFly__)
250	+	// #warning "mimalloc is not working correctly on DragonFly yet."
251	+	// #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1sizeof(void)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
252	+	#elif defined(__ANDROID__)
253	+	// See issue #381
254	+	#define MI_TLS_PTHREAD
255	+	#endif
256	+	#endif
257	+
258	+
259	+	#if defined(MI_TLS_SLOT)
260	+
261	+	static inline mi_heap_t* mi_prim_get_default_heap(void) {
262	+	mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT);
263	+	if mi_unlikely(heap == NULL) {
264	+	#ifdef __GNUC__
265	+	__asm(""); // prevent conditional load of the address of _mi_heap_empty
266	+	#endif
267	+	heap = (mi_heap_t*)&_mi_heap_empty;
268	+	}
269	+	return heap;
270	+	}
271	+
272	+	#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
273	+
274	+	static inline mi_heap_t** mi_prim_tls_pthread_heap_slot(void) {
275	+	pthread_t self = pthread_self();
276	+	#if defined(__DragonFly__)
277	+	if (self==NULL) return NULL;
278	+	#endif
279	+	return (mi_heap_t*)((uint8_t)self + MI_TLS_PTHREAD_SLOT_OFS);
280	+	}
281	+
282	+	static inline mi_heap_t* mi_prim_get_default_heap(void) {
283	+	mi_heap_t** pheap = mi_prim_tls_pthread_heap_slot();
284	+	if mi_unlikely(pheap == NULL) return _mi_heap_main_get();
285	+	mi_heap_t* heap = *pheap;
286	+	if mi_unlikely(heap == NULL) return (mi_heap_t*)&_mi_heap_empty;
287	+	return heap;
288	+	}
289	+
290	+	#elif defined(MI_TLS_PTHREAD)
291	+
292	+	extern pthread_key_t _mi_heap_default_key;
293	+	static inline mi_heap_t* mi_prim_get_default_heap(void) {
294	+	mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
295	+	return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
296	+	}
297	+
298	+	#else // default using a thread local variable; used on most platforms.
299	+
300	+	static inline mi_heap_t* mi_prim_get_default_heap(void) {
301	+	#if defined(MI_TLS_RECURSE_GUARD)
302	+	if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
303	+	#endif
304	+	return _mi_heap_default;
305	+	}
306	+
307	+	#endif // mi_prim_get_default_heap()
308	+
309	+
310	+
311	+	#endif // MIMALLOC_PRIM_H
312	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/include/mimalloc/track.h

1	+	/* ----------------------------------------------------------------------------
2	+	Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3	+	This is free software; you can redistribute it and/or modify it under the
4	+	terms of the MIT license. A copy of the license can be found in the file
5	+	"LICENSE" at the root of this distribution.
6	+	-----------------------------------------------------------------------------*/
7	+	#pragma once
8	+	#ifndef MIMALLOC_TRACK_H
9	+	#define MIMALLOC_TRACK_H
10	+
11	+	/* ------------------------------------------------------------------------------------------------------
12	+	Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers.
13	+	These can be defined for tracking allocation:
14	+
15	+	#define mi_track_malloc_size(p,reqsize,size,zero)
16	+	#define mi_track_free_size(p,_size)
17	+
18	+	The macros are set up such that the size passed to `mi_track_free_size`
19	+	always matches the size of `mi_track_malloc_size`. (currently, `size == mi_usable_size(p)`).
20	+	The `reqsize` is what the user requested, and `size >= reqsize`.
21	+	The `size` is either byte precise (and `size==reqsize`) if `MI_PADDING` is enabled,
22	+	or otherwise it is the usable block size which may be larger than the original request.
23	+	Use `_mi_block_size_of(void* p)` to get the full block size that was allocated (including padding etc).
24	+	The `zero` parameter is `true` if the allocated block is zero initialized.
25	+
26	+	Optional:
27	+
28	+	#define mi_track_align(p,alignedp,offset,size)
29	+	#define mi_track_resize(p,oldsize,newsize)
30	+	#define mi_track_init()
31	+
32	+	The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block.
33	+	The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`).
34	+	The `mi_track_resize` is currently unused but could be called on reallocations within a block.
35	+	`mi_track_init` is called at program start.
36	+
37	+	The following macros are for tools like asan and valgrind to track whether memory is
38	+	defined, undefined, or not accessible at all:
39	+
40	+	#define mi_track_mem_defined(p,size)
41	+	#define mi_track_mem_undefined(p,size)
42	+	#define mi_track_mem_noaccess(p,size)
43	+
44	+	-------------------------------------------------------------------------------------------------------*/
45	+
46	+	#if MI_TRACK_VALGRIND
47	+	// valgrind tool
48	+
49	+	#define MI_TRACK_ENABLED 1
50	+	#define MI_TRACK_HEAP_DESTROY 1 // track free of individual blocks on heap_destroy
51	+	#define MI_TRACK_TOOL "valgrind"
52	+
53	+	#include <valgrind/valgrind.h>
54	+	#include <valgrind/memcheck.h>
55	+
56	+	#define mi_track_malloc_size(p,reqsize,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /red zone/,zero)
57	+	#define mi_track_free_size(p,_size) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /red zone/)
58	+	#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /red zone/)
59	+	#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size)
60	+	#define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size)
61	+	#define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size)
62	+
63	+	#elif MI_TRACK_ASAN
64	+	// address sanitizer
65	+
66	+	#define MI_TRACK_ENABLED 1
67	+	#define MI_TRACK_HEAP_DESTROY 0
68	+	#define MI_TRACK_TOOL "asan"
69	+
70	+	#include <sanitizer/asan_interface.h>
71	+
72	+	#define mi_track_malloc_size(p,reqsize,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size)
73	+	#define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size)
74	+	#define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size)
75	+	#define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size)
76	+	#define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size)
77	+
78	+	#elif MI_TRACK_ETW
79	+	// windows event tracing
80	+
81	+	#define MI_TRACK_ENABLED 1
82	+	#define MI_TRACK_HEAP_DESTROY 0
83	+	#define MI_TRACK_TOOL "ETW"
84	+
85	+	#define WIN32_LEAN_AND_MEAN
86	+	#include <windows.h>
87	+	#include "../src/prim/windows/etw.h"
88	+
89	+	#define mi_track_init() EventRegistermicrosoft_windows_mimalloc();
90	+	#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)(p), size)
91	+	#define mi_track_free_size(p,size) EventWriteETW_MI_FREE((UINT64)(p), size)
92	+
93	+	#else
94	+	// no tracking
95	+
96	+	#define MI_TRACK_ENABLED 0
97	+	#define MI_TRACK_HEAP_DESTROY 0
98	+	#define MI_TRACK_TOOL "none"
99	+
100	+	#define mi_track_malloc_size(p,reqsize,size,zero)
101	+	#define mi_track_free_size(p,_size)
102	+
103	+	#endif
104	+
105	+	// -------------------
106	+	// Utility definitions
107	+
108	+	#ifndef mi_track_resize
109	+	#define mi_track_resize(p,oldsize,newsize) mi_track_free_size(p,oldsize); mi_track_malloc(p,newsize,false)
110	+	#endif
111	+
112	+	#ifndef mi_track_align
113	+	#define mi_track_align(p,alignedp,offset,size) mi_track_mem_noaccess(p,offset)
114	+	#endif
115	+
116	+	#ifndef mi_track_init
117	+	#define mi_track_init()
118	+	#endif
119	+
120	+	#ifndef mi_track_mem_defined
121	+	#define mi_track_mem_defined(p,size)
122	+	#endif
123	+
124	+	#ifndef mi_track_mem_undefined
125	+	#define mi_track_mem_undefined(p,size)
126	+	#endif
127	+
128	+	#ifndef mi_track_mem_noaccess
129	+	#define mi_track_mem_noaccess(p,size)
130	+	#endif
131	+
132	+
133	+	#if MI_PADDING
134	+	#define mi_track_malloc(p,reqsize,zero) \
135	+	if ((p)!=NULL) { \
136	+	mi_assert_internal(mi_usable_size(p)==(reqsize)); \
137	+	mi_track_malloc_size(p,reqsize,reqsize,zero); \
138	+	}
139	+	#else
140	+	#define mi_track_malloc(p,reqsize,zero) \
141	+	if ((p)!=NULL) { \
142	+	mi_assert_internal(mi_usable_size(p)>=(reqsize)); \
143	+	mi_track_malloc_size(p,reqsize,mi_usable_size(p),zero); \
144	+	}
145	+	#endif
146	+
147	+	#endif
148	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/include/mimalloc/types.h

1	+	/* ----------------------------------------------------------------------------
2	+	Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3	+	This is free software; you can redistribute it and/or modify it under the
4	+	terms of the MIT license. A copy of the license can be found in the file
5	+	"LICENSE" at the root of this distribution.
6	+	-----------------------------------------------------------------------------*/
7	+	#pragma once
8	+	#ifndef MIMALLOC_TYPES_H
9	+	#define MIMALLOC_TYPES_H
10	+
11	+	// --------------------------------------------------------------------------
12	+	// This file contains the main type definitions for mimalloc:
13	+	// mi_heap_t : all data for a thread-local heap, contains
14	+	// lists of all managed heap pages.
15	+	// mi_segment_t : a larger chunk of memory (32GiB) from where pages
16	+	// are allocated.
17	+	// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from
18	+	// where objects are allocated.
19	+	// --------------------------------------------------------------------------
20	+
21	+
22	+	#include <stddef.h> // ptrdiff_t
23	+	#include <stdint.h> // uintptr_t, uint16_t, etc
24	+	#include "mimalloc/atomic.h" // _Atomic
25	+
26	+	#ifdef _MSC_VER
27	+	#pragma warning(disable:4214) // bitfield is not int
28	+	#endif
29	+
30	+	// Minimal alignment necessary. On most platforms 16 bytes are needed
31	+	// due to SSE registers for example. This must be at least `sizeof(void*)`
32	+	#ifndef MI_MAX_ALIGN_SIZE
33	+	#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t)
34	+	#endif
35	+
36	+	// ------------------------------------------------------
37	+	// Variants
38	+	// ------------------------------------------------------
39	+
40	+	// Define NDEBUG in the release version to disable assertions.
41	+	// #define NDEBUG
42	+
43	+	// Define MI_TRACK_<tool> to enable tracking support
44	+	// #define MI_TRACK_VALGRIND 1
45	+	// #define MI_TRACK_ASAN 1
46	+	// #define MI_TRACK_ETW 1
47	+
48	+	// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance).
49	+	// #define MI_STAT 1
50	+
51	+	// Define MI_SECURE to enable security mitigations
52	+	// #define MI_SECURE 1 // guard page around metadata
53	+	// #define MI_SECURE 2 // guard page around each mimalloc page
54	+	// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free)
55	+	// #define MI_SECURE 4 // checks for double free. (may be more expensive)
56	+
57	+	#if !defined(MI_SECURE)
58	+	#define MI_SECURE 0
59	+	#endif
60	+
61	+	// Define MI_DEBUG for debug mode
62	+	// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free.
63	+	// #define MI_DEBUG 2 // + internal assertion checks
64	+	// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON)
65	+	#if !defined(MI_DEBUG)
66	+	#if !defined(NDEBUG) \|\| defined(_DEBUG)
67	+	#define MI_DEBUG 2
68	+	#else
69	+	#define MI_DEBUG 0
70	+	#endif
71	+	#endif
72	+
73	+	// Reserve extra padding at the end of each block to be more resilient against heap block overflows.
74	+	// The padding can detect buffer overflow on free.
75	+	#if !defined(MI_PADDING) && (MI_SECURE>=3 \|\| MI_DEBUG>=1 \|\| (MI_TRACK_VALGRIND \|\| MI_TRACK_ASAN \|\| MI_TRACK_ETW))
76	+	#define MI_PADDING 1
77	+	#endif
78	+
79	+	// Check padding bytes; allows byte-precise buffer overflow detection
80	+	#if !defined(MI_PADDING_CHECK) && MI_PADDING && (MI_SECURE>=3 \|\| MI_DEBUG>=1)
81	+	#define MI_PADDING_CHECK 1
82	+	#endif
83	+
84	+
85	+	// Encoded free lists allow detection of corrupted free lists
86	+	// and can detect buffer overflows, modify after free, and double `free`s.
87	+	#if (MI_SECURE>=3 \|\| MI_DEBUG>=1)
88	+	#define MI_ENCODE_FREELIST 1
89	+	#endif
90	+
91	+
92	+	// We used to abandon huge pages but to eagerly deallocate if freed from another thread,
93	+	// but that makes it not possible to visit them during a heap walk or include them in a
94	+	// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from
95	+	// another thread so most memory is available until it gets properly freed by the owning thread.
96	+	// #define MI_HUGE_PAGE_ABANDON 1
97	+
98	+
99	+	// ------------------------------------------------------
100	+	// Platform specific values
101	+	// ------------------------------------------------------
102	+
103	+	// ------------------------------------------------------
104	+	// Size of a pointer.
105	+	// We assume that `sizeof(void*)==sizeof(intptr_t)`
106	+	// and it holds for all platforms we know of.
107	+	//
108	+	// However, the C standard only requires that:
109	+	// p == (void*)((intptr_t)p))
110	+	// but we also need:
111	+	// i == (intptr_t)((void*)i)
112	+	// or otherwise one might define an intptr_t type that is larger than a pointer...
113	+	// ------------------------------------------------------
114	+
115	+	#if INTPTR_MAX > INT64_MAX
116	+	# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example)
117	+	#elif INTPTR_MAX == INT64_MAX
118	+	# define MI_INTPTR_SHIFT (3)
119	+	#elif INTPTR_MAX == INT32_MAX
120	+	# define MI_INTPTR_SHIFT (2)
121	+	#else
122	+	#error platform pointers must be 32, 64, or 128 bits
123	+	#endif
124	+
125	+	#if SIZE_MAX == UINT64_MAX
126	+	# define MI_SIZE_SHIFT (3)
127	+	typedef int64_t mi_ssize_t;
128	+	#elif SIZE_MAX == UINT32_MAX
129	+	# define MI_SIZE_SHIFT (2)
130	+	typedef int32_t mi_ssize_t;
131	+	#else
132	+	#error platform objects must be 32 or 64 bits
133	+	#endif
134	+
135	+	#if (SIZE_MAX/2) > LONG_MAX
136	+	# define MI_ZU(x) x##ULL
137	+	# define MI_ZI(x) x##LL
138	+	#else
139	+	# define MI_ZU(x) x##UL
140	+	# define MI_ZI(x) x##L
141	+	#endif
142	+
143	+	#define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT)
144	+	#define MI_INTPTR_BITS (MI_INTPTR_SIZE*8)
145	+
146	+	#define MI_SIZE_SIZE (1<<MI_SIZE_SHIFT)
147	+	#define MI_SIZE_BITS (MI_SIZE_SIZE*8)
148	+
149	+	#define MI_KiB (MI_ZU(1024))
150	+	#define MI_MiB (MI_KiB*MI_KiB)
151	+	#define MI_GiB (MI_MiB*MI_KiB)
152	+
153	+
154	+	// ------------------------------------------------------
155	+	// Main internal data-structures
156	+	// ------------------------------------------------------
157	+
158	+	// Main tuning parameters for segment and page sizes
159	+	// Sizes for 64-bit (usually divide by two for 32-bit)
160	+	#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit)
161	+
162	+	#if MI_INTPTR_SIZE > 4
163	+	#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB
164	+	#else
165	+	#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit
166	+	#endif
167	+
168	+	#define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB
169	+	#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB
170	+
171	+
172	+	// Derived constants
173	+	#define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT)
174	+	#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
175	+	#define MI_SEGMENT_MASK (MI_SEGMENT_ALIGN - 1)
176	+	#define MI_SEGMENT_SLICE_SIZE (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT)
177	+	#define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
178	+
179	+	#define MI_SMALL_PAGE_SIZE (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT)
180	+	#define MI_MEDIUM_PAGE_SIZE (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT)
181	+
182	+	#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 8KiB on 64-bit
183	+	#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB on 64-bit
184	+	#define MI_MEDIUM_OBJ_WSIZE_MAX (MI_MEDIUM_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
185	+	#define MI_LARGE_OBJ_SIZE_MAX (MI_SEGMENT_SIZE/2) // 32MiB on 64-bit
186	+	#define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE)
187	+
188	+	// Maximum number of size classes. (spaced exponentially in 12.5% increments)
189	+	#define MI_BIN_HUGE (73U)
190	+
191	+	#if (MI_MEDIUM_OBJ_WSIZE_MAX >= 655360)
192	+	#error "mimalloc internal: define more bins"
193	+	#endif
194	+
195	+	// Maximum slice offset (15)
196	+	#define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
197	+
198	+	// Used as a special value to encode block sizes in 32 bits.
199	+	#define MI_HUGE_BLOCK_SIZE ((uint32_t)(2*MI_GiB))
200	+
201	+	// blocks up to this size are always allocated aligned
202	+	#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE)
203	+
204	+	// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments
205	+	#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
206	+
207	+
208	+	// ------------------------------------------------------
209	+	// Mimalloc pages contain allocated blocks
210	+	// ------------------------------------------------------
211	+
212	+	// The free lists use encoded next fields
213	+	// (Only actually encodes when MI_ENCODED_FREELIST is defined.)
214	+	typedef uintptr_t mi_encoded_t;
215	+
216	+	// thread id's
217	+	typedef size_t mi_threadid_t;
218	+
219	+	// free lists contain blocks
220	+	typedef struct mi_block_s {
221	+	mi_encoded_t next;
222	+	} mi_block_t;
223	+
224	+
225	+	// The delayed flags are used for efficient multi-threaded free-ing
226	+	typedef enum mi_delayed_e {
227	+	MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list
228	+	MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap
229	+	MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list
230	+	MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim
231	+	} mi_delayed_t;
232	+
233	+
234	+	// The `in_full` and `has_aligned` page flags are put in a union to efficiently
235	+	// test if both are false (`full_aligned == 0`) in the `mi_free` routine.
236	+	#if !MI_TSAN
237	+	typedef union mi_page_flags_s {
238	+	uint8_t full_aligned;
239	+	struct {
240	+	uint8_t in_full : 1;
241	+	uint8_t has_aligned : 1;
242	+	} x;
243	+	} mi_page_flags_t;
244	+	#else
245	+	// under thread sanitizer, use a byte for each flag to suppress warning, issue #130
246	+	typedef union mi_page_flags_s {
247	+	uint16_t full_aligned;
248	+	struct {
249	+	uint8_t in_full;
250	+	uint8_t has_aligned;
251	+	} x;
252	+	} mi_page_flags_t;
253	+	#endif
254	+
255	+	// Thread free list.
256	+	// We use the bottom 2 bits of the pointer for mi_delayed_t flags
257	+	typedef uintptr_t mi_thread_free_t;
258	+
259	+	// A page contains blocks of one specific size (`block_size`).
260	+	// Each page has three list of free blocks:
261	+	// `free` for blocks that can be allocated,
262	+	// `local_free` for freed blocks that are not yet available to `mi_malloc`
263	+	// `thread_free` for freed blocks by other threads
264	+	// The `local_free` and `thread_free` lists are migrated to the `free` list
265	+	// when it is exhausted. The separate `local_free` list is necessary to
266	+	// implement a monotonic heartbeat. The `thread_free` list is needed for
267	+	// avoiding atomic operations in the common case.
268	+	//
269	+	//
270	+	// `used - \|thread_free\|` == actual blocks that are in use (alive)
271	+	// `used - \|thread_free\| + \|free\| + \|local_free\| == capacity`
272	+	//
273	+	// We don't count `freed` (as \|free\|) but use `used` to reduce
274	+	// the number of memory accesses in the `mi_page_all_free` function(s).
275	+	//
276	+	// Notes:
277	+	// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
278	+	// - Using `uint16_t` does not seem to slow things down
279	+	// - The size is 8 words on 64-bit which helps the page index calculations
280	+	// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
281	+	// and 12 are still good for address calculation)
282	+	// - To limit the structure size, the `xblock_size` is 32-bits only; for
283	+	// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
284	+	// - `thread_free` uses the bottom bits as a delayed-free flags to optimize
285	+	// concurrent frees where only the first concurrent free adds to the owning
286	+	// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
287	+	// The invariant is that no-delayed-free is only set if there is
288	+	// at least one block that will be added, or as already been added, to
289	+	// the owning heap `thread_delayed_free` list. This guarantees that pages
290	+	// will be freed correctly even if only other threads free blocks.
291	+	typedef struct mi_page_s {
292	+	// "owned" by the segment
293	+	uint32_t slice_count; // slices in this page (0 if not a page)
294	+	uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
295	+	uint8_t is_reset : 1; // `true` if the page memory was reset
296	+	uint8_t is_committed : 1; // `true` if the page virtual memory is committed
297	+	uint8_t is_zero_init : 1; // `true` if the page was zero initialized
298	+
299	+	// layout like this to optimize access in `mi_malloc` and `mi_free`
300	+	uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
301	+	uint16_t reserved; // number of blocks reserved in memory
302	+	mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
303	+	uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized
304	+	uint8_t retire_expire : 7; // expiration count for retired blocks
305	+
306	+	mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
307	+	uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
308	+	uint32_t xblock_size; // size available in each block (always `>0`)
309	+	mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
310	+
311	+	#if (MI_ENCODE_FREELIST \|\| MI_PADDING)
312	+	uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
313	+	#endif
314	+
315	+	_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
316	+	_Atomic(uintptr_t) xheap;
317	+
318	+	struct mi_page_s* next; // next page owned by this thread with the same `block_size`
319	+	struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
320	+
321	+	// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
322	+	#if MI_INTPTR_SIZE==8
323	+	uintptr_t padding[1];
324	+	#endif
325	+	} mi_page_t;
326	+
327	+
328	+
329	+	typedef enum mi_page_kind_e {
330	+	MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment
331	+	MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment
332	+	MI_PAGE_LARGE, // larger blocks go into a page of just one block
333	+	MI_PAGE_HUGE, // huge blocks (> 16 MiB) are put into a single page in a single segment.
334	+	} mi_page_kind_t;
335	+
336	+	typedef enum mi_segment_kind_e {
337	+	MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside.
338	+	MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside.
339	+	} mi_segment_kind_t;
340	+
341	+	// ------------------------------------------------------
342	+	// A segment holds a commit mask where a bit is set if
343	+	// the corresponding MI_COMMIT_SIZE area is committed.
344	+	// The MI_COMMIT_SIZE must be a multiple of the slice
345	+	// size. If it is equal we have the most fine grained
346	+	// decommit (but setting it higher can be more efficient).
347	+	// The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will
348	+	// be committed in one go which can be set higher than
349	+	// MI_COMMIT_SIZE for efficiency (while the decommit mask
350	+	// is still tracked in fine-grained MI_COMMIT_SIZE chunks)
351	+	// ------------------------------------------------------
352	+
353	+	#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB
354	+	#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB
355	+	#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)
356	+	#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS
357	+	#define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS)
358	+
359	+	#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS))
360	+	#error "the segment size must be exactly divisible by the (commit size * size_t bits)"
361	+	#endif
362	+
363	+	typedef struct mi_commit_mask_s {
364	+	size_t mask[MI_COMMIT_MASK_FIELD_COUNT];
365	+	} mi_commit_mask_t;
366	+
367	+	typedef mi_page_t mi_slice_t;
368	+	typedef int64_t mi_msecs_t;
369	+
370	+
371	+	// Segments are large allocated memory blocks (8mb on 64 bit) from
372	+	// the OS. Inside segments we allocated fixed size _pages_ that
373	+	// contain blocks.
374	+	typedef struct mi_segment_s {
375	+	size_t memid; // memory id for arena allocation
376	+	bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)
377	+	bool mem_is_large; // in large/huge os pages?
378	+	bool mem_is_committed; // `true` if the whole segment is eagerly committed
379	+	size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX)
380	+	size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX)
381	+
382	+	bool allow_decommit;
383	+	mi_msecs_t decommit_expire;
384	+	mi_commit_mask_t decommit_mask;
385	+	mi_commit_mask_t commit_mask;
386	+
387	+	_Atomic(struct mi_segment_s*) abandoned_next;
388	+
389	+	// from here is zero initialized
390	+	struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`)
391	+
392	+	size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`)
393	+	size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long)
394	+	size_t used; // count of pages in use
395	+	uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie`
396	+
397	+	size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT`
398	+	size_t segment_info_slices; // initial slices we are using segment info and possible guard pages.
399	+
400	+	// layout like this to optimize access in `mi_free`
401	+	mi_segment_kind_t kind;
402	+	size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
403	+	_Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
404	+
405	+	mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment
406	+	} mi_segment_t;
407	+
408	+
409	+	// ------------------------------------------------------
410	+	// Heaps
411	+	// Provide first-class heaps to allocate from.
412	+	// A heap just owns a set of pages for allocation and
413	+	// can only be allocate/reallocate from the thread that created it.
414	+	// Freeing blocks can be done from any thread though.
415	+	// Per thread, the segments are shared among its heaps.
416	+	// Per thread, there is always a default heap that is
417	+	// used for allocation; it is initialized to statically
418	+	// point to an empty heap to avoid initialization checks
419	+	// in the fast path.
420	+	// ------------------------------------------------------
421	+
422	+	// Thread local data
423	+	typedef struct mi_tld_s mi_tld_t;
424	+
425	+	// Pages of a certain block size are held in a queue.
426	+	typedef struct mi_page_queue_s {
427	+	mi_page_t* first;
428	+	mi_page_t* last;
429	+	size_t block_size;
430	+	} mi_page_queue_t;
431	+
432	+	#define MI_BIN_FULL (MI_BIN_HUGE+1)
433	+
434	+	// Random context
435	+	typedef struct mi_random_cxt_s {
436	+	uint32_t input[16];
437	+	uint32_t output[16];
438	+	int output_available;
439	+	bool weak;
440	+	} mi_random_ctx_t;
441	+
442	+
443	+	// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows
444	+	#if (MI_PADDING)
445	+	typedef struct mi_padding_s {
446	+	uint32_t canary; // encoded block value to check validity of the padding (in case of overflow)
447	+	uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes)
448	+	} mi_padding_t;
449	+	#define MI_PADDING_SIZE (sizeof(mi_padding_t))
450	+	#define MI_PADDING_WSIZE ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE)
451	+	#else
452	+	#define MI_PADDING_SIZE 0
453	+	#define MI_PADDING_WSIZE 0
454	+	#endif
455	+
456	+	#define MI_PAGES_DIRECT (MI_SMALL_WSIZE_MAX + MI_PADDING_WSIZE + 1)
457	+
458	+
459	+	// A heap owns a set of pages.
460	+	struct mi_heap_s {
461	+	mi_tld_t* tld;
462	+	mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size.
463	+	mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin")
464	+	_Atomic(mi_block_t*) thread_delayed_free;
465	+	mi_threadid_t thread_id; // thread this heap belongs too
466	+	mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0)
467	+	uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`)
468	+	uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list
469	+	mi_random_ctx_t random; // random number context used for secure allocation
470	+	size_t page_count; // total number of pages in the `pages` queues.
471	+	size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues)
472	+	size_t page_retired_max; // largest retired index into the `pages` array.
473	+	mi_heap_t* next; // list of heaps per thread
474	+	bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
475	+	};
476	+
477	+
478	+
479	+	// ------------------------------------------------------
480	+	// Debug
481	+	// ------------------------------------------------------
482	+
483	+	#if !defined(MI_DEBUG_UNINIT)
484	+	#define MI_DEBUG_UNINIT (0xD0)
485	+	#endif
486	+	#if !defined(MI_DEBUG_FREED)
487	+	#define MI_DEBUG_FREED (0xDF)
488	+	#endif
489	+	#if !defined(MI_DEBUG_PADDING)
490	+	#define MI_DEBUG_PADDING (0xDE)
491	+	#endif
492	+
493	+	#if (MI_DEBUG)
494	+	// use our own assertion to print without memory allocation
495	+	void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func );
496	+	#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__))
497	+	#else
498	+	#define mi_assert(x)
499	+	#endif
500	+
501	+	#if (MI_DEBUG>1)
502	+	#define mi_assert_internal mi_assert
503	+	#else
504	+	#define mi_assert_internal(x)
505	+	#endif
506	+
507	+	#if (MI_DEBUG>2)
508	+	#define mi_assert_expensive mi_assert
509	+	#else
510	+	#define mi_assert_expensive(x)
511	+	#endif
512	+
513	+	// ------------------------------------------------------
514	+	// Statistics
515	+	// ------------------------------------------------------
516	+
517	+	#ifndef MI_STAT
518	+	#if (MI_DEBUG>0)
519	+	#define MI_STAT 2
520	+	#else
521	+	#define MI_STAT 0
522	+	#endif
523	+	#endif
524	+
525	+	typedef struct mi_stat_count_s {
526	+	int64_t allocated;
527	+	int64_t freed;
528	+	int64_t peak;
529	+	int64_t current;
530	+	} mi_stat_count_t;
531	+
532	+	typedef struct mi_stat_counter_s {
533	+	int64_t total;
534	+	int64_t count;
535	+	} mi_stat_counter_t;
536	+
537	+	typedef struct mi_stats_s {
538	+	mi_stat_count_t segments;
539	+	mi_stat_count_t pages;
540	+	mi_stat_count_t reserved;
541	+	mi_stat_count_t committed;
542	+	mi_stat_count_t reset;
543	+	mi_stat_count_t page_committed;
544	+	mi_stat_count_t segments_abandoned;
545	+	mi_stat_count_t pages_abandoned;
546	+	mi_stat_count_t threads;
547	+	mi_stat_count_t normal;
548	+	mi_stat_count_t huge;
549	+	mi_stat_count_t large;
550	+	mi_stat_count_t malloc;
551	+	mi_stat_count_t segments_cache;
552	+	mi_stat_counter_t pages_extended;
553	+	mi_stat_counter_t mmap_calls;
554	+	mi_stat_counter_t commit_calls;
555	+	mi_stat_counter_t page_no_retire;
556	+	mi_stat_counter_t searches;
557	+	mi_stat_counter_t normal_count;
558	+	mi_stat_counter_t huge_count;
559	+	mi_stat_counter_t large_count;
560	+	#if MI_STAT>1
561	+	mi_stat_count_t normal_bins[MI_BIN_HUGE+1];
562	+	#endif
563	+	} mi_stats_t;
564	+
565	+
566	+	void _mi_stat_increase(mi_stat_count_t* stat, size_t amount);
567	+	void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount);
568	+	void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount);
569	+
570	+	#if (MI_STAT)
571	+	#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount)
572	+	#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount)
573	+	#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount)
574	+	#else
575	+	#define mi_stat_increase(stat,amount) (void)0
576	+	#define mi_stat_decrease(stat,amount) (void)0
577	+	#define mi_stat_counter_increase(stat,amount) (void)0
578	+	#endif
579	+
580	+	#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount)
581	+	#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount)
582	+	#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount)
583	+
584	+	// ------------------------------------------------------
585	+	// Thread Local data
586	+	// ------------------------------------------------------
587	+
588	+	// A "span" is is an available range of slices. The span queues keep
589	+	// track of slice spans of at most the given `slice_count` (but more than the previous size class).
590	+	typedef struct mi_span_queue_s {
591	+	mi_slice_t* first;
592	+	mi_slice_t* last;
593	+	size_t slice_count;
594	+	} mi_span_queue_t;
595	+
596	+	#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT)
597	+
598	+	// OS thread local data
599	+	typedef struct mi_os_tld_s {
600	+	size_t region_idx; // start point for next allocation
601	+	mi_stats_t* stats; // points to tld stats
602	+	} mi_os_tld_t;
603	+
604	+
605	+	// Segments thread local data
606	+	typedef struct mi_segments_tld_s {
607	+	mi_span_queue_t spans[MI_SEGMENT_BIN_MAX+1]; // free slice spans inside segments
608	+	size_t count; // current number of segments;
609	+	size_t peak_count; // peak number of segments
610	+	size_t current_size; // current size of all segments
611	+	size_t peak_size; // peak size of all segments
612	+	mi_stats_t* stats; // points to tld stats
613	+	mi_os_tld_t* os; // points to os stats
614	+	} mi_segments_tld_t;
615	+
616	+	// Thread local data
617	+	struct mi_tld_s {
618	+	unsigned long long heartbeat; // monotonic heartbeat count
619	+	bool recurse; // true if deferred was called; used to prevent infinite recursion.
620	+	mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted)
621	+	mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates)
622	+	mi_segments_tld_t segments; // segment tld
623	+	mi_os_tld_t os; // os tld
624	+	mi_stats_t stats; // statistics
625	+	};
626	+
627	+	#endif
628	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/include/mimalloc-new-delete.h

1	+	/* ----------------------------------------------------------------------------
2	+	Copyright (c) 2018-2020 Microsoft Research, Daan Leijen
3	+	This is free software; you can redistribute it and/or modify it under the
4	+	terms of the MIT license. A copy of the license can be found in the file
5	+	"LICENSE" at the root of this distribution.
6	+	-----------------------------------------------------------------------------*/
7	+	#pragma once
8	+	#ifndef MIMALLOC_NEW_DELETE_H
9	+	#define MIMALLOC_NEW_DELETE_H
10	+
11	+	// ----------------------------------------------------------------------------
12	+	// This header provides convenient overrides for the new and
13	+	// delete operations in C++.
14	+	//
15	+	// This header should be included in only one source file!
16	+	//
17	+	// On Windows, or when linking dynamically with mimalloc, these
18	+	// can be more performant than the standard new-delete operations.
19	+	// See <https://en.cppreference.com/w/cpp/memory/new/operator_new>
20	+	// ---------------------------------------------------------------------------
21	+	#if defined(__cplusplus)
22	+	#include <new>
23	+	#include <mimalloc.h>
24	+
25	+	#if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_)
26	+	// stay consistent with VCRT definitions
27	+	#define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n)
28	+	#define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n)
29	+	#else
30	+	#define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict
31	+	#define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict
32	+	#endif
33	+
34	+	void operator delete(void* p) noexcept { mi_free(p); };
35	+	void operator delete[](void* p) noexcept { mi_free(p); };
36	+
37	+	void operator delete (void* p, const std::nothrow_t&) noexcept { mi_free(p); }
38	+	void operator delete[](void* p, const std::nothrow_t&) noexcept { mi_free(p); }
39	+
40	+	mi_decl_new(n) void* operator new(std::size_t n) noexcept(false) { return mi_new(n); }
41	+	mi_decl_new(n) void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); }
42	+
43	+	mi_decl_new_nothrow(n) void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
44	+	mi_decl_new_nothrow(n) void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
45	+
46	+	#if (__cplusplus >= 201402L \|\| _MSC_VER >= 1916)
47	+	void operator delete (void* p, std::size_t n) noexcept { mi_free_size(p,n); };
48	+	void operator delete[](void* p, std::size_t n) noexcept { mi_free_size(p,n); };
49	+	#endif
50	+
51	+	#if (__cplusplus > 201402L \|\| defined(__cpp_aligned_new))
52	+	void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
53	+	void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
54	+	void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
55	+	void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
56	+	void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
57	+	void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
58	+
59	+	void* operator new (std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
60	+	void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
61	+	void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
62	+	void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
63	+	#endif
64	+	#endif
65	+
66	+	#endif // MIMALLOC_NEW_DELETE_H
67	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/include/mimalloc-override.h

1	+	/* ----------------------------------------------------------------------------
2	+	Copyright (c) 2018-2020 Microsoft Research, Daan Leijen
3	+	This is free software; you can redistribute it and/or modify it under the
4	+	terms of the MIT license. A copy of the license can be found in the file
5	+	"LICENSE" at the root of this distribution.
6	+	-----------------------------------------------------------------------------*/
7	+	#pragma once
8	+	#ifndef MIMALLOC_OVERRIDE_H
9	+	#define MIMALLOC_OVERRIDE_H
10	+
11	+	/* ----------------------------------------------------------------------------
12	+	This header can be used to statically redirect malloc/free and new/delete
13	+	to the mimalloc variants. This can be useful if one can include this file on
14	+	each source file in a project (but be careful when using external code to
15	+	not accidentally mix pointers from different allocators).
16	+	-----------------------------------------------------------------------------*/
17	+
18	+	#include <mimalloc.h>
19	+
20	+	// Standard C allocation
21	+	#define malloc(n) mi_malloc(n)
22	+	#define calloc(n,c) mi_calloc(n,c)
23	+	#define realloc(p,n) mi_realloc(p,n)
24	+	#define free(p) mi_free(p)
25	+
26	+	#define strdup(s) mi_strdup(s)
27	+	#define strndup(s,n) mi_strndup(s,n)
28	+	#define realpath(f,n) mi_realpath(f,n)
29	+
30	+	// Microsoft extensions
31	+	#define _expand(p,n) mi_expand(p,n)
32	+	#define _msize(p) mi_usable_size(p)
33	+	#define _recalloc(p,n,c) mi_recalloc(p,n,c)
34	+
35	+	#define _strdup(s) mi_strdup(s)
36	+	#define _strndup(s,n) mi_strndup(s,n)
37	+	#define _wcsdup(s) (wchar_t)mi_wcsdup((const unsigned short)(s))
38	+	#define _mbsdup(s) mi_mbsdup(s)
39	+	#define _dupenv_s(b,n,v) mi_dupenv_s(b,n,v)
40	+	#define _wdupenv_s(b,n,v) mi_wdupenv_s((unsigned short)(b),n,(const unsigned short)(v))
41	+
42	+	// Various Posix and Unix variants
43	+	#define reallocf(p,n) mi_reallocf(p,n)
44	+	#define malloc_size(p) mi_usable_size(p)
45	+	#define malloc_usable_size(p) mi_usable_size(p)
46	+	#define cfree(p) mi_free(p)
47	+
48	+	#define valloc(n) mi_valloc(n)
49	+	#define pvalloc(n) mi_pvalloc(n)
50	+	#define reallocarray(p,s,n) mi_reallocarray(p,s,n)
51	+	#define reallocarr(p,s,n) mi_reallocarr(p,s,n)
52	+	#define memalign(a,n) mi_memalign(a,n)
53	+	#define aligned_alloc(a,n) mi_aligned_alloc(a,n)
54	+	#define posix_memalign(p,a,n) mi_posix_memalign(p,a,n)
55	+	#define _posix_memalign(p,a,n) mi_posix_memalign(p,a,n)
56	+
57	+	// Microsoft aligned variants
58	+	#define _aligned_malloc(n,a) mi_malloc_aligned(n,a)
59	+	#define _aligned_realloc(p,n,a) mi_realloc_aligned(p,n,a)
60	+	#define _aligned_recalloc(p,s,n,a) mi_aligned_recalloc(p,s,n,a)
61	+	#define _aligned_msize(p,a,o) mi_usable_size(p)
62	+	#define _aligned_free(p) mi_free(p)
63	+	#define _aligned_offset_malloc(n,a,o) mi_malloc_aligned_at(n,a,o)
64	+	#define _aligned_offset_realloc(p,n,a,o) mi_realloc_aligned_at(p,n,a,o)
65	+	#define _aligned_offset_recalloc(p,s,n,a,o) mi_recalloc_aligned_at(p,s,n,a,o)
66	+
67	+	#endif // MIMALLOC_OVERRIDE_H
68	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/include/mimalloc.h

1	+	/* ----------------------------------------------------------------------------
2	+	Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
3	+	This is free software; you can redistribute it and/or modify it under the
4	+	terms of the MIT license. A copy of the license can be found in the file
5	+	"LICENSE" at the root of this distribution.
6	+	-----------------------------------------------------------------------------*/
7	+	#pragma once
8	+	#ifndef MIMALLOC_H
9	+	#define MIMALLOC_H
10	+
11	+	#define MI_MALLOC_VERSION 211 // major + 2 digits minor
12	+
13	+	// ------------------------------------------------------
14	+	// Compiler specific attributes
15	+	// ------------------------------------------------------
16	+
17	+	#ifdef __cplusplus
18	+	#if (__cplusplus >= 201103L) \|\| (_MSC_VER > 1900) // C++11
19	+	#define mi_attr_noexcept noexcept
20	+	#else
21	+	#define mi_attr_noexcept throw()
22	+	#endif
23	+	#else
24	+	#define mi_attr_noexcept
25	+	#endif
26	+
27	+	#if defined(__cplusplus) && (__cplusplus >= 201703)
28	+	#define mi_decl_nodiscard [[nodiscard]]
29	+	#elif (defined(__GNUC__) && (__GNUC__ >= 4)) \|\| defined(__clang__) // includes clang, icc, and clang-cl
30	+	#define mi_decl_nodiscard __attribute__((warn_unused_result))
31	+	#elif defined(_HAS_NODISCARD)
32	+	#define mi_decl_nodiscard _NODISCARD
33	+	#elif (_MSC_VER >= 1700)
34	+	#define mi_decl_nodiscard _Check_return_
35	+	#else
36	+	#define mi_decl_nodiscard
37	+	#endif
38	+
39	+	#if defined(_MSC_VER) \|\| defined(__MINGW32__)
40	+	#if !defined(MI_SHARED_LIB)
41	+	#define mi_decl_export
42	+	#elif defined(MI_SHARED_LIB_EXPORT)
43	+	#define mi_decl_export __declspec(dllexport)
44	+	#else
45	+	#define mi_decl_export __declspec(dllimport)
46	+	#endif
47	+	#if defined(__MINGW32__)
48	+	#define mi_decl_restrict
49	+	#define mi_attr_malloc __attribute__((malloc))
50	+	#else
51	+	#if (_MSC_VER >= 1900) && !defined(__EDG__)
52	+	#define mi_decl_restrict __declspec(allocator) __declspec(restrict)
53	+	#else
54	+	#define mi_decl_restrict __declspec(restrict)
55	+	#endif
56	+	#define mi_attr_malloc
57	+	#endif
58	+	#define mi_cdecl __cdecl
59	+	#define mi_attr_alloc_size(s)
60	+	#define mi_attr_alloc_size2(s1,s2)
61	+	#define mi_attr_alloc_align(p)
62	+	#elif defined(__GNUC__) // includes clang and icc
63	+	#if defined(MI_SHARED_LIB) && defined(MI_SHARED_LIB_EXPORT)
64	+	#define mi_decl_export __attribute__((visibility("default")))
65	+	#else
66	+	#define mi_decl_export
67	+	#endif
68	+	#define mi_cdecl // leads to warnings... __attribute__((cdecl))
69	+	#define mi_decl_restrict
70	+	#define mi_attr_malloc __attribute__((malloc))
71	+	#if (defined(__clang_major__) && (__clang_major__ < 4)) \|\| (__GNUC__ < 5)
72	+	#define mi_attr_alloc_size(s)
73	+	#define mi_attr_alloc_size2(s1,s2)
74	+	#define mi_attr_alloc_align(p)
75	+	#elif defined(__INTEL_COMPILER)
76	+	#define mi_attr_alloc_size(s) __attribute__((alloc_size(s)))
77	+	#define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2)))
78	+	#define mi_attr_alloc_align(p)
79	+	#else
80	+	#define mi_attr_alloc_size(s) __attribute__((alloc_size(s)))
81	+	#define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2)))
82	+	#define mi_attr_alloc_align(p) __attribute__((alloc_align(p)))
83	+	#endif
84	+	#else
85	+	#define mi_cdecl
86	+	#define mi_decl_export
87	+	#define mi_decl_restrict
88	+	#define mi_attr_malloc
89	+	#define mi_attr_alloc_size(s)
90	+	#define mi_attr_alloc_size2(s1,s2)
91	+	#define mi_attr_alloc_align(p)
92	+	#endif
93	+
94	+	// ------------------------------------------------------
95	+	// Includes
96	+	// ------------------------------------------------------
97	+
98	+	#include <stddef.h> // size_t
99	+	#include <stdbool.h> // bool
100	+	#include <stdint.h> // INTPTR_MAX
101	+
102	+	#ifdef __cplusplus
103	+	extern "C" {
104	+	#endif
105	+
106	+	// ------------------------------------------------------
107	+	// Standard malloc interface
108	+	// ------------------------------------------------------
109	+
110	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
111	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
112	+	mi_decl_nodiscard mi_decl_export void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
113	+	mi_decl_export void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
114	+
115	+	mi_decl_export void mi_free(void* p) mi_attr_noexcept;
116	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept mi_attr_malloc;
117	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept mi_attr_malloc;
118	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc;
119	+
120	+	// ------------------------------------------------------
121	+	// Extended functionality
122	+	// ------------------------------------------------------
123	+	#define MI_SMALL_WSIZE_MAX (128)
124	+	#define MI_SMALL_SIZE_MAX (MI_SMALL_WSIZE_MAXsizeof(void))
125	+
126	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
127	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
128	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
129	+
130	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
131	+	mi_decl_nodiscard mi_decl_export void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3);
132	+	mi_decl_nodiscard mi_decl_export void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
133	+
134	+	mi_decl_nodiscard mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept;
135	+	mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept;
136	+
137	+
138	+	// ------------------------------------------------------
139	+	// Internals
140	+	// ------------------------------------------------------
141	+
142	+	typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg);
143	+	mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept;
144	+
145	+	typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg);
146	+	mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept;
147	+
148	+	typedef void (mi_cdecl mi_error_fun)(int err, void* arg);
149	+	mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg);
150	+
151	+	mi_decl_export void mi_collect(bool force) mi_attr_noexcept;
152	+	mi_decl_export int mi_version(void) mi_attr_noexcept;
153	+	mi_decl_export void mi_stats_reset(void) mi_attr_noexcept;
154	+	mi_decl_export void mi_stats_merge(void) mi_attr_noexcept;
155	+	mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL
156	+	mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
157	+
158	+	mi_decl_export void mi_process_init(void) mi_attr_noexcept;
159	+	mi_decl_export void mi_thread_init(void) mi_attr_noexcept;
160	+	mi_decl_export void mi_thread_done(void) mi_attr_noexcept;
161	+	mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
162	+
163	+	mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
164	+	size_t* current_rss, size_t* peak_rss,
165	+	size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
166	+
167	+	// -------------------------------------------------------------------------------------
168	+	// Aligned allocation
169	+	// Note that `alignment` always follows `size` for consistency with unaligned
170	+	// allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`.
171	+	// -------------------------------------------------------------------------------------
172	+
173	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
174	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
175	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
176	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
177	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3);
178	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2);
179	+	mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3);
180	+	mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2);
181	+
182	+
183	+	// -------------------------------------------------------------------------------------
184	+	// Heaps: first-class, but can only allocate from the same thread that created it.
185	+	// -------------------------------------------------------------------------------------
186	+
187	+	struct mi_heap_s;
188	+	typedef struct mi_heap_s mi_heap_t;
189	+
190	+	mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new(void);
191	+	mi_decl_export void mi_heap_delete(mi_heap_t* heap);
192	+	mi_decl_export void mi_heap_destroy(mi_heap_t* heap);
193	+	mi_decl_export mi_heap_t* mi_heap_set_default(mi_heap_t* heap);
194	+	mi_decl_export mi_heap_t* mi_heap_get_default(void);
195	+	mi_decl_export mi_heap_t* mi_heap_get_backing(void);
196	+	mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept;
197	+
198	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
199	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
200	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
201	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
202	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
203	+
204	+	mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
205	+	mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);
206	+	mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
207	+
208	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept mi_attr_malloc;
209	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept mi_attr_malloc;
210	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc;
211	+
212	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3);
213	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
214	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3);
215	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2);
216	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4);
217	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3);
218	+	mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4);
219	+	mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3);
220	+
221	+
222	+	// --------------------------------------------------------------------------------
223	+	// Zero initialized re-allocation.
224	+	// Only valid on memory that was originally allocated with zero initialization too.
225	+	// e.g. `mi_calloc`, `mi_zalloc`, `mi_zalloc_aligned` etc.
226	+	// see <https://github.com/microsoft/mimalloc/issues/63#issuecomment-508272992>
227	+	// --------------------------------------------------------------------------------
228	+
229	+	mi_decl_nodiscard mi_decl_export void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2);
230	+	mi_decl_nodiscard mi_decl_export void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3);
231	+
232	+	mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3);
233	+	mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2);
234	+	mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4);
235	+	mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(2,3);
236	+
237	+	mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3);
238	+	mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4);
239	+
240	+	mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4);
241	+	mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3);
242	+	mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5);
243	+	mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(3,4);
244	+
245	+
246	+	// ------------------------------------------------------
247	+	// Analysis
248	+	// ------------------------------------------------------
249	+
250	+	mi_decl_export bool mi_heap_contains_block(mi_heap_t* heap, const void* p);
251	+	mi_decl_export bool mi_heap_check_owned(mi_heap_t* heap, const void* p);
252	+	mi_decl_export bool mi_check_owned(const void* p);
253	+
254	+	// An area of heap space contains blocks of a single size.
255	+	typedef struct mi_heap_area_s {
256	+	void* blocks; // start of the area containing heap blocks
257	+	size_t reserved; // bytes reserved for this area (virtual)
258	+	size_t committed; // current available bytes for this area
259	+	size_t used; // number of allocated blocks
260	+	size_t block_size; // size in bytes of each block
261	+	size_t full_block_size; // size in bytes of a full block including padding and metadata.
262	+	} mi_heap_area_t;
263	+
264	+	typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg);
265	+
266	+	mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg);
267	+
268	+	// Experimental
269	+	mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept;
270	+	mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept;
271	+
272	+	mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept;
273	+	mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept;
274	+
275	+	mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept;
276	+	mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept;
277	+
278	+	mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept;
279	+
280	+	// Experimental: heaps associated with specific memory arena's
281	+	typedef int mi_arena_id_t;
282	+	mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size);
283	+	mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
284	+	mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
285	+	mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
286	+
287	+	#if MI_MALLOC_VERSION >= 200
288	+	// Create a heap that only allocates in the specified arena
289	+	mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
290	+	#endif
291	+
292	+	// deprecated
293	+	mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept;
294	+
295	+
296	+	// ------------------------------------------------------
297	+	// Convenience
298	+	// ------------------------------------------------------
299	+
300	+	#define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp)))
301	+	#define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp)))
302	+	#define mi_calloc_tp(tp,n) ((tp*)mi_calloc(n,sizeof(tp)))
303	+	#define mi_mallocn_tp(tp,n) ((tp*)mi_mallocn(n,sizeof(tp)))
304	+	#define mi_reallocn_tp(p,tp,n) ((tp*)mi_reallocn(p,n,sizeof(tp)))
305	+	#define mi_recalloc_tp(p,tp,n) ((tp*)mi_recalloc(p,n,sizeof(tp)))
306	+
307	+	#define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp)))
308	+	#define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp)))
309	+	#define mi_heap_calloc_tp(hp,tp,n) ((tp*)mi_heap_calloc(hp,n,sizeof(tp)))
310	+	#define mi_heap_mallocn_tp(hp,tp,n) ((tp*)mi_heap_mallocn(hp,n,sizeof(tp)))
311	+	#define mi_heap_reallocn_tp(hp,p,tp,n) ((tp*)mi_heap_reallocn(hp,p,n,sizeof(tp)))
312	+	#define mi_heap_recalloc_tp(hp,p,tp,n) ((tp*)mi_heap_recalloc(hp,p,n,sizeof(tp)))
313	+
314	+
315	+	// ------------------------------------------------------
316	+	// Options
317	+	// ------------------------------------------------------
318	+
319	+	typedef enum mi_option_e {
320	+	// stable options
321	+	mi_option_show_errors,
322	+	mi_option_show_stats,
323	+	mi_option_verbose,
324	+	// some of the following options are experimental
325	+	// (deprecated options are kept for binary backward compatibility with v1.x versions)
326	+	mi_option_eager_commit,
327	+	mi_option_deprecated_eager_region_commit,
328	+	mi_option_deprecated_reset_decommits,
329	+	mi_option_large_os_pages, // use large (2MiB) OS pages, implies eager commit
330	+	mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup
331	+	mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node
332	+	mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup
333	+	mi_option_deprecated_segment_cache,
334	+	mi_option_page_reset,
335	+	mi_option_abandoned_page_decommit,
336	+	mi_option_deprecated_segment_reset,
337	+	mi_option_eager_commit_delay,
338	+	mi_option_decommit_delay,
339	+	mi_option_use_numa_nodes, // 0 = use available numa nodes, otherwise use at most N nodes.
340	+	mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only reserved arenas)
341	+	mi_option_os_tag,
342	+	mi_option_max_errors,
343	+	mi_option_max_warnings,
344	+	mi_option_max_segment_reclaim,
345	+	mi_option_allow_decommit,
346	+	mi_option_segment_decommit_delay,
347	+	mi_option_decommit_extend_delay,
348	+	mi_option_destroy_on_exit,
349	+	_mi_option_last
350	+	} mi_option_t;
351	+
352	+
353	+	mi_decl_nodiscard mi_decl_export bool mi_option_is_enabled(mi_option_t option);
354	+	mi_decl_export void mi_option_enable(mi_option_t option);
355	+	mi_decl_export void mi_option_disable(mi_option_t option);
356	+	mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable);
357	+	mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable);
358	+
359	+	mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option);
360	+	mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max);
361	+	mi_decl_export void mi_option_set(mi_option_t option, long value);
362	+	mi_decl_export void mi_option_set_default(mi_option_t option, long value);
363	+
364	+
365	+	// -------------------------------------------------------------------------------------------------------
366	+	// "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions.
367	+	// (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.)
368	+	// note: we use `mi_cfree` as "checked free" and it checks if the pointer is in our heap before free-ing.
369	+	// -------------------------------------------------------------------------------------------------------
370	+
371	+	mi_decl_export void mi_cfree(void* p) mi_attr_noexcept;
372	+	mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept;
373	+	mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept;
374	+	mi_decl_nodiscard mi_decl_export size_t mi_malloc_good_size(size_t size) mi_attr_noexcept;
375	+	mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept;
376	+
377	+	mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept;
378	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1);
379	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
380	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
381	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1);
382	+
383	+	mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3);
384	+	mi_decl_nodiscard mi_decl_export int mi_reallocarr(void* p, size_t count, size_t size) mi_attr_noexcept;
385	+	mi_decl_nodiscard mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept;
386	+	mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept;
387	+
388	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept mi_attr_malloc;
389	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept mi_attr_malloc;
390	+	mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept;
391	+	mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept;
392	+
393	+	mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept;
394	+	mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept;
395	+	mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept;
396	+
397	+	// The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`.
398	+	// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception).
399	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1);
400	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
401	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
402	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
403	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2);
404	+	mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2);
405	+	mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3);
406	+
407	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) mi_attr_malloc mi_attr_alloc_size(2);
408	+	mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3);
409	+
410	+	#ifdef __cplusplus
411	+	}
412	+	#endif
413	+
414	+	// ---------------------------------------------------------------------------------------------
415	+	// Implement the C++ std::allocator interface for use in STL containers.
416	+	// (note: see `mimalloc-new-delete.h` for overriding the new/delete operators globally)
417	+	// ---------------------------------------------------------------------------------------------
418	+	#ifdef __cplusplus
419	+
420	+	#include <cstddef> // std::size_t
421	+	#include <cstdint> // PTRDIFF_MAX
422	+	#if (__cplusplus >= 201103L) \|\| (_MSC_VER > 1900) // C++11
423	+	#include <type_traits> // std::true_type
424	+	#include <utility> // std::forward
425	+	#endif
426	+
427	+	template<class T> struct _mi_stl_allocator_common {
428	+	typedef T value_type;
429	+	typedef std::size_t size_type;
430	+	typedef std::ptrdiff_t difference_type;
431	+	typedef value_type& reference;
432	+	typedef value_type const& const_reference;
433	+	typedef value_type* pointer;
434	+	typedef value_type const* const_pointer;
435	+
436	+	#if ((__cplusplus >= 201103L) \|\| (_MSC_VER > 1900)) // C++11
437	+	using propagate_on_container_copy_assignment = std::true_type;
438	+	using propagate_on_container_move_assignment = std::true_type;
439	+	using propagate_on_container_swap = std::true_type;
440	+	template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); }
441	+	template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); }
442	+	#else
443	+	void construct(pointer p, value_type const& val) { ::new(p) value_type(val); }
444	+	void destroy(pointer p) { p->~value_type(); }
445	+	#endif
446	+
447	+	size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); }
448	+	pointer address(reference x) const { return &x; }
449	+	const_pointer address(const_reference x) const { return &x; }
450	+	};
451	+
452	+	template<class T> struct mi_stl_allocator : public _mi_stl_allocator_common<T> {
453	+	using typename _mi_stl_allocator_common<T>::size_type;
454	+	using typename _mi_stl_allocator_common<T>::value_type;
455	+	using typename _mi_stl_allocator_common<T>::pointer;
456	+	template <class U> struct rebind { typedef mi_stl_allocator<U> other; };
457	+
458	+	mi_stl_allocator() mi_attr_noexcept = default;
459	+	mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept = default;
460	+	template<class U> mi_stl_allocator(const mi_stl_allocator<U>&) mi_attr_noexcept { }
461	+	mi_stl_allocator select_on_container_copy_construction() const { return *this; }
462	+	void deallocate(T* p, size_type) { mi_free(p); }
463	+
464	+	#if (__cplusplus >= 201703L) // C++17
465	+	mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_new_n(count, sizeof(T))); }
466	+	mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); }
467	+	#else
468	+	mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_new_n(count, sizeof(value_type))); }
469	+	#endif
470	+
471	+	#if ((__cplusplus >= 201103L) \|\| (_MSC_VER > 1900)) // C++11
472	+	using is_always_equal = std::true_type;
473	+	#endif
474	+	};
475	+
476	+	template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return true; }
477	+	template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return false; }
478	+
479	+
480	+	#if (__cplusplus >= 201103L) \|\| (_MSC_VER >= 1900) // C++11
481	+	#define MI_HAS_HEAP_STL_ALLOCATOR 1
482	+
483	+	#include <memory> // std::shared_ptr
484	+
485	+	// Common base class for STL allocators in a specific heap
486	+	template<class T, bool _mi_destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
487	+	using typename _mi_stl_allocator_common<T>::size_type;
488	+	using typename _mi_stl_allocator_common<T>::value_type;
489	+	using typename _mi_stl_allocator_common<T>::pointer;
490	+
491	+	_mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { } /* will not delete nor destroy the passed in heap */
492	+
493	+	#if (__cplusplus >= 201703L) // C++17
494	+	mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
495	+	mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); }
496	+	#else
497	+	mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); }
498	+	#endif
499	+
500	+	#if ((__cplusplus >= 201103L) \|\| (_MSC_VER > 1900)) // C++11
501	+	using is_always_equal = std::false_type;
502	+	#endif
503	+
504	+	void collect(bool force) { mi_heap_collect(this->heap.get(), force); }
505	+	template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) const { return (this->heap == x.heap); }
506	+
507	+	protected:
508	+	std::shared_ptr<mi_heap_t> heap;
509	+	template<class U, bool D> friend struct _mi_heap_stl_allocator_common;
510	+
511	+	_mi_heap_stl_allocator_common() {
512	+	mi_heap_t* hp = mi_heap_new();
513	+	this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */
514	+	}
515	+	_mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { }
516	+	template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) mi_attr_noexcept : heap(x.heap) { }
517	+
518	+	private:
519	+	static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } }
520	+	static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } }
521	+	};
522	+
523	+	// STL allocator allocation in a specific heap
524	+	template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T, false> {
525	+	using typename _mi_heap_stl_allocator_common<T, false>::size_type;
526	+	mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T, false>() { } // creates fresh heap that is deleted when the destructor is called
527	+	mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { } // no delete nor destroy on the passed in heap
528	+	template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, false>(x) { }
529	+
530	+	mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; }
531	+	void deallocate(T* p, size_type) { mi_free(p); }
532	+	template<class U> struct rebind { typedef mi_heap_stl_allocator<U> other; };
533	+	};
534	+
535	+	template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); }
536	+	template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
537	+
538	+
539	+	// STL allocator allocation in a specific heap, where `free` does nothing and
540	+	// the heap is destroyed in one go on destruction -- use with care!
541	+	template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T, true> {
542	+	using typename _mi_heap_stl_allocator_common<T, true>::size_type;
543	+	mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T, true>() { } // creates fresh heap that is destroyed when the destructor is called
544	+	mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { } // no delete nor destroy on the passed in heap
545	+	template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, true>(x) { }
546	+
547	+	mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; }
548	+	void deallocate(T, size_type) { / do nothing as we destroy the heap on destruct. */ }
549	+	template<class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; };
550	+	};
551	+
552	+	template<class T1, class T2> bool operator==(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); }
553	+	template<class T1, class T2> bool operator!=(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
554	+
555	+	#endif // C++11
556	+
557	+	#endif // __cplusplus
558	+
559	+	#endif
560	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/readme.md

1	+
2	+	<img align="left" width="100" height="100" src="doc/mimalloc-logo.png"/>
3	+
4	+	[<img align="right" src="https://dev.azure.com/Daan0324/mimalloc/_apis/build/status/microsoft.mimalloc?branchName=dev"/>](https://dev.azure.com/Daan0324/mimalloc/_build?definitionId=1&_a=summary)
5	+
6	+	# mimalloc
7	+
8	+
9	+
10	+	mimalloc (pronounced "me-malloc")
11	+	is a general purpose allocator with excellent [performance](#performance) characteristics.
12	+	Initially developed by Daan Leijen for the runtime systems of the
13	+	[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
14	+
15	+	Latest release tag: `v2.1.1` (2023-04-03).
16	+	Latest stable tag: `v1.8.1` (2023-04-03).
17	+
18	+	mimalloc is a drop-in replacement for `malloc` and can be used in other programs
19	+	without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
20	+	```
21	+	> LD_PRELOAD=/usr/lib/libmimalloc.so myprogram
22	+	```
23	+	It also includes a robust way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include:
24	+
25	+	- __small and consistent__: the library is about 8k LOC using simple and
26	+	consistent data structures. This makes it very suitable
27	+	to integrate and adapt in other projects. For runtime systems it
28	+	provides hooks for a monotonic _heartbeat_ and deferred freeing (for
29	+	bounded worst-case times with reference counting).
30	+	Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS,
31	+	Linux, WASM, various BSD's, Haiku, MUSL, etc) and has excellent support for dynamic overriding.
32	+	- __free list sharding__: instead of one big free list (per size class) we have
33	+	many smaller lists per "mimalloc page" which reduces fragmentation and
34	+	increases locality --
35	+	things that are allocated close in time get allocated close in memory.
36	+	(A mimalloc page contains blocks of one size class and is usually 64KiB on a 64-bit system).
37	+	- __free list multi-sharding__: the big idea! Not only do we shard the free list
38	+	per mimalloc page, but for each page we have multiple free lists. In particular, there
39	+	is one list for thread-local `free` operations, and another one for concurrent `free`
40	+	operations. Free-ing from another thread can now be a single CAS without needing
41	+	sophisticated coordination between threads. Since there will be
42	+	thousands of separate free lists, contention is naturally distributed over the heap,
43	+	and the chance of contending on a single location will be low -- this is quite
44	+	similar to randomized algorithms like skip lists where adding
45	+	a random oracle removes the need for a more complex algorithm.
46	+	- __eager page reset__: when a "page" becomes empty (with increased chance
47	+	due to free list sharding) the memory is marked to the OS as unused (reset or decommitted)
48	+	reducing (real) memory pressure and fragmentation, especially in long running
49	+	programs.
50	+	- __secure__: _mimalloc_ can be built in secure mode, adding guard pages,
51	+	randomized allocation, encrypted free lists, etc. to protect against various
52	+	heap vulnerabilities. The performance penalty is usually around 10% on average
53	+	over our benchmarks.
54	+	- __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions.
55	+	A heap can be destroyed at once instead of deallocating each object separately.
56	+	- __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation
57	+	times (_wcat_) (upto OS primitives), bounded space overhead (~0.2% meta-data, with low
58	+	internal fragmentation), and has no internal points of contention using only atomic operations.
59	+	- __fast__: In our benchmarks (see [below](#performance)),
60	+	_mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc),
61	+	and often uses less memory. A nice property is that it does consistently well over a wide range
62	+	of benchmarks. There is also good huge OS page support for larger server programs.
63	+
64	+	The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API.
65	+	You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results.
66	+
67	+	Enjoy!
68	+
69	+	### Branches
70	+
71	+	* `master`: latest stable release (based on `dev-slice`).
72	+	* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's.
73	+	* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev`.
74	+
75	+	### Releases
76	+
77	+	Note: the `v2.x` version has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage
78	+	and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
79	+	(see [below](#performance)); please report if you observe any significant performance regression.
80	+
81	+	* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms.
82	+
83	+	* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision
84	+	with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS
85	+	abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes.
86	+
87	+	* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support.
88	+	Support abitrary large alignments (in particular for `std::pmr` pools).
89	+	Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev).
90	+	Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho).
91	+	Various small bug fixes.
92	+
93	+	* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow
94	+	detection. Initial
95	+	support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .
96	+
97	+	* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation
98	+	even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix
99	+	warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object
100	+	allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes.
101	+
102	+	* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on
103	+	Windows 11, fix compilation with musl, potentially reduced
104	+	committed memory, add `bin/minject` for Windows,
105	+	improved wasm support, faster aligned allocation,
106	+	various small fixes.
107	+
108	+	* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including
109	+	M1), improved performance for v2 for large objects, Python integration improvements, more standard
110	+	installation directories, various small fixes.
111	+
112	+	* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix
113	+	thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
114	+
115	+	* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental).
116	+
117	+	* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages.
118	+
119	+	* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
120	+	improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.
121	+
122	+	* [Older release notes](#older-release-notes)
123	+
124	+	Special thanks to:
125	+
126	+	* [David Carlier](https://devnexen.blogspot.com/) (@devnexen) for his many contributions, and making
127	+	mimalloc work better on many less common operating systems, like Haiku, Dragonfly, etc.
128	+	* Mary Feofanova (@mary3000), Evgeniy Moiseenko, and Manuel Pöter (@mpoeter) for making mimalloc TSAN checkable, and finding
129	+	memory model bugs using the [genMC] model checker.
130	+	* Weipeng Liu (@pongba), Zhuowei Li, Junhua Wang, and Jakub Szymanski, for their early support of mimalloc and deployment
131	+	at large scale services, leading to many improvements in the mimalloc algorithms for large workloads.
132	+	* Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs
133	+	in (early versions of) `mimalloc`.
134	+	* Manuel Pöter (@mpoeter) and Sam Gross(@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. Sam also created the [no GIL](https://github.com/colesbury/nogil) Python fork which
135	+	uses mimalloc internally.
136	+
137	+
138	+	[genMC]: https://plv.mpi-sws.org/genmc/
139	+
140	+	### Usage
141	+
142	+	mimalloc is used in various large scale low-latency services and programs, for example:
143	+
144	+	<a href="https://www.bing.com"><img height="50" align="left" src="https://upload.wikimedia.org/wikipedia/commons/e/e9/Bing_logo.svg"></a>
145	+	<a href="https://azure.microsoft.com/"><img height="50" align="left" src="https://upload.wikimedia.org/wikipedia/commons/a/a8/Microsoft_Azure_Logo.svg"></a>
146	+	<a href="https://deathstrandingpc.505games.com"><img height="100" src="doc/ds-logo.png"></a>
147	+	<a href="https://docs.unrealengine.com/4.26/en-US/WhatsNew/Builds/ReleaseNotes/4_25/"><img height="100" src="doc/unreal-logo.svg"></a>
148	+	<a href="https://cab.spbu.ru/software/spades/"><img height="100" src="doc/spades-logo.png"></a>
149	+
150	+
151	+	# Building
152	+
153	+	## Windows
154	+
155	+	Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build.
156	+	The `mimalloc` project builds a static library (in `out/msvc-x64`), while the
157	+	`mimalloc-override` project builds a DLL for overriding malloc
158	+	in the entire program.
159	+
160	+	## macOS, Linux, BSD, etc.
161	+
162	+	We use [`cmake`](https://cmake.org)<sup>1</sup> as the build system:
163	+
164	+	```
165	+	> mkdir -p out/release
166	+	> cd out/release
167	+	> cmake ../..
168	+	> make
169	+	```
170	+	This builds the library as a shared (dynamic)
171	+	library (`.so` or `.dylib`), a static library (`.a`), and
172	+	as a single object file (`.o`).
173	+
174	+	`> sudo make install` (install the library and header files in `/usr/local/lib` and `/usr/local/include`)
175	+
176	+	You can build the debug version which does many internal checks and
177	+	maintains detailed statistics as:
178	+
179	+	```
180	+	> mkdir -p out/debug
181	+	> cd out/debug
182	+	> cmake -DCMAKE_BUILD_TYPE=Debug ../..
183	+	> make
184	+	```
185	+	This will name the shared library as `libmimalloc-debug.so`.
186	+
187	+	Finally, you can build a _secure_ version that uses guard pages, encrypted
188	+	free lists, etc., as:
189	+	```
190	+	> mkdir -p out/secure
191	+	> cd out/secure
192	+	> cmake -DMI_SECURE=ON ../..
193	+	> make
194	+	```
195	+	This will name the shared library as `libmimalloc-secure.so`.
196	+	Use `ccmake`<sup>2</sup> instead of `cmake`
197	+	to see and customize all the available build options.
198	+
199	+	Notes:
200	+	1. Install CMake: `sudo apt-get install cmake`
201	+	2. Install CCMake: `sudo apt-get install cmake-curses-gui`
202	+
203	+
204	+	## Single source
205	+
206	+	You can also directly build the single `src/static.c` file as part of your project without
207	+	needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path.
208	+
209	+
210	+	# Using the library
211	+
212	+	The preferred usage is including `<mimalloc.h>`, linking with
213	+	the shared- or static library, and using the `mi_malloc` API exclusively for allocation. For example,
214	+	```
215	+	> gcc -o myprogram -lmimalloc myfile.c
216	+	```
217	+
218	+	mimalloc uses only safe OS calls (`mmap` and `VirtualAlloc`) and can co-exist
219	+	with other allocators linked to the same program.
220	+	If you use `cmake`, you can simply use:
221	+	```
222	+	find_package(mimalloc 1.4 REQUIRED)
223	+	```
224	+	in your `CMakeLists.txt` to find a locally installed mimalloc. Then use either:
225	+	```
226	+	target_link_libraries(myapp PUBLIC mimalloc)
227	+	```
228	+	to link with the shared (dynamic) library, or:
229	+	```
230	+	target_link_libraries(myapp PUBLIC mimalloc-static)
231	+	```
232	+	to link with the static library. See `test\CMakeLists.txt` for an example.
233	+
234	+	For best performance in C++ programs, it is also recommended to override the
235	+	global `new` and `delete` operators. For convience, mimalloc provides
236	+	[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project.
237	+	In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator`
238	+	interface.
239	+
240	+	You can pass environment variables to print verbose messages (`MIMALLOC_VERBOSE=1`)
241	+	and statistics (`MIMALLOC_SHOW_STATS=1`) (in the debug version):
242	+	```
243	+	> env MIMALLOC_SHOW_STATS=1 ./cfrac 175451865205073170563711388363
244	+
245	+	175451865205073170563711388363 = 374456281610909315237213 * 468551
246	+
247	+	heap stats: peak total freed unit
248	+	normal 2: 16.4 kb 17.5 mb 17.5 mb 16 b ok
249	+	normal 3: 16.3 kb 15.2 mb 15.2 mb 24 b ok
250	+	normal 4: 64 b 4.6 kb 4.6 kb 32 b ok
251	+	normal 5: 80 b 118.4 kb 118.4 kb 40 b ok
252	+	normal 6: 48 b 48 b 48 b 48 b ok
253	+	normal 17: 960 b 960 b 960 b 320 b ok
254	+
255	+	heap stats: peak total freed unit
256	+	normal: 33.9 kb 32.8 mb 32.8 mb 1 b ok
257	+	huge: 0 b 0 b 0 b 1 b ok
258	+	total: 33.9 kb 32.8 mb 32.8 mb 1 b ok
259	+	malloc requested: 32.8 mb
260	+
261	+	committed: 58.2 kb 58.2 kb 58.2 kb 1 b ok
262	+	reserved: 2.0 mb 2.0 mb 2.0 mb 1 b ok
263	+	reset: 0 b 0 b 0 b 1 b ok
264	+	segments: 1 1 1
265	+	-abandoned: 0
266	+	pages: 6 6 6
267	+	-abandoned: 0
268	+	mmaps: 3
269	+	mmap fast: 0
270	+	mmap slow: 1
271	+	threads: 0
272	+	elapsed: 2.022s
273	+	process: user: 1.781s, system: 0.016s, faults: 756, reclaims: 0, rss: 2.7 mb
274	+	```
275	+
276	+	The above model of using the `mi_` prefixed API is not always possible
277	+	though in existing programs that already use the standard malloc interface,
278	+	and another option is to override the standard malloc interface
279	+	completely and redirect all calls to the _mimalloc_ library instead .
280	+
281	+	## Environment Options
282	+
283	+	You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)),
284	+	or via environment variables:
285	+
286	+	- `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates.
287	+	- `MIMALLOC_VERBOSE=1`: show verbose messages.
288	+	- `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages.
289	+	- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages that are not in use, to signal to the OS
290	+	that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server)
291	+	programs. By setting it to `0` this will no longer be done which can improve performance for batch-like programs.
292	+	As an alternative, the `MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page
293	+	reset occur less frequently instead of turning it off completely.
294	+	- `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected
295	+	at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than
296	+	the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA
297	+	nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed).
298	+	- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly
299	+	improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs
300	+	to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes
301	+	the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that
302	+	can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible).
303	+	<!--
304	+	- `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions
305	+	show in the working set even though usually just a small part is committed to physical memory. This is why it
306	+	turned off by default on Windows as it looks not good in the task manager. However, turning it on has no
307	+	real drawbacks and may improve performance by a little.
308	+	-->
309	+	- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at
310	+	startup and sometimes this can give a large (latency) performance improvement on big workloads.
311	+	Usually it is better to not use
312	+	`MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving
313	+	contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at
314	+	startup only once).
315	+	Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])).
316	+	With huge OS pages, it may be beneficial to set the setting
317	+	`MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB)
318	+	of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
319	+	and allocate just a little to take up space in the huge OS page area (which cannot be reset).
320	+	The huge pages are usually allocated evenly among NUMA nodes.
321	+	We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
322	+	the huge pages at a specific numa node instead.
323	+
324	+	Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
325	+	for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
326	+	OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in large increments.
327	+
328	+	[linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5
329	+	[windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017
330	+
331	+	## Secure Mode
332	+
333	+	_mimalloc_ can be build in secure mode by using the `-DMI_SECURE=ON` flags in `cmake`. This build enables various mitigations
334	+	to make mimalloc more robust against exploits. In particular:
335	+
336	+	- All internal mimalloc pages are surrounded by guard pages and the heap metadata is behind a guard page as well (so a buffer overflow
337	+	exploit cannot reach into the metadata).
338	+	- All free list pointers are
339	+	[encoded](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396)
340	+	with per-page keys which is used both to prevent overwrites with a known pointer, as well as to detect heap corruption.
341	+	- Double free's are detected (and ignored).
342	+	- The free lists are initialized in a random order and allocation randomly chooses between extension and reuse within a page to
343	+	mitigate against attacks that rely on a predicable allocation order. Similarly, the larger heap blocks allocated by mimalloc
344	+	from the OS are also address randomized.
345	+
346	+	As always, evaluate with care as part of an overall security strategy as all of the above are mitigations but not guarantees.
347	+
348	+	## Debug Mode
349	+
350	+	When _mimalloc_ is built using debug mode, various checks are done at runtime to catch development errors.
351	+
352	+	- Statistics are maintained in detail for each object size. They can be shown using `MIMALLOC_SHOW_STATS=1` at runtime.
353	+	- All objects have padding at the end to detect (byte precise) heap block overflows.
354	+	- Double free's, and freeing invalid heap pointers are detected.
355	+	- Corrupted free-lists and some forms of use-after-free are detected.
356	+
357	+
358	+	# Overriding Standard Malloc
359	+
360	+	Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or _statically_.
361	+
362	+	## Dynamic override
363	+
364	+	This is the recommended way to override the standard malloc interface.
365	+
366	+	### Dynamic Override on Linux, BSD
367	+
368	+	On these ELF-based systems we preload the mimalloc shared
369	+	library so all calls to the standard `malloc` interface are
370	+	resolved to the _mimalloc_ library.
371	+	```
372	+	> env LD_PRELOAD=/usr/lib/libmimalloc.so myprogram
373	+	```
374	+
375	+	You can set extra environment variables to check that mimalloc is running,
376	+	like:
377	+	```
378	+	> env MIMALLOC_VERBOSE=1 LD_PRELOAD=/usr/lib/libmimalloc.so myprogram
379	+	```
380	+	or run with the debug version to get detailed statistics:
381	+	```
382	+	> env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram
383	+	```
384	+
385	+	### Dynamic Override on MacOS
386	+
387	+	On macOS we can also preload the mimalloc shared
388	+	library so all calls to the standard `malloc` interface are
389	+	resolved to the _mimalloc_ library.
390	+	```
391	+	> env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram
392	+	```
393	+
394	+	Note that certain security restrictions may apply when doing this from
395	+	the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash).
396	+
397	+
398	+	### Dynamic Override on Windows
399	+
400	+	<span id="override_on_windows">Overriding on Windows</span> is robust and has the
401	+	particular advantage to be able to redirect all malloc/free calls that go through
402	+	the (dynamic) C runtime allocator, including those from other DLL's or libraries.
403	+
404	+	The overriding on Windows requires that you link your program explicitly with
405	+	the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch).
406	+	Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be put
407	+	in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency).
408	+	The redirection DLL ensures that all calls to the C runtime malloc API get redirected to
409	+	mimalloc (in `mimalloc-override.dll`).
410	+
411	+	To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some
412	+	call to the mimalloc API in the `main` function, like `mi_version()`
413	+	(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project
414	+	for an example on how to use this. For best performance on Windows with C++, it
415	+	is also recommended to also override the `new`/`delete` operations (by including
416	+	[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project).
417	+
418	+	The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic
419	+	overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected.
420	+
421	+	(Note: in principle, it is possible to even patch existing executables without any recompilation
422	+	if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll`
423	+	into the import table (and put `mimalloc-redirect.dll` in the same folder)
424	+	Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)).
425	+
426	+
427	+	## Static override
428	+
429	+	On Unix-like systems, you can also statically link with _mimalloc_ to override the standard
430	+	malloc interface. The recommended way is to link the final program with the
431	+	_mimalloc_ single object file (`mimalloc.o`). We use
432	+	an object file instead of a library file as linkers give preference to
433	+	that over archives to resolve symbols. To ensure that the standard
434	+	malloc interface resolves to the _mimalloc_ library, link it as the first
435	+	object file. For example:
436	+	```
437	+	> gcc -o myprogram mimalloc.o myfile1.c ...
438	+	```
439	+
440	+	Another way to override statically that works on all platforms, is to
441	+	link statically to mimalloc (as shown in the introduction) and include a
442	+	header file in each source file that re-defines `malloc` etc. to `mi_malloc`.
443	+	This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-override.h). This only works reliably though if all sources are
444	+	under your control or otherwise mixing of pointers from different heaps may occur!
445	+
446	+
447	+	## Tools
448	+
449	+	Generally, we recommend using the standard allocator with memory tracking tools, but mimalloc
450	+	can also be build to support the [address sanitizer][asan] or the excellent [Valgrind] tool.
451	+	Moreover, it can be build to support Windows event tracing ([ETW]).
452	+	This has a small performance overhead but does allow detecting memory leaks and byte-precise
453	+	buffer overflows directly on final executables. See also the `test/test-wrong.c` file to test with various tools.
454	+
455	+	### Valgrind
456	+
457	+	To build with [valgrind] support, use the `MI_TRACK_VALGRIND=ON` cmake option:
458	+
459	+	```
460	+	> cmake ../.. -DMI_TRACK_VALGRIND=ON
461	+	```
462	+
463	+	This can also be combined with secure mode or debug mode.
464	+	You can then run your programs directly under valgrind:
465	+
466	+	```
467	+	> valgrind <myprogram>
468	+	```
469	+
470	+	If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly),
471	+	you also need to tell `valgrind` to not intercept those calls itself, and use:
472	+
473	+	```
474	+	> MIMALLOC_SHOW_STATS=1 valgrind --soname-synonyms=somalloc=mimalloc -- <myprogram>
475	+	```
476	+
477	+	By setting the `MIMALLOC_SHOW_STATS` environment variable you can check that mimalloc is indeed
478	+	used and not the standard allocator. Even though the [Valgrind option][valgrind-soname]
479	+	is called `--soname-synonyms`, this also
480	+	works when overriding with a static library or object file. Unfortunately, it is not possible to
481	+	dynamically override mimalloc using `LD_PRELOAD` together with `valgrind`.
482	+	See also the `test/test-wrong.c` file to test with `valgrind`.
483	+
484	+	Valgrind support is in its initial development -- please report any issues.
485	+
486	+	[Valgrind]: https://valgrind.org/
487	+	[valgrind-soname]: https://valgrind.org/docs/manual/manual-core.html#opt.soname-synonyms
488	+
489	+	### ASAN
490	+
491	+	To build with the address sanitizer, use the `-DMI_TRACK_ASAN=ON` cmake option:
492	+
493	+	```
494	+	> cmake ../.. -DMI_TRACK_ASAN=ON
495	+	```
496	+
497	+	This can also be combined with secure mode or debug mode.
498	+	You can then run your programs as:'
499	+
500	+	```
501	+	> ASAN_OPTIONS=verbosity=1 <myprogram>
502	+	```
503	+
504	+	When you link a program with an address sanitizer build of mimalloc, you should
505	+	generally compile that program too with the address sanitizer enabled.
506	+	For example, assuming you build mimalloc in `out/debug`:
507	+
508	+	```
509	+	clang -g -o test-wrong -Iinclude test/test-wrong.c out/debug/libmimalloc-asan-debug.a -lpthread -fsanitize=address -fsanitize-recover=address
510	+	```
511	+
512	+	Since the address sanitizer redirects the standard allocation functions, on some platforms (macOSX for example)
513	+	it is required to compile mimalloc with `-DMI_OVERRIDE=OFF`.
514	+	Adress sanitizer support is in its initial development -- please report any issues.
515	+
516	+	[asan]: https://github.com/google/sanitizers/wiki/AddressSanitizer
517	+
518	+	### ETW
519	+
520	+	Event tracing for Windows ([ETW]) provides a high performance way to capture all allocations though
521	+	mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACE_ETW=ON` cmake option.
522	+
523	+	You can then capture an allocation trace using the Windows performance recorder (WPR), using the
524	+	`src/prim/windows/etw-mimalloc.wprp` profile. In an admin prompt, you can use:
525	+	```
526	+	> wpr -start src\prim\windows\etw-mimalloc.wprp -filemode
527	+	> <my_mimalloc_program>
528	+	> wpr -stop <my_mimalloc_program>.etl
529	+	```
530	+	and then open `<my_mimalloc_program>.etl` in the Windows Performance Analyzer (WPA), or
531	+	use a tool like [TraceControl] that is specialized for analyzing mimalloc traces.
532	+
533	+	[ETW]: https://learn.microsoft.com/en-us/windows-hardware/test/wpt/event-tracing-for-windows
534	+	[TraceControl]: https://github.com/xinglonghe/TraceControl
535	+
536	+
537	+	# Performance
538	+
539	+	Last update: 2021-01-30
540	+
541	+	We tested _mimalloc_ against many other top allocators over a wide
542	+	range of benchmarks, ranging from various real world programs to
543	+	synthetic benchmarks that see how the allocator behaves under more
544	+	extreme circumstances. In our benchmark suite, _mimalloc_ outperforms other leading
545	+	allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), and has a similar memory footprint. A nice property is that it
546	+	does consistently well over the wide range of benchmarks.
547	+
548	+	General memory allocators are interesting as there exists no algorithm that is
549	+	optimal -- for a given allocator one can usually construct a workload
550	+	where it does not do so well. The goal is thus to find an allocation
551	+	strategy that performs well over a wide range of benchmarks without
552	+	suffering from (too much) underperformance in less common situations.
553	+
554	+	As always, interpret these results with care since some benchmarks test synthetic
555	+	or uncommon situations that may never apply to your workloads. For example, most
556	+	allocators do not do well on `xmalloc-testN` but that includes even the best
557	+	industrial allocators like _jemalloc_ and _tcmalloc_ that are used in some of
558	+	the world's largest systems (like Chrome or FreeBSD).
559	+
560	+	Also, the benchmarks here do not measure the behaviour on very large and long-running server workloads,
561	+	or worst-case latencies of allocation. Much work has gone into `mimalloc` to work well on such
562	+	workloads (for example, to reduce virtual memory fragmentation on long-running services)
563	+	but such optimizations are not always reflected in the current benchmark suite.
564	+
565	+	We show here only an overview -- for
566	+	more specific details and further benchmarks we refer to the
567	+	[technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action).
568	+	The benchmark suite is automated and available separately
569	+	as [mimalloc-bench](https://github.com/daanx/mimalloc-bench).
570	+
571	+
572	+	## Benchmark Results on a 16-core AMD 5950x (Zen3)
573	+
574	+	Testing on the 16-core AMD 5950x processor at 3.4Ghz (4.9Ghz boost), with
575	+	with 32GiB memory at 3600Mhz, running Ubuntu 20.04 with glibc 2.31 and GCC 9.3.0.
576	+
577	+	We measure three versions of _mimalloc_: the main version `mi` (tag:v1.7.0),
578	+	the new v2.0 beta version as `xmi` (tag:v2.0.0), and the main version in secure mode as `smi` (tag:v1.7.0).
579	+
580	+	The other allocators are
581	+	Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (`tc`, tag:gperftools-2.8.1) used in Chrome,
582	+	Facebook's [_jemalloc_](https://github.com/jemalloc/jemalloc) (`je`, tag:5.2.1) by Jason Evans used in Firefox and FreeBSD,
583	+	the Intel thread building blocks [allocator](https://github.com/intel/tbb) (`tbb`, tag:v2020.3),
584	+	[rpmalloc](https://github.com/mjansson/rpmalloc) (`rp`,tag:1.4.1) by Mattias Jansson,
585	+	the original scalable [_Hoard_](https://github.com/emeryberger/Hoard) (git:d880f72) allocator by Emery Berger \[1],
586	+	the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:67ff31a) allocator by
587	+	Bobby Powers _et al_ \[8],
588	+	and finally the default system allocator (`glibc`, 2.31) (based on _PtMalloc2_).
589	+
590	+	<img width="90%" src="doc/bench-2021/bench-amd5950x-2021-01-30-a.svg"/>
591	+	<img width="90%" src="doc/bench-2021/bench-amd5950x-2021-01-30-b.svg"/>
592	+
593	+	Any benchmarks ending in `N` run on all 32 logical cores in parallel.
594	+	Results are averaged over 10 runs and reported relative
595	+	to mimalloc (where 1.2 means it took 1.2× longer to run).
596	+	The legend also contains the _overall relative score_ between the
597	+	allocators where 100 points is the maximum if an allocator is fastest on
598	+	all benchmarks.
599	+
600	+	The single threaded _cfrac_ benchmark by Dave Barrett is an implementation of
601	+	continued fraction factorization which uses many small short-lived allocations.
602	+	All allocators do well on such common usage, where _mimalloc_ is just a tad
603	+	faster than _tcmalloc_ and
604	+	_jemalloc_.
605	+
606	+	The _leanN_ program is interesting as a large realistic and
607	+	concurrent workload of the [Lean](https://github.com/leanprover/lean)
608	+	theorem prover compiling its own standard library, and there is a 13%
609	+	speedup over _tcmalloc_. This is
610	+	quite significant: if Lean spends 20% of its time in the
611	+	allocator that means that _mimalloc_ is 1.6× faster than _tcmalloc_
612	+	here. (This is surprising as that is not measured in a pure
613	+	allocation benchmark like _alloc-test_. We conjecture that we see this
614	+	outsized improvement here because _mimalloc_ has better locality in
615	+	the allocation which improves performance for the other computations
616	+	in a program as well).
617	+
618	+	The single threaded _redis_ benchmark again show that most allocators do well on such workloads.
619	+
620	+	The _larsonN_ server benchmark by Larson and Krishnan \[2] allocates and frees between threads. They observed this
621	+	behavior (which they call _bleeding_) in actual server applications, and the benchmark simulates this.
622	+	Here, _mimalloc_ is quite a bit faster than _tcmalloc_ and _jemalloc_ probably due to the object migration between different threads.
623	+
624	+	The _mstressN_ workload performs many allocations and re-allocations,
625	+	and migrates objects between threads (as in _larsonN_). However, it also
626	+	creates and destroys the _N_ worker threads a few times keeping some objects
627	+	alive beyond the life time of the allocating thread. We observed this
628	+	behavior in many larger server applications.
629	+
630	+	The [_rptestN_](https://github.com/mjansson/rpmalloc-benchmark) benchmark
631	+	by Mattias Jansson is a allocator test originally designed
632	+	for _rpmalloc_, and tries to simulate realistic allocation patterns over
633	+	multiple threads. Here the differences between allocators become more apparent.
634	+
635	+	The second benchmark set tests specific aspects of the allocators and
636	+	shows even more extreme differences between them.
637	+
638	+	The _alloc-test_, by
639	+	[OLogN Technologies AG](http://ithare.com/testing-memory-allocators-ptmalloc2-tcmalloc-hoard-jemalloc-while-trying-to-simulate-real-world-loads/), is a very allocation intensive benchmark doing millions of
640	+	allocations in various size classes. The test is scaled such that when an
641	+	allocator performs almost identically on _alloc-test1_ as _alloc-testN_ it
642	+	means that it scales linearly.
643	+
644	+	The _sh6bench_ and _sh8bench_ benchmarks are
645	+	developed by [MicroQuill](http://www.microquill.com/) as part of SmartHeap.
646	+	In _sh6bench_ _mimalloc_ does much
647	+	better than the others (more than 2.5× faster than _jemalloc_).
648	+	We cannot explain this well but believe it is
649	+	caused in part by the "reverse" free-ing pattern in _sh6bench_.
650	+	The _sh8bench_ is a variation with object migration
651	+	between threads; whereas _tcmalloc_ did well on _sh6bench_, the addition of object migration causes it to be 10× slower than before.
652	+
653	+	The _xmalloc-testN_ benchmark by Lever and Boreham \[5] and Christian Eder, simulates an asymmetric workload where
654	+	some threads only allocate, and others only free -- they observed this pattern in
655	+	larger server applications. Here we see that
656	+	the _mimalloc_ technique of having non-contended sharded thread free
657	+	lists pays off as it outperforms others by a very large margin. Only _rpmalloc_, _tbb_, and _glibc_ also scale well on this benchmark.
658	+
659	+	The _cache-scratch_ benchmark by Emery Berger \[1], and introduced with
660	+	the Hoard allocator to test for _passive-false_ sharing of cache lines.
661	+	With a single thread they all
662	+	perform the same, but when running with multiple threads the potential allocator
663	+	induced false sharing of the cache lines can cause large run-time differences.
664	+	Crundal \[6] describes in detail why the false cache line sharing occurs in the _tcmalloc_ design, and also discusses how this
665	+	can be avoided with some small implementation changes.
666	+	Only the _tbb_, _rpmalloc_ and _mesh_ allocators also avoid the
667	+	cache line sharing completely, while _Hoard_ and _glibc_ seem to mitigate
668	+	the effects. Kukanov and Voss \[7] describe in detail
669	+	how the design of _tbb_ avoids the false cache line sharing.
670	+
671	+
672	+	## On a 36-core Intel Xeon
673	+
674	+	For completeness, here are the results on a big Amazon
675	+	[c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized) instance
676	+	consisting of a 2×18-core Intel Xeon (Cascade Lake) at 3.4GHz (boost 3.5GHz)
677	+	with 144GiB ECC memory, running Ubuntu 20.04 with glibc 2.31, GCC 9.3.0, and
678	+	Clang 10.0.0. This time, the mimalloc allocators (mi, xmi, and smi) were
679	+	compiled with the Clang compiler instead of GCC.
680	+	The results are similar to the AMD results but it is interesting to
681	+	see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks.
682	+
683	+	<img width="90%" src="doc/bench-2021/bench-c5-18xlarge-2021-01-30-a.svg"/>
684	+	<img width="90%" src="doc/bench-2021/bench-c5-18xlarge-2021-01-30-b.svg"/>
685	+
686	+
687	+	## Peak Working Set
688	+
689	+	The following figure shows the peak working set (rss) of the allocators
690	+	on the benchmarks (on the c5.18xlarge instance).
691	+
692	+	<img width="90%" src="doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-a.svg"/>
693	+	<img width="90%" src="doc/bench-2021/bench-c5-18xlarge-2021-01-30-rss-b.svg"/>
694	+
695	+	Note that the _xmalloc-testN_ memory usage should be disregarded as it
696	+	allocates more the faster the program runs. Similarly, memory usage of
697	+	_larsonN_, _mstressN_, _rptestN_ and _sh8bench_ can vary depending on scheduling and
698	+	speed. Nevertheless, we hope to improve the memory usage on _mstressN_
699	+	and _rptestN_ (just as _cfrac_, _larsonN_ and _sh8bench_ have a small working set which skews the results).
700	+
701	+	<!--
702	+	# Previous Benchmarks
703	+
704	+	Todo: should we create a separate page for this?
705	+
706	+	## Benchmark Results on 36-core Intel: 2020-01-20
707	+
708	+	Testing on a big Amazon EC2 compute instance
709	+	([c5.18xlarge](https://aws.amazon.com/ec2/instance-types/#Compute_Optimized))
710	+	consisting of a 72 processor Intel Xeon at 3GHz
711	+	with 144GiB ECC memory, running Ubuntu 18.04.1 with glibc 2.27 and GCC 7.4.0.
712	+	The measured allocators are _mimalloc_ (xmi, tag:v1.4.0, page reset enabled)
713	+	and its secure build as _smi_,
714	+	Google's [_tcmalloc_](https://github.com/gperftools/gperftools) (tc, tag:gperftools-2.7) used in Chrome,
715	+	Facebook's [_jemalloc_](https://github.com/jemalloc/jemalloc) (je, tag:5.2.1) by Jason Evans used in Firefox and FreeBSD,
716	+	the Intel thread building blocks [allocator](https://github.com/intel/tbb) (tbb, tag:2020),
717	+	[rpmalloc](https://github.com/mjansson/rpmalloc) (rp,tag:1.4.0) by Mattias Jansson,
718	+	the original scalable [_Hoard_](https://github.com/emeryberger/Hoard) (tag:3.13) allocator by Emery Berger \[1],
719	+	the memory compacting [_Mesh_](https://github.com/plasma-umass/Mesh) (git:51222e7) allocator by
720	+	Bobby Powers _et al_ \[8],
721	+	and finally the default system allocator (glibc, 2.27) (based on _PtMalloc2_).
722	+
723	+	<img width="90%" src="doc/bench-2020/bench-c5-18xlarge-2020-01-20-a.svg"/>
724	+	<img width="90%" src="doc/bench-2020/bench-c5-18xlarge-2020-01-20-b.svg"/>
725	+
726	+	The following figure shows the peak working set (rss) of the allocators
727	+	on the benchmarks (on the c5.18xlarge instance).
728	+
729	+	<img width="90%" src="doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-a.svg"/>
730	+	<img width="90%" src="doc/bench-2020/bench-c5-18xlarge-2020-01-20-rss-b.svg"/>
731	+
732	+
733	+	## On 24-core AMD Epyc, 2020-01-16
734	+
735	+	For completeness, here are the results on a
736	+	[r5a.12xlarge](https://aws.amazon.com/ec2/instance-types/#Memory_Optimized) instance
737	+	having a 48 processor AMD Epyc 7000 at 2.5GHz with 384GiB of memory.
738	+	The results are similar to the Intel results but it is interesting to
739	+	see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks.
740	+
741	+	<img width="90%" src="doc/bench-2020/bench-r5a-12xlarge-2020-01-16-a.svg"/>
742	+	<img width="90%" src="doc/bench-2020/bench-r5a-12xlarge-2020-01-16-b.svg"/>
743	+
744	+	-->
745	+
746	+
747	+	# References
748	+
749	+	- \[1] Emery D. Berger, Kathryn S. McKinley, Robert D. Blumofe, and Paul R. Wilson.
750	+	_Hoard: A Scalable Memory Allocator for Multithreaded Applications_
751	+	the Ninth International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS-IX). Cambridge, MA, November 2000.
752	+	[pdf](http://www.cs.utexas.edu/users/mckinley/papers/asplos-2000.pdf)
753	+
754	+	- \[2] P. Larson and M. Krishnan. _Memory allocation for long-running server applications_.
755	+	In ISMM, Vancouver, B.C., Canada, 1998. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.45.1947&rep=rep1&type=pdf)
756	+
757	+	- \[3] D. Grunwald, B. Zorn, and R. Henderson.
758	+	_Improving the cache locality of memory allocation_. In R. Cartwright, editor,
759	+	Proceedings of the Conference on Programming Language Design and Implementation, pages 177–186, New York, NY, USA, June 1993. [pdf](http://citeseer.ist.psu.edu/viewdoc/download?doi=10.1.1.43.6621&rep=rep1&type=pdf)
760	+
761	+	- \[4] J. Barnes and P. Hut. _A hierarchical O(n*log(n)) force-calculation algorithm_. Nature, 324:446-449, 1986.
762	+
763	+	- \[5] C. Lever, and D. Boreham. _Malloc() Performance in a Multithreaded Linux Environment._
764	+	In USENIX Annual Technical Conference, Freenix Session. San Diego, CA. Jun. 2000.
765	+	Available at <https://github.com/kuszmaul/SuperMalloc/tree/master/tests>
766	+
767	+	- \[6] Timothy Crundal. _Reducing Active-False Sharing in TCMalloc_. 2016. CS16S1 project at the Australian National University. [pdf](http://courses.cecs.anu.edu.au/courses/CSPROJECTS/16S1/Reports/Timothy_Crundal_Report.pdf)
768	+
769	+	- \[7] Alexey Kukanov, and Michael J Voss.
770	+	_The Foundations for Scalable Multi-Core Software in Intel Threading Building Blocks._
771	+	Intel Technology Journal 11 (4). 2007
772	+
773	+	- \[8] Bobby Powers, David Tench, Emery D. Berger, and Andrew McGregor.
774	+	_Mesh: Compacting Memory Management for C/C++_
775	+	In Proceedings of the 40th ACM SIGPLAN Conference on Programming Language Design and Implementation (PLDI'19), June 2019, pages 333-–346.
776	+
777	+	<!--
778	+	- \[9] Paul Liétar, Theodore Butler, Sylvan Clebsch, Sophia Drossopoulou, Juliana Franco, Matthew J Parkinson,
779	+	Alex Shamis, Christoph M Wintersteiger, and David Chisnall.
780	+	_Snmalloc: A Message Passing Allocator._
781	+	In Proceedings of the 2019 ACM SIGPLAN International Symposium on Memory Management, 122–135. ACM. 2019.
782	+	-->
783	+
784	+	# Contributing
785	+
786	+	This project welcomes contributions and suggestions. Most contributions require you to agree to a
787	+	Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
788	+	the rights to use your contribution. For details, visit https://cla.microsoft.com.
789	+
790	+	When you submit a pull request, a CLA-bot will automatically determine whether you need to provide
791	+	a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions
792	+	provided by the bot. You will only need to do this once across all repos using our CLA.
793	+
794	+
795	+	# Older Release Notes
796	+
797	+	* 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved
798	+	handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call.
799	+	* 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations,
800	+	support for IllumOS and Haiku, NUMA support for Vista/XP, improved NUMA detection for AMD Ryzen, ubsan support.
801	+	* 2020-05-05, `v1.6.3`: stable release 1.6: improved behavior in out-of-memory situations, improved malloc zones on macOS,
802	+	build PIC static libraries by default, add option to abort on out-of-memory, line buffered statistics.
803	+	* 2020-04-20, `v1.6.2`: stable release 1.6: fix compilation on Android, MingW, Raspberry, and Conda,
804	+	stability fix for Windows 7, fix multiple mimalloc instances in one executable, fix `strnlen` overload,
805	+	fix aligned debug padding.
806	+	* 2020-02-17, `v1.6.1`: stable release 1.6: minor updates (build with clang-cl, fix alignment issue for small objects).
807	+	* 2020-02-09, `v1.6.0`: stable release 1.6: fixed potential memory leak, improved overriding
808	+	and thread local support on FreeBSD, NetBSD, DragonFly, and macOSX. New byte-precise
809	+	heap block overflow detection in debug mode (besides the double-free detection and free-list
810	+	corruption detection). Add `nodiscard` attribute to most allocation functions.
811	+	Enable `MIMALLOC_PAGE_RESET` by default. New reclamation strategy for abandoned heap pages
812	+	for better memory footprint.
813	+	* 2020-02-09, `v1.5.0`: stable release 1.5: improved free performance, small bug fixes.
814	+	* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset,
815	+	more eager concurrent free, addition of STL allocator, fixed potential memory leak.
816	+	* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger
817	+	free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode.
818	+	* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates.
819	+	* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows.
820	+	* 2019-10-07, `v1.1.0`: stable release 1.1.
821	+	* 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support.
822	+	* 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements.
823	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/src/alloc-aligned.c

1	+	/* ----------------------------------------------------------------------------
2	+	Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
3	+	This is free software; you can redistribute it and/or modify it under the
4	+	terms of the MIT license. A copy of the license can be found in the file
5	+	"LICENSE" at the root of this distribution.
6	+	-----------------------------------------------------------------------------*/
7	+
8	+	#include "mimalloc.h"
9	+	#include "mimalloc/internal.h"
10	+	#include "mimalloc/prim.h" // mi_prim_get_default_heap
11	+
12	+	#include <string.h> // memset
13	+
14	+	// ------------------------------------------------------
15	+	// Aligned Allocation
16	+	// ------------------------------------------------------
17	+
18	+	// Fallback primitive aligned allocation -- split out for better codegen
19	+	static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
20	+	{
21	+	mi_assert_internal(size <= PTRDIFF_MAX);
22	+	mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
23	+
24	+	const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)`
25	+	const size_t padsize = size + MI_PADDING_SIZE;
26	+
27	+	// use regular allocation if it is guaranteed to fit the alignment constraints
28	+	if (offset==0 && alignment<=padsize && padsize<=MI_MAX_ALIGN_GUARANTEE && (padsize&align_mask)==0) {
29	+	void* p = _mi_heap_malloc_zero(heap, size, zero);
30	+	mi_assert_internal(p == NULL \|\| ((uintptr_t)p % alignment) == 0);
31	+	return p;
32	+	}
33	+
34	+	void* p;
35	+	size_t oversize;
36	+	if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {
37	+	// use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
38	+	// This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the
39	+	// first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
40	+	if mi_unlikely(offset != 0) {
41	+	// todo: cannot support offset alignment for very large alignments yet
42	+	#if MI_DEBUG > 0
43	+	_mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
44	+	#endif
45	+	return NULL;
46	+	}
47	+	oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
48	+	p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block
49	+	// zero afterwards as only the area from the aligned_p may be committed!
50	+	if (p == NULL) return NULL;
51	+	}
52	+	else {
53	+	// otherwise over-allocate
54	+	oversize = size + alignment - 1;
55	+	p = _mi_heap_malloc_zero(heap, oversize, zero);
56	+	if (p == NULL) return NULL;
57	+	}
58	+
59	+	// .. and align within the allocation
60	+	const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask;
61	+	const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset);
62	+	mi_assert_internal(adjust < alignment);
63	+	void* aligned_p = (void*)((uintptr_t)p + adjust);
64	+	if (aligned_p != p) {
65	+	mi_page_t* page = _mi_ptr_page(p);
66	+	mi_page_set_has_aligned(page, true);
67	+	_mi_padding_shrink(page, (mi_block_t*)p, adjust + size);
68	+	}
69	+	// todo: expand padding if overallocated ?
70	+
71	+	mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
72	+	mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
73	+	mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
74	+	mi_assert_internal(mi_usable_size(aligned_p)>=size);
75	+	mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust);
76	+
77	+	// now zero the block if needed
78	+	if (alignment > MI_ALIGNMENT_MAX) {
79	+	// for the tracker, on huge aligned allocations only from the start of the large block is defined
80	+	mi_track_mem_undefined(aligned_p, size);
81	+	if (zero) {
82	+	_mi_memzero(aligned_p, mi_usable_size(aligned_p));
83	+	}
84	+	}
85	+
86	+	if (p != aligned_p) {
87	+	mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p));
88	+	}
89	+	return aligned_p;
90	+	}
91	+
92	+	// Primitive aligned allocation
93	+	static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
94	+	{
95	+	// note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size.
96	+	mi_assert(alignment > 0);
97	+	if mi_unlikely(alignment == 0 \|\| !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
98	+	#if MI_DEBUG > 0
99	+	_mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment);
100	+	#endif
101	+	return NULL;
102	+	}
103	+	/*
104	+	if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers)
105	+	#if MI_DEBUG > 0
106	+	_mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment);
107	+	#endif
108	+	return NULL;
109	+	}
110	+	*/
111	+	if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
112	+	#if MI_DEBUG > 0
113	+	_mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
114	+	#endif
115	+	return NULL;
116	+	}
117	+	const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
118	+	const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check
119	+
120	+	// try first if there happens to be a small block available with just the right alignment
121	+	if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) {
122	+	mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize);
123	+	const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0;
124	+	if mi_likely(page->free != NULL && is_aligned)
125	+	{
126	+	#if MI_STAT>1
127	+	mi_heap_stat_increase(heap, malloc, size);
128	+	#endif
129	+	void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc
130	+	mi_assert_internal(p != NULL);
131	+	mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
132	+	mi_track_malloc(p,size,zero);
133	+	return p;
134	+	}
135	+	}
136	+	// fallback
137	+	return mi_heap_malloc_zero_aligned_at_fallback(heap, size, alignment, offset, zero);
138	+	}
139	+
140	+
141	+	// ------------------------------------------------------
142	+	// Optimized mi_heap_malloc_aligned / mi_malloc_aligned
143	+	// ------------------------------------------------------
144	+
145	+	mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
146	+	return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false);
147	+	}
148	+
149	+	mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
150	+	#if !MI_PADDING
151	+	// without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`)
152	+	if (!_mi_is_power_of_two(alignment)) return NULL;
153	+	if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX)
154	+	#else
155	+	// with padding, we can only guarantee this for fixed alignments
156	+	if mi_likely((alignment == sizeof(void*) \|\| (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2)))
157	+	&& size <= MI_SMALL_SIZE_MAX)
158	+	#endif
159	+	{
160	+	// fast path for common alignment and size
161	+	return mi_heap_malloc_small(heap, size);
162	+	}
163	+	else {
164	+	return mi_heap_malloc_aligned_at(heap, size, alignment, 0);
165	+	}
166	+	}
167	+
168	+	// ------------------------------------------------------
169	+	// Aligned Allocation
170	+	// ------------------------------------------------------
171	+
172	+	mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
173	+	return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true);
174	+	}
175	+
176	+	mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
177	+	return mi_heap_zalloc_aligned_at(heap, size, alignment, 0);
178	+	}
179	+
180	+	mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
181	+	size_t total;
182	+	if (mi_count_size_overflow(count, size, &total)) return NULL;
183	+	return mi_heap_zalloc_aligned_at(heap, total, alignment, offset);
184	+	}
185	+
186	+	mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept {
187	+	return mi_heap_calloc_aligned_at(heap,count,size,alignment,0);
188	+	}
189	+
190	+	mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
191	+	return mi_heap_malloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset);
192	+	}
193	+
194	+	mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
195	+	return mi_heap_malloc_aligned(mi_prim_get_default_heap(), size, alignment);
196	+	}
197	+
198	+	mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
199	+	return mi_heap_zalloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset);
200	+	}
201	+
202	+	mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
203	+	return mi_heap_zalloc_aligned(mi_prim_get_default_heap(), size, alignment);
204	+	}
205	+
206	+	mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
207	+	return mi_heap_calloc_aligned_at(mi_prim_get_default_heap(), count, size, alignment, offset);
208	+	}
209	+
210	+	mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept {
211	+	return mi_heap_calloc_aligned(mi_prim_get_default_heap(), count, size, alignment);
212	+	}
213	+
214	+
215	+	// ------------------------------------------------------
216	+	// Aligned re-allocation
217	+	// ------------------------------------------------------
218	+
219	+	static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset, bool zero) mi_attr_noexcept {
220	+	mi_assert(alignment > 0);
221	+	if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero);
222	+	if (p == NULL) return mi_heap_malloc_zero_aligned_at(heap,newsize,alignment,offset,zero);
223	+	size_t size = mi_usable_size(p);
224	+	if (newsize <= size && newsize >= (size - (size / 2))
225	+	&& (((uintptr_t)p + offset) % alignment) == 0) {
226	+	return p; // reallocation still fits, is aligned and not more than 50% waste
227	+	}
228	+	else {
229	+	void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset);
230	+	if (newp != NULL) {
231	+	if (zero && newsize > size) {
232	+	const mi_page_t* page = _mi_ptr_page(newp);
233	+	if (page->is_zero) {
234	+	// already zero initialized
235	+	mi_assert_expensive(mi_mem_is_zero(newp,newsize));
236	+	}
237	+	else {
238	+	// also set last word in the previous allocation to zero to ensure any padding is zero-initialized
239	+	size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
240	+	memset((uint8_t*)newp + start, 0, newsize - start);
241	+	}
242	+	}
243	+	_mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
244	+	mi_free(p); // only free if successful
245	+	}
246	+	return newp;
247	+	}
248	+	}
249	+
250	+	static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, bool zero) mi_attr_noexcept {
251	+	mi_assert(alignment > 0);
252	+	if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero);
253	+	size_t offset = ((uintptr_t)p % alignment); // use offset of previous allocation (p can be NULL)
254	+	return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero);
255	+	}
256	+
257	+	mi_decl_nodiscard void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
258	+	return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false);
259	+	}
260	+
261	+	mi_decl_nodiscard void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
262	+	return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false);
263	+	}
264	+
265	+	mi_decl_nodiscard void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
266	+	return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true);
267	+	}
268	+
269	+	mi_decl_nodiscard void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
270	+	return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true);
271	+	}
272	+
273	+	mi_decl_nodiscard void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
274	+	size_t total;
275	+	if (mi_count_size_overflow(newcount, size, &total)) return NULL;
276	+	return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset);
277	+	}
278	+
279	+	mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
280	+	size_t total;
281	+	if (mi_count_size_overflow(newcount, size, &total)) return NULL;
282	+	return mi_heap_rezalloc_aligned(heap, p, total, alignment);
283	+	}
284	+
285	+	mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
286	+	return mi_heap_realloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset);
287	+	}
288	+
289	+	mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
290	+	return mi_heap_realloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment);
291	+	}
292	+
293	+	mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
294	+	return mi_heap_rezalloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset);
295	+	}
296	+
297	+	mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
298	+	return mi_heap_rezalloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment);
299	+	}
300	+
301	+	mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
302	+	return mi_heap_recalloc_aligned_at(mi_prim_get_default_heap(), p, newcount, size, alignment, offset);
303	+	}
304	+
305	+	mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
306	+	return mi_heap_recalloc_aligned(mi_prim_get_default_heap(), p, newcount, size, alignment);
307	+	}
308	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/src/alloc-override.c

1	+	/* ----------------------------------------------------------------------------
2	+	Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
3	+	This is free software; you can redistribute it and/or modify it under the
4	+	terms of the MIT license. A copy of the license can be found in the file
5	+	"LICENSE" at the root of this distribution.
6	+	-----------------------------------------------------------------------------*/
7	+
8	+	#if !defined(MI_IN_ALLOC_C)
9	+	#error "this file should be included from 'alloc.c' (so aliases can work)"
10	+	#endif
11	+
12	+	#if defined(MI_MALLOC_OVERRIDE) && defined(_WIN32) && !(defined(MI_SHARED_LIB) && defined(_DLL))
13	+	#error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)"
14	+	#endif
15	+
16	+	#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32))
17	+
18	+	#if defined(__APPLE__)
19	+	#include <AvailabilityMacros.h>
20	+	mi_decl_externc void vfree(void* p);
21	+	mi_decl_externc size_t malloc_size(const void* p);
22	+	mi_decl_externc size_t malloc_good_size(size_t size);
23	+	#endif
24	+
25	+	// helper definition for C override of C++ new
26	+	typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
27	+
28	+	// ------------------------------------------------------
29	+	// Override system malloc
30	+	// ------------------------------------------------------
31	+
32	+	#if (defined(__GNUC__) \|\| defined(__clang__)) && !defined(__APPLE__) && !MI_TRACK_ENABLED
33	+	// gcc, clang: use aliasing to alias the exported function to one of our `mi_` functions
34	+	#if (defined(__GNUC__) && __GNUC__ >= 9)
35	+	#pragma GCC diagnostic ignored "-Wattributes" // or we get warnings that nodiscard is ignored on a forward
36	+	#define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"), copy(fun)));
37	+	#else
38	+	#define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default")));
39	+	#endif
40	+	#define MI_FORWARD1(fun,x) MI_FORWARD(fun)
41	+	#define MI_FORWARD2(fun,x,y) MI_FORWARD(fun)
42	+	#define MI_FORWARD3(fun,x,y,z) MI_FORWARD(fun)
43	+	#define MI_FORWARD0(fun,x) MI_FORWARD(fun)
44	+	#define MI_FORWARD02(fun,x,y) MI_FORWARD(fun)
45	+	#else
46	+	// otherwise use forwarding by calling our `mi_` function
47	+	#define MI_FORWARD1(fun,x) { return fun(x); }
48	+	#define MI_FORWARD2(fun,x,y) { return fun(x,y); }
49	+	#define MI_FORWARD3(fun,x,y,z) { return fun(x,y,z); }
50	+	#define MI_FORWARD0(fun,x) { fun(x); }
51	+	#define MI_FORWARD02(fun,x,y) { fun(x,y); }
52	+	#endif
53	+
54	+
55	+	#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
56	+	// define MI_OSX_IS_INTERPOSED as we should not provide forwarding definitions for
57	+	// functions that are interposed (or the interposing does not work)
58	+	#define MI_OSX_IS_INTERPOSED
59	+
60	+	mi_decl_externc size_t mi_malloc_size_checked(void *p) {
61	+	if (!mi_is_in_heap_region(p)) return 0;
62	+	return mi_usable_size(p);
63	+	}
64	+
65	+	// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
66	+	// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
67	+	struct mi_interpose_s {
68	+	const void* replacement;
69	+	const void* target;
70	+	};
71	+	#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void)&newfun, (const void)&oldfun }
72	+	#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun)
73	+
74	+	__attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) =
75	+	{
76	+	MI_INTERPOSE_MI(malloc),
77	+	MI_INTERPOSE_MI(calloc),
78	+	MI_INTERPOSE_MI(realloc),
79	+	MI_INTERPOSE_MI(strdup),
80	+	MI_INTERPOSE_MI(strndup),
81	+	MI_INTERPOSE_MI(realpath),
82	+	MI_INTERPOSE_MI(posix_memalign),
83	+	MI_INTERPOSE_MI(reallocf),
84	+	MI_INTERPOSE_MI(valloc),
85	+	MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked),
86	+	MI_INTERPOSE_MI(malloc_good_size),
87	+	#if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
88	+	MI_INTERPOSE_MI(aligned_alloc),
89	+	#endif
90	+	#ifdef MI_OSX_ZONE
91	+	// we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely
92	+	MI_INTERPOSE_MI(free),
93	+	MI_INTERPOSE_FUN(vfree,mi_free),
94	+	#else
95	+	// sometimes code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>)
96	+	MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us
97	+	MI_INTERPOSE_FUN(vfree,mi_cfree),
98	+	#endif
99	+	};
100	+
101	+	#ifdef __cplusplus
102	+	extern "C" {
103	+	#endif
104	+	void _ZdlPv(void* p); // delete
105	+	void _ZdaPv(void* p); // delete[]
106	+	void _ZdlPvm(void* p, size_t n); // delete
107	+	void _ZdaPvm(void* p, size_t n); // delete[]
108	+	void* _Znwm(size_t n); // new
109	+	void* _Znam(size_t n); // new[]
110	+	void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new nothrow
111	+	void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new[] nothrow
112	+	#ifdef __cplusplus
113	+	}
114	+	#endif
115	+	__attribute__((used)) static struct mi_interpose_s _mi_cxx_interposes[] __attribute__((section("__DATA, __interpose"))) =
116	+	{
117	+	MI_INTERPOSE_FUN(_ZdlPv,mi_free),
118	+	MI_INTERPOSE_FUN(_ZdaPv,mi_free),
119	+	MI_INTERPOSE_FUN(_ZdlPvm,mi_free_size),
120	+	MI_INTERPOSE_FUN(_ZdaPvm,mi_free_size),
121	+	MI_INTERPOSE_FUN(_Znwm,mi_new),
122	+	MI_INTERPOSE_FUN(_Znam,mi_new),
123	+	MI_INTERPOSE_FUN(_ZnwmRKSt9nothrow_t,mi_new_nothrow),
124	+	MI_INTERPOSE_FUN(_ZnamRKSt9nothrow_t,mi_new_nothrow),
125	+	};
126	+
127	+	#elif defined(_MSC_VER)
128	+	// cannot override malloc unless using a dll.
129	+	// we just override new/delete which does work in a static library.
130	+	#else
131	+	// On all other systems forward to our API
132	+	mi_decl_export void* malloc(size_t size) MI_FORWARD1(mi_malloc, size)
133	+	mi_decl_export void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n)
134	+	mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
135	+	mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p)
136	+	#endif
137	+
138	+	#if (defined(__GNUC__) \|\| defined(__clang__)) && !defined(__APPLE__)
139	+	#pragma GCC visibility push(default)
140	+	#endif
141	+
142	+	// ------------------------------------------------------
143	+	// Override new/delete
144	+	// This is not really necessary as they usually call
145	+	// malloc/free anyway, but it improves performance.
146	+	// ------------------------------------------------------
147	+	#ifdef __cplusplus
148	+	// ------------------------------------------------------
149	+	// With a C++ compiler we override the new/delete operators.
150	+	// see <https://en.cppreference.com/w/cpp/memory/new/operator_new>
151	+	// ------------------------------------------------------
152	+	#include <new>
153	+
154	+	#ifndef MI_OSX_IS_INTERPOSED
155	+	void operator delete(void* p) noexcept MI_FORWARD0(mi_free,p)
156	+	void operator delete[](void* p) noexcept MI_FORWARD0(mi_free,p)
157	+
158	+	void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n)
159	+	void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n)
160	+
161	+	void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); }
162	+	void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); }
163	+
164	+	#if (__cplusplus >= 201402L \|\| _MSC_VER >= 1916)
165	+	void operator delete (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n)
166	+	void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n)
167	+	#endif
168	+	#endif
169	+
170	+	#if (__cplusplus > 201402L && defined(__cpp_aligned_new)) && (!defined(__GNUC__) \|\| (__GNUC__ > 5))
171	+	void operator delete (void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
172	+	void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
173	+	void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
174	+	void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
175	+	void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
176	+	void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
177	+
178	+	void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
179	+	void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
180	+	void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
181	+	void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
182	+	#endif
183	+
184	+	#elif (defined(__GNUC__) \|\| defined(__clang__))
185	+	// ------------------------------------------------------
186	+	// Override by defining the mangled C++ names of the operators (as
187	+	// used by GCC and CLang).
188	+	// See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling>
189	+	// ------------------------------------------------------
190	+
191	+	void _ZdlPv(void* p) MI_FORWARD0(mi_free,p) // delete
192	+	void _ZdaPv(void* p) MI_FORWARD0(mi_free,p) // delete[]
193	+	void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
194	+	void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
195	+	void _ZdlPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); }
196	+	void _ZdaPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); }
197	+	void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
198	+	void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
199	+
200	+	#if (MI_INTPTR_SIZE==8)
201	+	void* _Znwm(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit
202	+	void* _Znam(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit
203	+	void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
204	+	void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
205	+	void* _ZnwmSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
206	+	void* _ZnamSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
207	+	void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
208	+	void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
209	+	#elif (MI_INTPTR_SIZE==4)
210	+	void* _Znwj(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit
211	+	void* _Znaj(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit
212	+	void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
213	+	void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
214	+	void* _ZnwjSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
215	+	void* _ZnajSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
216	+	void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
217	+	void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
218	+	#else
219	+	#error "define overloads for new/delete for this platform (just for performance, can be skipped)"
220	+	#endif
221	+	#endif // __cplusplus
222	+
223	+	// ------------------------------------------------------
224	+	// Further Posix & Unix functions definitions
225	+	// ------------------------------------------------------
226	+
227	+	#ifdef __cplusplus
228	+	extern "C" {
229	+	#endif
230	+
231	+	#ifndef MI_OSX_IS_INTERPOSED
232	+	// Forward Posix/Unix calls as well
233	+	void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize)
234	+	size_t malloc_size(const void* p) MI_FORWARD1(mi_usable_size,p)
235	+	#if !defined(__ANDROID__) && !defined(__FreeBSD__)
236	+	size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p)
237	+	#else
238	+	size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p)
239	+	#endif
240	+
241	+	// No forwarding here due to aliasing/name mangling issues
242	+	void* valloc(size_t size) { return mi_valloc(size); }
243	+	void vfree(void* p) { mi_free(p); }
244	+	size_t malloc_good_size(size_t size) { return mi_malloc_good_size(size); }
245	+	int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }
246	+
247	+	// `aligned_alloc` is only available when __USE_ISOC11 is defined.
248	+	// Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot
249	+	// override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9.
250	+	// Fortunately, in the case where `aligned_alloc` is declared as `static inline` it
251	+	// uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we can avoid overriding it ourselves.
252	+	#if __USE_ISOC11
253	+	void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
254	+	#endif
255	+	#endif
256	+
257	+	// no forwarding here due to aliasing/name mangling issues
258	+	void cfree(void* p) { mi_free(p); }
259	+	void* pvalloc(size_t size) { return mi_pvalloc(size); }
260	+	void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); }
261	+	int reallocarr(void* p, size_t count, size_t size) { return mi_reallocarr(p, count, size); }
262	+	void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); }
263	+	void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
264	+
265	+	#if defined(__wasi__)
266	+	// forward __libc interface (see PR #667)
267	+	void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc, size)
268	+	void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc, count, size)
269	+	void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc, p, size)
270	+	void __libc_free(void* p) MI_FORWARD0(mi_free, p)
271	+	void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); }
272	+
273	+	#elif defined(__GLIBC__) && defined(__linux__)
274	+	// forward __libc interface (needed for glibc-based Linux distributions)
275	+	void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size)
276	+	void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size)
277	+	void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size)
278	+	void __libc_free(void* p) MI_FORWARD0(mi_free,p)
279	+	void __libc_cfree(void* p) MI_FORWARD0(mi_free,p)
280	+
281	+	void* __libc_valloc(size_t size) { return mi_valloc(size); }
282	+	void* __libc_pvalloc(size_t size) { return mi_pvalloc(size); }
283	+	void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment,size); }
284	+	int __posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p,alignment,size); }
285	+	#endif
286	+
287	+	#ifdef __cplusplus
288	+	}
289	+	#endif
290	+
291	+	#if (defined(__GNUC__) \|\| defined(__clang__)) && !defined(__APPLE__)
292	+	#pragma GCC visibility pop
293	+	#endif
294	+
295	+	#endif // MI_MALLOC_OVERRIDE && !_WIN32
296	+

■ ■ ■ ■ ■ ■

preload-mimalloc/mimalloc/src/alloc-posix.c

1	+	/* ----------------------------------------------------------------------------
2	+	Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
3	+	This is free software; you can redistribute it and/or modify it under the
4	+	terms of the MIT license. A copy of the license can be found in the file
5	+	"LICENSE" at the root of this distribution.
6	+	-----------------------------------------------------------------------------*/
7	+
8	+	// ------------------------------------------------------------------------
9	+	// mi prefixed publi definitions of various Posix, Unix, and C++ functions
10	+	// for convenience and used when overriding these functions.
11	+	// ------------------------------------------------------------------------
12	+	#include "mimalloc.h"
13	+	#include "mimalloc/internal.h"
14	+
15	+	// ------------------------------------------------------
16	+	// Posix & Unix functions definitions
17	+	// ------------------------------------------------------
18	+
19	+	#include <errno.h>
20	+	#include <string.h> // memset
21	+	#include <stdlib.h> // getenv
22	+
23	+	#ifdef _MSC_VER
24	+	#pragma warning(disable:4996) // getenv _wgetenv
25	+	#endif
26	+
27	+	#ifndef EINVAL
28	+	#define EINVAL 22
29	+	#endif
30	+	#ifndef ENOMEM
31	+	#define ENOMEM 12
32	+	#endif
33	+
34	+
35	+	mi_decl_nodiscard size_t mi_malloc_size(const void* p) mi_attr_noexcept {
36	+	// if (!mi_is_in_heap_region(p)) return 0;
37	+	return mi_usable_size(p);
38	+	}
39	+
40	+	mi_decl_nodiscard size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept {
41	+	// if (!mi_is_in_heap_region(p)) return 0;
42	+	return mi_usable_size(p);
43	+	}
44	+
45	+	mi_decl_nodiscard size_t mi_malloc_good_size(size_t size) mi_attr_noexcept {
46	+	return mi_good_size(size);
47	+	}
48	+
49	+	void mi_cfree(void* p) mi_attr_noexcept {
50	+	if (mi_is_in_heap_region(p)) {
51	+	mi_free(p);
52	+	}
53	+	}
54	+
55	+	int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept {
56	+	// Note: The spec dictates we should not modify `*p` on an error. (issue#27)
57	+	// <http://man7.org/linux/man-pages/man3/posix_memalign.3.html>
58	+	if (p == NULL) return EINVAL;
59	+	if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment
60	+	if (alignment==0 \|\| !_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2
61	+	void* q = mi_malloc_aligned(size, alignment);
62	+	if (q==NULL && size != 0) return ENOMEM;
63	+	mi_assert_internal(((uintptr_t)q % alignment) == 0);
64	+	*p = q;
65	+	return 0;
66	+	}
67	+
68	+	mi_decl_nodiscard mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept {
69	+	void* p = mi_malloc_aligned(size, alignment);
70	+	mi_assert_internal(((uintptr_t)p % alignment) == 0);
71	+	return p;
72	+	}
73	+
74	+	mi_decl_nodiscard mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept {
75	+	return mi_memalign( _mi_os_page_size(), size );
76	+	}
77	+
78	+	mi_decl_nodiscard mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept {
79	+	size_t psize = _mi_os_page_size();
80	+	if (size >= SIZE_MAX - psize) return NULL; // overflow
81	+	size_t asize = _mi_align_up(size, psize);
82	+	return mi_malloc_aligned(asize, psize);
83	+	}
84	+
85	+	mi_decl_nodiscard mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept {
86	+	// C11 requires the size to be an integral multiple of the alignment, see <https://en.cppreference.com/w/c/memory/aligned_alloc>.
87	+	// unfortunately, it turns out quite some programs pass a size that is not an integral multiple so skip this check..
88	+	/* if mi_unlikely((size & (alignment - 1)) != 0) { // C11 requires alignment>0 && integral multiple, see <https://en.cppreference.com/w/c/memory/aligned_alloc>
89	+	#if MI_DEBUG > 0
90	+	_mi_error_message(EOVERFLOW, "(mi_)aligned_alloc requires the size to be an integral multiple of the alignment (size %zu, alignment %zu)\n", size, alignment);
91	+	#endif
92	+	return NULL;
93	+	}
94	+	*/
95	+	// C11 also requires alignment to be a power-of-two (and > 0) which is checked in mi_malloc_aligned
96	+	void* p = mi_malloc_aligned(size, alignment);
97	+	mi_assert_internal(((uintptr_t)p % alignment) == 0);
98	+	return p;
99	+	}
100	+
101	+	mi_decl_nodiscard void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD
102	+	void* newp = mi_reallocn(p,count,size);
103	+	if (newp==NULL) { errno = ENOMEM; }
104	+	return newp;
105	+	}
106	+
107	+	mi_decl_nodiscard int mi_reallocarr( void* p, size_t count, size_t size ) mi_attr_noexcept { // NetBSD
108	+	mi_assert(p != NULL);
109	+	if (p == NULL) {
110	+	errno = EINVAL;
111	+	return EINVAL;
112	+	}
113	+	void op = (void)p;
114	+	void* newp = mi_reallocarray(*op, count, size);
115	+	if mi_unlikely(newp == NULL) { return errno; }
116	+	*op = newp;
117	+	return 0;
118	+	}
119	+
120	+	void* mi__expand(void* p, size_t newsize) mi_attr_noexcept { // Microsoft
121	+	void* res = mi_expand(p, newsize);
122	+	if (res == NULL) { errno = ENOMEM; }
123	+	return res;
124	+	}
125	+
126	+	mi_decl_nodiscard mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept {
127	+	if (s==NULL) return NULL;
128	+	size_t len;
129	+	for(len = 0; s[len] != 0; len++) { }
130	+	size_t size = (len+1)*sizeof(unsigned short);
131	+	unsigned short* p = (unsigned short*)mi_malloc(size);
132	+	if (p != NULL) {
133	+	_mi_memcpy(p,s,size);
134	+	}
135	+	return p;
136	+	}
137	+
138	+	mi_decl_nodiscard mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept {
139	+	return (unsigned char)mi_strdup((const char)s);
140	+	}
141	+
142	+	int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept {
143	+	if (buf==NULL \|\| name==NULL) return EINVAL;
144	+	if (size != NULL) *size = 0;
145	+	char* p = getenv(name); // mscver warning 4996
146	+	if (p==NULL) {
147	+	*buf = NULL;
148	+	}
149	+	else {
150	+	*buf = mi_strdup(p);
151	+	if (*buf==NULL) return ENOMEM;
152	+	if (size != NULL) *size = _mi_strlen(p);
153	+	}
154	+	return 0;
155	+	}
156	+
157	+	int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept {
158	+	if (buf==NULL \|\| name==NULL) return EINVAL;
159	+	if (size != NULL) *size = 0;
160	+	#if !defined(_WIN32) \|\| (defined(WINAPI_FAMILY) && (WINAPI_FAMILY != WINAPI_FAMILY_DESKTOP_APP))
161	+	// not supported
162	+	*buf = NULL;
163	+	return EINVAL;
164	+	#else
165	+	unsigned short* p = (unsigned short)_wgetenv((const wchar_t)name); // msvc warning 4996
166	+	if (p==NULL) {
167	+	*buf = NULL;
168	+	}
169	+	else {
170	+	*buf = mi_wcsdup(p);
171	+	if (*buf==NULL) return ENOMEM;
172	+	if (size != NULL) size = wcslen((const wchar_t)p);
173	+	}
174	+	return 0;
175	+	#endif
176	+	}
177	+
178	+	mi_decl_nodiscard void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { // Microsoft
179	+	return mi_recalloc_aligned_at(p, newcount, size, alignment, offset);
180	+	}
181	+
182	+	mi_decl_nodiscard void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { // Microsoft
183	+	return mi_recalloc_aligned(p, newcount, size, alignment);
184	+	}
185	+