Skip to content

Commit 992ebd4

Browse files
committed
Merge branch 'dev' into dev2
2 parents 6e5ed1e + 26fa8be commit 992ebd4

File tree

6 files changed

+119
-26
lines changed

6 files changed

+119
-26
lines changed

include/mimalloc/internal.h

+16
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ bool _mi_os_has_virtual_reserve(void);
127127

128128
bool _mi_os_reset(void* addr, size_t size);
129129
bool _mi_os_commit(void* p, size_t size, bool* is_zero);
130+
bool _mi_os_commit_ex(void* addr, size_t size, bool* is_zero, size_t stat_size);
130131
bool _mi_os_decommit(void* addr, size_t size);
131132
bool _mi_os_protect(void* addr, size_t size);
132133
bool _mi_os_unprotect(void* addr, size_t size);
@@ -1030,6 +1031,21 @@ static inline size_t mi_bsr(size_t x) {
10301031
return (x==0 ? MI_SIZE_BITS : MI_SIZE_BITS - 1 - mi_clz(x));
10311032
}
10321033

1034+
size_t _mi_popcount_generic(size_t x);
1035+
1036+
static inline size_t mi_popcount(size_t x) {
1037+
if (x<=1) return x;
1038+
if (x==SIZE_MAX) return MI_SIZE_BITS;
1039+
#if defined(__GNUC__)
1040+
#if (SIZE_MAX == ULONG_MAX)
1041+
return __builtin_popcountl(x);
1042+
#else
1043+
return __builtin_popcountll(x);
1044+
#endif
1045+
#else
1046+
return _mi_popcount_generic(x);
1047+
#endif
1048+
}
10331049

10341050
// ---------------------------------------------------------------------------------
10351051
// Provide our own `_mi_memcpy` for potential performance optimizations.

src/arena.c

+26-13
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
259259

260260
// set the dirty bits (todo: no need for an atomic op here?)
261261
if (arena->memid.initially_zero && arena->blocks_dirty != NULL) {
262-
memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
262+
memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL, NULL);
263263
}
264264

265265
// set commit state
@@ -271,10 +271,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
271271
// commit requested, but the range may not be committed as a whole: ensure it is committed now
272272
memid->initially_committed = true;
273273
bool any_uncommitted;
274-
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
274+
size_t already_committed = 0;
275+
_mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted, &already_committed);
275276
if (any_uncommitted) {
277+
mi_assert_internal(already_committed < needed_bcount);
278+
const size_t commit_size = mi_arena_block_size(needed_bcount);
279+
const size_t stat_commit_size = commit_size - mi_arena_block_size(already_committed);
276280
bool commit_zero = false;
277-
if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero)) {
281+
if (!_mi_os_commit_ex(p, commit_size, &commit_zero, stat_commit_size)) {
278282
memid->initially_committed = false;
279283
}
280284
else {
@@ -284,7 +288,14 @@ static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t ar
284288
}
285289
else {
286290
// no need to commit, but check if already fully committed
287-
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
291+
size_t already_committed = 0;
292+
memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &already_committed);
293+
if (!memid->initially_committed && already_committed > 0) {
294+
// partially committed: as it will be committed at some time, adjust the stats and pretend the range is fully uncommitted.
295+
mi_assert_internal(already_committed < needed_bcount);
296+
_mi_stat_decrease(&_mi_stats_main.committed, mi_arena_block_size(already_committed));
297+
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
298+
}
288299
}
289300

290301
return p;
@@ -468,17 +479,19 @@ static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks)
468479
const size_t size = mi_arena_block_size(blocks);
469480
void* const p = mi_arena_block_start(arena, bitmap_idx);
470481
bool needs_recommit;
471-
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) {
482+
size_t already_committed = 0;
483+
if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx, &already_committed)) {
472484
// all blocks are committed, we can purge freely
485+
mi_assert_internal(already_committed == blocks);
473486
needs_recommit = _mi_os_purge(p, size);
474487
}
475488
else {
476489
// some blocks are not committed -- this can happen when a partially committed block is freed
477490
// in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge
478-
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory),
479-
// and also undo the decommit stats (as it was already adjusted)
491+
// we need to ensure we do not try to reset (as that may be invalid for uncommitted memory).
492+
mi_assert_internal(already_committed < blocks);
480493
mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits));
481-
needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, 0);
494+
needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, mi_arena_block_size(already_committed));
482495
}
483496

484497
// clear the purged blocks
@@ -512,7 +525,7 @@ static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t
512525
else {
513526
// already an expiration was set
514527
}
515-
_mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL);
528+
_mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL, NULL);
516529
}
517530
}
518531

@@ -652,7 +665,7 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
652665
if (p==NULL) return;
653666
if (size==0) return;
654667
const bool all_committed = (committed_size == size);
655-
const bool decommitted_size = (committed_size <= size ? size - committed_size : 0);
668+
const size_t decommitted_size = (committed_size <= size ? size - committed_size : 0);
656669

657670
// need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.)
658671
mi_track_mem_undefined(p,size);
@@ -695,14 +708,14 @@ void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memi
695708
mi_assert_internal(arena->blocks_purge != NULL);
696709

697710
if (!all_committed) {
698-
// mark the entire range as no longer committed (so we recommit the full range when re-using)
711+
// mark the entire range as no longer committed (so we will recommit the full range when re-using)
699712
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
700713
mi_track_mem_noaccess(p,size);
701-
if (committed_size > 0) {
714+
//if (committed_size > 0) {
702715
// if partially committed, adjust the committed stats (is it will be recommitted when re-using)
703716
// in the delayed purge, we do no longer decrease the commit if the range is not marked entirely as committed.
704717
_mi_stat_decrease(&_mi_stats_main.committed, committed_size);
705-
}
718+
//}
706719
// note: if not all committed, it may be that the purge will reset/decommit the entire range
707720
// that contains already decommitted parts. Since purge consistently uses reset or decommit that
708721
// works (as we should never reset decommitted parts).

src/bitmap.c

+17-11
Original file line numberDiff line numberDiff line change
@@ -369,67 +369,73 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
369369

370370
// Set `count` bits at `bitmap_idx` to 1 atomically
371371
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
372-
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) {
372+
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set) {
373373
size_t idx = mi_bitmap_index_field(bitmap_idx);
374374
size_t pre_mask;
375375
size_t mid_mask;
376376
size_t post_mask;
377377
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
378378
bool all_zero = true;
379379
bool any_zero = false;
380+
size_t one_count = 0;
380381
_Atomic(size_t)*field = &bitmap[idx];
381382
size_t prev = mi_atomic_or_acq_rel(field++, pre_mask);
382-
if ((prev & pre_mask) != 0) all_zero = false;
383+
if ((prev & pre_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & pre_mask); }
383384
if ((prev & pre_mask) != pre_mask) any_zero = true;
384385
while (mid_count-- > 0) {
385386
prev = mi_atomic_or_acq_rel(field++, mid_mask);
386-
if ((prev & mid_mask) != 0) all_zero = false;
387+
if ((prev & mid_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & mid_mask); }
387388
if ((prev & mid_mask) != mid_mask) any_zero = true;
388389
}
389390
if (post_mask!=0) {
390391
prev = mi_atomic_or_acq_rel(field, post_mask);
391-
if ((prev & post_mask) != 0) all_zero = false;
392+
if ((prev & post_mask) != 0) { all_zero = false; one_count += mi_popcount(prev & post_mask); }
392393
if ((prev & post_mask) != post_mask) any_zero = true;
393394
}
394395
if (pany_zero != NULL) { *pany_zero = any_zero; }
396+
if (already_set != NULL) { *already_set = one_count; };
397+
mi_assert_internal(all_zero ? one_count == 0 : one_count <= count);
395398
return all_zero;
396399
}
397400

398401

399402
// Returns `true` if all `count` bits were 1.
400403
// `any_ones` is `true` if there was at least one bit set to one.
401-
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
404+
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones, size_t* already_set) {
402405
size_t idx = mi_bitmap_index_field(bitmap_idx);
403406
size_t pre_mask;
404407
size_t mid_mask;
405408
size_t post_mask;
406409
size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
407410
bool all_ones = true;
408411
bool any_ones = false;
412+
size_t one_count = 0;
409413
mi_bitmap_field_t* field = &bitmap[idx];
410414
size_t prev = mi_atomic_load_relaxed(field++);
411415
if ((prev & pre_mask) != pre_mask) all_ones = false;
412-
if ((prev & pre_mask) != 0) any_ones = true;
416+
if ((prev & pre_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & pre_mask); }
413417
while (mid_count-- > 0) {
414418
prev = mi_atomic_load_relaxed(field++);
415419
if ((prev & mid_mask) != mid_mask) all_ones = false;
416-
if ((prev & mid_mask) != 0) any_ones = true;
420+
if ((prev & mid_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & mid_mask); }
417421
}
418422
if (post_mask!=0) {
419423
prev = mi_atomic_load_relaxed(field);
420424
if ((prev & post_mask) != post_mask) all_ones = false;
421-
if ((prev & post_mask) != 0) any_ones = true;
425+
if ((prev & post_mask) != 0) { any_ones = true; one_count += mi_popcount(prev & post_mask); }
422426
}
423427
if (pany_ones != NULL) { *pany_ones = any_ones; }
428+
if (already_set != NULL) { *already_set = one_count; }
429+
mi_assert_internal(all_ones ? one_count == count : one_count < count);
424430
return all_ones;
425431
}
426432

427-
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
428-
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL);
433+
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set) {
434+
return mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, NULL, already_set);
429435
}
430436

431437
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) {
432438
bool any_ones;
433-
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones);
439+
mi_bitmap_is_claimedx_across(bitmap, bitmap_fields, count, bitmap_idx, &any_ones, NULL);
434440
return any_ones;
435441
}

src/bitmap.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,9 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
111111

112112
// Set `count` bits at `bitmap_idx` to 1 atomically
113113
// Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit.
114-
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero);
114+
bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero, size_t* already_set);
115115

116-
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
116+
bool _mi_bitmap_is_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, size_t* already_set);
117117
bool _mi_bitmap_is_any_claimed_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
118118

119119
#endif

src/libc.c

+57
Original file line numberDiff line numberDiff line change
@@ -275,3 +275,60 @@ int _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) {
275275
va_end(args);
276276
return written;
277277
}
278+
279+
280+
#if MI_SIZE_SIZE == 4
281+
#define mi_mask_even_bits32 (0x55555555)
282+
#define mi_mask_even_pairs32 (0x33333333)
283+
#define mi_mask_even_nibbles32 (0x0F0F0F0F)
284+
285+
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
286+
static size_t mi_byte_sum32(uint32_t x) {
287+
// perform `x * 0x01010101`: the highest byte contains the sum of all bytes.
288+
x += (x << 8);
289+
x += (x << 16);
290+
return (size_t)(x >> 24);
291+
}
292+
293+
static size_t mi_popcount_generic32(uint32_t x) {
294+
// first count each 2-bit group `a`, where: a==0b00 -> 00, a==0b01 -> 01, a==0b10 -> 01, a==0b11 -> 10
295+
// in other words, `a - (a>>1)`; to do this in parallel, we need to mask to prevent spilling a bit pair
296+
// into the lower bit-pair:
297+
x = x - ((x >> 1) & mi_mask_even_bits32);
298+
// add the 2-bit pair results
299+
x = (x & mi_mask_even_pairs32) + ((x >> 2) & mi_mask_even_pairs32);
300+
// add the 4-bit nibble results
301+
x = (x + (x >> 4)) & mi_mask_even_nibbles32;
302+
// each byte now has a count of its bits, we can sum them now:
303+
return mi_byte_sum32(x);
304+
}
305+
306+
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
307+
return mi_popcount_generic32(x);
308+
}
309+
310+
#else
311+
#define mi_mask_even_bits64 (0x5555555555555555)
312+
#define mi_mask_even_pairs64 (0x3333333333333333)
313+
#define mi_mask_even_nibbles64 (0x0F0F0F0F0F0F0F0F)
314+
315+
// sum of all the bytes in `x` if it is guaranteed that the sum < 256!
316+
static size_t mi_byte_sum64(uint64_t x) {
317+
x += (x << 8);
318+
x += (x << 16);
319+
x += (x << 32);
320+
return (size_t)(x >> 56);
321+
}
322+
323+
static size_t mi_popcount_generic64(uint64_t x) {
324+
x = x - ((x >> 1) & mi_mask_even_bits64);
325+
x = (x & mi_mask_even_pairs64) + ((x >> 2) & mi_mask_even_pairs64);
326+
x = (x + (x >> 4)) & mi_mask_even_nibbles64;
327+
return mi_byte_sum64(x);
328+
}
329+
330+
mi_decl_noinline size_t _mi_popcount_generic(size_t x) {
331+
return mi_popcount_generic64(x);
332+
}
333+
#endif
334+

src/stats.c

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
3030
{
3131
// add atomically (for abandoned pages)
3232
int64_t current = mi_atomic_addi64_relaxed(&stat->current, amount);
33+
// if (stat == &_mi_stats_main.committed) { mi_assert_internal(current + amount >= 0); };
3334
mi_atomic_maxi64_relaxed(&stat->peak, current + amount);
3435
if (amount > 0) {
3536
mi_atomic_addi64_relaxed(&stat->total,amount);

0 commit comments

Comments
 (0)