From 757c2366d11ccc9c0b19c3f501cbbb3f6ff18e13 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 16:07:28 +0000 Subject: [PATCH 01/19] Rename corealloc.h to alloc.h --- src/snmalloc/mem/{corealloc.h => alloc.h} | 0 src/snmalloc/mem/mem.h | 2 +- src/snmalloc/mem/pool.h | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename src/snmalloc/mem/{corealloc.h => alloc.h} (100%) diff --git a/src/snmalloc/mem/corealloc.h b/src/snmalloc/mem/alloc.h similarity index 100% rename from src/snmalloc/mem/corealloc.h rename to src/snmalloc/mem/alloc.h diff --git a/src/snmalloc/mem/mem.h b/src/snmalloc/mem/mem.h index fc5e59965..e9c80765c 100644 --- a/src/snmalloc/mem/mem.h +++ b/src/snmalloc/mem/mem.h @@ -1,7 +1,7 @@ +#include "alloc.h" #include "backend_concept.h" #include "backend_wrappers.h" #include "check_init.h" -#include "corealloc.h" #include "entropy.h" #include "freelist.h" #include "metadata.h" diff --git a/src/snmalloc/mem/pool.h b/src/snmalloc/mem/pool.h index 9b6294d67..1ea7b1369 100644 --- a/src/snmalloc/mem/pool.h +++ b/src/snmalloc/mem/pool.h @@ -81,7 +81,7 @@ namespace snmalloc * The third template argument is a method to retrieve the actual PoolState. * * For the pool of allocators, refer to the AllocPool alias defined in - * corealloc.h. + * alloc.h. * * For a pool of another type, it is recommended to leave the * third template argument with its default value. The SingletonPoolState From 6447c87725a65328dd82e7a0efdeb115c208761a Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:11:58 +0000 Subject: [PATCH 02/19] Some outstanding tests. --- src/test/perf/combininglock/combininglock.cc | 37 ++++++++++++++++ src/test/perf/realloc/realloc.cc | 46 ++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 src/test/perf/combininglock/combininglock.cc create mode 100644 src/test/perf/realloc/realloc.cc diff --git a/src/test/perf/combininglock/combininglock.cc b/src/test/perf/combininglock/combininglock.cc new file mode 100644 index 000000000..6a9437c70 --- /dev/null +++ b/src/test/perf/combininglock/combininglock.cc @@ -0,0 +1,37 @@ +#include +#include +#include + +snmalloc::CombiningLock cl; + +std::atomic run{true}; + +void loop() +{ + size_t j = 0; + size_t i = 0; + while (run) + { + i++; + snmalloc::with(cl, [&]() { j++; }); + if (i != j) + snmalloc::error("i != j"); + } +} + +int main() +{ + std::vector threads; + for (size_t i = 0; i < 8; i++) + { + threads.emplace_back(std::thread(loop)); + } + + std::this_thread::sleep_for(std::chrono::seconds(100)); + run = false; + + for (auto& t : threads) + { + t.join(); + } +} \ No newline at end of file diff --git a/src/test/perf/realloc/realloc.cc b/src/test/perf/realloc/realloc.cc new file mode 100644 index 000000000..5efcfbaeb --- /dev/null +++ b/src/test/perf/realloc/realloc.cc @@ -0,0 +1,46 @@ +#include "test/opt.h" +#include "test/setup.h" +#include "test/usage.h" +#include "test/xoroshiro.h" + +#include +#include +#include +#include +#include + +using namespace snmalloc; + +NOINLINE +void* myrealloc(void* p, size_t size) +{ + return snmalloc::libc::realloc(p, size); +} + +void grow() +{ + void* base = nullptr; + for (size_t i = 1; i < 1000; i++) + { + base = myrealloc(base, i * 8); + } + snmalloc::libc::free(base); +} + +int main() +{ + auto start = Aal::tick(); + + for (size_t i = 0; i < 10000; i++) + { + grow(); + if (i % 10 == 0) + { + std::cout << "." << std::flush; + } + } + + auto end = Aal::tick(); + + std::cout << "Taken: " << end - start << std::endl; +} \ No newline at end of file From d29fffa6b6ae6e427d9d89f1e1c005fd5711e4cc Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:13:53 +0000 Subject: [PATCH 03/19] Add statistic to snmalloc. This adds a collection of per sizeclass statistic for tracking how many allocations have occurred on each thread. These are racily combined to provide basic tracking information. --- src/snmalloc/backend_helpers/statsrange.h | 25 ++---- src/snmalloc/ds_core/ds_core.h | 3 +- src/snmalloc/ds_core/stats.h | 97 +++++++++++++++++++++++ src/snmalloc/global/globalalloc.h | 78 ++++++++++++++++++ src/snmalloc/mem/alloc.h | 51 ++++++++++-- src/snmalloc/mem/allocstats.h | 44 ++++++++++ src/snmalloc/mem/pool.h | 8 ++ src/snmalloc/mem/remotecache.h | 31 ++++---- src/test/func/alloc_churn/alloc_churn.cc | 32 ++++++++ src/test/func/cleanup/cleanup.cc | 61 ++++++++++++++ src/test/func/statistics/stats.cc | 17 +++- src/test/perf/batchblitz/batchblitz.cc | 94 ++++++++++++++++++++++ src/test/perf/churn/churn.cc | 94 ++++++++++++++++++++++ 13 files changed, 594 insertions(+), 41 deletions(-) create mode 100644 src/snmalloc/ds_core/stats.h create mode 100644 src/snmalloc/mem/allocstats.h create mode 100644 src/test/func/alloc_churn/alloc_churn.cc create mode 100644 src/test/func/cleanup/cleanup.cc create mode 100644 src/test/perf/batchblitz/batchblitz.cc create mode 100644 src/test/perf/churn/churn.cc diff --git a/src/snmalloc/backend_helpers/statsrange.h b/src/snmalloc/backend_helpers/statsrange.h index d1e213777..8fe676fb6 100644 --- a/src/snmalloc/backend_helpers/statsrange.h +++ b/src/snmalloc/backend_helpers/statsrange.h @@ -16,8 +16,7 @@ namespace snmalloc { using ContainsParent::parent; - static inline stl::Atomic current_usage{}; - static inline stl::Atomic peak_usage{}; + static inline Stat usage{}; public: static constexpr bool Aligned = ParentRange::Aligned; @@ -30,34 +29,26 @@ namespace snmalloc CapPtr alloc_range(size_t size) { - auto result = parent.alloc_range(size); - if (result != nullptr) - { - auto prev = current_usage.fetch_add(size); - auto curr = peak_usage.load(); - while (curr < prev + size) - { - if (peak_usage.compare_exchange_weak(curr, prev + size)) - break; - } - } - return result; + auto r = parent.alloc_range(size); + if (r != nullptr) + usage += size; + return r; } void dealloc_range(CapPtr base, size_t size) { - current_usage -= size; + usage -= size; parent.dealloc_range(base, size); } size_t get_current_usage() { - return current_usage.load(); + return usage.get_curr(); } size_t get_peak_usage() { - return peak_usage.load(); + return usage.get_peak(); } }; }; diff --git a/src/snmalloc/ds_core/ds_core.h b/src/snmalloc/ds_core/ds_core.h index 38e99dce2..ebc55e6cc 100644 --- a/src/snmalloc/ds_core/ds_core.h +++ b/src/snmalloc/ds_core/ds_core.h @@ -16,4 +16,5 @@ #include "ptrwrap.h" #include "redblacktree.h" #include "seqset.h" -#include "tid.h" \ No newline at end of file +#include "stats.h" +#include "tid.h" diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h new file mode 100644 index 000000000..55bdd2dd6 --- /dev/null +++ b/src/snmalloc/ds_core/stats.h @@ -0,0 +1,97 @@ +#include "defines.h" + +#include +#include + +namespace snmalloc +{ + /** + * Very basic statistic that tracks current and peak values. + */ + class Stat + { + private: + stl::Atomic curr{0}; + stl::Atomic peak{0}; + + public: + void increase(size_t amount) + { + size_t c = (curr += amount); + size_t p = peak.load(std::memory_order_relaxed); + while (c > p) + { + if (peak.compare_exchange_strong(p, c)) + break; + } + } + + void decrease(size_t amount) + { + size_t prev = curr.fetch_sub(amount); + SNMALLOC_ASSERT_MSG( + prev >= amount, "prev = {}, amount = {}", prev, amount); + UNUSED(prev); + } + + size_t get_curr() + { + return curr.load(std::memory_order_relaxed); + } + + size_t get_peak() + { + return peak.load(std::memory_order_relaxed); + } + + void operator+=(size_t amount) + { + increase(amount); + } + + void operator-=(size_t amount) + { + decrease(amount); + } + + void operator++() + { + increase(1); + } + + void operator--() + { + decrease(1); + } + }; + + /** + * Very basic statistic that can only grow. Not thread-safe. + */ + class MonotoneLocalStat + { + std::atomic value{0}; + + public: + void operator++(int) + { + value.fetch_add(1, std::memory_order_relaxed); + } + + void operator+=(const MonotoneLocalStat& other) + { + auto v = other.value.load(std::memory_order_relaxed); + value.fetch_add(v, std::memory_order_relaxed); + } + + void operator+=(size_t v) + { + value.fetch_add(v, std::memory_order_relaxed); + } + + size_t operator*() + { + return value.load(std::memory_order_relaxed); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h index fa769e3e4..e0209d447 100644 --- a/src/snmalloc/global/globalalloc.h +++ b/src/snmalloc/global/globalalloc.h @@ -84,6 +84,9 @@ namespace snmalloc } } + if (result == nullptr) + SNMALLOC_CHECK(RemoteDeallocCache::remote_inflight.get_curr() == 0); + if (result != nullptr) { *result = okay; @@ -128,6 +131,81 @@ namespace snmalloc } } + template + inline static void get_stats(AllocStats& stats) + { + auto alloc = AllocPool::iterate(); + while (alloc != nullptr) + { + stats += alloc->get_stats(); + alloc = AllocPool::iterate(alloc); + } + } + + template + inline static void print_alloc_stats() + { + static std::atomic dump{0}; + + auto l_dump = dump++; + if (l_dump == 0) + { + message<1024>( + "snmalloc_allocs,dumpid,sizeclass,size,allocated,deallocated,in_use," + "bytes,slabs allocated,slabs deallocated,slabs in_use,slabs bytes"); + message<1024>( + "snmalloc_totals,dumpid,backend bytes,peak backend " + "bytes,requested,slabs requested bytes,remote inflight bytes,allocator " + "count"); + } + + AllocStats stats; + snmalloc::get_stats(stats); + size_t total_live{0}; + size_t total_live_slabs{0}; + for (size_t i = 0; i < snmalloc::SIZECLASS_REP_SIZE; i++) + { + auto sc = snmalloc::sizeclass_t::from_raw(i); + auto allocated = *stats[sc].objects_allocated; + auto deallocated = *stats[sc].objects_deallocated; + auto slabs_allocated = *stats[sc].slabs_allocated; + auto slabs_deallocated = *stats[sc].slabs_deallocated; + if (allocated == 0 && deallocated == 0) + continue; + auto size = snmalloc::sizeclass_full_to_size(sc); + auto slab_size = snmalloc::sizeclass_full_to_slab_size(sc); + auto in_use = allocated - deallocated; + auto amount = in_use * size; + total_live += amount; + auto in_use_slabs = slabs_allocated - slabs_deallocated; + auto amount_slabs = in_use_slabs * slab_size; + total_live_slabs += amount_slabs; + + snmalloc::message<1024>( + "snmalloc_allocs,{},{},{},{},{},{},{},{},{},{},{}", + l_dump, + i, + size, + allocated, + deallocated, + in_use, + amount, + slabs_allocated, + slabs_deallocated, + in_use_slabs, + amount_slabs); + } + snmalloc::message<1024>( + "snmalloc_totals,{},{},{},{},{},{},{}", + l_dump, + Config::Backend::get_current_usage(), + Config::Backend::get_peak_usage(), + total_live, + total_live_slabs, + RemoteDeallocCache::remote_inflight.get_curr(), + Config::pool().get_count()); + } + /** * Returns the number of remaining bytes in an object. * diff --git a/src/snmalloc/mem/alloc.h b/src/snmalloc/mem/alloc.h index 1b7f7f5b5..e9c25028a 100644 --- a/src/snmalloc/mem/alloc.h +++ b/src/snmalloc/mem/alloc.h @@ -1,6 +1,7 @@ #pragma once #include "../ds/ds.h" +#include "allocstats.h" #include "check_init.h" #include "freelist.h" #include "metadata.h" @@ -156,6 +157,11 @@ namespace snmalloc */ Ticker ticker; + /** + * Tracks this allocators memory usage + */ + AllocStats stats; + /** * The message queue needs to be accessible from other threads * @@ -437,6 +443,9 @@ namespace snmalloc post(); } + // Push size to global statistics + RemoteDeallocCache::remote_inflight -= bytes_freed; + return action(args...); } @@ -488,16 +497,15 @@ namespace snmalloc freelist::Object::key_root, entry.get_slab_metadata()->as_key_tweak(), domesticate); - if (!need_post && !remote_dealloc_cache.reserve_space(entry, nelem)) - { - need_post = true; - } + + need_post |= remote_dealloc_cache.reserve_space(entry, nelem); + remote_dealloc_cache.template forward( entry.get_remote()->trunc_id(), msg); } template - SNMALLOC_FAST_PATH static auto dealloc_local_objects_fast( + SNMALLOC_FAST_PATH auto dealloc_local_objects_fast( capptr::Alloc msg, const PagemapEntry& entry, BackendSlabMetadata* meta, @@ -523,6 +531,8 @@ namespace snmalloc bytes_freed += objsize * length; + stats[entry.get_sizeclass()].objects_deallocated += static_cast(length); + // Update the head and the next pointer in the free list. meta->free_queue.append_segment( curr, @@ -606,6 +616,7 @@ namespace snmalloc if (SNMALLOC_LIKELY(!fl->empty())) { auto p = fl->take(key, domesticate); + stats[sizeclass].objects_allocated++; return finish_alloc(p, sizeclass); } @@ -694,6 +705,13 @@ namespace snmalloc chunk.unsafe_ptr(), bits::next_pow2(size)); } + if (chunk.unsafe_ptr() != nullptr) + { + auto sc = size_to_sizeclass_full(size); + self->stats[sc].objects_allocated++; + self->stats[sc].slabs_allocated++; + } + return capptr_chunk_is_alloc( capptr_to_user_address_control(chunk)); }, @@ -773,6 +791,7 @@ namespace snmalloc laden.insert(meta); } + stats[sizeclass].objects_allocated++; auto r = finish_alloc(p, sizeclass); return ticker.check_tick(r); } @@ -831,6 +850,9 @@ namespace snmalloc laden.insert(meta); } + stats[sizeclass].slabs_allocated++; + stats[sizeclass].objects_allocated++; + auto r = finish_alloc(p, sizeclass); return ticker.check_tick(r); }, @@ -1006,6 +1028,7 @@ namespace snmalloc */ if (SNMALLOC_LIKELY(public_state() == entry.get_remote())) { + stats[entry.get_sizeclass()].objects_deallocated++; dealloc_cheri_checks(p_tame.unsafe_ptr()); dealloc_local_object(p_tame, entry); return; @@ -1074,6 +1097,8 @@ namespace snmalloc // Remove from set of fully used slabs. meta->node.remove(); + stats[entry.get_sizeclass()].slabs_deallocated++; + Config::Backend::dealloc_chunk( get_backend_local_state(), *meta, p, size, entry.get_sizeclass()); @@ -1170,6 +1195,8 @@ namespace snmalloc // don't touch the cache lines at this point in snmalloc_check_client. auto start = clear_slab(meta, sizeclass); + stats[sizeclass].slabs_deallocated++; + Config::Backend::dealloc_chunk( get_backend_local_state(), *meta, @@ -1336,7 +1363,7 @@ namespace snmalloc return capptr_domesticate(local_state, p); }; - size_t bytes_flushed = 0; // Not currently used. + size_t bytes_flushed = 0; if (destroy_queue) { @@ -1349,6 +1376,8 @@ namespace snmalloc entry, m, need_post, domesticate, bytes_flushed); }; + RemoteDeallocCache::remote_inflight -= bytes_flushed; + message_queue().destroy_and_iterate(domesticate, cb); } else @@ -1397,8 +1426,9 @@ namespace snmalloc } }); - // Set the remote_dealloc_cache to immediately slow path. - remote_dealloc_cache.capacity = 0; + // TODO: I don't think this is needed. + // // Set the remote_dealloc_cache to immediately slow path. + // remote_dealloc_cache.cache_bytes = REMOTE_CACHE; return posted; } @@ -1467,6 +1497,11 @@ namespace snmalloc #endif return sent_something; } + + const AllocStats& get_stats() + { + return stats; + } }; template diff --git a/src/snmalloc/mem/allocstats.h b/src/snmalloc/mem/allocstats.h new file mode 100644 index 000000000..bfa789c36 --- /dev/null +++ b/src/snmalloc/mem/allocstats.h @@ -0,0 +1,44 @@ +#include "../ds_core/ds_core.h" +#include "sizeclasstable.h" + +#include + +namespace snmalloc +{ + struct AllocStat + { + MonotoneLocalStat objects_allocated{}; + MonotoneLocalStat objects_deallocated{}; + MonotoneLocalStat slabs_allocated{}; + MonotoneLocalStat slabs_deallocated{}; + }; + + class AllocStats + { + std::array sizeclass{}; + + public: + AllocStat& operator[](sizeclass_t index) + { + auto i = index.raw(); + return sizeclass[i]; + } + + AllocStat& operator[](smallsizeclass_t index) + { + return sizeclass[sizeclass_t::from_small_class(index).raw()]; + } + + void operator+=(const AllocStats& other) + { + for (size_t i = 0; i < SIZECLASS_REP_SIZE; i++) + { + sizeclass[i].objects_allocated += other.sizeclass[i].objects_allocated; + sizeclass[i].objects_deallocated += + other.sizeclass[i].objects_deallocated; + sizeclass[i].slabs_allocated += other.sizeclass[i].slabs_allocated; + sizeclass[i].slabs_deallocated += other.sizeclass[i].slabs_deallocated; + } + } + }; +} // namespace snmalloc \ No newline at end of file diff --git a/src/snmalloc/mem/pool.h b/src/snmalloc/mem/pool.h index 1ea7b1369..9a7898ff7 100644 --- a/src/snmalloc/mem/pool.h +++ b/src/snmalloc/mem/pool.h @@ -32,9 +32,15 @@ namespace snmalloc FlagWord lock{}; capptr::Alloc list{nullptr}; + std::atomic count{0}; public: constexpr PoolState() = default; + + size_t get_count() + { + return count.load(std::memory_order_relaxed); + } }; /** @@ -124,6 +130,8 @@ namespace snmalloc p->list_next = pool.list; pool.list = p; + pool.count++; + p->set_in_use(); }); return p.unsafe_ptr(); diff --git a/src/snmalloc/mem/remotecache.h b/src/snmalloc/mem/remotecache.h index 3d5ed70b8..1c0269cb1 100644 --- a/src/snmalloc/mem/remotecache.h +++ b/src/snmalloc/mem/remotecache.h @@ -194,14 +194,17 @@ namespace snmalloc RemoteDeallocCacheBatchingImpl batching; + static inline Stat remote_inflight; + /** - * The total amount of memory we are waiting for before we will dispatch - * to other allocators. Zero can mean we have not initialised the allocator - * yet. This is initialised to the 0 so that we always hit a slow path to - * start with, when we hit the slow path and need to dispatch everything, we - * can check if we are a real allocator and lazily provide a real allocator. + * The total amount of bytes of memory in the cache. + * + * REMOTE_CACHE is used as the initial value, so that we always hit a slow + * path to start with, when we hit the slow path and need to dispatch + * everything, we can check if we are a real allocator and lazily provide a + * real allocator. */ - int64_t capacity{0}; + size_t cache_bytes{REMOTE_CACHE}; #ifndef NDEBUG bool initialised = false; @@ -236,10 +239,8 @@ namespace snmalloc auto size = n * static_cast(sizeclass_full_to_size(entry.get_sizeclass())); - bool result = capacity > size; - if (result) - capacity -= size; - return result; + cache_bytes += size; + return cache_bytes < REMOTE_CACHE; } template @@ -288,6 +289,9 @@ namespace snmalloc return capptr_domesticate(local_state, p); }; + // We are about to post cache_bytes bytes to other allocators. + remote_inflight += cache_bytes; + batching.close_all([this]( RemoteAllocator::alloc_id_t target_id, capptr::Alloc msg) { @@ -356,8 +360,8 @@ namespace snmalloc } } - // Reset capacity as we have emptied everything - capacity = REMOTE_CACHE; + // Reset capacity as we have empty everything + cache_bytes = 0; return sent_something; } @@ -382,9 +386,8 @@ namespace snmalloc // a null address. l.init(0, RemoteAllocator::key_global, NO_KEY_TWEAK); } - capacity = REMOTE_CACHE; - batching.init(); + cache_bytes = 0; } }; } // namespace snmalloc diff --git a/src/test/func/alloc_churn/alloc_churn.cc b/src/test/func/alloc_churn/alloc_churn.cc new file mode 100644 index 000000000..a2d6dd436 --- /dev/null +++ b/src/test/func/alloc_churn/alloc_churn.cc @@ -0,0 +1,32 @@ +#include "snmalloc/snmalloc.h" + +#include + +void test_step() +{ + auto b = snmalloc::get_scoped_allocator(); + auto a = snmalloc::get_scoped_allocator(); + + for (size_t j = 0; j < 32; j++) + for (size_t i = 0; i < 20; i++) + { + auto p = a->alloc(snmalloc::bits::one_at_bit(i)); + if (p != nullptr) + b->dealloc(p); + p = b->alloc(snmalloc::bits::one_at_bit(i)); + if (p != nullptr) + a->dealloc(p); + } +} + +int main() +{ + for (size_t i = 0; i < 10000; i++) + { + if (i % 1000 == 0) { + std::cout << "Step " << i << std::endl; + snmalloc::print_alloc_stats(); + } + test_step(); + } +} \ No newline at end of file diff --git a/src/test/func/cleanup/cleanup.cc b/src/test/func/cleanup/cleanup.cc new file mode 100644 index 000000000..5e3666dc8 --- /dev/null +++ b/src/test/func/cleanup/cleanup.cc @@ -0,0 +1,61 @@ +#include +#include +#include +#include + +void ecall() +{ + auto a = snmalloc::get_scoped_allocator(); + std::vector allocs; + for (size_t j = 0; j < 1000; j++) + { + allocs.push_back(a->alloc(j % 1024)); + } + auto p = a->alloc(1 * 1024 * 1024); + memset(p, 0, 1 * 1024 * 1024); + + for (size_t j = 0; j < allocs.size(); j++) + a->dealloc(allocs[j]); + + a->dealloc(p); +} + +void thread_body() +{ + for (int i = 0; i < 1000; i++) + { + ecall(); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } +} + +void monitor_body() +{ + for (int i = 0; i < 60; i++) + { + std::cout << "Current: " + << snmalloc::Alloc::Config::Backend::get_current_usage() + << std::endl; + std::cout << "Peak : " + << snmalloc::Alloc::Config::Backend::get_peak_usage() + << std::endl; + std::cout << "Allocs : " << snmalloc::Alloc::Config::pool().get_count() + << std::endl; + std::cout << "--------------------------------------------" << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(1)); + } +} + +int main() +{ + std::vector threads; + for (int i = 0; i < 8; i++) + { + threads.push_back(std::thread(thread_body)); + } + threads.push_back(std::thread(monitor_body)); + + for (auto& t : threads) + t.join(); + return 0; +} \ No newline at end of file diff --git a/src/test/func/statistics/stats.cc b/src/test/func/statistics/stats.cc index d66f060a1..032884a0d 100644 --- a/src/test/func/statistics/stats.cc +++ b/src/test/func/statistics/stats.cc @@ -11,6 +11,7 @@ void debug_check_empty_1() auto r = snmalloc::alloc(size); snmalloc::debug_check_empty(&result); + snmalloc::print_alloc_stats(); if (result != false) { std::cout << "debug_check_empty failed to detect leaked memory:" << size @@ -18,7 +19,11 @@ void debug_check_empty_1() abort(); } - snmalloc::dealloc(r); +snmalloc::print_alloc_stats(); + +snmalloc::dealloc(r); + + snmalloc::print_alloc_stats(); snmalloc::debug_check_empty(&result); if (result != true) @@ -27,8 +32,12 @@ void debug_check_empty_1() abort(); } + snmalloc::print_alloc_stats(); + r = snmalloc::alloc(size); + snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(&result); if (result != false) { @@ -37,14 +46,20 @@ void debug_check_empty_1() abort(); } + snmalloc::print_alloc_stats(); + snmalloc::dealloc(r); + snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(&result); if (result != true) { std::cout << "debug_check_empty failed to say empty:" << size << std::endl; abort(); } + + snmalloc::print_alloc_stats(); } template diff --git a/src/test/perf/batchblitz/batchblitz.cc b/src/test/perf/batchblitz/batchblitz.cc new file mode 100644 index 000000000..496bc5a5d --- /dev/null +++ b/src/test/perf/batchblitz/batchblitz.cc @@ -0,0 +1,94 @@ +#include +#include +#include +#include + +size_t threads{0}; +size_t memory{0}; +size_t iterations{0}; + +// Global barrier for synchronising threads. +std::atomic barrier{0}; +std::atomic incarnation{0}; + +std::atomic stop{false}; + +std::vector> allocations; + +NOINLINE bool wait() +{ + auto old_incarnation = incarnation.load(); + // Register we have arrived at the barrier. + if (--barrier == 0) + { + printf("."); + fflush(stdout); + barrier = threads; + incarnation++; + return stop; + } + + while (incarnation.load() == old_incarnation) + { + if (stop) + return true; + snmalloc::Aal::pause(); + } + + return stop; +} + +void thread_func(size_t tid) +{ + size_t size = 4097; + size_t mem = memory / size; + for (size_t j = 0; j < iterations; j++) + { + if (wait()) + return; + std::vector& allocs = allocations[tid]; + for (size_t i = 0; i < mem; i++) + { + allocs.push_back(snmalloc::alloc(4097)); + } + if (wait()) + return; + std::vector& deallocs = allocations[(tid + 1) % threads]; + for (auto p : deallocs) + { + snmalloc::dealloc(p); + } + deallocs.clear(); + } +} + +int main() +{ + threads = std::thread::hardware_concurrency(); + barrier = threads; + + if (snmalloc::DefaultPal::address_bits == 32) + memory = snmalloc::bits::one_at_bit(30) / threads; + else + memory = snmalloc::bits::one_at_bit(32) / threads; + iterations = 1000; + + for (size_t i = 0; i < threads; i++) + allocations.emplace_back(); + + std::vector thread_pool; + for (size_t i = 0; i < threads; i++) + thread_pool.emplace_back(thread_func, i); + + for (size_t i = 0; i < 30; i++) + { + std::this_thread::sleep_for(std::chrono::seconds(1)); + snmalloc::print_alloc_stats(); + } + stop = true; + + + + for (auto& t : thread_pool) + t.join(); +} diff --git a/src/test/perf/churn/churn.cc b/src/test/perf/churn/churn.cc new file mode 100644 index 000000000..1a540c0d9 --- /dev/null +++ b/src/test/perf/churn/churn.cc @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include + +int main() +{ + std::vector threads; + std::atomic running; + snmalloc::Stat requests; + std::atomic done{false}; + + for (size_t i = 0; i < 16; i++) + { + threads.push_back(std::thread([&running, &requests, &done]() { + std::queue q; + while (!done) + { + snmalloc::ScopedAllocator alloc; + running++; + + if (rand() % 1000 == 0) + { + // Deallocate everything in the queue + while (q.size() > 0) + { + auto p = q.front(); + requests -= *p; + alloc->dealloc(p); + q.pop(); + } + } + + for (size_t j = 0; j < 1000; j++) + { + if (q.size() >= 20000 || (q.size() > 0 && (rand() % 10 == 0))) + { + auto p = q.front(); + requests -= *p; + alloc->dealloc(p); + q.pop(); + } + else + { + size_t size = + (rand() % 1024 == 0) ? 16 * 1024 * (1 << (rand() % 3)) : 48; + requests += size; + auto p = (size_t*)alloc->alloc(size); + *p = size; + q.push(p); + } + } + + running--; + std::this_thread::sleep_for(std::chrono::microseconds(rand() % 2000)); + } + })); + } + + std::thread([&requests]() { + size_t count = 0; + while (count < 60) + { + count++; + std::this_thread::sleep_for(std::chrono::seconds(1)); + // std::cout << "Inflight: " << + // snmalloc::RemoteDeallocCache::remote_inflight << std::endl; std::cout + // << "Current reservation: " << snmalloc::Globals::get_current_usage() << + // std::endl; std::cout << "Peak reservation: " << + // snmalloc::Globals::get_peak_usage() << std::endl; std::cout << + // "Allocator count: " << snmalloc::Globals::pool().get_count() << + // std::endl; std::cout << "Running threads: " << running << + // std::endl; std::cout << "Index: " << count << std::endl; + // std::cout << "------------------------------------------" << std::endl; + std::cout << count << "," + << snmalloc::Alloc::Config::Backend::get_peak_usage() << "," + << snmalloc::Alloc::Config::Backend::get_current_usage() << "," + << requests.get_curr() << "," << requests.get_peak() << "," + << snmalloc::RemoteDeallocCache::remote_inflight.get_peak() + << "," + << snmalloc::RemoteDeallocCache::remote_inflight.get_curr() + << std::endl; + snmalloc::print_alloc_stats(); + } + }).join(); + + done = true; + + for (auto& t : threads) + t.join(); + + return 0; +} \ No newline at end of file From 8eda79dcda3172ff1319e40f6f13ae1463ae91dc Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:26:26 +0000 Subject: [PATCH 04/19] conversion fix. --- src/snmalloc/mem/remotecache.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/snmalloc/mem/remotecache.h b/src/snmalloc/mem/remotecache.h index 1c0269cb1..c0aef4ac7 100644 --- a/src/snmalloc/mem/remotecache.h +++ b/src/snmalloc/mem/remotecache.h @@ -236,8 +236,7 @@ namespace snmalloc { static_assert(sizeof(n) * 8 > MAX_CAPACITY_BITS); - auto size = - n * static_cast(sizeclass_full_to_size(entry.get_sizeclass())); + size_t size = n * sizeclass_full_to_size(entry.get_sizeclass()); cache_bytes += size; return cache_bytes < REMOTE_CACHE; From 462402d0db074c95d7e3c128383b937bc2142767 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:32:19 +0000 Subject: [PATCH 05/19] Fix header --- src/snmalloc/ds_core/stats.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h index 55bdd2dd6..57ffcc9a4 100644 --- a/src/snmalloc/ds_core/stats.h +++ b/src/snmalloc/ds_core/stats.h @@ -1,7 +1,8 @@ -#include "defines.h" +#pragma once -#include -#include +#include "defines.h" +#include "snmalloc/stl/atomic.h" +#include "stddef.h" namespace snmalloc { From 99879876792e560a06a862a32b337c17c83be7d4 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:32:39 +0000 Subject: [PATCH 06/19] Clangformat --- src/snmalloc/global/globalalloc.h | 3 ++- src/snmalloc/mem/alloc.h | 3 ++- src/test/func/alloc_churn/alloc_churn.cc | 3 ++- src/test/func/statistics/stats.cc | 4 ++-- src/test/perf/batchblitz/batchblitz.cc | 2 -- src/test/perf/churn/churn.cc | 21 ++++++++++++--------- 6 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h index e0209d447..2ce108642 100644 --- a/src/snmalloc/global/globalalloc.h +++ b/src/snmalloc/global/globalalloc.h @@ -85,7 +85,8 @@ namespace snmalloc } if (result == nullptr) - SNMALLOC_CHECK(RemoteDeallocCache::remote_inflight.get_curr() == 0); + SNMALLOC_CHECK( + RemoteDeallocCache::remote_inflight.get_curr() == 0); if (result != nullptr) { diff --git a/src/snmalloc/mem/alloc.h b/src/snmalloc/mem/alloc.h index e9c25028a..1272fc0ff 100644 --- a/src/snmalloc/mem/alloc.h +++ b/src/snmalloc/mem/alloc.h @@ -531,7 +531,8 @@ namespace snmalloc bytes_freed += objsize * length; - stats[entry.get_sizeclass()].objects_deallocated += static_cast(length); + stats[entry.get_sizeclass()].objects_deallocated += + static_cast(length); // Update the head and the next pointer in the free list. meta->free_queue.append_segment( diff --git a/src/test/func/alloc_churn/alloc_churn.cc b/src/test/func/alloc_churn/alloc_churn.cc index a2d6dd436..23d3c3015 100644 --- a/src/test/func/alloc_churn/alloc_churn.cc +++ b/src/test/func/alloc_churn/alloc_churn.cc @@ -23,7 +23,8 @@ int main() { for (size_t i = 0; i < 10000; i++) { - if (i % 1000 == 0) { + if (i % 1000 == 0) + { std::cout << "Step " << i << std::endl; snmalloc::print_alloc_stats(); } diff --git a/src/test/func/statistics/stats.cc b/src/test/func/statistics/stats.cc index 032884a0d..3bedcd55b 100644 --- a/src/test/func/statistics/stats.cc +++ b/src/test/func/statistics/stats.cc @@ -19,9 +19,9 @@ void debug_check_empty_1() abort(); } -snmalloc::print_alloc_stats(); + snmalloc::print_alloc_stats(); -snmalloc::dealloc(r); + snmalloc::dealloc(r); snmalloc::print_alloc_stats(); diff --git a/src/test/perf/batchblitz/batchblitz.cc b/src/test/perf/batchblitz/batchblitz.cc index 496bc5a5d..3dce75353 100644 --- a/src/test/perf/batchblitz/batchblitz.cc +++ b/src/test/perf/batchblitz/batchblitz.cc @@ -87,8 +87,6 @@ int main() } stop = true; - - for (auto& t : thread_pool) t.join(); } diff --git a/src/test/perf/churn/churn.cc b/src/test/perf/churn/churn.cc index 1a540c0d9..910204a1d 100644 --- a/src/test/perf/churn/churn.cc +++ b/src/test/perf/churn/churn.cc @@ -65,7 +65,8 @@ int main() count++; std::this_thread::sleep_for(std::chrono::seconds(1)); // std::cout << "Inflight: " << - // snmalloc::RemoteDeallocCache::remote_inflight << std::endl; std::cout + // snmalloc::RemoteDeallocCache::remote_inflight << + // std::endl; std::cout // << "Current reservation: " << snmalloc::Globals::get_current_usage() << // std::endl; std::cout << "Peak reservation: " << // snmalloc::Globals::get_peak_usage() << std::endl; std::cout << @@ -73,14 +74,16 @@ int main() // std::endl; std::cout << "Running threads: " << running << // std::endl; std::cout << "Index: " << count << std::endl; // std::cout << "------------------------------------------" << std::endl; - std::cout << count << "," - << snmalloc::Alloc::Config::Backend::get_peak_usage() << "," - << snmalloc::Alloc::Config::Backend::get_current_usage() << "," - << requests.get_curr() << "," << requests.get_peak() << "," - << snmalloc::RemoteDeallocCache::remote_inflight.get_peak() - << "," - << snmalloc::RemoteDeallocCache::remote_inflight.get_curr() - << std::endl; + std::cout + << count << "," << snmalloc::Alloc::Config::Backend::get_peak_usage() + << "," << snmalloc::Alloc::Config::Backend::get_current_usage() << "," + << requests.get_curr() << "," << requests.get_peak() << "," + << snmalloc::RemoteDeallocCache::remote_inflight + .get_peak() + << "," + << snmalloc::RemoteDeallocCache::remote_inflight + .get_curr() + << std::endl; snmalloc::print_alloc_stats(); } }).join(); From 0f4477c85ab628d89d30bb25e890ee16c57faf0c Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Fri, 21 Mar 2025 17:58:26 +0000 Subject: [PATCH 07/19] Do not write to the default allocators state. --- src/snmalloc/mem/remotecache.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/snmalloc/mem/remotecache.h b/src/snmalloc/mem/remotecache.h index c0aef4ac7..ec60839f4 100644 --- a/src/snmalloc/mem/remotecache.h +++ b/src/snmalloc/mem/remotecache.h @@ -206,9 +206,7 @@ namespace snmalloc */ size_t cache_bytes{REMOTE_CACHE}; -#ifndef NDEBUG bool initialised = false; -#endif /// Used to find the index into the array of queues for remote /// deallocation @@ -238,8 +236,21 @@ namespace snmalloc size_t size = n * sizeclass_full_to_size(entry.get_sizeclass()); - cache_bytes += size; - return cache_bytes < REMOTE_CACHE; + size_t new_cache_bytes = cache_bytes + size; + if (SNMALLOC_UNLIKELY(new_cache_bytes > REMOTE_CACHE)) + { + // Check if this is the default allocator, and if not, we + // can update the state. + if (initialised) + { + cache_bytes = new_cache_bytes; + } + + return false; + } + + cache_bytes = new_cache_bytes; + return true; } template @@ -376,9 +387,8 @@ namespace snmalloc */ void init() { -#ifndef NDEBUG initialised = true; -#endif + for (auto& l : list) { // We do not need to initialise with a particular slab, so pass From 19dc26a2d5203c9290356d062a6f27ba77cf73c4 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sat, 22 Mar 2025 21:13:12 +0000 Subject: [PATCH 08/19] temporarily disable test to get a cleaner CI run --- src/snmalloc/global/globalalloc.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h index 2ce108642..3a75079c0 100644 --- a/src/snmalloc/global/globalalloc.h +++ b/src/snmalloc/global/globalalloc.h @@ -84,9 +84,10 @@ namespace snmalloc } } - if (result == nullptr) - SNMALLOC_CHECK( - RemoteDeallocCache::remote_inflight.get_curr() == 0); + // Check why this doesn't hold. + // if (result == nullptr) + // SNMALLOC_CHECK( + // RemoteDeallocCache::remote_inflight.get_curr() == 0); if (result != nullptr) { From 77e14ebb7252b79ddc5643729f2ee1fcfaf414d9 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sat, 22 Mar 2025 21:43:00 +0000 Subject: [PATCH 09/19] Change headers slightly. --- src/snmalloc/ds_core/seqset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snmalloc/ds_core/seqset.h b/src/snmalloc/ds_core/seqset.h index 6046bca70..d59c53530 100644 --- a/src/snmalloc/ds_core/seqset.h +++ b/src/snmalloc/ds_core/seqset.h @@ -1,7 +1,7 @@ #pragma once #include "../aal/aal.h" -#include "../ds_core/ds_core.h" +#include "defines.h" #include "snmalloc/stl/type_traits.h" #include "snmalloc/stl/utility.h" From 098b1c49acc47c21b4520ef2e5aa5d0afe3d17ad Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sat, 22 Mar 2025 21:54:25 +0000 Subject: [PATCH 10/19] Move seqset as it uses pointeroffset and that is aal. --- src/snmalloc/ds_aal/ds_aal.h | 1 + src/snmalloc/{ds_core => ds_aal}/seqset.h | 3 +-- src/snmalloc/ds_core/ds_core.h | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) rename src/snmalloc/{ds_core => ds_aal}/seqset.h (98%) diff --git a/src/snmalloc/ds_aal/ds_aal.h b/src/snmalloc/ds_aal/ds_aal.h index 21eeb8dd6..e0b4ac202 100644 --- a/src/snmalloc/ds_aal/ds_aal.h +++ b/src/snmalloc/ds_aal/ds_aal.h @@ -7,4 +7,5 @@ #include "../aal/aal.h" #include "flaglock.h" #include "prevent_fork.h" +#include "seqset.h" #include "singleton.h" \ No newline at end of file diff --git a/src/snmalloc/ds_core/seqset.h b/src/snmalloc/ds_aal/seqset.h similarity index 98% rename from src/snmalloc/ds_core/seqset.h rename to src/snmalloc/ds_aal/seqset.h index d59c53530..0ad18fb4d 100644 --- a/src/snmalloc/ds_core/seqset.h +++ b/src/snmalloc/ds_aal/seqset.h @@ -1,7 +1,6 @@ #pragma once -#include "../aal/aal.h" -#include "defines.h" +#include "../ds_core/ds_core.h" #include "snmalloc/stl/type_traits.h" #include "snmalloc/stl/utility.h" diff --git a/src/snmalloc/ds_core/ds_core.h b/src/snmalloc/ds_core/ds_core.h index ebc55e6cc..2292b8118 100644 --- a/src/snmalloc/ds_core/ds_core.h +++ b/src/snmalloc/ds_core/ds_core.h @@ -15,6 +15,5 @@ #include "mitigations.h" #include "ptrwrap.h" #include "redblacktree.h" -#include "seqset.h" #include "stats.h" #include "tid.h" From 5bc8fd84c71ab7d3f2cd6d4a4bd258267385bcf0 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sat, 22 Mar 2025 22:14:30 +0000 Subject: [PATCH 11/19] Fix stl --- src/snmalloc/ds_core/stats.h | 21 +++++++++++---------- src/snmalloc/global/globalalloc.h | 2 +- src/snmalloc/mem/pool.h | 4 ++-- src/snmalloc/stl/gnu/atomic.h | 2 +- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h index 57ffcc9a4..f949dda5d 100644 --- a/src/snmalloc/ds_core/stats.h +++ b/src/snmalloc/ds_core/stats.h @@ -18,8 +18,9 @@ namespace snmalloc public: void increase(size_t amount) { - size_t c = (curr += amount); - size_t p = peak.load(std::memory_order_relaxed); + size_t old = curr.fetch_add(amount); + size_t c = old + amount; + size_t p = peak.load(stl::memory_order_relaxed); while (c > p) { if (peak.compare_exchange_strong(p, c)) @@ -37,12 +38,12 @@ namespace snmalloc size_t get_curr() { - return curr.load(std::memory_order_relaxed); + return curr.load(stl::memory_order_relaxed); } size_t get_peak() { - return peak.load(std::memory_order_relaxed); + return peak.load(stl::memory_order_relaxed); } void operator+=(size_t amount) @@ -71,28 +72,28 @@ namespace snmalloc */ class MonotoneLocalStat { - std::atomic value{0}; + stl::Atomic value{0}; public: void operator++(int) { - value.fetch_add(1, std::memory_order_relaxed); + value.fetch_add(1, stl::memory_order_relaxed); } void operator+=(const MonotoneLocalStat& other) { - auto v = other.value.load(std::memory_order_relaxed); - value.fetch_add(v, std::memory_order_relaxed); + auto v = other.value.load(stl::memory_order_relaxed); + value.fetch_add(v, stl::memory_order_relaxed); } void operator+=(size_t v) { - value.fetch_add(v, std::memory_order_relaxed); + value.fetch_add(v, stl::memory_order_relaxed); } size_t operator*() { - return value.load(std::memory_order_relaxed); + return value.load(stl::memory_order_relaxed); } }; } // namespace snmalloc diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h index 3a75079c0..818ebce03 100644 --- a/src/snmalloc/global/globalalloc.h +++ b/src/snmalloc/global/globalalloc.h @@ -147,7 +147,7 @@ namespace snmalloc template inline static void print_alloc_stats() { - static std::atomic dump{0}; + static stl::Atomic dump{0}; auto l_dump = dump++; if (l_dump == 0) diff --git a/src/snmalloc/mem/pool.h b/src/snmalloc/mem/pool.h index 9a7898ff7..6bce43f06 100644 --- a/src/snmalloc/mem/pool.h +++ b/src/snmalloc/mem/pool.h @@ -32,14 +32,14 @@ namespace snmalloc FlagWord lock{}; capptr::Alloc list{nullptr}; - std::atomic count{0}; + stl::Atomic count{0}; public: constexpr PoolState() = default; size_t get_count() { - return count.load(std::memory_order_relaxed); + return count.load(stl::memory_order_relaxed); } }; diff --git a/src/snmalloc/stl/gnu/atomic.h b/src/snmalloc/stl/gnu/atomic.h index 7a193972e..82fff30d9 100644 --- a/src/snmalloc/stl/gnu/atomic.h +++ b/src/snmalloc/stl/gnu/atomic.h @@ -89,7 +89,7 @@ namespace snmalloc return load(); } - SNMALLOC_FAST_PATH T load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST) + SNMALLOC_FAST_PATH T load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST) const { T res; __atomic_load(addressof(val), addressof(res), order(mem_ord)); From 0338c70c92d583ba82cff42e83a5ef5e21178e5b Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sun, 23 Mar 2025 08:50:19 +0000 Subject: [PATCH 12/19] stl const? --- src/snmalloc/stl/gnu/atomic.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/snmalloc/stl/gnu/atomic.h b/src/snmalloc/stl/gnu/atomic.h index 82fff30d9..83fb8ce57 100644 --- a/src/snmalloc/stl/gnu/atomic.h +++ b/src/snmalloc/stl/gnu/atomic.h @@ -63,6 +63,11 @@ namespace snmalloc return __builtin_addressof(ref); } + SNMALLOC_FAST_PATH static const T* addressof(const T& ref) + { + return __builtin_addressof(ref); + } + // From libc++: // require types that are 1, 2, 4, 8, or 16 bytes in length to be aligned // to at least their size to be potentially From 1768968eaffeda38255eb559d3bf73d765bc89e8 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Sun, 23 Mar 2025 09:04:12 +0000 Subject: [PATCH 13/19] Fixing CI --- src/snmalloc/stl/gnu/atomic.h | 3 ++- src/test/func/alloc_churn/alloc_churn.cc | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/snmalloc/stl/gnu/atomic.h b/src/snmalloc/stl/gnu/atomic.h index 83fb8ce57..bb7145f65 100644 --- a/src/snmalloc/stl/gnu/atomic.h +++ b/src/snmalloc/stl/gnu/atomic.h @@ -94,7 +94,8 @@ namespace snmalloc return load(); } - SNMALLOC_FAST_PATH T load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST) const + SNMALLOC_FAST_PATH T + load(MemoryOrder mem_ord = MemoryOrder::SEQ_CST) const { T res; __atomic_load(addressof(val), addressof(res), order(mem_ord)); diff --git a/src/test/func/alloc_churn/alloc_churn.cc b/src/test/func/alloc_churn/alloc_churn.cc index 23d3c3015..1cc320ea8 100644 --- a/src/test/func/alloc_churn/alloc_churn.cc +++ b/src/test/func/alloc_churn/alloc_churn.cc @@ -21,9 +21,9 @@ void test_step() int main() { - for (size_t i = 0; i < 10000; i++) + for (size_t i = 0; i < 1000; i++) { - if (i % 1000 == 0) + if (i % 100 == 0) { std::cout << "Step " << i << std::endl; snmalloc::print_alloc_stats(); From fc395ddb531e9f01e6fba07164e390715b3ab33a Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 24 Mar 2025 16:21:24 +0000 Subject: [PATCH 14/19] Fix inflight check --- src/snmalloc/global/globalalloc.h | 12 ++++++++---- src/snmalloc/mem/alloc.h | 4 ++-- src/test/func/alloc_churn/alloc_churn.cc | 1 + 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/snmalloc/global/globalalloc.h b/src/snmalloc/global/globalalloc.h index 818ebce03..e9c9ccc7b 100644 --- a/src/snmalloc/global/globalalloc.h +++ b/src/snmalloc/global/globalalloc.h @@ -84,10 +84,14 @@ namespace snmalloc } } - // Check why this doesn't hold. - // if (result == nullptr) - // SNMALLOC_CHECK( - // RemoteDeallocCache::remote_inflight.get_curr() == 0); + if ( + result == nullptr && + RemoteDeallocCache::remote_inflight.get_curr() != 0) + { + report_fatal_error( + "debug_check_empty: remote inflight deallocations left {}}", + RemoteDeallocCache::remote_inflight.get_curr()); + } if (result != nullptr) { diff --git a/src/snmalloc/mem/alloc.h b/src/snmalloc/mem/alloc.h index 1272fc0ff..9c917d288 100644 --- a/src/snmalloc/mem/alloc.h +++ b/src/snmalloc/mem/alloc.h @@ -1377,9 +1377,9 @@ namespace snmalloc entry, m, need_post, domesticate, bytes_flushed); }; - RemoteDeallocCache::remote_inflight -= bytes_flushed; - message_queue().destroy_and_iterate(domesticate, cb); + + RemoteDeallocCache::remote_inflight -= bytes_flushed; } else { diff --git a/src/test/func/alloc_churn/alloc_churn.cc b/src/test/func/alloc_churn/alloc_churn.cc index 1cc320ea8..ebfe87774 100644 --- a/src/test/func/alloc_churn/alloc_churn.cc +++ b/src/test/func/alloc_churn/alloc_churn.cc @@ -27,6 +27,7 @@ int main() { std::cout << "Step " << i << std::endl; snmalloc::print_alloc_stats(); + snmalloc::debug_check_empty(); } test_step(); } From 5071db0d2df380909c4e7c0fc11d72cfc6edf0fa Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 24 Mar 2025 20:30:18 +0000 Subject: [PATCH 15/19] Fix inflight statistic --- src/snmalloc/mem/alloc.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/snmalloc/mem/alloc.h b/src/snmalloc/mem/alloc.h index 9c917d288..8a01217a4 100644 --- a/src/snmalloc/mem/alloc.h +++ b/src/snmalloc/mem/alloc.h @@ -498,6 +498,10 @@ namespace snmalloc entry.get_slab_metadata()->as_key_tweak(), domesticate); + // Need to account for forwarded bytes. + size_t size = nelem * sizeclass_full_to_size(entry.get_sizeclass()); + bytes_returned += size; + need_post |= remote_dealloc_cache.reserve_space(entry, nelem); remote_dealloc_cache.template forward( @@ -1364,10 +1368,10 @@ namespace snmalloc return capptr_domesticate(local_state, p); }; - size_t bytes_flushed = 0; - if (destroy_queue) { + size_t bytes_flushed = 0; + auto cb = [this, domesticate, &bytes_flushed](capptr::Alloc m) { bool need_post = true; // Always going to post, so ignore. From 94d7a268738f64ed86a7600abeeed6159f09cbb1 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 24 Mar 2025 21:04:12 +0000 Subject: [PATCH 16/19] Shrink test size. --- src/test/func/memory/memory.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/func/memory/memory.cc b/src/test/func/memory/memory.cc index 891737843..9f876d459 100644 --- a/src/test/func/memory/memory.cc +++ b/src/test/func/memory/memory.cc @@ -558,7 +558,7 @@ int main(int argc, char** argv) #endif #define TEST(testname) \ std::cout << "Running " #testname << std::endl; \ - for (size_t i = 0; i < 100; i++) \ + for (size_t i = 0; i < 50; i++) \ testname(); TEST(test_alloc_dealloc_64k); From 647438084440baa038b15b3e3fb99533c65a73f3 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Mon, 24 Mar 2025 21:22:14 +0000 Subject: [PATCH 17/19] Remove interlocked from fast path. --- src/snmalloc/ds_core/stats.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/snmalloc/ds_core/stats.h b/src/snmalloc/ds_core/stats.h index f949dda5d..29d4c9a42 100644 --- a/src/snmalloc/ds_core/stats.h +++ b/src/snmalloc/ds_core/stats.h @@ -77,7 +77,8 @@ namespace snmalloc public: void operator++(int) { - value.fetch_add(1, stl::memory_order_relaxed); + auto old = value.load(stl::memory_order_relaxed); + value.store(old + 1, stl::memory_order_relaxed); } void operator+=(const MonotoneLocalStat& other) @@ -88,7 +89,8 @@ namespace snmalloc void operator+=(size_t v) { - value.fetch_add(v, stl::memory_order_relaxed); + auto old = value.load(stl::memory_order_relaxed); + value.store(old + v, stl::memory_order_relaxed); } size_t operator*() From 1a7c372cff29020934d273711973686defe09942 Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 25 Mar 2025 12:08:20 +0000 Subject: [PATCH 18/19] capptr_reveal position was causing GCC to emit a frame which wasn't required after inlining. --- src/snmalloc/mem/alloc.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/snmalloc/mem/alloc.h b/src/snmalloc/mem/alloc.h index 8a01217a4..ccdd2d5a3 100644 --- a/src/snmalloc/mem/alloc.h +++ b/src/snmalloc/mem/alloc.h @@ -33,7 +33,7 @@ namespace snmalloc } template - inline static SNMALLOC_FAST_PATH capptr::Alloc + inline static SNMALLOC_FAST_PATH void* finish_alloc(freelist::HeadPtr p, smallsizeclass_t sizeclass) { auto r = finish_alloc_no_zero(p, sizeclass); @@ -44,7 +44,7 @@ namespace snmalloc // TODO: Should this be zeroing the free Object state, in the non-zeroing // case? - return r; + return capptr_reveal(r); } struct FastFreeLists @@ -598,17 +598,17 @@ namespace snmalloc { // Small allocations are more likely. Improve // branch prediction by placing this case first. - return capptr_reveal(small_alloc(size)); + return small_alloc(size); } - return capptr_reveal(alloc_not_small(size, this)); + return alloc_not_small(size, this); } /** * Fast allocation for small objects. */ template - SNMALLOC_FAST_PATH capptr::Alloc small_alloc(size_t size) + SNMALLOC_FAST_PATH void* small_alloc(size_t size) { auto domesticate = [this](freelist::QueuePtr p) SNMALLOC_FAST_PATH_LAMBDA { @@ -627,7 +627,7 @@ namespace snmalloc return handle_message_queue( [](Allocator* alloc, smallsizeclass_t sizeclass, freelist::Iter<>* fl) - -> capptr::Alloc { + -> void* { return alloc->small_refill(sizeclass, *fl); }, this, @@ -645,7 +645,7 @@ namespace snmalloc * register. */ template - static SNMALLOC_SLOW_PATH capptr::Alloc + static SNMALLOC_SLOW_PATH void* alloc_not_small(size_t size, Allocator* self) { if (size == 0) @@ -657,15 +657,15 @@ namespace snmalloc } return self->handle_message_queue( - [](Allocator* self, size_t size) -> capptr::Alloc { + [](Allocator* self, size_t size) -> void* { return CheckInit::check_init( - [self, size]() { + [self, size]() -> void* { if (size > bits::one_at_bit(bits::BITS - 1)) { // Cannot allocate something that is more that half the size of // the address space errno = ENOMEM; - return capptr::Alloc{nullptr}; + return nullptr; } // Check if secondary allocator wants to offer the memory @@ -677,7 +677,7 @@ namespace snmalloc { if constexpr (zero_mem == YesZero) Config::Pal::zero(result, size); - return capptr::Alloc::unsafe_from(result); + return result; } // Grab slab of correct size @@ -717,10 +717,10 @@ namespace snmalloc self->stats[sc].slabs_allocated++; } - return capptr_chunk_is_alloc( - capptr_to_user_address_control(chunk)); + return capptr_reveal(capptr_chunk_is_alloc( + capptr_to_user_address_control(chunk))); }, - [](Allocator* a, size_t size) { + [](Allocator* a, size_t size) -> void* { return alloc_not_small(size, a); }, size); @@ -730,7 +730,7 @@ namespace snmalloc } template - SNMALLOC_FAST_PATH capptr::Alloc + SNMALLOC_FAST_PATH void* small_refill(smallsizeclass_t sizeclass, freelist::Iter<>& fast_free_list) { void* result = SecondaryAllocator::allocate( @@ -750,9 +750,9 @@ namespace snmalloc // deallocated, before snmalloc is initialised, then it will fail // to access the pagemap. return CheckInit::check_init( - [result]() { return capptr::Alloc::unsafe_from(result); }, + [result]() { return result; }, [](Allocator*, void* result) { - return capptr::Alloc::unsafe_from(result); + return result; }, result); } @@ -804,11 +804,11 @@ namespace snmalloc } template - SNMALLOC_SLOW_PATH capptr::Alloc small_refill_slow( + SNMALLOC_SLOW_PATH void* small_refill_slow( smallsizeclass_t sizeclass, freelist::Iter<>& fast_free_list) { return CheckInit::check_init( - [this, sizeclass, &fast_free_list]() -> capptr::Alloc { + [this, sizeclass, &fast_free_list]() -> void* { size_t rsize = sizeclass_to_size(sizeclass); // No existing free list get a new slab. From 85dac300fc77e60f15c8469e810ea982d8334a7f Mon Sep 17 00:00:00 2001 From: Matthew Parkinson Date: Tue, 25 Mar 2025 12:28:37 +0000 Subject: [PATCH 19/19] Alter inlining for GCC --- src/snmalloc/mem/alloc.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/snmalloc/mem/alloc.h b/src/snmalloc/mem/alloc.h index ccdd2d5a3..d2ef84113 100644 --- a/src/snmalloc/mem/alloc.h +++ b/src/snmalloc/mem/alloc.h @@ -561,7 +561,7 @@ namespace snmalloc * - alloc(size_t) * - small_alloc(size_t) * - gets allocation from a fast free list and is done. - * - if no fast free list, + * - otherwise no fast free list and calls small_alloc_slow * - check for message queue * - small_refill(size_t) * - If another free list is available, use it. @@ -625,6 +625,13 @@ namespace snmalloc return finish_alloc(p, sizeclass); } + return small_alloc_slow(sizeclass, fl); + } + + template + SNMALLOC_SLOW_PATH void* + small_alloc_slow(smallsizeclass_t sizeclass, freelist::Iter<>* fl) + { return handle_message_queue( [](Allocator* alloc, smallsizeclass_t sizeclass, freelist::Iter<>* fl) -> void* { @@ -717,8 +724,8 @@ namespace snmalloc self->stats[sc].slabs_allocated++; } - return capptr_reveal(capptr_chunk_is_alloc( - capptr_to_user_address_control(chunk))); + return capptr_reveal( + capptr_chunk_is_alloc(capptr_to_user_address_control(chunk))); }, [](Allocator* a, size_t size) -> void* { return alloc_not_small(size, a); @@ -751,9 +758,7 @@ namespace snmalloc // to access the pagemap. return CheckInit::check_init( [result]() { return result; }, - [](Allocator*, void* result) { - return result; - }, + [](Allocator*, void* result) { return result; }, result); }