diff --git a/include/customizer/cell_customizer.hpp b/include/customizer/cell_customizer.hpp index 00d03163a..0fd6dcbc1 100644 --- a/include/customizer/cell_customizer.hpp +++ b/include/customizer/cell_customizer.hpp @@ -116,8 +116,8 @@ class CellCustomizer const std::vector &allowed_nodes, CellMetric &metric) const { - Heap heap_exemplar(graph.GetNumberOfNodes()); - HeapPtr heaps(heap_exemplar); + const auto number_of_nodes = graph.GetNumberOfNodes(); + HeapPtr heaps([number_of_nodes] { return Heap{number_of_nodes}; }); for (std::size_t level = 1; level < partition.GetNumberOfLevels(); ++level) { diff --git a/include/util/pool_allocator.hpp b/include/util/pool_allocator.hpp new file mode 100644 index 000000000..35ed97075 --- /dev/null +++ b/include/util/pool_allocator.hpp @@ -0,0 +1,158 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace osrm::util +{ + +inline size_t align_up(size_t n, size_t alignment) +{ + return (n + alignment - 1) & ~(alignment - 1); +} + +inline size_t get_next_power_of_two_exponent(size_t n) +{ + BOOST_ASSERT(n > 0); + return (sizeof(size_t) * 8) - std::countl_zero(n - 1); +} + +class MemoryPool +{ + private: + constexpr static size_t MIN_CHUNK_SIZE_BYTES = 4096; + + public: + static std::shared_ptr instance() + { + static thread_local std::shared_ptr instance; + if (!instance) + { + instance = std::shared_ptr(new MemoryPool()); + } + return instance; + } + + template T *allocate(std::size_t items_count) + { + static_assert(alignof(T) <= alignof(std::max_align_t), + "Type is over-aligned for this allocator."); + + size_t free_list_index = get_next_power_of_two_exponent(items_count * sizeof(T)); + auto &free_list = free_lists_[free_list_index]; + if (free_list.empty()) + { + size_t block_size_in_bytes = 1u << free_list_index; + block_size_in_bytes = align_up(block_size_in_bytes, alignof(std::max_align_t)); + // check if there is space in current memory chunk + if (current_chunk_left_bytes_ < block_size_in_bytes) + { + allocate_chunk(block_size_in_bytes); + } + + free_list.push_back(current_chunk_ptr_); + current_chunk_left_bytes_ -= block_size_in_bytes; + current_chunk_ptr_ += block_size_in_bytes; + } + auto ptr = reinterpret_cast(free_list.back()); + free_list.pop_back(); + return ptr; + } + + template void deallocate(T *p, std::size_t n) noexcept + { + size_t free_list_index = get_next_power_of_two_exponent(n * sizeof(T)); + // NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion) + free_lists_[free_list_index].push_back(reinterpret_cast(p)); + } + + ~MemoryPool() + { + for (auto chunk : chunks_) + { + // NOLINTNEXTLINE(cppcoreguidelines-no-malloc) + std::free(chunk); + } + } + + private: + MemoryPool() = default; + MemoryPool(const MemoryPool &) = delete; + MemoryPool &operator=(const MemoryPool &) = delete; + + void allocate_chunk(size_t bytes) + { + auto chunk_size = std::max(bytes, MIN_CHUNK_SIZE_BYTES); + // NOLINTNEXTLINE(cppcoreguidelines-no-malloc) + void *chunk = std::malloc(chunk_size); + if (!chunk) + { + throw std::bad_alloc(); + } + chunks_.push_back(chunk); + current_chunk_ptr_ = static_cast(chunk); + current_chunk_left_bytes_ = chunk_size; + } + + // we have 64 free lists, one for each possible power of two + std::array, sizeof(std::size_t) * 8> free_lists_; + + // list of allocated memory chunks, we don't free them until the pool is destroyed + std::vector chunks_; + + uint8_t *current_chunk_ptr_ = nullptr; + size_t current_chunk_left_bytes_ = 0; +}; + +template class PoolAllocator +{ + public: + using value_type = T; + + PoolAllocator() noexcept : pool(MemoryPool::instance()){}; + + template + PoolAllocator(const PoolAllocator &) noexcept : pool(MemoryPool::instance()) + { + } + + template struct rebind + { + using other = PoolAllocator; + }; + + T *allocate(std::size_t n) { return pool->allocate(n); } + + void deallocate(T *p, std::size_t n) noexcept { pool->deallocate(p, n); } + + PoolAllocator(const PoolAllocator &) = default; + PoolAllocator &operator=(const PoolAllocator &) = default; + PoolAllocator(PoolAllocator &&) noexcept = default; + PoolAllocator &operator=(PoolAllocator &&) noexcept = default; + + private: + // using shared_ptr guarantees that memory pool won't be destroyed before all allocators using + // it (important if there are static instances of PoolAllocator) + std::shared_ptr pool; +}; +template +bool operator==(const PoolAllocator &, const PoolAllocator &) +{ + return true; +} + +template +bool operator!=(const PoolAllocator &, const PoolAllocator &) +{ + return false; +} + +} // namespace osrm::util diff --git a/include/util/query_heap.hpp b/include/util/query_heap.hpp index e2678f2c0..256335064 100644 --- a/include/util/query_heap.hpp +++ b/include/util/query_heap.hpp @@ -1,17 +1,16 @@ #ifndef OSRM_UTIL_QUERY_HEAP_HPP #define OSRM_UTIL_QUERY_HEAP_HPP +#include "util/pool_allocator.hpp" +#include #include #include - -#include #include #include #include #include #include #include - namespace osrm::util { @@ -56,7 +55,11 @@ template class UnorderedMapStorage void Clear() { nodes.clear(); } private: - std::unordered_map nodes; + template + using UnorderedMap = std:: + unordered_map, std::equal_to, PoolAllocator>>; + + UnorderedMap nodes; }; template other.weight; } }; + using HeapContainer = boost::heap::d_ary_heap, boost::heap::mutable_, - boost::heap::compare>>; + boost::heap::compare>, + boost::heap::allocator>>; using HeapHandle = typename HeapContainer::handle_type; public: @@ -160,6 +165,9 @@ class QueryHeap Data data; }; + QueryHeap(const QueryHeap &other) = delete; + QueryHeap(QueryHeap &&other) = delete; + template explicit QueryHeap(StorageArgs... args) : node_index(args...) { Clear(); diff --git a/unit_tests/util/pool_allocator.cpp b/unit_tests/util/pool_allocator.cpp new file mode 100644 index 000000000..7f88dfb85 --- /dev/null +++ b/unit_tests/util/pool_allocator.cpp @@ -0,0 +1,158 @@ +#include "util/pool_allocator.hpp" +#include "util/typedefs.hpp" +#include + +#include + +BOOST_AUTO_TEST_SUITE(pool_allocator) + +using namespace osrm; +using namespace osrm::util; + +BOOST_AUTO_TEST_CASE(test_align_up) +{ + BOOST_CHECK_EQUAL(align_up(5, 4), 8); + BOOST_CHECK_EQUAL(align_up(9, 8), 16); + BOOST_CHECK_EQUAL(align_up(17, 16), 32); + BOOST_CHECK_EQUAL(align_up(4, 4), 4); + BOOST_CHECK_EQUAL(align_up(8, 8), 8); + BOOST_CHECK_EQUAL(align_up(16, 16), 16); + BOOST_CHECK_EQUAL(align_up(32, 16), 32); + BOOST_CHECK_EQUAL(align_up(0, 4), 0); + BOOST_CHECK_EQUAL(align_up(0, 8), 0); + BOOST_CHECK_EQUAL(align_up(0, 16), 0); + BOOST_CHECK_EQUAL(align_up(1000000, 256), 1000192); + BOOST_CHECK_EQUAL(align_up(999999, 512), 1000448); + BOOST_CHECK_EQUAL(align_up(123456789, 1024), 123457536); + BOOST_CHECK_EQUAL(align_up(0, 1), 0); + BOOST_CHECK_EQUAL(align_up(5, 1), 5); + BOOST_CHECK_EQUAL(align_up(123456, 1), 123456); +} + +BOOST_AUTO_TEST_CASE(test_get_next_power_of_two_exponent) +{ + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(1), 0); + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(2), 1); + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(4), 2); + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(8), 3); + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(16), 4); + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(3), 2); + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(5), 3); + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(9), 4); + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(15), 4); + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(17), 5); + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(1), 0); + BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(SIZE_MAX), sizeof(size_t) * 8); +} + +// in many of these tests we hope on address sanitizer to alert in the case if we are doing +// something wrong +BOOST_AUTO_TEST_CASE(smoke) +{ + PoolAllocator pool; + auto ptr = pool.allocate(1); + *ptr = 42; + BOOST_CHECK_NE(ptr, nullptr); + pool.deallocate(ptr, 1); + + ptr = pool.allocate(2); + *ptr = 42; + *(ptr + 1) = 43; + BOOST_CHECK_NE(ptr, nullptr); + pool.deallocate(ptr, 2); +} + +BOOST_AUTO_TEST_CASE(a_lot_of_items) +{ + PoolAllocator pool; + auto ptr = pool.allocate(2048); + for (int i = 0; i < 2048; ++i) + { + ptr[i] = i; + } + + for (int i = 0; i < 2048; ++i) + { + BOOST_CHECK_EQUAL(ptr[i], i); + } + + pool.deallocate(ptr, 2048); +} + +BOOST_AUTO_TEST_CASE(copy) +{ + PoolAllocator pool; + auto ptr = pool.allocate(1); + *ptr = 42; + BOOST_CHECK_NE(ptr, nullptr); + pool.deallocate(ptr, 1); + + PoolAllocator pool2(pool); + ptr = pool2.allocate(1); + *ptr = 42; + BOOST_CHECK_NE(ptr, nullptr); + pool2.deallocate(ptr, 1); +} + +BOOST_AUTO_TEST_CASE(move) +{ + PoolAllocator pool; + auto ptr = pool.allocate(1); + *ptr = 42; + BOOST_CHECK_NE(ptr, nullptr); + pool.deallocate(ptr, 1); + + PoolAllocator pool2(std::move(pool)); + ptr = pool2.allocate(1); + *ptr = 42; + BOOST_CHECK_NE(ptr, nullptr); + pool2.deallocate(ptr, 1); +} + +BOOST_AUTO_TEST_CASE(unordered_map) +{ + std::unordered_map, + std::equal_to, + PoolAllocator>> + map; + map[1] = 42; + BOOST_CHECK_EQUAL(map[1], 42); + + map.clear(); + + map[2] = 43; + + BOOST_CHECK_EQUAL(map[2], 43); +} + +BOOST_AUTO_TEST_CASE(alignment) +{ + PoolAllocator pool_char; + PoolAllocator pool_double; + + auto ptr_char = pool_char.allocate(1); + auto ptr_double = pool_double.allocate(1); + + BOOST_CHECK_NE(ptr_double, nullptr); + BOOST_CHECK_EQUAL(reinterpret_cast(ptr_double) % alignof(double), 0); + BOOST_CHECK_NE(ptr_char, nullptr); + BOOST_CHECK_EQUAL(reinterpret_cast(ptr_char) % alignof(char), 0); + + pool_char.deallocate(ptr_char, 1); + pool_double.deallocate(ptr_double, 1); + + ptr_char = pool_char.allocate(2); + ptr_double = pool_double.allocate(1); + + BOOST_CHECK_NE(ptr_double, nullptr); + BOOST_CHECK_EQUAL(reinterpret_cast(ptr_double) % alignof(double), 0); + BOOST_CHECK_NE(ptr_char, nullptr); + BOOST_CHECK_EQUAL(reinterpret_cast(ptr_char) % alignof(char), 0); + + pool_char.deallocate(ptr_char, 2); + pool_double.deallocate(ptr_double, 1); +} + +BOOST_AUTO_TEST_SUITE_END()