Compare commits

...

49 Commits

Author SHA1 Message Date
Siarhei Fedartsou
d47012a3e6 Merge branch 'master' into sf-pool-alloc 2024-07-28 21:05:31 +02:00
Siarhei Fedartsou
bff349f1b1 wip 2024-07-13 08:53:15 +02:00
Siarhei Fedartsou
9ce059f216 wip 2024-07-12 22:45:56 +02:00
Siarhei Fedartsou
6090387c5e wip 2024-07-12 22:10:05 +02:00
Siarhei Fedartsou
3096440505 wip 2024-07-12 21:32:52 +02:00
Siarhei Fedartsou
8bd26dc8b6 wip 2024-07-12 20:37:41 +02:00
Siarhei Fedartsou
70d67d0eea wip 2024-07-12 20:36:47 +02:00
Siarhei Fedartsou
fdd1ca05df wip 2024-07-12 20:13:40 +02:00
Siarhei Fedartsou
81d128b100 wip 2024-07-12 19:57:23 +02:00
Siarhei Fedartsou
f18791a71b wip 2024-07-12 18:39:32 +02:00
Siarhei Fedartsou
1436e96ff4 wip 2024-07-12 17:50:32 +02:00
Siarhei Fedartsou
21f53ed6dd Use pool in std::unordered_map 2024-07-11 23:22:45 +02:00
Siarhei Fedartsou
e045dea04c Use pool in std::unordered_map 2024-07-11 23:21:48 +02:00
Siarhei Fedartsou
ac05d36102
Update bench.cpp 2024-07-11 23:05:45 +02:00
Siarhei Fedartsou
e9cdb317f6 wip 2024-07-11 22:39:31 +02:00
Siarhei Fedartsou
270f187e2a wip 2024-07-11 22:32:41 +02:00
Siarhei Fedartsou
fb8182a10e wip 2024-07-11 21:40:00 +02:00
Siarhei Fedartsou
69bc6c035d wip 2024-07-11 21:26:19 +02:00
Siarhei Fedartsou
a18ad919af wip 2024-07-11 21:06:03 +02:00
Siarhei Fedartsou
434cab4952 wip 2024-07-11 20:54:00 +02:00
Siarhei Fedartsou
a90e9dde33 wip 2024-07-11 20:47:20 +02:00
Siarhei Fedartsou
9ef1911eae wip 2024-07-11 20:42:47 +02:00
Siarhei Fedartsou
18b3c5f8ed wip 2024-07-11 20:25:55 +02:00
Siarhei Fedartsou
3691f90e23 wip 2024-07-11 20:11:11 +02:00
Siarhei Fedartsou
4d940ab096 wip 2024-07-11 20:05:08 +02:00
Siarhei Fedartsou
c5aae5148e wip 2024-07-11 19:33:45 +02:00
Siarhei Fedartsou
058c26e39a wip 2024-07-11 18:21:20 +02:00
Siarhei Fedartsou
7337771aaf wip 2024-07-11 18:16:07 +02:00
Siarhei Fedartsou
abbe5e25a5 wip 2024-07-11 18:05:16 +02:00
Siarhei Fedartsou
6d2fc45476 wip 2024-07-10 22:12:59 +02:00
Siarhei Fedartsou
6f04aa9587 wip 2024-07-10 22:01:25 +02:00
Siarhei Fedartsou
1037256a30 wip 2024-07-10 21:49:40 +02:00
Siarhei Fedartsou
f9358ed031 wip 2024-07-10 21:13:50 +02:00
Siarhei Fedartsou
13448e4f9a wip 2024-07-10 20:30:29 +02:00
Siarhei Fedartsou
7eb2d93a82 wip 2024-07-10 20:16:37 +02:00
Siarhei Fedartsou
49f875c0f8 wip 2024-07-10 19:49:28 +02:00
Siarhei Fedartsou
53032e556a wip 2024-07-09 22:53:24 +02:00
Siarhei Fedartsou
233a7563b6 wip 2024-07-09 22:39:43 +02:00
Siarhei Fedartsou
d1f04abf42 wip 2024-07-09 22:27:21 +02:00
Siarhei Fedartsou
c578698da0 wip 2024-07-09 21:28:22 +02:00
Siarhei Fedartsou
8df282b2e6 wip 2024-07-09 21:15:50 +02:00
Siarhei Fedartsou
5f166a5e4e wip 2024-07-06 12:43:21 +03:00
Siarhei Fedartsou
f62e917226 wip 2024-07-06 12:40:57 +03:00
Siarhei Fedartsou
611a3c250b wip 2024-07-03 22:27:47 +03:00
Siarhei Fedartsou
bbdac63362 wip 2024-07-03 21:55:16 +03:00
Siarhei Fedartsou
0723dc073c wip 2024-07-03 21:51:09 +03:00
Siarhei Fedartsou
3bd897ffe1 wip 2024-07-03 21:51:09 +03:00
Siarhei Fedartsou
8c7b80e7ee wip 2024-07-03 21:51:09 +03:00
Siarhei Fedartsou
4e5bf05518 Try to use boost::fast_pool_allocator in QueryHeap 2024-07-03 21:51:09 +03:00
4 changed files with 331 additions and 7 deletions

View File

@ -116,8 +116,8 @@ class CellCustomizer
const std::vector<bool> &allowed_nodes,
CellMetric &metric) const
{
Heap heap_exemplar(graph.GetNumberOfNodes());
HeapPtr heaps(heap_exemplar);
const auto number_of_nodes = graph.GetNumberOfNodes();
HeapPtr heaps([number_of_nodes] { return Heap{number_of_nodes}; });
for (std::size_t level = 1; level < partition.GetNumberOfLevels(); ++level)
{

View File

@ -0,0 +1,158 @@
#pragma once
#include <algorithm>
#include <array>
#include <bit>
#include <boost/assert.hpp>
#include <cstddef>
#include <cstdlib>
#include <memory>
#include <mutex>
#include <new>
#include <vector>
namespace osrm::util
{
inline size_t align_up(size_t n, size_t alignment)
{
return (n + alignment - 1) & ~(alignment - 1);
}
inline size_t get_next_power_of_two_exponent(size_t n)
{
BOOST_ASSERT(n > 0);
return (sizeof(size_t) * 8) - std::countl_zero(n - 1);
}
class MemoryPool
{
private:
constexpr static size_t MIN_CHUNK_SIZE_BYTES = 4096;
public:
static std::shared_ptr<MemoryPool> instance()
{
static thread_local std::shared_ptr<MemoryPool> instance;
if (!instance)
{
instance = std::shared_ptr<MemoryPool>(new MemoryPool());
}
return instance;
}
template <typename T> T *allocate(std::size_t items_count)
{
static_assert(alignof(T) <= alignof(std::max_align_t),
"Type is over-aligned for this allocator.");
size_t free_list_index = get_next_power_of_two_exponent(items_count * sizeof(T));
auto &free_list = free_lists_[free_list_index];
if (free_list.empty())
{
size_t block_size_in_bytes = 1u << free_list_index;
block_size_in_bytes = align_up(block_size_in_bytes, alignof(std::max_align_t));
// check if there is space in current memory chunk
if (current_chunk_left_bytes_ < block_size_in_bytes)
{
allocate_chunk(block_size_in_bytes);
}
free_list.push_back(current_chunk_ptr_);
current_chunk_left_bytes_ -= block_size_in_bytes;
current_chunk_ptr_ += block_size_in_bytes;
}
auto ptr = reinterpret_cast<T *>(free_list.back());
free_list.pop_back();
return ptr;
}
template <typename T> void deallocate(T *p, std::size_t n) noexcept
{
size_t free_list_index = get_next_power_of_two_exponent(n * sizeof(T));
// NOLINTNEXTLINE(bugprone-multi-level-implicit-pointer-conversion)
free_lists_[free_list_index].push_back(reinterpret_cast<void *>(p));
}
~MemoryPool()
{
for (auto chunk : chunks_)
{
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
std::free(chunk);
}
}
private:
MemoryPool() = default;
MemoryPool(const MemoryPool &) = delete;
MemoryPool &operator=(const MemoryPool &) = delete;
void allocate_chunk(size_t bytes)
{
auto chunk_size = std::max(bytes, MIN_CHUNK_SIZE_BYTES);
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
void *chunk = std::malloc(chunk_size);
if (!chunk)
{
throw std::bad_alloc();
}
chunks_.push_back(chunk);
current_chunk_ptr_ = static_cast<uint8_t *>(chunk);
current_chunk_left_bytes_ = chunk_size;
}
// we have 64 free lists, one for each possible power of two
std::array<std::vector<void *>, sizeof(std::size_t) * 8> free_lists_;
// list of allocated memory chunks, we don't free them until the pool is destroyed
std::vector<void *> chunks_;
uint8_t *current_chunk_ptr_ = nullptr;
size_t current_chunk_left_bytes_ = 0;
};
template <typename T> class PoolAllocator
{
public:
using value_type = T;
PoolAllocator() noexcept : pool(MemoryPool::instance()){};
template <typename U>
PoolAllocator(const PoolAllocator<U> &) noexcept : pool(MemoryPool::instance())
{
}
template <typename U> struct rebind
{
using other = PoolAllocator<U>;
};
T *allocate(std::size_t n) { return pool->allocate<T>(n); }
void deallocate(T *p, std::size_t n) noexcept { pool->deallocate<T>(p, n); }
PoolAllocator(const PoolAllocator &) = default;
PoolAllocator &operator=(const PoolAllocator &) = default;
PoolAllocator(PoolAllocator &&) noexcept = default;
PoolAllocator &operator=(PoolAllocator &&) noexcept = default;
private:
// using shared_ptr guarantees that memory pool won't be destroyed before all allocators using
// it (important if there are static instances of PoolAllocator)
std::shared_ptr<MemoryPool> pool;
};
template <typename T, typename U>
bool operator==(const PoolAllocator<T> &, const PoolAllocator<U> &)
{
return true;
}
template <typename T, typename U>
bool operator!=(const PoolAllocator<T> &, const PoolAllocator<U> &)
{
return false;
}
} // namespace osrm::util

View File

@ -1,17 +1,16 @@
#ifndef OSRM_UTIL_QUERY_HEAP_HPP
#define OSRM_UTIL_QUERY_HEAP_HPP
#include "util/pool_allocator.hpp"
#include <algorithm>
#include <boost/assert.hpp>
#include <boost/heap/d_ary_heap.hpp>
#include <algorithm>
#include <cstdint>
#include <limits>
#include <map>
#include <optional>
#include <unordered_map>
#include <vector>
namespace osrm::util
{
@ -56,7 +55,11 @@ template <typename NodeID, typename Key> class UnorderedMapStorage
void Clear() { nodes.clear(); }
private:
std::unordered_map<NodeID, Key> nodes;
template <typename K, typename V>
using UnorderedMap = std::
unordered_map<K, V, std::hash<K>, std::equal_to<K>, PoolAllocator<std::pair<const K, V>>>;
UnorderedMap<NodeID, Key> nodes;
};
template <typename NodeID,
@ -142,10 +145,12 @@ class QueryHeap
return weight > other.weight;
}
};
using HeapContainer = boost::heap::d_ary_heap<HeapData,
boost::heap::arity<4>,
boost::heap::mutable_<true>,
boost::heap::compare<std::greater<HeapData>>>;
boost::heap::compare<std::greater<HeapData>>,
boost::heap::allocator<PoolAllocator<HeapData>>>;
using HeapHandle = typename HeapContainer::handle_type;
public:
@ -160,6 +165,9 @@ class QueryHeap
Data data;
};
QueryHeap(const QueryHeap &other) = delete;
QueryHeap(QueryHeap &&other) = delete;
template <typename... StorageArgs> explicit QueryHeap(StorageArgs... args) : node_index(args...)
{
Clear();

View File

@ -0,0 +1,158 @@
#include "util/pool_allocator.hpp"
#include "util/typedefs.hpp"
#include <boost/test/unit_test.hpp>
#include <unordered_map>
BOOST_AUTO_TEST_SUITE(pool_allocator)
using namespace osrm;
using namespace osrm::util;
BOOST_AUTO_TEST_CASE(test_align_up)
{
BOOST_CHECK_EQUAL(align_up(5, 4), 8);
BOOST_CHECK_EQUAL(align_up(9, 8), 16);
BOOST_CHECK_EQUAL(align_up(17, 16), 32);
BOOST_CHECK_EQUAL(align_up(4, 4), 4);
BOOST_CHECK_EQUAL(align_up(8, 8), 8);
BOOST_CHECK_EQUAL(align_up(16, 16), 16);
BOOST_CHECK_EQUAL(align_up(32, 16), 32);
BOOST_CHECK_EQUAL(align_up(0, 4), 0);
BOOST_CHECK_EQUAL(align_up(0, 8), 0);
BOOST_CHECK_EQUAL(align_up(0, 16), 0);
BOOST_CHECK_EQUAL(align_up(1000000, 256), 1000192);
BOOST_CHECK_EQUAL(align_up(999999, 512), 1000448);
BOOST_CHECK_EQUAL(align_up(123456789, 1024), 123457536);
BOOST_CHECK_EQUAL(align_up(0, 1), 0);
BOOST_CHECK_EQUAL(align_up(5, 1), 5);
BOOST_CHECK_EQUAL(align_up(123456, 1), 123456);
}
BOOST_AUTO_TEST_CASE(test_get_next_power_of_two_exponent)
{
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(1), 0);
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(2), 1);
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(4), 2);
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(8), 3);
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(16), 4);
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(3), 2);
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(5), 3);
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(9), 4);
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(15), 4);
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(17), 5);
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(1), 0);
BOOST_CHECK_EQUAL(get_next_power_of_two_exponent(SIZE_MAX), sizeof(size_t) * 8);
}
// in many of these tests we hope on address sanitizer to alert in the case if we are doing
// something wrong
BOOST_AUTO_TEST_CASE(smoke)
{
PoolAllocator<int> pool;
auto ptr = pool.allocate(1);
*ptr = 42;
BOOST_CHECK_NE(ptr, nullptr);
pool.deallocate(ptr, 1);
ptr = pool.allocate(2);
*ptr = 42;
*(ptr + 1) = 43;
BOOST_CHECK_NE(ptr, nullptr);
pool.deallocate(ptr, 2);
}
BOOST_AUTO_TEST_CASE(a_lot_of_items)
{
PoolAllocator<int> pool;
auto ptr = pool.allocate(2048);
for (int i = 0; i < 2048; ++i)
{
ptr[i] = i;
}
for (int i = 0; i < 2048; ++i)
{
BOOST_CHECK_EQUAL(ptr[i], i);
}
pool.deallocate(ptr, 2048);
}
BOOST_AUTO_TEST_CASE(copy)
{
PoolAllocator<int> pool;
auto ptr = pool.allocate(1);
*ptr = 42;
BOOST_CHECK_NE(ptr, nullptr);
pool.deallocate(ptr, 1);
PoolAllocator<int> pool2(pool);
ptr = pool2.allocate(1);
*ptr = 42;
BOOST_CHECK_NE(ptr, nullptr);
pool2.deallocate(ptr, 1);
}
BOOST_AUTO_TEST_CASE(move)
{
PoolAllocator<int> pool;
auto ptr = pool.allocate(1);
*ptr = 42;
BOOST_CHECK_NE(ptr, nullptr);
pool.deallocate(ptr, 1);
PoolAllocator<int> pool2(std::move(pool));
ptr = pool2.allocate(1);
*ptr = 42;
BOOST_CHECK_NE(ptr, nullptr);
pool2.deallocate(ptr, 1);
}
BOOST_AUTO_TEST_CASE(unordered_map)
{
std::unordered_map<int,
int,
std::hash<int>,
std::equal_to<int>,
PoolAllocator<std::pair<const int, int>>>
map;
map[1] = 42;
BOOST_CHECK_EQUAL(map[1], 42);
map.clear();
map[2] = 43;
BOOST_CHECK_EQUAL(map[2], 43);
}
BOOST_AUTO_TEST_CASE(alignment)
{
PoolAllocator<char> pool_char;
PoolAllocator<double> pool_double;
auto ptr_char = pool_char.allocate(1);
auto ptr_double = pool_double.allocate(1);
BOOST_CHECK_NE(ptr_double, nullptr);
BOOST_CHECK_EQUAL(reinterpret_cast<uintptr_t>(ptr_double) % alignof(double), 0);
BOOST_CHECK_NE(ptr_char, nullptr);
BOOST_CHECK_EQUAL(reinterpret_cast<uintptr_t>(ptr_char) % alignof(char), 0);
pool_char.deallocate(ptr_char, 1);
pool_double.deallocate(ptr_double, 1);
ptr_char = pool_char.allocate(2);
ptr_double = pool_double.allocate(1);
BOOST_CHECK_NE(ptr_double, nullptr);
BOOST_CHECK_EQUAL(reinterpret_cast<uintptr_t>(ptr_double) % alignof(double), 0);
BOOST_CHECK_NE(ptr_char, nullptr);
BOOST_CHECK_EQUAL(reinterpret_cast<uintptr_t>(ptr_char) % alignof(char), 0);
pool_char.deallocate(ptr_char, 2);
pool_double.deallocate(ptr_double, 1);
}
BOOST_AUTO_TEST_SUITE_END()