Add packed storage of partition information

Right now we need a 64bit integer for every node in the graph.
Depending on the number of cells we will see, we might get away with
32bit eventually.
This commit is contained in:
Patrick Niklaus 2017-02-17 13:26:36 +00:00 committed by Patrick Niklaus
parent 5739c27ef4
commit 438aa66b4c
3 changed files with 404 additions and 4 deletions

View File

@ -1,17 +1,54 @@
#ifndef OSRM_UTIL_MULTI_LEVEL_PARTITION_HPP
#define OSRM_UTIL_MULTI_LEVEL_PARTITION_HPP
#include "util/exception.hpp"
#include "util/for_each_pair.hpp"
#include "util/typedefs.hpp"
#include <algorithm>
#include <array>
#include <climits>
#include <cmath>
#include <cstdint>
#include <numeric>
#include <vector>
#include <boost/range/adaptor/reversed.hpp>
namespace osrm
{
namespace util
{
namespace detail
{
// get the msb of an integer
// return 0 for integers without msb
template <typename T> std::size_t highestMSB(T value)
{
static_assert(std::is_integral<T>::value, "Integer required.");
std::size_t msb = 0;
while (value > 0)
{
value >>= 1u;
msb++;
}
return msb;
}
#if (defined(__clang__) || defined(__GNUC__) || defined(__GNUG__)) && __x86_64__
inline std::size_t highestMSB(std::uint64_t v)
{
BOOST_ASSERT(v > 0);
return 63UL - __builtin_clzl(v);
}
#endif
}
using LevelID = std::uint8_t;
using CellID = std::uint32_t;
static constexpr CellID INVALID_CELL_ID = std::numeric_limits<CellID>::max();
// Mock interface, can be removed when we have an actual implementation
class MultiLevelPartition
{
@ -19,12 +56,262 @@ class MultiLevelPartition
// Returns the cell id of `node` at `level`
virtual CellID GetCell(LevelID level, NodeID node) const = 0;
// Returns the lowest cell id (at `level - 1`) of all children `cell` at `level`
virtual CellID BeginChildren(LevelID level, CellID cell) const = 0;
// Returns the highest cell id (at `level - 1`) of all children `cell` at `level`
virtual CellID EndChildren(LevelID level, CellID cell) const = 0;
// Returns the highest level in which `first` and `second` are still in different cells
virtual LevelID GetHighestDifferentLevel(NodeID first, NodeID second) const = 0;
virtual std::size_t GetNumberOfLevels() const = 0;
// Returns the level at which a `node` is relevant for a query from start to target
virtual LevelID GetQueryLevel(NodeID start, NodeID target, NodeID node) const = 0;
virtual std::size_t GetNumberOfCells(LevelID level) const = 0;
virtual std::uint8_t GetNumberOfLevels() const = 0;
virtual std::uint32_t GetNumberOfCells(LevelID level) const = 0;
};
class PackedMultiLevelPartition final : public MultiLevelPartition
{
using PartitionID = std::uint64_t;
static const constexpr std::uint8_t NUM_PARTITION_BITS = sizeof(PartitionID) * CHAR_BIT;
public:
// cell_sizes is index by level (starting at 0, the base graph).
// However level 0 always needs to have cell size 1, since it is the
// basegraph.
PackedMultiLevelPartition(const std::vector<std::vector<CellID>> &partitions,
const std::vector<std::uint32_t> &level_to_num_cells)
: level_offsets(makeLevelOffsets(level_to_num_cells)), level_masks(makeLevelMasks()),
bit_to_level(makeBitToLevel())
{
initializePartitionIDs(partitions);
}
// returns the index of the cell the vertex is contained at level l
CellID GetCell(LevelID l, NodeID node) const final override
{
auto p = partition[node];
auto lidx = LevelIDToIndex(l);
auto masked = p & level_masks[lidx];
return masked >> level_offsets[lidx];
}
LevelID GetQueryLevel(NodeID start, NodeID target, NodeID node) const final override
{
return std::min(GetHighestDifferentLevel(start, node),
GetHighestDifferentLevel(target, node));
}
LevelID GetHighestDifferentLevel(NodeID first, NodeID second) const final override
{
if (partition[first] == partition[second])
return 0;
auto msb = detail::highestMSB(partition[first] ^ partition[second]);
return bit_to_level[msb];
}
std::uint8_t GetNumberOfLevels() const final override { return level_offsets.size(); }
std::uint32_t GetNumberOfCells(LevelID level) const final override
{
return GetCell(level, GetSenitileNode());
}
// Returns the lowest cell id (at `level - 1`) of all children `cell` at `level`
CellID BeginChildren(LevelID level, CellID cell) const final override
{
BOOST_ASSERT(level > 1);
auto lidx = LevelIDToIndex(level);
auto offset = level_to_children_offset[lidx];
return cell_to_children[offset + cell];
}
// Returns the highest cell id (at `level - 1`) of all children `cell` at `level`
CellID EndChildren(LevelID level, CellID cell) const final override
{
BOOST_ASSERT(level > 1);
auto lidx = LevelIDToIndex(level);
auto offset = level_to_children_offset[lidx];
return cell_to_children[offset + cell + 1];
}
private:
inline std::size_t LevelIDToIndex(LevelID l) const { return l - 1; }
// We save the sentinel as last node in the partition information.
// It has the highest cell id in each level so we can derived the range
// of cell ids efficiently.
inline NodeID GetSenitileNode() const { return partition.size() - 1; }
void SetCellID(LevelID l, NodeID node, std::size_t cell_id)
{
auto lidx = LevelIDToIndex(l);
auto shifted_id = cell_id << level_offsets[lidx];
auto cleared_cell = partition[node] & ~level_masks[lidx];
partition[node] = cleared_cell | shifted_id;
}
// If we have N cells per level we need log_2 bits for every cell ID
std::vector<std::uint8_t>
makeLevelOffsets(const std::vector<std::uint32_t> &level_to_num_cells) const
{
std::vector<std::uint8_t> offsets;
offsets.reserve(level_to_num_cells.size());
auto sum_bits = 0;
for (auto num_cells : level_to_num_cells)
{
// bits needed to number all contained vertexes
auto bits = static_cast<std::uint64_t>(std::ceil(std::log2(num_cells + 1)));
offsets.push_back(sum_bits);
sum_bits += bits;
if (sum_bits > 64)
{
throw util::exception("Can't pack the partition information at level " +
std::to_string(offsets.size()) +
" into a 64bit integer. Would require " +
std::to_string(sum_bits) + " bits.");
}
}
// sentinel
offsets.push_back(sum_bits);
return offsets;
}
std::vector<PartitionID> makeLevelMasks() const
{
std::vector<PartitionID> masks;
masks.reserve(level_offsets.size());
util::for_each_pair(level_offsets.begin(),
level_offsets.end(),
[&](const auto offset, const auto next_offset) {
// create mask that has `bits` ones at its LSBs.
// 000011
BOOST_ASSERT(offset < NUM_PARTITION_BITS);
PartitionID mask = (1UL << offset) - 1UL;
// 001111
BOOST_ASSERT(next_offset < NUM_PARTITION_BITS);
PartitionID next_mask = (1UL << next_offset) - 1UL;
// 001100
masks.push_back(next_mask ^ mask);
});
return masks;
}
std::array<LevelID, NUM_PARTITION_BITS> makeBitToLevel() const
{
std::array<LevelID, NUM_PARTITION_BITS> bit_to_level;
LevelID l = 1;
for (auto bits : level_offsets)
{
// set all bits to point to the correct level.
for (auto idx = bits; idx < NUM_PARTITION_BITS; ++idx)
{
bit_to_level[idx] = l;
}
l++;
}
return bit_to_level;
}
void initializePartitionIDs(const std::vector<std::vector<CellID>> &partitions)
{
auto num_nodes = partitions.front().size();
std::vector<NodeID> permutation(num_nodes);
std::iota(permutation.begin(), permutation.end(), 0);
// We include a sentinel element at the end of the partition
partition.resize(num_nodes + 1, 0);
NodeID sentinel = num_nodes;
// Sort nodes bottum-up by cell id.
// This ensures that we get a nice grouping from parent to child cells:
//
// intitial:
// level 0: 0 1 2 3 4 5
// level 1: 2 1 3 4 3 4
// level 2: 2 2 0 1 0 1
//
// first round:
// level 0: 1 0 2 4 3 5
// level 1: 1 2 3 3 4 4 (< sorted)
// level 2: 2 2 0 0 1 1
//
// second round:
// level 0: 2 4 3 5 1 0
// level 1: 3 3 4 4 1 2
// level 2: 0 0 1 1 2 2 (< sorted)
for (const auto &partition : partitions)
{
std::stable_sort(permutation.begin(),
permutation.end(),
[&partition](const auto lhs, const auto rhs) {
return partition[lhs] < partition[rhs];
});
}
// top down assign new cell ids
LevelID level = partitions.size();
for (const auto &partition : boost::adaptors::reverse(partitions))
{
BOOST_ASSERT(permutation.size() > 0);
CellID last_cell_id = partition[permutation.front()];
CellID cell_id = 0;
for (const auto node : permutation)
{
if (last_cell_id != partition[node])
{
cell_id++;
last_cell_id = partition[node];
}
SetCellID(level, node, cell_id);
}
// Store the number of cells of the level in the sentinel
SetCellID(level, sentinel, cell_id + 1);
level--;
}
// level 1 does not have child cells
level_to_children_offset.push_back(0);
for (auto level_idx = 0UL; level_idx < partitions.size() - 1; ++level_idx)
{
const auto &parent_partition = partitions[level_idx + 1];
level_to_children_offset.push_back(cell_to_children.size());
CellID last_parent_id = parent_partition[permutation.front()];
cell_to_children.push_back(GetCell(level_idx + 1, permutation.front()));
for (const auto node : permutation)
{
if (last_parent_id != parent_partition[node])
{
// Note: we use the new cell id here, not the ones contained
// in the input partition
cell_to_children.push_back(GetCell(level_idx + 1, node));
last_parent_id = parent_partition[node];
}
}
// insert sentinel for the last cell
cell_to_children.push_back(GetCell(level_idx + 1, permutation.back()) + 1);
}
}
std::vector<PartitionID> partition;
std::vector<std::uint8_t> level_offsets;
std::vector<PartitionID> level_masks;
std::vector<std::uint32_t> level_to_children_offset;
std::vector<CellID> cell_to_children;
std::array<LevelID, NUM_PARTITION_BITS> bit_to_level;
};
}
}

View File

@ -22,10 +22,11 @@ class MockMLP final : public MultiLevelPartition
CellID GetCell(LevelID level, NodeID node) const { return levels[level - 1][node]; };
LevelID GetHighestDifferentLevel(NodeID, NodeID) const { return 3; };
LevelID GetQueryLevel(NodeID, NodeID, NodeID) const { return 3; };
std::size_t GetNumberOfLevels() const { return levels.size() + 1; }
std::uint8_t GetNumberOfLevels() const { return levels.size() + 1; }
std::size_t GetNumberOfCells(LevelID level) const
std::uint32_t GetNumberOfCells(LevelID level) const
{
auto max_id = 0;
for (auto cell : levels[level - 1])
@ -33,6 +34,9 @@ class MockMLP final : public MultiLevelPartition
return max_id + 1;
}
CellID BeginChildren(LevelID, CellID) const { return 0; }
CellID EndChildren(LevelID, CellID) const { return 0; }
MockMLP(std::vector<std::vector<CellID>> levels_) : levels(std::move(levels_)) {}
std::vector<std::vector<CellID>> levels;

View File

@ -0,0 +1,109 @@
#include <boost/numeric/conversion/cast.hpp>
#include <boost/test/unit_test.hpp>
#include "util/multi_level_partition.hpp"
#define CHECK_SIZE_RANGE(range, ref) BOOST_CHECK_EQUAL(range.second - range.first, ref)
#define CHECK_EQUAL_RANGE(range, ref) \
do \
{ \
const auto &lhs = range; \
const auto &rhs = ref; \
BOOST_CHECK_EQUAL_COLLECTIONS(lhs.first, lhs.second, rhs.begin(), rhs.end()); \
} while (0)
using namespace osrm;
using namespace osrm::util;
BOOST_AUTO_TEST_SUITE(multi_level_partition_tests)
BOOST_AUTO_TEST_CASE(packed_mlp)
{
// node: 0 1 2 3 4 5 6 7 8 9 10 11
std::vector<CellID> l1{{0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5}};
std::vector<CellID> l2{{0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3}};
std::vector<CellID> l3{{0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}};
std::vector<CellID> l4{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
PackedMultiLevelPartition mlp{{l1, l2, l3, l4}, {6, 4, 2, 1}};
BOOST_CHECK_EQUAL(mlp.GetNumberOfCells(1), 6);
BOOST_CHECK_EQUAL(mlp.GetNumberOfCells(2), 4);
BOOST_CHECK_EQUAL(mlp.GetNumberOfCells(3), 2);
BOOST_CHECK_EQUAL(mlp.GetNumberOfCells(4), 1);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 0), l1[0]);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 1), l1[1]);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 2), l1[2]);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 3), l1[3]);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 4), l1[4]);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 5), l1[5]);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 6), l1[6]);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 7), l1[7]);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 8), l1[8]);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 9), l1[9]);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 10), l1[10]);
BOOST_CHECK_EQUAL(mlp.GetCell(1, 11), l1[11]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 0), l2[0]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 1), l2[1]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 2), l2[2]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 3), l2[3]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 4), l2[4]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 5), l2[5]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 6), l2[6]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 7), l2[7]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 8), l2[8]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 9), l2[9]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 10), l2[10]);
BOOST_CHECK_EQUAL(mlp.GetCell(2, 11), l2[11]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 0), l3[0]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 1), l3[1]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 2), l3[2]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 3), l3[3]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 4), l3[4]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 5), l3[5]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 6), l3[6]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 7), l3[7]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 8), l3[8]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 9), l3[9]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 10), l3[10]);
BOOST_CHECK_EQUAL(mlp.GetCell(3, 11), l3[11]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 0), l4[0]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 1), l4[1]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 2), l4[2]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 3), l4[3]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 4), l4[4]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 5), l4[5]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 6), l4[6]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 7), l4[7]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 8), l4[8]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 9), l4[9]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 10), l4[10]);
BOOST_CHECK_EQUAL(mlp.GetCell(4, 11), l4[11]);
BOOST_CHECK_EQUAL(mlp.GetHighestDifferentLevel(0, 1), 0);
BOOST_CHECK_EQUAL(mlp.GetHighestDifferentLevel(0, 2), 1);
BOOST_CHECK_EQUAL(mlp.GetHighestDifferentLevel(0, 4), 3);
BOOST_CHECK_EQUAL(mlp.GetHighestDifferentLevel(7, 8), 2);
BOOST_CHECK_EQUAL(mlp.BeginChildren(2, 0), 0);
BOOST_CHECK_EQUAL(mlp.EndChildren(2, 0), 2);
BOOST_CHECK_EQUAL(mlp.BeginChildren(2, 1), 2);
BOOST_CHECK_EQUAL(mlp.EndChildren(2, 1), 4);
BOOST_CHECK_EQUAL(mlp.BeginChildren(2, 2), 4);
BOOST_CHECK_EQUAL(mlp.EndChildren(2, 2), 5);
BOOST_CHECK_EQUAL(mlp.BeginChildren(2, 3), 5);
BOOST_CHECK_EQUAL(mlp.EndChildren(2, 3), 6);
BOOST_CHECK_EQUAL(mlp.BeginChildren(3, 0), 0);
BOOST_CHECK_EQUAL(mlp.EndChildren(3, 0), 1);
BOOST_CHECK_EQUAL(mlp.BeginChildren(3, 1), 1);
BOOST_CHECK_EQUAL(mlp.EndChildren(3, 1), 4);
BOOST_CHECK_EQUAL(mlp.BeginChildren(4, 0), 0);
BOOST_CHECK_EQUAL(mlp.EndChildren(4, 0), 2);
}
BOOST_AUTO_TEST_SUITE_END()