From 438aa66b4c0e3fc40ca486276e0ef06b8a43eed4 Mon Sep 17 00:00:00 2001 From: Patrick Niklaus Date: Fri, 17 Feb 2017 13:26:36 +0000 Subject: [PATCH] Add packed storage of partition information Right now we need a 64bit integer for every node in the graph. Depending on the number of cells we will see, we might get away with 32bit eventually. --- include/util/multi_level_partition.hpp | 291 +++++++++++++++++++++- unit_tests/util/cell_storage.cpp | 8 +- unit_tests/util/multi_level_partition.cpp | 109 ++++++++ 3 files changed, 404 insertions(+), 4 deletions(-) create mode 100644 unit_tests/util/multi_level_partition.cpp diff --git a/include/util/multi_level_partition.hpp b/include/util/multi_level_partition.hpp index d3bad229a..ac65e6afc 100644 --- a/include/util/multi_level_partition.hpp +++ b/include/util/multi_level_partition.hpp @@ -1,17 +1,54 @@ #ifndef OSRM_UTIL_MULTI_LEVEL_PARTITION_HPP #define OSRM_UTIL_MULTI_LEVEL_PARTITION_HPP +#include "util/exception.hpp" +#include "util/for_each_pair.hpp" #include "util/typedefs.hpp" + +#include +#include +#include +#include #include +#include +#include + +#include namespace osrm { namespace util { +namespace detail +{ +// get the msb of an integer +// return 0 for integers without msb +template std::size_t highestMSB(T value) +{ + static_assert(std::is_integral::value, "Integer required."); + std::size_t msb = 0; + while (value > 0) + { + value >>= 1u; + msb++; + } + return msb; +} + +#if (defined(__clang__) || defined(__GNUC__) || defined(__GNUG__)) && __x86_64__ +inline std::size_t highestMSB(std::uint64_t v) +{ + BOOST_ASSERT(v > 0); + return 63UL - __builtin_clzl(v); +} +#endif +} using LevelID = std::uint8_t; using CellID = std::uint32_t; +static constexpr CellID INVALID_CELL_ID = std::numeric_limits::max(); + // Mock interface, can be removed when we have an actual implementation class MultiLevelPartition { @@ -19,12 +56,262 @@ class MultiLevelPartition // Returns the cell id of `node` at `level` virtual CellID GetCell(LevelID level, NodeID node) const = 0; + // Returns the lowest cell id (at `level - 1`) of all children `cell` at `level` + virtual CellID BeginChildren(LevelID level, CellID cell) const = 0; + + // Returns the highest cell id (at `level - 1`) of all children `cell` at `level` + virtual CellID EndChildren(LevelID level, CellID cell) const = 0; + // Returns the highest level in which `first` and `second` are still in different cells virtual LevelID GetHighestDifferentLevel(NodeID first, NodeID second) const = 0; - virtual std::size_t GetNumberOfLevels() const = 0; + // Returns the level at which a `node` is relevant for a query from start to target + virtual LevelID GetQueryLevel(NodeID start, NodeID target, NodeID node) const = 0; - virtual std::size_t GetNumberOfCells(LevelID level) const = 0; + virtual std::uint8_t GetNumberOfLevels() const = 0; + + virtual std::uint32_t GetNumberOfCells(LevelID level) const = 0; +}; + +class PackedMultiLevelPartition final : public MultiLevelPartition +{ + using PartitionID = std::uint64_t; + static const constexpr std::uint8_t NUM_PARTITION_BITS = sizeof(PartitionID) * CHAR_BIT; + + public: + // cell_sizes is index by level (starting at 0, the base graph). + // However level 0 always needs to have cell size 1, since it is the + // basegraph. + PackedMultiLevelPartition(const std::vector> &partitions, + const std::vector &level_to_num_cells) + : level_offsets(makeLevelOffsets(level_to_num_cells)), level_masks(makeLevelMasks()), + bit_to_level(makeBitToLevel()) + { + initializePartitionIDs(partitions); + } + + // returns the index of the cell the vertex is contained at level l + CellID GetCell(LevelID l, NodeID node) const final override + { + auto p = partition[node]; + auto lidx = LevelIDToIndex(l); + auto masked = p & level_masks[lidx]; + return masked >> level_offsets[lidx]; + } + + LevelID GetQueryLevel(NodeID start, NodeID target, NodeID node) const final override + { + return std::min(GetHighestDifferentLevel(start, node), + GetHighestDifferentLevel(target, node)); + } + + LevelID GetHighestDifferentLevel(NodeID first, NodeID second) const final override + { + if (partition[first] == partition[second]) + return 0; + + auto msb = detail::highestMSB(partition[first] ^ partition[second]); + return bit_to_level[msb]; + } + + std::uint8_t GetNumberOfLevels() const final override { return level_offsets.size(); } + + std::uint32_t GetNumberOfCells(LevelID level) const final override + { + return GetCell(level, GetSenitileNode()); + } + + // Returns the lowest cell id (at `level - 1`) of all children `cell` at `level` + CellID BeginChildren(LevelID level, CellID cell) const final override + { + BOOST_ASSERT(level > 1); + auto lidx = LevelIDToIndex(level); + auto offset = level_to_children_offset[lidx]; + return cell_to_children[offset + cell]; + } + + // Returns the highest cell id (at `level - 1`) of all children `cell` at `level` + CellID EndChildren(LevelID level, CellID cell) const final override + { + BOOST_ASSERT(level > 1); + auto lidx = LevelIDToIndex(level); + auto offset = level_to_children_offset[lidx]; + return cell_to_children[offset + cell + 1]; + } + + private: + inline std::size_t LevelIDToIndex(LevelID l) const { return l - 1; } + + // We save the sentinel as last node in the partition information. + // It has the highest cell id in each level so we can derived the range + // of cell ids efficiently. + inline NodeID GetSenitileNode() const { return partition.size() - 1; } + + void SetCellID(LevelID l, NodeID node, std::size_t cell_id) + { + auto lidx = LevelIDToIndex(l); + + auto shifted_id = cell_id << level_offsets[lidx]; + auto cleared_cell = partition[node] & ~level_masks[lidx]; + partition[node] = cleared_cell | shifted_id; + } + + // If we have N cells per level we need log_2 bits for every cell ID + std::vector + makeLevelOffsets(const std::vector &level_to_num_cells) const + { + std::vector offsets; + offsets.reserve(level_to_num_cells.size()); + + auto sum_bits = 0; + for (auto num_cells : level_to_num_cells) + { + // bits needed to number all contained vertexes + auto bits = static_cast(std::ceil(std::log2(num_cells + 1))); + offsets.push_back(sum_bits); + sum_bits += bits; + if (sum_bits > 64) + { + throw util::exception("Can't pack the partition information at level " + + std::to_string(offsets.size()) + + " into a 64bit integer. Would require " + + std::to_string(sum_bits) + " bits."); + } + } + // sentinel + offsets.push_back(sum_bits); + + return offsets; + } + + std::vector makeLevelMasks() const + { + std::vector masks; + masks.reserve(level_offsets.size()); + + util::for_each_pair(level_offsets.begin(), + level_offsets.end(), + [&](const auto offset, const auto next_offset) { + // create mask that has `bits` ones at its LSBs. + // 000011 + BOOST_ASSERT(offset < NUM_PARTITION_BITS); + PartitionID mask = (1UL << offset) - 1UL; + // 001111 + BOOST_ASSERT(next_offset < NUM_PARTITION_BITS); + PartitionID next_mask = (1UL << next_offset) - 1UL; + // 001100 + masks.push_back(next_mask ^ mask); + }); + + return masks; + } + + std::array makeBitToLevel() const + { + std::array bit_to_level; + + LevelID l = 1; + for (auto bits : level_offsets) + { + // set all bits to point to the correct level. + for (auto idx = bits; idx < NUM_PARTITION_BITS; ++idx) + { + bit_to_level[idx] = l; + } + l++; + } + + return bit_to_level; + } + + void initializePartitionIDs(const std::vector> &partitions) + { + auto num_nodes = partitions.front().size(); + std::vector permutation(num_nodes); + std::iota(permutation.begin(), permutation.end(), 0); + // We include a sentinel element at the end of the partition + partition.resize(num_nodes + 1, 0); + NodeID sentinel = num_nodes; + + // Sort nodes bottum-up by cell id. + // This ensures that we get a nice grouping from parent to child cells: + // + // intitial: + // level 0: 0 1 2 3 4 5 + // level 1: 2 1 3 4 3 4 + // level 2: 2 2 0 1 0 1 + // + // first round: + // level 0: 1 0 2 4 3 5 + // level 1: 1 2 3 3 4 4 (< sorted) + // level 2: 2 2 0 0 1 1 + // + // second round: + // level 0: 2 4 3 5 1 0 + // level 1: 3 3 4 4 1 2 + // level 2: 0 0 1 1 2 2 (< sorted) + for (const auto &partition : partitions) + { + std::stable_sort(permutation.begin(), + permutation.end(), + [&partition](const auto lhs, const auto rhs) { + return partition[lhs] < partition[rhs]; + }); + } + + // top down assign new cell ids + LevelID level = partitions.size(); + for (const auto &partition : boost::adaptors::reverse(partitions)) + { + BOOST_ASSERT(permutation.size() > 0); + CellID last_cell_id = partition[permutation.front()]; + CellID cell_id = 0; + for (const auto node : permutation) + { + if (last_cell_id != partition[node]) + { + cell_id++; + last_cell_id = partition[node]; + } + SetCellID(level, node, cell_id); + } + // Store the number of cells of the level in the sentinel + SetCellID(level, sentinel, cell_id + 1); + level--; + } + + // level 1 does not have child cells + level_to_children_offset.push_back(0); + + for (auto level_idx = 0UL; level_idx < partitions.size() - 1; ++level_idx) + { + const auto &parent_partition = partitions[level_idx + 1]; + + level_to_children_offset.push_back(cell_to_children.size()); + + CellID last_parent_id = parent_partition[permutation.front()]; + cell_to_children.push_back(GetCell(level_idx + 1, permutation.front())); + for (const auto node : permutation) + { + if (last_parent_id != parent_partition[node]) + { + // Note: we use the new cell id here, not the ones contained + // in the input partition + cell_to_children.push_back(GetCell(level_idx + 1, node)); + last_parent_id = parent_partition[node]; + } + } + // insert sentinel for the last cell + cell_to_children.push_back(GetCell(level_idx + 1, permutation.back()) + 1); + } + } + + std::vector partition; + std::vector level_offsets; + std::vector level_masks; + std::vector level_to_children_offset; + std::vector cell_to_children; + std::array bit_to_level; }; } } diff --git a/unit_tests/util/cell_storage.cpp b/unit_tests/util/cell_storage.cpp index 1fd88abe4..79abf4cf5 100644 --- a/unit_tests/util/cell_storage.cpp +++ b/unit_tests/util/cell_storage.cpp @@ -22,10 +22,11 @@ class MockMLP final : public MultiLevelPartition CellID GetCell(LevelID level, NodeID node) const { return levels[level - 1][node]; }; LevelID GetHighestDifferentLevel(NodeID, NodeID) const { return 3; }; + LevelID GetQueryLevel(NodeID, NodeID, NodeID) const { return 3; }; - std::size_t GetNumberOfLevels() const { return levels.size() + 1; } + std::uint8_t GetNumberOfLevels() const { return levels.size() + 1; } - std::size_t GetNumberOfCells(LevelID level) const + std::uint32_t GetNumberOfCells(LevelID level) const { auto max_id = 0; for (auto cell : levels[level - 1]) @@ -33,6 +34,9 @@ class MockMLP final : public MultiLevelPartition return max_id + 1; } + CellID BeginChildren(LevelID, CellID) const { return 0; } + CellID EndChildren(LevelID, CellID) const { return 0; } + MockMLP(std::vector> levels_) : levels(std::move(levels_)) {} std::vector> levels; diff --git a/unit_tests/util/multi_level_partition.cpp b/unit_tests/util/multi_level_partition.cpp new file mode 100644 index 000000000..aaddc0420 --- /dev/null +++ b/unit_tests/util/multi_level_partition.cpp @@ -0,0 +1,109 @@ +#include +#include + +#include "util/multi_level_partition.hpp" + +#define CHECK_SIZE_RANGE(range, ref) BOOST_CHECK_EQUAL(range.second - range.first, ref) +#define CHECK_EQUAL_RANGE(range, ref) \ + do \ + { \ + const auto &lhs = range; \ + const auto &rhs = ref; \ + BOOST_CHECK_EQUAL_COLLECTIONS(lhs.first, lhs.second, rhs.begin(), rhs.end()); \ + } while (0) + +using namespace osrm; +using namespace osrm::util; + +BOOST_AUTO_TEST_SUITE(multi_level_partition_tests) + +BOOST_AUTO_TEST_CASE(packed_mlp) +{ + // node: 0 1 2 3 4 5 6 7 8 9 10 11 + std::vector l1{{0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5}}; + std::vector l2{{0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3}}; + std::vector l3{{0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}}; + std::vector l4{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + PackedMultiLevelPartition mlp{{l1, l2, l3, l4}, {6, 4, 2, 1}}; + + BOOST_CHECK_EQUAL(mlp.GetNumberOfCells(1), 6); + BOOST_CHECK_EQUAL(mlp.GetNumberOfCells(2), 4); + BOOST_CHECK_EQUAL(mlp.GetNumberOfCells(3), 2); + BOOST_CHECK_EQUAL(mlp.GetNumberOfCells(4), 1); + + BOOST_CHECK_EQUAL(mlp.GetCell(1, 0), l1[0]); + BOOST_CHECK_EQUAL(mlp.GetCell(1, 1), l1[1]); + BOOST_CHECK_EQUAL(mlp.GetCell(1, 2), l1[2]); + BOOST_CHECK_EQUAL(mlp.GetCell(1, 3), l1[3]); + BOOST_CHECK_EQUAL(mlp.GetCell(1, 4), l1[4]); + BOOST_CHECK_EQUAL(mlp.GetCell(1, 5), l1[5]); + BOOST_CHECK_EQUAL(mlp.GetCell(1, 6), l1[6]); + BOOST_CHECK_EQUAL(mlp.GetCell(1, 7), l1[7]); + BOOST_CHECK_EQUAL(mlp.GetCell(1, 8), l1[8]); + BOOST_CHECK_EQUAL(mlp.GetCell(1, 9), l1[9]); + BOOST_CHECK_EQUAL(mlp.GetCell(1, 10), l1[10]); + BOOST_CHECK_EQUAL(mlp.GetCell(1, 11), l1[11]); + + BOOST_CHECK_EQUAL(mlp.GetCell(2, 0), l2[0]); + BOOST_CHECK_EQUAL(mlp.GetCell(2, 1), l2[1]); + BOOST_CHECK_EQUAL(mlp.GetCell(2, 2), l2[2]); + BOOST_CHECK_EQUAL(mlp.GetCell(2, 3), l2[3]); + BOOST_CHECK_EQUAL(mlp.GetCell(2, 4), l2[4]); + BOOST_CHECK_EQUAL(mlp.GetCell(2, 5), l2[5]); + BOOST_CHECK_EQUAL(mlp.GetCell(2, 6), l2[6]); + BOOST_CHECK_EQUAL(mlp.GetCell(2, 7), l2[7]); + BOOST_CHECK_EQUAL(mlp.GetCell(2, 8), l2[8]); + BOOST_CHECK_EQUAL(mlp.GetCell(2, 9), l2[9]); + BOOST_CHECK_EQUAL(mlp.GetCell(2, 10), l2[10]); + BOOST_CHECK_EQUAL(mlp.GetCell(2, 11), l2[11]); + + BOOST_CHECK_EQUAL(mlp.GetCell(3, 0), l3[0]); + BOOST_CHECK_EQUAL(mlp.GetCell(3, 1), l3[1]); + BOOST_CHECK_EQUAL(mlp.GetCell(3, 2), l3[2]); + BOOST_CHECK_EQUAL(mlp.GetCell(3, 3), l3[3]); + BOOST_CHECK_EQUAL(mlp.GetCell(3, 4), l3[4]); + BOOST_CHECK_EQUAL(mlp.GetCell(3, 5), l3[5]); + BOOST_CHECK_EQUAL(mlp.GetCell(3, 6), l3[6]); + BOOST_CHECK_EQUAL(mlp.GetCell(3, 7), l3[7]); + BOOST_CHECK_EQUAL(mlp.GetCell(3, 8), l3[8]); + BOOST_CHECK_EQUAL(mlp.GetCell(3, 9), l3[9]); + BOOST_CHECK_EQUAL(mlp.GetCell(3, 10), l3[10]); + BOOST_CHECK_EQUAL(mlp.GetCell(3, 11), l3[11]); + + BOOST_CHECK_EQUAL(mlp.GetCell(4, 0), l4[0]); + BOOST_CHECK_EQUAL(mlp.GetCell(4, 1), l4[1]); + BOOST_CHECK_EQUAL(mlp.GetCell(4, 2), l4[2]); + BOOST_CHECK_EQUAL(mlp.GetCell(4, 3), l4[3]); + BOOST_CHECK_EQUAL(mlp.GetCell(4, 4), l4[4]); + BOOST_CHECK_EQUAL(mlp.GetCell(4, 5), l4[5]); + BOOST_CHECK_EQUAL(mlp.GetCell(4, 6), l4[6]); + BOOST_CHECK_EQUAL(mlp.GetCell(4, 7), l4[7]); + BOOST_CHECK_EQUAL(mlp.GetCell(4, 8), l4[8]); + BOOST_CHECK_EQUAL(mlp.GetCell(4, 9), l4[9]); + BOOST_CHECK_EQUAL(mlp.GetCell(4, 10), l4[10]); + BOOST_CHECK_EQUAL(mlp.GetCell(4, 11), l4[11]); + + BOOST_CHECK_EQUAL(mlp.GetHighestDifferentLevel(0, 1), 0); + BOOST_CHECK_EQUAL(mlp.GetHighestDifferentLevel(0, 2), 1); + BOOST_CHECK_EQUAL(mlp.GetHighestDifferentLevel(0, 4), 3); + BOOST_CHECK_EQUAL(mlp.GetHighestDifferentLevel(7, 8), 2); + + BOOST_CHECK_EQUAL(mlp.BeginChildren(2, 0), 0); + BOOST_CHECK_EQUAL(mlp.EndChildren(2, 0), 2); + BOOST_CHECK_EQUAL(mlp.BeginChildren(2, 1), 2); + BOOST_CHECK_EQUAL(mlp.EndChildren(2, 1), 4); + BOOST_CHECK_EQUAL(mlp.BeginChildren(2, 2), 4); + BOOST_CHECK_EQUAL(mlp.EndChildren(2, 2), 5); + BOOST_CHECK_EQUAL(mlp.BeginChildren(2, 3), 5); + BOOST_CHECK_EQUAL(mlp.EndChildren(2, 3), 6); + + BOOST_CHECK_EQUAL(mlp.BeginChildren(3, 0), 0); + BOOST_CHECK_EQUAL(mlp.EndChildren(3, 0), 1); + BOOST_CHECK_EQUAL(mlp.BeginChildren(3, 1), 1); + BOOST_CHECK_EQUAL(mlp.EndChildren(3, 1), 4); + + BOOST_CHECK_EQUAL(mlp.BeginChildren(4, 0), 0); + BOOST_CHECK_EQUAL(mlp.EndChildren(4, 0), 2); +} + +BOOST_AUTO_TEST_SUITE_END()