From 3e5c978719dcacca5f51c8c8916f4655951cd328 Mon Sep 17 00:00:00 2001 From: Michael Krasnyk Date: Fri, 27 May 2016 09:37:57 +0200 Subject: [PATCH] Reduce ramIndex file size PR #2472: the bottom-most node of the r-tree contains only a single index to a leaf node, so out of 532 bytes only 4 are used. --- include/util/static_rtree.hpp | 283 ++++++++++++++-------------------- 1 file changed, 119 insertions(+), 164 deletions(-) diff --git a/include/util/static_rtree.hpp b/include/util/static_rtree.hpp index f62c5c2db..7a4789f7e 100644 --- a/include/util/static_rtree.hpp +++ b/include/util/static_rtree.hpp @@ -62,12 +62,9 @@ class StaticRTree using EdgeData = EdgeDataT; using CoordinateList = CoordinateListT; - static_assert(LEAF_PAGE_SIZE >= sizeof(uint32_t) + sizeof(EdgeDataT), - "LEAF_PAGE_SIZE is too small"); - static_assert(((LEAF_PAGE_SIZE - 1) & LEAF_PAGE_SIZE) == 0, - "LEAF_PAGE_SIZE is not a power of 2"); - static constexpr std::uint32_t LEAF_NODE_SIZE = - (LEAF_PAGE_SIZE - sizeof(uint32_t)) / sizeof(EdgeDataT); + static_assert(LEAF_PAGE_SIZE >= sizeof(uint32_t) + sizeof(EdgeDataT), "LEAF_PAGE_SIZE is too small"); + static_assert(((LEAF_PAGE_SIZE - 1) & LEAF_PAGE_SIZE) == 0, "LEAF_PAGE_SIZE is not a power of 2"); + static constexpr std::uint32_t LEAF_NODE_SIZE = (LEAF_PAGE_SIZE - sizeof(uint32_t) - sizeof(Rectangle)) / sizeof(EdgeDataT); struct CandidateSegment { @@ -75,19 +72,27 @@ class StaticRTree EdgeDataT data; }; + struct TreeNodeIndex + { + TreeNodeIndex() : index(0), is_leaf(false) {} + TreeNodeIndex(std::size_t index, bool is_leaf) : index(index), is_leaf(is_leaf) {} + std::uint32_t index : 31; + std::uint32_t is_leaf : 1; + }; + struct TreeNode { - TreeNode() : child_count(0), child_is_on_disk(false) {} + TreeNode() : child_count(0) {} + std::uint32_t child_count; Rectangle minimum_bounding_rectangle; - std::uint32_t child_count : 31; - bool child_is_on_disk : 1; - std::uint32_t children[BRANCHING_FACTOR]; + TreeNodeIndex children[BRANCHING_FACTOR]; }; struct ALIGNED(LEAF_PAGE_SIZE) LeafNode { LeafNode() : object_count(0), objects() {} std::uint32_t object_count; + Rectangle minimum_bounding_rectangle; std::array objects; }; static_assert(sizeof(LeafNode) == LEAF_PAGE_SIZE, "LeafNode size does not fit the page size"); @@ -95,8 +100,7 @@ class StaticRTree private: struct WrappedInputElement { - explicit WrappedInputElement(const uint64_t _hilbert_value, - const std::uint32_t _array_index) + explicit WrappedInputElement(const uint64_t _hilbert_value, const std::uint32_t _array_index) : m_hilbert_value(_hilbert_value), m_array_index(_array_index) { } @@ -112,16 +116,8 @@ class StaticRTree } }; - struct TreeIndex - { - std::uint32_t index; - }; - struct SegmentIndex - { - std::uint32_t index; - std::uint32_t object; - Coordinate fixed_projected_coordinate; - }; + struct TreeIndex {}; + struct SegmentIndex { std::uint32_t object; Coordinate fixed_projected_coordinate; }; using QueryNodeType = mapbox::util::variant; struct QueryCandidate { @@ -132,6 +128,7 @@ class StaticRTree } std::uint64_t squared_min_dist; + TreeNodeIndex index; QueryNodeType node; }; @@ -158,12 +155,10 @@ class StaticRTree std::vector input_wrapper_vector(element_count); // generate auxiliary vector of hilbert-values - tbb::parallel_for( - tbb::blocked_range(0, element_count), - [&input_data_vector, &input_wrapper_vector, this]( - const tbb::blocked_range &range) { - for (uint64_t element_counter = range.begin(), end = range.end(); - element_counter != end; + tbb::parallel_for(tbb::blocked_range(0, element_count), + [&input_data_vector, &input_wrapper_vector, this](const tbb::blocked_range &range) + { + for (uint64_t element_counter = range.begin(), end = range.end(); element_counter != end; ++element_counter) { WrappedInputElement ¤t_wrapper = input_wrapper_vector[element_counter]; @@ -177,9 +172,7 @@ class StaticRTree Coordinate current_centroid = coordinate_calculation::centroid( m_coordinate_list[current_element.u], m_coordinate_list[current_element.v]); - current_centroid.lat = - FixedLatitude(COORDINATE_PRECISION * - web_mercator::latToY(toFloating(current_centroid.lat))); + current_centroid.lat = FixedLatitude(COORDINATE_PRECISION * web_mercator::latToY(toFloating(current_centroid.lat))); current_wrapper.m_hilbert_value = hilbertCode(current_centroid); } @@ -193,38 +186,53 @@ class StaticRTree std::vector tree_nodes_in_level; // pack M elements into leaf node and write to leaf file - uint64_t processed_objects_count = 0; - while (processed_objects_count < element_count) + uint64_t wrapped_element_index = 0; + for (std::uint32_t node_index = 0; wrapped_element_index < element_count; ++node_index) { - - LeafNode current_leaf; TreeNode current_node; - for (std::uint32_t current_element_index = 0; LEAF_NODE_SIZE > current_element_index; - ++current_element_index) + for (std::uint32_t leaf_index = 0; leaf_index < BRANCHING_FACTOR && wrapped_element_index < element_count; ++leaf_index) { - if (element_count > (processed_objects_count + current_element_index)) + LeafNode current_leaf; + Rectangle &rectangle = current_leaf.minimum_bounding_rectangle; + for (std::uint32_t object_index = 0; object_index < LEAF_NODE_SIZE && wrapped_element_index < element_count; + ++object_index, ++wrapped_element_index) { - std::uint32_t index_of_next_object = - input_wrapper_vector[processed_objects_count + current_element_index] - .m_array_index; - current_leaf.objects[current_element_index] = - input_data_vector[index_of_next_object]; - ++current_leaf.object_count; + const std::uint32_t input_object_index = input_wrapper_vector[wrapped_element_index].m_array_index; + const EdgeDataT &object = input_data_vector[input_object_index]; + + current_leaf.object_count += 1; + current_leaf.objects[object_index] = object; + + Coordinate projected_u{web_mercator::fromWGS84(Coordinate{m_coordinate_list[object.u]})}; + Coordinate projected_v{web_mercator::fromWGS84(Coordinate{m_coordinate_list[object.v]})}; + + rectangle.min_lon = std::min(rectangle.min_lon, std::min(projected_u.lon, projected_v.lon)); + rectangle.max_lon = std::max(rectangle.max_lon, std::max(projected_u.lon, projected_v.lon)); + + rectangle.min_lat = std::min(rectangle.min_lat, std::min(projected_u.lat, projected_v.lat)); + rectangle.max_lat = std::max(rectangle.max_lat, std::max(projected_u.lat, projected_v.lat)); + + BOOST_ASSERT(std::abs(static_cast(toFloating(projected_u.lon))) <= 180.); + BOOST_ASSERT(std::abs(static_cast(toFloating(projected_u.lat))) <= 180.); + BOOST_ASSERT(std::abs(static_cast(toFloating(projected_v.lon))) <= 180.); + BOOST_ASSERT(std::abs(static_cast(toFloating(projected_v.lat))) <= 180.); + + BOOST_ASSERT(rectangle.min_lon != FixedLongitude(std::numeric_limits::min())); + BOOST_ASSERT(rectangle.min_lat != FixedLatitude(std::numeric_limits::min())); + BOOST_ASSERT(rectangle.max_lon != FixedLongitude(std::numeric_limits::min())); + BOOST_ASSERT(rectangle.max_lat != FixedLatitude(std::numeric_limits::min())); } + + // append the leaf node to the current tree node + current_node.child_count += 1; + current_node.children[leaf_index] = TreeNodeIndex{node_index * BRANCHING_FACTOR + leaf_index, true}; + current_node.minimum_bounding_rectangle.MergeBoundingBoxes(current_leaf.minimum_bounding_rectangle); + + // write leaf_node to leaf node file + leaf_node_file.write((char *)¤t_leaf, sizeof(current_leaf)); } - // generate tree node that resemble the objects in leaf and store it for next level - InitializeMBRectangle(current_node.minimum_bounding_rectangle, - current_leaf.objects, - current_leaf.object_count, - m_coordinate_list); - current_node.child_is_on_disk = true; - current_node.children[0] = tree_nodes_in_level.size(); tree_nodes_in_level.emplace_back(current_node); - - // write leaf_node to leaf node file - leaf_node_file.write((char *)¤t_leaf, sizeof(current_leaf)); - processed_objects_count += current_leaf.object_count; } leaf_node_file.flush(); leaf_node_file.close(); @@ -238,16 +246,14 @@ class StaticRTree { TreeNode parent_node; // pack BRANCHING_FACTOR elements into tree_nodes each - for (std::uint32_t current_child_node_index = 0; - BRANCHING_FACTOR > current_child_node_index; + for (std::uint32_t current_child_node_index = 0; current_child_node_index < BRANCHING_FACTOR; ++current_child_node_index) { if (processed_tree_nodes_in_level < tree_nodes_in_level.size()) { - TreeNode ¤t_child_node = - tree_nodes_in_level[processed_tree_nodes_in_level]; + TreeNode ¤t_child_node = tree_nodes_in_level[processed_tree_nodes_in_level]; // add tree node to parent entry - parent_node.children[current_child_node_index] = m_search_tree.size(); + parent_node.children[current_child_node_index] = TreeNodeIndex{m_search_tree.size(), false}; m_search_tree.emplace_back(current_child_node); // merge MBRs parent_node.minimum_bounding_rectangle.MergeBoundingBoxes( @@ -262,7 +268,7 @@ class StaticRTree tree_nodes_in_level.swap(tree_nodes_in_next_level); ++processing_level; } - BOOST_ASSERT_MSG(1 == tree_nodes_in_level.size(), "tree broken, more than one root node"); + BOOST_ASSERT_MSG(tree_nodes_in_level.size() == 1, "tree broken, more than one root node"); // last remaining entry is the root node, store it m_search_tree.emplace_back(tree_nodes_in_level[0]); @@ -270,17 +276,20 @@ class StaticRTree std::reverse(m_search_tree.begin(), m_search_tree.end()); std::uint32_t search_tree_size = m_search_tree.size(); - tbb::parallel_for( - tbb::blocked_range(0, search_tree_size), - [this, &search_tree_size](const tbb::blocked_range &range) { + tbb::parallel_for(tbb::blocked_range(0, search_tree_size), + [this, &search_tree_size](const tbb::blocked_range &range) + { for (std::uint32_t i = range.begin(), end = range.end(); i != end; ++i) { TreeNode ¤t_tree_node = this->m_search_tree[i]; for (std::uint32_t j = 0; j < current_tree_node.child_count; ++j) { - const std::uint32_t old_id = current_tree_node.children[j]; - const std::uint32_t new_id = search_tree_size - old_id - 1; - current_tree_node.children[j] = new_id; + if (!current_tree_node.children[j].is_leaf) + { + const std::uint32_t old_id = current_tree_node.children[j].index; + const std::uint32_t new_id = search_tree_size - old_id - 1; + current_tree_node.children[j].index = new_id; + } } } }); @@ -306,7 +315,7 @@ class StaticRTree { throw exception("ram index file does not exist"); } - if (0 == boost::filesystem::file_size(node_file)) + if (boost::filesystem::file_size(node_file) == 0) { throw exception("ram index file is empty"); } @@ -344,8 +353,7 @@ class StaticRTree } catch (std::exception &exc) { - throw exception(boost::str(boost::format("Leaf file %1% mapping failed: %2%") % - leaf_file % exc.what())); + throw exception(boost::str(boost::format("Leaf file %1% mapping failed: %2%") % leaf_file % exc.what())); } } @@ -354,25 +362,22 @@ class StaticRTree std::vector SearchInBox(const Rectangle &search_rectangle) const { const Rectangle projected_rectangle{ - search_rectangle.min_lon, - search_rectangle.max_lon, - toFixed(FloatLatitude{ - web_mercator::latToY(toFloating(FixedLatitude(search_rectangle.min_lat)))}), - toFixed(FloatLatitude{ - web_mercator::latToY(toFloating(FixedLatitude(search_rectangle.max_lat)))})}; + search_rectangle.min_lon, search_rectangle.max_lon, + toFixed(FloatLatitude{web_mercator::latToY(toFloating(FixedLatitude(search_rectangle.min_lat)))}), + toFixed(FloatLatitude{web_mercator::latToY(toFloating(FixedLatitude(search_rectangle.max_lat)))})}; std::vector results; - std::queue traversal_queue; - traversal_queue.push(0); + std::queue traversal_queue; + traversal_queue.push(TreeNodeIndex{}); while (!traversal_queue.empty()) { - auto const ¤t_tree_node = m_search_tree[traversal_queue.front()]; + auto const current_tree_index = traversal_queue.front(); traversal_queue.pop(); - if (current_tree_node.child_is_on_disk) + if (current_tree_index.is_leaf) { - const LeafNode ¤t_leaf_node = m_leaves[current_tree_node.children[0]]; + const LeafNode ¤t_leaf_node = m_leaves[current_tree_index.index]; for (const auto i : irange(0u, current_leaf_node.object_count)) { @@ -380,14 +385,11 @@ class StaticRTree // we don't need to project the coordinates here, // because we use the unprojected rectangle to test against - const Rectangle bbox{std::min(m_coordinate_list[current_edge.u].lon, - m_coordinate_list[current_edge.v].lon), - std::max(m_coordinate_list[current_edge.u].lon, - m_coordinate_list[current_edge.v].lon), - std::min(m_coordinate_list[current_edge.u].lat, - m_coordinate_list[current_edge.v].lat), - std::max(m_coordinate_list[current_edge.u].lat, - m_coordinate_list[current_edge.v].lat)}; + const Rectangle bbox{ + std::min(m_coordinate_list[current_edge.u].lon, m_coordinate_list[current_edge.v].lon), + std::max(m_coordinate_list[current_edge.u].lon, m_coordinate_list[current_edge.v].lon), + std::min(m_coordinate_list[current_edge.u].lat, m_coordinate_list[current_edge.v].lat), + std::max(m_coordinate_list[current_edge.u].lat, m_coordinate_list[current_edge.v].lat)}; // use the _unprojected_ input rectangle here if (bbox.Intersects(search_rectangle)) @@ -398,13 +400,16 @@ class StaticRTree } else { + const TreeNode ¤t_tree_node = m_search_tree[current_tree_index.index]; + // If it's a tree node, look at all children and add them // to the search queue if their bounding boxes intersect for (std::uint32_t i = 0; i < current_tree_node.child_count; ++i) { - const std::int32_t child_id = current_tree_node.children[i]; - const auto &child_tree_node = m_search_tree[child_id]; - const auto &child_rectangle = child_tree_node.minimum_bounding_rectangle; + const TreeNodeIndex child_id = current_tree_node.children[i]; + const auto &child_rectangle = child_id.is_leaf + ? m_leaves[child_id.index].minimum_bounding_rectangle + : m_search_tree[child_id.index].minimum_bounding_rectangle; if (child_rectangle.Intersects(projected_rectangle)) { @@ -420,8 +425,7 @@ class StaticRTree std::vector Nearest(const Coordinate input_coordinate, const std::size_t max_results) const { - return Nearest(input_coordinate, - [](const CandidateSegment &) { return std::make_pair(true, true); }, + return Nearest(input_coordinate, [](const CandidateSegment &) { return std::make_pair(true, true); }, [max_results](const std::size_t num_results, const CandidateSegment &) { return num_results >= max_results; }); @@ -439,36 +443,30 @@ class StaticRTree // initialize queue with root element std::priority_queue traversal_queue; - traversal_queue.push(QueryCandidate{0, TreeIndex{0}}); + traversal_queue.push(QueryCandidate{0, TreeNodeIndex{}, TreeIndex{}}); while (!traversal_queue.empty()) { QueryCandidate current_query_node = traversal_queue.top(); traversal_queue.pop(); + const TreeNodeIndex ¤t_tree_index = current_query_node.index; if (current_query_node.node.template is()) { // current object is a tree node - const TreeNode ¤t_tree_node = - m_search_tree[current_query_node.node.template get().index]; - if (current_tree_node.child_is_on_disk) + if (current_tree_index.is_leaf) { - ExploreLeafNode(current_tree_node.children[0], - fixed_projected_coordinate, - projected_coordinate, - traversal_queue); + ExploreLeafNode(current_tree_index, fixed_projected_coordinate, projected_coordinate, traversal_queue); } else { - ExploreTreeNode(current_tree_node, fixed_projected_coordinate, traversal_queue); + ExploreTreeNode(current_tree_index, fixed_projected_coordinate, traversal_queue); } } else - { - // inspecting an actual road segment + { // current candidate is an actual road segment const auto &segment_index = current_query_node.node.template get(); - auto edge_data = m_leaves[segment_index.index].objects[segment_index.object]; - const auto ¤t_candidate = - CandidateSegment{segment_index.fixed_projected_coordinate, edge_data}; + auto edge_data = m_leaves[current_tree_index.index].objects[segment_index.object]; + const auto ¤t_candidate = CandidateSegment{segment_index.fixed_projected_coordinate, edge_data}; // to allow returns of no-results if too restrictive filtering, this needs to be // done here @@ -476,7 +474,6 @@ class StaticRTree // first candidate if (terminate(results.size(), current_candidate)) { - traversal_queue = std::priority_queue{}; break; } @@ -498,12 +495,12 @@ class StaticRTree private: template - void ExploreLeafNode(const std::uint32_t leaf_id, + void ExploreLeafNode(const TreeNodeIndex &leaf_id, const Coordinate projected_input_coordinate_fixed, const FloatCoordinate &projected_input_coordinate, QueueT &traversal_queue) const { - const LeafNode ¤t_leaf_node = m_leaves[leaf_id]; + const LeafNode ¤t_leaf_node = m_leaves[leaf_id.index]; // current object represents a block on disk for (const auto i : irange(0u, current_leaf_node.object_count)) @@ -514,75 +511,33 @@ class StaticRTree FloatCoordinate projected_nearest; std::tie(std::ignore, projected_nearest) = - coordinate_calculation::projectPointOnSegment( - projected_u, projected_v, projected_input_coordinate); + coordinate_calculation::projectPointOnSegment(projected_u, projected_v, projected_input_coordinate); - const auto squared_distance = coordinate_calculation::squaredEuclideanDistance( - projected_input_coordinate_fixed, projected_nearest); + const auto squared_distance = + coordinate_calculation::squaredEuclideanDistance(projected_input_coordinate_fixed, projected_nearest); // distance must be non-negative BOOST_ASSERT(0. <= squared_distance); - traversal_queue.push(QueryCandidate{ - squared_distance, SegmentIndex{leaf_id, i, Coordinate{projected_nearest}}}); + traversal_queue.push(QueryCandidate{squared_distance, leaf_id, SegmentIndex{i, Coordinate{projected_nearest}}}); } } template - void ExploreTreeNode(const TreeNode &parent, + void ExploreTreeNode(const TreeNodeIndex &parent_id, const Coordinate fixed_projected_input_coordinate, QueueT &traversal_queue) const { + const TreeNode &parent = m_search_tree[parent_id.index]; for (std::uint32_t i = 0; i < parent.child_count; ++i) { - const std::int32_t child_id = parent.children[i]; - const auto &child_tree_node = m_search_tree[child_id]; - const auto &child_rectangle = child_tree_node.minimum_bounding_rectangle; + const TreeNodeIndex child_id = parent.children[i]; + const auto &child_rectangle = child_id.is_leaf + ? m_leaves[child_id.index].minimum_bounding_rectangle + : m_search_tree[child_id.index].minimum_bounding_rectangle; const auto squared_lower_bound_to_element = child_rectangle.GetMinSquaredDist(fixed_projected_input_coordinate); - traversal_queue.push(QueryCandidate{squared_lower_bound_to_element, - TreeIndex{static_cast(child_id)}}); + traversal_queue.push(QueryCandidate{squared_lower_bound_to_element, child_id, TreeIndex{}}); } } - - template - void InitializeMBRectangle(Rectangle &rectangle, - const std::array &objects, - const std::uint32_t element_count, - const std::vector &coordinate_list) - { - for (std::uint32_t i = 0; i < element_count; ++i) - { - BOOST_ASSERT(objects[i].u < coordinate_list.size()); - BOOST_ASSERT(objects[i].v < coordinate_list.size()); - - Coordinate projected_u{ - web_mercator::fromWGS84(Coordinate{coordinate_list[objects[i].u]})}; - Coordinate projected_v{ - web_mercator::fromWGS84(Coordinate{coordinate_list[objects[i].v]})}; - - BOOST_ASSERT(toFloating(projected_u.lon) <= FloatLongitude(180.)); - BOOST_ASSERT(toFloating(projected_u.lon) >= FloatLongitude(-180.)); - BOOST_ASSERT(toFloating(projected_u.lat) <= FloatLatitude(180.)); - BOOST_ASSERT(toFloating(projected_u.lat) >= FloatLatitude(-180.)); - BOOST_ASSERT(toFloating(projected_v.lon) <= FloatLongitude(180.)); - BOOST_ASSERT(toFloating(projected_v.lon) >= FloatLongitude(-180.)); - BOOST_ASSERT(toFloating(projected_v.lat) <= FloatLatitude(180.)); - BOOST_ASSERT(toFloating(projected_v.lat) >= FloatLatitude(-180.)); - - rectangle.min_lon = - std::min(rectangle.min_lon, std::min(projected_u.lon, projected_v.lon)); - rectangle.max_lon = - std::max(rectangle.max_lon, std::max(projected_u.lon, projected_v.lon)); - - rectangle.min_lat = - std::min(rectangle.min_lat, std::min(projected_u.lat, projected_v.lat)); - rectangle.max_lat = - std::max(rectangle.max_lat, std::max(projected_u.lat, projected_v.lat)); - } - BOOST_ASSERT(rectangle.min_lon != FixedLongitude(std::numeric_limits::min())); - BOOST_ASSERT(rectangle.min_lat != FixedLatitude(std::numeric_limits::min())); - BOOST_ASSERT(rectangle.max_lon != FixedLongitude(std::numeric_limits::min())); - BOOST_ASSERT(rectangle.max_lat != FixedLatitude(std::numeric_limits::min())); - } }; //[1] "On Packing R-Trees"; I. Kamel, C. Faloutsos; 1993; DOI: 10.1145/170088.170403