Reduce ramIndex file size

PR #2472: the bottom-most node of the r-tree contains
only a single index to a leaf node, so out of 532 bytes
only 4 are used.
This commit is contained in:
Michael Krasnyk 2016-05-27 09:37:57 +02:00
parent 843f1a6356
commit 3e5c978719
No known key found for this signature in database
GPG Key ID: A277FCFBE39A658D

View File

@ -62,12 +62,9 @@ class StaticRTree
using EdgeData = EdgeDataT;
using CoordinateList = CoordinateListT;
static_assert(LEAF_PAGE_SIZE >= sizeof(uint32_t) + sizeof(EdgeDataT),
"LEAF_PAGE_SIZE is too small");
static_assert(((LEAF_PAGE_SIZE - 1) & LEAF_PAGE_SIZE) == 0,
"LEAF_PAGE_SIZE is not a power of 2");
static constexpr std::uint32_t LEAF_NODE_SIZE =
(LEAF_PAGE_SIZE - sizeof(uint32_t)) / sizeof(EdgeDataT);
static_assert(LEAF_PAGE_SIZE >= sizeof(uint32_t) + sizeof(EdgeDataT), "LEAF_PAGE_SIZE is too small");
static_assert(((LEAF_PAGE_SIZE - 1) & LEAF_PAGE_SIZE) == 0, "LEAF_PAGE_SIZE is not a power of 2");
static constexpr std::uint32_t LEAF_NODE_SIZE = (LEAF_PAGE_SIZE - sizeof(uint32_t) - sizeof(Rectangle)) / sizeof(EdgeDataT);
struct CandidateSegment
{
@ -75,19 +72,27 @@ class StaticRTree
EdgeDataT data;
};
struct TreeNodeIndex
{
TreeNodeIndex() : index(0), is_leaf(false) {}
TreeNodeIndex(std::size_t index, bool is_leaf) : index(index), is_leaf(is_leaf) {}
std::uint32_t index : 31;
std::uint32_t is_leaf : 1;
};
struct TreeNode
{
TreeNode() : child_count(0), child_is_on_disk(false) {}
TreeNode() : child_count(0) {}
std::uint32_t child_count;
Rectangle minimum_bounding_rectangle;
std::uint32_t child_count : 31;
bool child_is_on_disk : 1;
std::uint32_t children[BRANCHING_FACTOR];
TreeNodeIndex children[BRANCHING_FACTOR];
};
struct ALIGNED(LEAF_PAGE_SIZE) LeafNode
{
LeafNode() : object_count(0), objects() {}
std::uint32_t object_count;
Rectangle minimum_bounding_rectangle;
std::array<EdgeDataT, LEAF_NODE_SIZE> objects;
};
static_assert(sizeof(LeafNode) == LEAF_PAGE_SIZE, "LeafNode size does not fit the page size");
@ -95,8 +100,7 @@ class StaticRTree
private:
struct WrappedInputElement
{
explicit WrappedInputElement(const uint64_t _hilbert_value,
const std::uint32_t _array_index)
explicit WrappedInputElement(const uint64_t _hilbert_value, const std::uint32_t _array_index)
: m_hilbert_value(_hilbert_value), m_array_index(_array_index)
{
}
@ -112,16 +116,8 @@ class StaticRTree
}
};
struct TreeIndex
{
std::uint32_t index;
};
struct SegmentIndex
{
std::uint32_t index;
std::uint32_t object;
Coordinate fixed_projected_coordinate;
};
struct TreeIndex {};
struct SegmentIndex { std::uint32_t object; Coordinate fixed_projected_coordinate; };
using QueryNodeType = mapbox::util::variant<TreeIndex, SegmentIndex>;
struct QueryCandidate
{
@ -132,6 +128,7 @@ class StaticRTree
}
std::uint64_t squared_min_dist;
TreeNodeIndex index;
QueryNodeType node;
};
@ -158,12 +155,10 @@ class StaticRTree
std::vector<WrappedInputElement> input_wrapper_vector(element_count);
// generate auxiliary vector of hilbert-values
tbb::parallel_for(
tbb::blocked_range<uint64_t>(0, element_count),
[&input_data_vector, &input_wrapper_vector, this](
const tbb::blocked_range<uint64_t> &range) {
for (uint64_t element_counter = range.begin(), end = range.end();
element_counter != end;
tbb::parallel_for(tbb::blocked_range<uint64_t>(0, element_count),
[&input_data_vector, &input_wrapper_vector, this](const tbb::blocked_range<uint64_t> &range)
{
for (uint64_t element_counter = range.begin(), end = range.end(); element_counter != end;
++element_counter)
{
WrappedInputElement &current_wrapper = input_wrapper_vector[element_counter];
@ -177,9 +172,7 @@ class StaticRTree
Coordinate current_centroid = coordinate_calculation::centroid(
m_coordinate_list[current_element.u], m_coordinate_list[current_element.v]);
current_centroid.lat =
FixedLatitude(COORDINATE_PRECISION *
web_mercator::latToY(toFloating(current_centroid.lat)));
current_centroid.lat = FixedLatitude(COORDINATE_PRECISION * web_mercator::latToY(toFloating(current_centroid.lat)));
current_wrapper.m_hilbert_value = hilbertCode(current_centroid);
}
@ -193,38 +186,53 @@ class StaticRTree
std::vector<TreeNode> tree_nodes_in_level;
// pack M elements into leaf node and write to leaf file
uint64_t processed_objects_count = 0;
while (processed_objects_count < element_count)
uint64_t wrapped_element_index = 0;
for (std::uint32_t node_index = 0; wrapped_element_index < element_count; ++node_index)
{
LeafNode current_leaf;
TreeNode current_node;
for (std::uint32_t current_element_index = 0; LEAF_NODE_SIZE > current_element_index;
++current_element_index)
for (std::uint32_t leaf_index = 0; leaf_index < BRANCHING_FACTOR && wrapped_element_index < element_count; ++leaf_index)
{
if (element_count > (processed_objects_count + current_element_index))
LeafNode current_leaf;
Rectangle &rectangle = current_leaf.minimum_bounding_rectangle;
for (std::uint32_t object_index = 0; object_index < LEAF_NODE_SIZE && wrapped_element_index < element_count;
++object_index, ++wrapped_element_index)
{
std::uint32_t index_of_next_object =
input_wrapper_vector[processed_objects_count + current_element_index]
.m_array_index;
current_leaf.objects[current_element_index] =
input_data_vector[index_of_next_object];
++current_leaf.object_count;
}
const std::uint32_t input_object_index = input_wrapper_vector[wrapped_element_index].m_array_index;
const EdgeDataT &object = input_data_vector[input_object_index];
current_leaf.object_count += 1;
current_leaf.objects[object_index] = object;
Coordinate projected_u{web_mercator::fromWGS84(Coordinate{m_coordinate_list[object.u]})};
Coordinate projected_v{web_mercator::fromWGS84(Coordinate{m_coordinate_list[object.v]})};
rectangle.min_lon = std::min(rectangle.min_lon, std::min(projected_u.lon, projected_v.lon));
rectangle.max_lon = std::max(rectangle.max_lon, std::max(projected_u.lon, projected_v.lon));
rectangle.min_lat = std::min(rectangle.min_lat, std::min(projected_u.lat, projected_v.lat));
rectangle.max_lat = std::max(rectangle.max_lat, std::max(projected_u.lat, projected_v.lat));
BOOST_ASSERT(std::abs(static_cast<double>(toFloating(projected_u.lon))) <= 180.);
BOOST_ASSERT(std::abs(static_cast<double>(toFloating(projected_u.lat))) <= 180.);
BOOST_ASSERT(std::abs(static_cast<double>(toFloating(projected_v.lon))) <= 180.);
BOOST_ASSERT(std::abs(static_cast<double>(toFloating(projected_v.lat))) <= 180.);
BOOST_ASSERT(rectangle.min_lon != FixedLongitude(std::numeric_limits<int>::min()));
BOOST_ASSERT(rectangle.min_lat != FixedLatitude(std::numeric_limits<int>::min()));
BOOST_ASSERT(rectangle.max_lon != FixedLongitude(std::numeric_limits<int>::min()));
BOOST_ASSERT(rectangle.max_lat != FixedLatitude(std::numeric_limits<int>::min()));
}
// generate tree node that resemble the objects in leaf and store it for next level
InitializeMBRectangle(current_node.minimum_bounding_rectangle,
current_leaf.objects,
current_leaf.object_count,
m_coordinate_list);
current_node.child_is_on_disk = true;
current_node.children[0] = tree_nodes_in_level.size();
tree_nodes_in_level.emplace_back(current_node);
// append the leaf node to the current tree node
current_node.child_count += 1;
current_node.children[leaf_index] = TreeNodeIndex{node_index * BRANCHING_FACTOR + leaf_index, true};
current_node.minimum_bounding_rectangle.MergeBoundingBoxes(current_leaf.minimum_bounding_rectangle);
// write leaf_node to leaf node file
leaf_node_file.write((char *)&current_leaf, sizeof(current_leaf));
processed_objects_count += current_leaf.object_count;
}
tree_nodes_in_level.emplace_back(current_node);
}
leaf_node_file.flush();
leaf_node_file.close();
@ -238,16 +246,14 @@ class StaticRTree
{
TreeNode parent_node;
// pack BRANCHING_FACTOR elements into tree_nodes each
for (std::uint32_t current_child_node_index = 0;
BRANCHING_FACTOR > current_child_node_index;
for (std::uint32_t current_child_node_index = 0; current_child_node_index < BRANCHING_FACTOR;
++current_child_node_index)
{
if (processed_tree_nodes_in_level < tree_nodes_in_level.size())
{
TreeNode &current_child_node =
tree_nodes_in_level[processed_tree_nodes_in_level];
TreeNode &current_child_node = tree_nodes_in_level[processed_tree_nodes_in_level];
// add tree node to parent entry
parent_node.children[current_child_node_index] = m_search_tree.size();
parent_node.children[current_child_node_index] = TreeNodeIndex{m_search_tree.size(), false};
m_search_tree.emplace_back(current_child_node);
// merge MBRs
parent_node.minimum_bounding_rectangle.MergeBoundingBoxes(
@ -262,7 +268,7 @@ class StaticRTree
tree_nodes_in_level.swap(tree_nodes_in_next_level);
++processing_level;
}
BOOST_ASSERT_MSG(1 == tree_nodes_in_level.size(), "tree broken, more than one root node");
BOOST_ASSERT_MSG(tree_nodes_in_level.size() == 1, "tree broken, more than one root node");
// last remaining entry is the root node, store it
m_search_tree.emplace_back(tree_nodes_in_level[0]);
@ -270,17 +276,20 @@ class StaticRTree
std::reverse(m_search_tree.begin(), m_search_tree.end());
std::uint32_t search_tree_size = m_search_tree.size();
tbb::parallel_for(
tbb::blocked_range<std::uint32_t>(0, search_tree_size),
[this, &search_tree_size](const tbb::blocked_range<std::uint32_t> &range) {
tbb::parallel_for(tbb::blocked_range<std::uint32_t>(0, search_tree_size),
[this, &search_tree_size](const tbb::blocked_range<std::uint32_t> &range)
{
for (std::uint32_t i = range.begin(), end = range.end(); i != end; ++i)
{
TreeNode &current_tree_node = this->m_search_tree[i];
for (std::uint32_t j = 0; j < current_tree_node.child_count; ++j)
{
const std::uint32_t old_id = current_tree_node.children[j];
if (!current_tree_node.children[j].is_leaf)
{
const std::uint32_t old_id = current_tree_node.children[j].index;
const std::uint32_t new_id = search_tree_size - old_id - 1;
current_tree_node.children[j] = new_id;
current_tree_node.children[j].index = new_id;
}
}
}
});
@ -306,7 +315,7 @@ class StaticRTree
{
throw exception("ram index file does not exist");
}
if (0 == boost::filesystem::file_size(node_file))
if (boost::filesystem::file_size(node_file) == 0)
{
throw exception("ram index file is empty");
}
@ -344,8 +353,7 @@ class StaticRTree
}
catch (std::exception &exc)
{
throw exception(boost::str(boost::format("Leaf file %1% mapping failed: %2%") %
leaf_file % exc.what()));
throw exception(boost::str(boost::format("Leaf file %1% mapping failed: %2%") % leaf_file % exc.what()));
}
}
@ -354,25 +362,22 @@ class StaticRTree
std::vector<EdgeDataT> SearchInBox(const Rectangle &search_rectangle) const
{
const Rectangle projected_rectangle{
search_rectangle.min_lon,
search_rectangle.max_lon,
toFixed(FloatLatitude{
web_mercator::latToY(toFloating(FixedLatitude(search_rectangle.min_lat)))}),
toFixed(FloatLatitude{
web_mercator::latToY(toFloating(FixedLatitude(search_rectangle.max_lat)))})};
search_rectangle.min_lon, search_rectangle.max_lon,
toFixed(FloatLatitude{web_mercator::latToY(toFloating(FixedLatitude(search_rectangle.min_lat)))}),
toFixed(FloatLatitude{web_mercator::latToY(toFloating(FixedLatitude(search_rectangle.max_lat)))})};
std::vector<EdgeDataT> results;
std::queue<std::uint32_t> traversal_queue;
traversal_queue.push(0);
std::queue<TreeNodeIndex> traversal_queue;
traversal_queue.push(TreeNodeIndex{});
while (!traversal_queue.empty())
{
auto const &current_tree_node = m_search_tree[traversal_queue.front()];
auto const current_tree_index = traversal_queue.front();
traversal_queue.pop();
if (current_tree_node.child_is_on_disk)
if (current_tree_index.is_leaf)
{
const LeafNode &current_leaf_node = m_leaves[current_tree_node.children[0]];
const LeafNode &current_leaf_node = m_leaves[current_tree_index.index];
for (const auto i : irange(0u, current_leaf_node.object_count))
{
@ -380,14 +385,11 @@ class StaticRTree
// we don't need to project the coordinates here,
// because we use the unprojected rectangle to test against
const Rectangle bbox{std::min(m_coordinate_list[current_edge.u].lon,
m_coordinate_list[current_edge.v].lon),
std::max(m_coordinate_list[current_edge.u].lon,
m_coordinate_list[current_edge.v].lon),
std::min(m_coordinate_list[current_edge.u].lat,
m_coordinate_list[current_edge.v].lat),
std::max(m_coordinate_list[current_edge.u].lat,
m_coordinate_list[current_edge.v].lat)};
const Rectangle bbox{
std::min(m_coordinate_list[current_edge.u].lon, m_coordinate_list[current_edge.v].lon),
std::max(m_coordinate_list[current_edge.u].lon, m_coordinate_list[current_edge.v].lon),
std::min(m_coordinate_list[current_edge.u].lat, m_coordinate_list[current_edge.v].lat),
std::max(m_coordinate_list[current_edge.u].lat, m_coordinate_list[current_edge.v].lat)};
// use the _unprojected_ input rectangle here
if (bbox.Intersects(search_rectangle))
@ -398,13 +400,16 @@ class StaticRTree
}
else
{
const TreeNode &current_tree_node = m_search_tree[current_tree_index.index];
// If it's a tree node, look at all children and add them
// to the search queue if their bounding boxes intersect
for (std::uint32_t i = 0; i < current_tree_node.child_count; ++i)
{
const std::int32_t child_id = current_tree_node.children[i];
const auto &child_tree_node = m_search_tree[child_id];
const auto &child_rectangle = child_tree_node.minimum_bounding_rectangle;
const TreeNodeIndex child_id = current_tree_node.children[i];
const auto &child_rectangle = child_id.is_leaf
? m_leaves[child_id.index].minimum_bounding_rectangle
: m_search_tree[child_id.index].minimum_bounding_rectangle;
if (child_rectangle.Intersects(projected_rectangle))
{
@ -420,8 +425,7 @@ class StaticRTree
std::vector<EdgeDataT> Nearest(const Coordinate input_coordinate,
const std::size_t max_results) const
{
return Nearest(input_coordinate,
[](const CandidateSegment &) { return std::make_pair(true, true); },
return Nearest(input_coordinate, [](const CandidateSegment &) { return std::make_pair(true, true); },
[max_results](const std::size_t num_results, const CandidateSegment &) {
return num_results >= max_results;
});
@ -439,36 +443,30 @@ class StaticRTree
// initialize queue with root element
std::priority_queue<QueryCandidate> traversal_queue;
traversal_queue.push(QueryCandidate{0, TreeIndex{0}});
traversal_queue.push(QueryCandidate{0, TreeNodeIndex{}, TreeIndex{}});
while (!traversal_queue.empty())
{
QueryCandidate current_query_node = traversal_queue.top();
traversal_queue.pop();
const TreeNodeIndex &current_tree_index = current_query_node.index;
if (current_query_node.node.template is<TreeIndex>())
{ // current object is a tree node
const TreeNode &current_tree_node =
m_search_tree[current_query_node.node.template get<TreeIndex>().index];
if (current_tree_node.child_is_on_disk)
if (current_tree_index.is_leaf)
{
ExploreLeafNode(current_tree_node.children[0],
fixed_projected_coordinate,
projected_coordinate,
traversal_queue);
ExploreLeafNode(current_tree_index, fixed_projected_coordinate, projected_coordinate, traversal_queue);
}
else
{
ExploreTreeNode(current_tree_node, fixed_projected_coordinate, traversal_queue);
ExploreTreeNode(current_tree_index, fixed_projected_coordinate, traversal_queue);
}
}
else
{
// inspecting an actual road segment
{ // current candidate is an actual road segment
const auto &segment_index = current_query_node.node.template get<SegmentIndex>();
auto edge_data = m_leaves[segment_index.index].objects[segment_index.object];
const auto &current_candidate =
CandidateSegment{segment_index.fixed_projected_coordinate, edge_data};
auto edge_data = m_leaves[current_tree_index.index].objects[segment_index.object];
const auto &current_candidate = CandidateSegment{segment_index.fixed_projected_coordinate, edge_data};
// to allow returns of no-results if too restrictive filtering, this needs to be
// done here
@ -476,7 +474,6 @@ class StaticRTree
// first candidate
if (terminate(results.size(), current_candidate))
{
traversal_queue = std::priority_queue<QueryCandidate>{};
break;
}
@ -498,12 +495,12 @@ class StaticRTree
private:
template <typename QueueT>
void ExploreLeafNode(const std::uint32_t leaf_id,
void ExploreLeafNode(const TreeNodeIndex &leaf_id,
const Coordinate projected_input_coordinate_fixed,
const FloatCoordinate &projected_input_coordinate,
QueueT &traversal_queue) const
{
const LeafNode &current_leaf_node = m_leaves[leaf_id];
const LeafNode &current_leaf_node = m_leaves[leaf_id.index];
// current object represents a block on disk
for (const auto i : irange(0u, current_leaf_node.object_count))
@ -514,75 +511,33 @@ class StaticRTree
FloatCoordinate projected_nearest;
std::tie(std::ignore, projected_nearest) =
coordinate_calculation::projectPointOnSegment(
projected_u, projected_v, projected_input_coordinate);
coordinate_calculation::projectPointOnSegment(projected_u, projected_v, projected_input_coordinate);
const auto squared_distance = coordinate_calculation::squaredEuclideanDistance(
projected_input_coordinate_fixed, projected_nearest);
const auto squared_distance =
coordinate_calculation::squaredEuclideanDistance(projected_input_coordinate_fixed, projected_nearest);
// distance must be non-negative
BOOST_ASSERT(0. <= squared_distance);
traversal_queue.push(QueryCandidate{
squared_distance, SegmentIndex{leaf_id, i, Coordinate{projected_nearest}}});
traversal_queue.push(QueryCandidate{squared_distance, leaf_id, SegmentIndex{i, Coordinate{projected_nearest}}});
}
}
template <class QueueT>
void ExploreTreeNode(const TreeNode &parent,
void ExploreTreeNode(const TreeNodeIndex &parent_id,
const Coordinate fixed_projected_input_coordinate,
QueueT &traversal_queue) const
{
const TreeNode &parent = m_search_tree[parent_id.index];
for (std::uint32_t i = 0; i < parent.child_count; ++i)
{
const std::int32_t child_id = parent.children[i];
const auto &child_tree_node = m_search_tree[child_id];
const auto &child_rectangle = child_tree_node.minimum_bounding_rectangle;
const TreeNodeIndex child_id = parent.children[i];
const auto &child_rectangle = child_id.is_leaf
? m_leaves[child_id.index].minimum_bounding_rectangle
: m_search_tree[child_id.index].minimum_bounding_rectangle;
const auto squared_lower_bound_to_element =
child_rectangle.GetMinSquaredDist(fixed_projected_input_coordinate);
traversal_queue.push(QueryCandidate{squared_lower_bound_to_element,
TreeIndex{static_cast<std::uint32_t>(child_id)}});
traversal_queue.push(QueryCandidate{squared_lower_bound_to_element, child_id, TreeIndex{}});
}
}
template <typename CoordinateT>
void InitializeMBRectangle(Rectangle &rectangle,
const std::array<EdgeDataT, LEAF_NODE_SIZE> &objects,
const std::uint32_t element_count,
const std::vector<CoordinateT> &coordinate_list)
{
for (std::uint32_t i = 0; i < element_count; ++i)
{
BOOST_ASSERT(objects[i].u < coordinate_list.size());
BOOST_ASSERT(objects[i].v < coordinate_list.size());
Coordinate projected_u{
web_mercator::fromWGS84(Coordinate{coordinate_list[objects[i].u]})};
Coordinate projected_v{
web_mercator::fromWGS84(Coordinate{coordinate_list[objects[i].v]})};
BOOST_ASSERT(toFloating(projected_u.lon) <= FloatLongitude(180.));
BOOST_ASSERT(toFloating(projected_u.lon) >= FloatLongitude(-180.));
BOOST_ASSERT(toFloating(projected_u.lat) <= FloatLatitude(180.));
BOOST_ASSERT(toFloating(projected_u.lat) >= FloatLatitude(-180.));
BOOST_ASSERT(toFloating(projected_v.lon) <= FloatLongitude(180.));
BOOST_ASSERT(toFloating(projected_v.lon) >= FloatLongitude(-180.));
BOOST_ASSERT(toFloating(projected_v.lat) <= FloatLatitude(180.));
BOOST_ASSERT(toFloating(projected_v.lat) >= FloatLatitude(-180.));
rectangle.min_lon =
std::min(rectangle.min_lon, std::min(projected_u.lon, projected_v.lon));
rectangle.max_lon =
std::max(rectangle.max_lon, std::max(projected_u.lon, projected_v.lon));
rectangle.min_lat =
std::min(rectangle.min_lat, std::min(projected_u.lat, projected_v.lat));
rectangle.max_lat =
std::max(rectangle.max_lat, std::max(projected_u.lat, projected_v.lat));
}
BOOST_ASSERT(rectangle.min_lon != FixedLongitude(std::numeric_limits<int>::min()));
BOOST_ASSERT(rectangle.min_lat != FixedLatitude(std::numeric_limits<int>::min()));
BOOST_ASSERT(rectangle.max_lon != FixedLongitude(std::numeric_limits<int>::min()));
BOOST_ASSERT(rectangle.max_lat != FixedLatitude(std::numeric_limits<int>::min()));
}
};
//[1] "On Packing R-Trees"; I. Kamel, C. Faloutsos; 1993; DOI: 10.1145/170088.170403