Do not generate intermediate .osrm file in osrm-extract. (#6354)

This commit is contained in:
Siarhei Fedartsou
2022-09-30 14:29:10 +02:00
committed by GitHub
parent 395cc6e9df
commit 21888334dd
19 changed files with 193 additions and 184 deletions
+63 -95
View File
@@ -5,8 +5,8 @@
#include "extractor/name_table.hpp"
#include "extractor/restriction.hpp"
#include "extractor/serialization.hpp"
#include "util/coordinate_calculation.hpp"
#include "util/integer_range.hpp"
#include "util/exception.hpp"
#include "util/exception_utils.hpp"
@@ -16,6 +16,7 @@
#include "util/timing_util.hpp"
#include <boost/assert.hpp>
#include <boost/core/ignore_unused.hpp>
#include <boost/numeric/conversion/cast.hpp>
#include <tbb/parallel_sort.h>
@@ -407,22 +408,14 @@ ExtractionContainers::ExtractionContainers()
*
*/
void ExtractionContainers::PrepareData(ScriptingEnvironment &scripting_environment,
const std::string &osrm_path,
const std::string &name_file_name)
{
storage::tar::FileWriter writer(osrm_path, storage::tar::FileWriter::GenerateFingerprint);
const auto restriction_ways = IdentifyRestrictionWays();
const auto maneuver_override_ways = IdentifyManeuverOverrideWays();
const auto traffic_signals = IdentifyTrafficSignals();
PrepareNodes();
WriteNodes(writer);
PrepareEdges(scripting_environment);
all_nodes_list.clear(); // free all_nodes_list before allocation of normal_edges
all_nodes_list.shrink_to_fit();
WriteEdges(writer);
WriteMetadata(writer);
PrepareTrafficSignals(traffic_signals);
PrepareManeuverOverrides(maneuver_override_ways);
@@ -519,6 +512,60 @@ void ExtractionContainers::PrepareNodes()
TIMER_STOP(id_map);
log << "ok, after " << TIMER_SEC(id_map) << "s";
}
{
util::UnbufferedLog log;
log << "Confirming/Writing used nodes ... ";
TIMER_START(write_nodes);
// identify all used nodes by a merging step of two sorted lists
auto node_iterator = all_nodes_list.begin();
auto node_id_iterator = used_node_id_list.begin();
const auto all_nodes_list_end = all_nodes_list.end();
for (const auto index : util::irange<NodeID>(0, used_node_id_list.size()))
{
boost::ignore_unused(index);
BOOST_ASSERT(node_id_iterator != used_node_id_list.end());
BOOST_ASSERT(node_iterator != all_nodes_list_end);
BOOST_ASSERT(*node_id_iterator >= node_iterator->node_id);
while (*node_id_iterator > node_iterator->node_id &&
node_iterator != all_nodes_list_end)
{
++node_iterator;
}
if (node_iterator == all_nodes_list_end || *node_id_iterator < node_iterator->node_id)
{
throw util::exception(
"Invalid OSM data: Referenced non-existing node with ID " +
std::to_string(static_cast<std::uint64_t>(*node_id_iterator)));
}
BOOST_ASSERT(*node_id_iterator == node_iterator->node_id);
++node_id_iterator;
used_nodes.emplace_back(*node_iterator++);
}
TIMER_STOP(write_nodes);
log << "ok, after " << TIMER_SEC(write_nodes) << "s";
}
{
util::UnbufferedLog log;
log << "Writing barrier nodes ... ";
TIMER_START(write_nodes);
for (const auto osm_id : barrier_nodes)
{
const auto node_id = mapExternalToInternalNodeID(
used_node_id_list.begin(), used_node_id_list.end(), osm_id);
if (node_id != SPECIAL_NODEID)
{
used_barrier_nodes.emplace(node_id);
}
}
log << "ok, after " << TIMER_SEC(write_nodes) << "s";
}
util::Log() << "Processed " << max_internal_node_id << " nodes";
}
void ExtractionContainers::PrepareEdges(ScriptingEnvironment &scripting_environment)
@@ -804,12 +851,11 @@ void ExtractionContainers::PrepareEdges(ScriptingEnvironment &scripting_environm
all_edges_list[j].result.target = SPECIAL_NODEID;
}
}
}
void ExtractionContainers::WriteEdges(storage::tar::FileWriter &writer) const
{
std::vector<NodeBasedEdge> normal_edges;
normal_edges.reserve(all_edges_list.size());
all_nodes_list.clear(); // free all_nodes_list before allocation of used_edges
all_nodes_list.shrink_to_fit();
used_edges.reserve(all_edges_list.size());
{
util::UnbufferedLog log;
log << "Writing used edges ... " << std::flush;
@@ -825,98 +871,20 @@ void ExtractionContainers::WriteEdges(storage::tar::FileWriter &writer) const
// IMPORTANT: here, we're using slicing to only write the data from the base
// class of NodeBasedEdgeWithOSM
normal_edges.push_back(edge.result);
used_edges.push_back(edge.result);
}
if (normal_edges.size() > std::numeric_limits<uint32_t>::max())
if (used_edges.size() > std::numeric_limits<uint32_t>::max())
{
throw util::exception("There are too many edges, OSRM only supports 2^32" + SOURCE_REF);
}
storage::serialization::write(writer, "/extractor/edges", normal_edges);
TIMER_STOP(write_edges);
log << "ok, after " << TIMER_SEC(write_edges) << "s";
log << " -- Processed " << normal_edges.size() << " edges";
log << " -- Processed " << used_edges.size() << " edges";
}
}
void ExtractionContainers::WriteMetadata(storage::tar::FileWriter &writer) const
{
util::UnbufferedLog log;
log << "Writing way meta-data ... " << std::flush;
TIMER_START(write_meta_data);
storage::serialization::write(writer, "/extractor/annotations", all_edges_annotation_data_list);
TIMER_STOP(write_meta_data);
log << "ok, after " << TIMER_SEC(write_meta_data) << "s";
log << " -- Metadata contains << " << all_edges_annotation_data_list.size() << " entries.";
}
void ExtractionContainers::WriteNodes(storage::tar::FileWriter &writer) const
{
{
util::UnbufferedLog log;
log << "Confirming/Writing used nodes ... ";
TIMER_START(write_nodes);
// identify all used nodes by a merging step of two sorted lists
auto node_iterator = all_nodes_list.begin();
auto node_id_iterator = used_node_id_list.begin();
const auto all_nodes_list_end = all_nodes_list.end();
const std::function<QueryNode()> encode_function = [&]() -> QueryNode {
BOOST_ASSERT(node_id_iterator != used_node_id_list.end());
BOOST_ASSERT(node_iterator != all_nodes_list_end);
BOOST_ASSERT(*node_id_iterator >= node_iterator->node_id);
while (*node_id_iterator > node_iterator->node_id &&
node_iterator != all_nodes_list_end)
{
++node_iterator;
}
if (node_iterator == all_nodes_list_end || *node_id_iterator < node_iterator->node_id)
{
throw util::exception(
"Invalid OSM data: Referenced non-existing node with ID " +
std::to_string(static_cast<std::uint64_t>(*node_id_iterator)));
}
BOOST_ASSERT(*node_id_iterator == node_iterator->node_id);
++node_id_iterator;
return *node_iterator++;
};
writer.WriteElementCount64("/extractor/nodes", used_node_id_list.size());
writer.WriteStreaming<QueryNode>(
"/extractor/nodes",
boost::make_function_input_iterator(encode_function, boost::infinite()),
used_node_id_list.size());
TIMER_STOP(write_nodes);
log << "ok, after " << TIMER_SEC(write_nodes) << "s";
}
{
util::UnbufferedLog log;
log << "Writing barrier nodes ... ";
TIMER_START(write_nodes);
std::vector<NodeID> internal_barrier_nodes;
for (const auto osm_id : barrier_nodes)
{
const auto node_id = mapExternalToInternalNodeID(
used_node_id_list.begin(), used_node_id_list.end(), osm_id);
if (node_id != SPECIAL_NODEID)
{
internal_barrier_nodes.push_back(node_id);
}
}
storage::serialization::write(writer, "/extractor/barriers", internal_barrier_nodes);
log << "ok, after " << TIMER_SEC(write_nodes) << "s";
}
util::Log() << "Processed " << max_internal_node_id << " nodes";
}
ExtractionContainers::ReferencedWays ExtractionContainers::IdentifyManeuverOverrideWays()
{
ReferencedWays maneuver_override_ways;
+52 -31
View File
@@ -204,12 +204,7 @@ int Extractor::run(ScriptingEnvironment &scripting_environment)
tbb::global_control gc(tbb::global_control::max_allowed_parallelism,
config.requested_num_threads);
LaneDescriptionMap turn_lane_map;
std::vector<TurnRestriction> turn_restrictions;
std::vector<UnresolvedManeuverOverride> unresolved_maneuver_overrides;
TrafficSignals traffic_signals;
std::tie(turn_lane_map, turn_restrictions, unresolved_maneuver_overrides, traffic_signals) =
ParseOSMData(scripting_environment, number_of_threads);
auto parsed_osm_data = ParseOSMData(scripting_environment, number_of_threads);
// Transform the node-based graph that OSM is based on into an edge-based graph
// that is better for routing. Every edge becomes a node, and every valid
@@ -227,11 +222,15 @@ int Extractor::run(ScriptingEnvironment &scripting_environment)
std::uint32_t ebg_connectivity_checksum = 0;
// Create a node-based graph from the OSRM file
NodeBasedGraphFactory node_based_graph_factory(config.GetPath(".osrm"),
scripting_environment,
turn_restrictions,
unresolved_maneuver_overrides,
traffic_signals);
NodeBasedGraphFactory node_based_graph_factory(scripting_environment,
parsed_osm_data.turn_restrictions,
parsed_osm_data.unresolved_maneuver_overrides,
parsed_osm_data.traffic_signals,
std::move(parsed_osm_data.barriers),
std::move(parsed_osm_data.osm_coordinates),
std::move(parsed_osm_data.osm_node_ids),
parsed_osm_data.edge_list,
std::move(parsed_osm_data.annotation_data));
NameTable name_table;
files::readNames(config.GetPath(".osrm.names"), name_table);
@@ -270,10 +269,11 @@ int Extractor::run(ScriptingEnvironment &scripting_environment)
edge_based_nodes_container =
EdgeBasedNodeDataContainer({}, std::move(node_based_graph_factory.GetAnnotationData()));
turn_restrictions = removeInvalidTurnPaths(std::move(turn_restrictions), node_based_graph);
unresolved_maneuver_overrides =
removeInvalidTurnPaths(std::move(unresolved_maneuver_overrides), node_based_graph);
auto restriction_graph = constructRestrictionGraph(turn_restrictions);
parsed_osm_data.turn_restrictions =
removeInvalidTurnPaths(std::move(parsed_osm_data.turn_restrictions), node_based_graph);
parsed_osm_data.unresolved_maneuver_overrides = removeInvalidTurnPaths(
std::move(parsed_osm_data.unresolved_maneuver_overrides), node_based_graph);
auto restriction_graph = constructRestrictionGraph(parsed_osm_data.turn_restrictions);
const auto number_of_node_based_nodes = node_based_graph.GetNumberOfNodes();
@@ -282,12 +282,12 @@ int Extractor::run(ScriptingEnvironment &scripting_environment)
coordinates,
node_based_graph_factory.GetCompressedEdges(),
barrier_nodes,
traffic_signals,
parsed_osm_data.traffic_signals,
restriction_graph,
segregated_edges,
name_table,
unresolved_maneuver_overrides,
turn_lane_map,
parsed_osm_data.unresolved_maneuver_overrides,
parsed_osm_data.turn_lane_map,
scripting_environment,
edge_based_nodes_container,
edge_based_node_segments,
@@ -304,7 +304,7 @@ int Extractor::run(ScriptingEnvironment &scripting_environment)
barrier_nodes,
restriction_graph,
name_table,
std::move(turn_lane_map),
std::move(parsed_osm_data.turn_lane_map),
scripting_environment);
TIMER_STOP(expansion);
@@ -356,17 +356,13 @@ int Extractor::run(ScriptingEnvironment &scripting_environment)
util::Log() << "Expansion: " << nodes_per_second << " nodes/sec and " << edges_per_second
<< " edges/sec";
util::Log() << "To prepare the data for routing, run: "
<< "./osrm-contract " << config.GetPath(".osrm");
<< "./osrm-contract " << config.base_path;
return 0;
}
std::tuple<LaneDescriptionMap,
std::vector<TurnRestriction>,
std::vector<UnresolvedManeuverOverride>,
TrafficSignals>
Extractor::ParseOSMData(ScriptingEnvironment &scripting_environment,
const unsigned number_of_threads)
Extractor::ParsedOSMData Extractor::ParseOSMData(ScriptingEnvironment &scripting_environment,
const unsigned number_of_threads)
{
TIMER_START(extracting);
@@ -617,7 +613,6 @@ Extractor::ParseOSMData(ScriptingEnvironment &scripting_environment,
}
extraction_containers.PrepareData(scripting_environment,
config.GetPath(".osrm").string(),
config.GetPath(".osrm.names").string());
auto profile_properties = scripting_environment.GetProfileProperties();
@@ -629,10 +624,36 @@ Extractor::ParseOSMData(ScriptingEnvironment &scripting_environment,
TIMER_STOP(extracting);
util::Log() << "extraction finished after " << TIMER_SEC(extracting) << "s";
return std::make_tuple(std::move(turn_lane_map),
std::move(extraction_containers.turn_restrictions),
std::move(extraction_containers.internal_maneuver_overrides),
std::move(extraction_containers.internal_traffic_signals));
std::vector<util::Coordinate> osm_coordinates;
extractor::PackedOSMIDs osm_node_ids;
osm_coordinates.resize(extraction_containers.used_nodes.size());
osm_node_ids.reserve(extraction_containers.used_nodes.size());
for (size_t index = 0; index < extraction_containers.used_nodes.size(); ++index)
{
const auto &current_node = extraction_containers.used_nodes[index];
osm_coordinates[index].lon = current_node.lon;
osm_coordinates[index].lat = current_node.lat;
osm_node_ids.push_back(current_node.node_id);
}
if (config.dump_nbg_graph)
{
storage::tar::FileWriter writer(config.GetPath(".osrm.nbg").string(),
storage::tar::FileWriter::GenerateFingerprint);
storage::serialization::write(writer, "/extractor/nodes", extraction_containers.used_nodes);
storage::serialization::write(writer, "/extractor/edges", extraction_containers.used_edges);
}
return ParsedOSMData{std::move(turn_lane_map),
std::move(extraction_containers.turn_restrictions),
std::move(extraction_containers.internal_maneuver_overrides),
std::move(extraction_containers.internal_traffic_signals),
std::move(extraction_containers.used_barrier_nodes),
std::move(osm_coordinates),
std::move(osm_node_ids),
std::move(extraction_containers.used_edges),
std::move(extraction_containers.all_edges_annotation_data_list)};
}
void Extractor::FindComponents(unsigned number_of_edge_based_nodes,
+11 -13
View File
@@ -16,32 +16,30 @@ namespace extractor
{
NodeBasedGraphFactory::NodeBasedGraphFactory(
const boost::filesystem::path &input_file,
ScriptingEnvironment &scripting_environment,
std::vector<TurnRestriction> &turn_restrictions,
std::vector<UnresolvedManeuverOverride> &maneuver_overrides,
const TrafficSignals &traffic_signals)
const TrafficSignals &traffic_signals,
std::unordered_set<NodeID> &&barriers,
std::vector<util::Coordinate> &&coordinates,
extractor::PackedOSMIDs &&osm_node_ids,
const std::vector<NodeBasedEdge> &edge_list,
std::vector<NodeBasedEdgeAnnotation> &&annotation_data)
: annotation_data(std::move(annotation_data)), barriers(std::move(barriers)),
coordinates(std::move(coordinates)), osm_node_ids(std::move(osm_node_ids))
{
LoadDataFromFile(input_file);
BuildCompressedOutputGraph(edge_list);
Compress(scripting_environment, turn_restrictions, maneuver_overrides, traffic_signals);
CompressGeometry();
CompressAnnotationData();
}
// load the data serialised during the extraction run
void NodeBasedGraphFactory::LoadDataFromFile(const boost::filesystem::path &input_file)
void NodeBasedGraphFactory::BuildCompressedOutputGraph(const std::vector<NodeBasedEdge> &edge_list)
{
auto barriers_iter = inserter(barriers, end(barriers));
std::vector<NodeBasedEdge> edge_list;
files::readRawNBGraph(
input_file, barriers_iter, coordinates, osm_node_ids, edge_list, annotation_data);
const auto number_of_node_based_nodes = coordinates.size();
if (edge_list.empty())
{
throw util::exception("Node-based-graph (" + input_file.string() + ") contains no edges." +
SOURCE_REF);
throw util::exception("Node-based-graph contains no edges." + SOURCE_REF);
}
// at this point, the data isn't compressed, but since we update the graph in-place, we assign
+3 -3
View File
@@ -55,8 +55,8 @@ NAN_MODULE_INIT(Engine::Init)
// clang-format off
/**
* The `OSRM` method is the main constructor for creating an OSRM instance.
* An OSRM instance requires a `.osrm` dataset, which is prepared by the OSRM toolchain.
* You can create such a `.osrm` file by running the OSRM binaries we ship in `node_modules/osrm/lib/binding/` and default
* An OSRM instance requires a `.osrm.*` dataset(`.osrm.*` because it contains several files), which is prepared by the OSRM toolchain.
* You can create such a `.osrm.*` dataset by running the OSRM binaries we ship in `node_modules/osrm/lib/binding/` and default
* profiles (e.g. for setting speeds and determining road types to route on) in `node_modules/osrm/profiles/`:
*
* node_modules/osrm/lib/binding/osrm-extract data.osm.pbf -p node_modules/osrm/profiles/car.lua
@@ -64,7 +64,7 @@ NAN_MODULE_INIT(Engine::Init)
*
* Consult the [osrm-backend](https://github.com/Project-OSRM/osrm-backend) documentation for further details.
*
* Once you have a complete `network.osrm` file, you can calculate routes in javascript with this object.
* Once you have a complete `network.osrm.*` dataset, you can calculate routes in javascript with this object.
*
* ```javascript
* var osrm = new OSRM('network.osrm');
+1 -5
View File
@@ -39,12 +39,8 @@ std::size_t loadGraph(const std::string &path,
std::vector<TarjanEdge> &graph_edge_list)
{
std::vector<extractor::NodeBasedEdge> edge_list;
std::vector<extractor::NodeBasedEdgeAnnotation> annotation_data;
auto nop = boost::make_function_output_iterator([](auto) {});
extractor::files::readRawNBGraph(
path, nop, coordinate_list, osm_node_ids, edge_list, annotation_data);
extractor::files::readRawNBGraph(path, coordinate_list, osm_node_ids, edge_list);
// Building a node-based graph
for (const auto &input_edge : edge_list)
+1 -1
View File
@@ -77,7 +77,7 @@ return_code parseArguments(int argc,
hidden_options.add_options()(
"input,i",
boost::program_options::value<boost::filesystem::path>(&customization_config.base_path),
"Input file in .osrm format");
"Input base file path");
// positional option
boost::program_options::positional_options_description positional_options;
+6 -1
View File
@@ -74,7 +74,12 @@ return_code parseArguments(int argc,
boost::program_options::bool_switch(&extractor_config.use_locations_cache)
->implicit_value(false)
->default_value(true),
"Use internal nodes locations cache for location-dependent data lookups");
"Use internal nodes locations cache for location-dependent data lookups")(
"dump-nbg-graph",
boost::program_options::bool_switch(&extractor_config.dump_nbg_graph)
->implicit_value(true)
->default_value(false),
"Dump raw node-based graph to *.osrm file for debug purposes.");
bool dummy;
// hidden options, will be allowed on command line, but will not be
+1 -1
View File
@@ -119,7 +119,7 @@ return_code parseArguments(int argc,
hidden_options.add_options()(
"input,i",
boost::program_options::value<boost::filesystem::path>(&config.base_path),
"Input file in .osrm format");
"Input base file path");
// positional option
boost::program_options::positional_options_description positional_options;