Remove STXXL from OSM parsing and enable in CMake by default

This commit is contained in:
Michael Krasnyk 2017-07-10 23:26:31 +02:00 committed by Patrick Niklaus
parent 960e9178ba
commit 924a8a7e38
3 changed files with 26 additions and 109 deletions

View File

@ -21,7 +21,7 @@ option(BUILD_PACKAGE "Build OSRM package" OFF)
option(ENABLE_ASSERTIONS "Use assertions in release mode" OFF) option(ENABLE_ASSERTIONS "Use assertions in release mode" OFF)
option(ENABLE_COVERAGE "Build with coverage instrumentalisation" OFF) option(ENABLE_COVERAGE "Build with coverage instrumentalisation" OFF)
option(ENABLE_SANITIZER "Use memory sanitizer for Debug build" OFF) option(ENABLE_SANITIZER "Use memory sanitizer for Debug build" OFF)
option(ENABLE_STXXL "Use STXXL library" OFF) option(ENABLE_STXXL "Use STXXL library" ON)
option(ENABLE_LTO "Use LTO if available" OFF) option(ENABLE_LTO "Use LTO if available" OFF)
option(ENABLE_FUZZING "Fuzz testing using LLVM's libFuzzer" OFF) option(ENABLE_FUZZING "Fuzz testing using LLVM's libFuzzer" OFF)
option(ENABLE_GOLD_LINKER "Use GNU gold linker if available" ON) option(ENABLE_GOLD_LINKER "Use GNU gold linker if available" ON)

View File

@ -10,33 +10,19 @@
#include "storage/io.hpp" #include "storage/io.hpp"
#include <cstdint>
#include <unordered_map>
#if USE_STXXL_LIBRARY
#include <stxxl/vector>
#endif
namespace osrm namespace osrm
{ {
namespace extractor namespace extractor
{ {
/** /**
* Uses external memory containers from stxxl to store all the data that * Uses memory containers to store all the data that
* is collected by the extractor callbacks. * is collected by the extractor callbacks.
* *
* The data is the filtered, aggregated and finally written to disk. * The data is the filtered, aggregated and finally written to disk.
*/ */
class ExtractionContainers class ExtractionContainers
{ {
#if USE_STXXL_LIBRARY
template <typename T> using ExternalVector = stxxl::vector<T>;
#else
template <typename T> using ExternalVector = std::vector<T>;
#endif
void FlushVectors();
void PrepareNodes(); void PrepareNodes();
void PrepareRestrictions(); void PrepareRestrictions();
void PrepareEdges(ScriptingEnvironment &scripting_environment); void PrepareEdges(ScriptingEnvironment &scripting_environment);
@ -47,13 +33,13 @@ class ExtractionContainers
void WriteCharData(const std::string &file_name); void WriteCharData(const std::string &file_name);
public: public:
using NodeIDVector = ExternalVector<OSMNodeID>; using NodeIDVector = std::vector<OSMNodeID>;
using NodeVector = ExternalVector<QueryNode>; using NodeVector = std::vector<QueryNode>;
using EdgeVector = ExternalVector<InternalExtractorEdge>; using EdgeVector = std::vector<InternalExtractorEdge>;
using RestrictionsVector = std::vector<InputRestrictionContainer>; using RestrictionsVector = std::vector<InputRestrictionContainer>;
using WayIDStartEndVector = ExternalVector<FirstAndLastSegmentOfWay>; using WayIDStartEndVector = std::vector<FirstAndLastSegmentOfWay>;
using NameCharData = ExternalVector<unsigned char>; using NameCharData = std::vector<unsigned char>;
using NameOffsets = ExternalVector<unsigned>; using NameOffsets = std::vector<unsigned>;
std::vector<OSMNodeID> barrier_nodes; std::vector<OSMNodeID> barrier_nodes;
std::vector<OSMNodeID> traffic_lights; std::vector<OSMNodeID> traffic_lights;

View File

@ -28,27 +28,10 @@
#include <mutex> #include <mutex>
#include <sstream> #include <sstream>
#if USE_STXXL_LIBRARY
#include <stxxl/sort>
#endif
namespace namespace
{ {
namespace oe = osrm::extractor; namespace oe = osrm::extractor;
// Needed for STXXL comparison - STXXL requires max_value(), min_value(), so we can not use
// std::less<OSMNodeId>{}. Anonymous namespace to keep translation unit local.
struct OSMNodeIDLess
{
OSMNodeIDLess() {}
using value_type = OSMNodeID;
bool operator()(const value_type left, const value_type right) const { return left < right; }
value_type max_value() { return MAX_OSM_NODEID; }
value_type min_value() { return MIN_OSM_NODEID; }
};
struct CmpEdgeByOSMStartID struct CmpEdgeByOSMStartID
{ {
using value_type = oe::InternalExtractorEdge; using value_type = oe::InternalExtractorEdge;
@ -56,9 +39,6 @@ struct CmpEdgeByOSMStartID
{ {
return lhs.result.osm_source_id < rhs.result.osm_source_id; return lhs.result.osm_source_id < rhs.result.osm_source_id;
} }
value_type max_value() { return value_type::max_osm_value(); }
value_type min_value() { return value_type::min_osm_value(); }
}; };
struct CmpEdgeByOSMTargetID struct CmpEdgeByOSMTargetID
@ -68,9 +48,6 @@ struct CmpEdgeByOSMTargetID
{ {
return lhs.result.osm_target_id < rhs.result.osm_target_id; return lhs.result.osm_target_id < rhs.result.osm_target_id;
} }
value_type max_value() { return value_type::max_osm_value(); }
value_type min_value() { return value_type::min_osm_value(); }
}; };
struct CmpEdgeByInternalSourceTargetAndName struct CmpEdgeByInternalSourceTargetAndName
@ -99,7 +76,6 @@ struct CmpEdgeByInternalSourceTargetAndName
if (rhs.result.name_id == EMPTY_NAMEID) if (rhs.result.name_id == EMPTY_NAMEID)
return true; return true;
std::lock_guard<std::mutex> lock(mutex);
BOOST_ASSERT(!name_offsets.empty() && name_offsets.back() == name_data.size()); BOOST_ASSERT(!name_offsets.empty() && name_offsets.back() == name_data.size());
const oe::ExtractionContainers::NameCharData::const_iterator data = name_data.begin(); const oe::ExtractionContainers::NameCharData::const_iterator data = name_data.begin();
return std::lexicographical_compare(data + name_offsets[lhs.result.name_id], return std::lexicographical_compare(data + name_offsets[lhs.result.name_id],
@ -108,10 +84,6 @@ struct CmpEdgeByInternalSourceTargetAndName
data + name_offsets[rhs.result.name_id + 1]); data + name_offsets[rhs.result.name_id + 1]);
} }
value_type max_value() { return value_type::max_internal_value(); }
value_type min_value() { return value_type::min_internal_value(); }
std::mutex &mutex;
const oe::ExtractionContainers::NameCharData &name_data; const oe::ExtractionContainers::NameCharData &name_data;
const oe::ExtractionContainers::NameOffsets &name_offsets; const oe::ExtractionContainers::NameOffsets &name_offsets;
}; };
@ -119,26 +91,9 @@ struct CmpEdgeByInternalSourceTargetAndName
template <typename Iter> template <typename Iter>
inline NodeID mapExternalToInternalNodeID(Iter first, Iter last, const OSMNodeID value) inline NodeID mapExternalToInternalNodeID(Iter first, Iter last, const OSMNodeID value)
{ {
const OSMNodeIDLess compare; const auto it = std::lower_bound(first, last, value);
const auto it = std::lower_bound(first, last, value, compare); return (it == last || value < *it) ? SPECIAL_NODEID
return (it == last || compare(value, *it)) ? SPECIAL_NODEID : static_cast<NodeID>(std::distance(first, it));
: static_cast<NodeID>(std::distance(first, it));
}
template <typename T, typename Func> void sort_external_vector(T &vector, const Func &func)
{
#if USE_STXXL_LIBRARY
#ifndef _MSC_VER
constexpr static unsigned stxxl_memory =
((sizeof(std::size_t) == 4) ? std::numeric_limits<int>::max()
: std::numeric_limits<unsigned>::max());
#else
const static unsigned stxxl_memory = ((sizeof(std::size_t) == 4) ? INT_MAX : UINT_MAX);
#endif
stxxl::sort(vector.begin(), vector.end(), func, stxxl_memory);
#else
tbb::parallel_sort(vector.begin(), vector.end(), func);
#endif
} }
} }
@ -149,11 +104,6 @@ namespace extractor
ExtractionContainers::ExtractionContainers() ExtractionContainers::ExtractionContainers()
{ {
#if USE_STXXL_LIBRARY
// Check if stxxl can be instantiated
stxxl::vector<unsigned> dummy_vector;
#endif
// Insert four empty strings offsets for name, ref, destination, pronunciation, and exits // Insert four empty strings offsets for name, ref, destination, pronunciation, and exits
name_offsets.push_back(0); name_offsets.push_back(0);
name_offsets.push_back(0); name_offsets.push_back(0);
@ -164,18 +114,6 @@ ExtractionContainers::ExtractionContainers()
name_offsets.push_back(0); name_offsets.push_back(0);
} }
void ExtractionContainers::FlushVectors()
{
#if USE_STXXL_LIBRARY
used_node_id_list.flush();
all_nodes_list.flush();
all_edges_list.flush();
name_char_data.flush();
name_offsets.flush();
way_start_end_id_list.flush();
#endif
}
/** /**
* Processes the collected data and serializes it. * Processes the collected data and serializes it.
* At this point nodes are still referenced by their OSM id. * At this point nodes are still referenced by their OSM id.
@ -194,11 +132,11 @@ void ExtractionContainers::PrepareData(ScriptingEnvironment &scripting_environme
storage::io::FileWriter file_out(output_file_name, storage::io::FileWriter file_out(output_file_name,
storage::io::FileWriter::GenerateFingerprint); storage::io::FileWriter::GenerateFingerprint);
FlushVectors();
PrepareNodes(); PrepareNodes();
WriteNodes(file_out); WriteNodes(file_out);
PrepareEdges(scripting_environment); PrepareEdges(scripting_environment);
all_nodes_list.clear(); // free all_nodes_list before allocation of normal_edges
all_nodes_list.shrink_to_fit();
WriteEdges(file_out); WriteEdges(file_out);
PrepareRestrictions(); PrepareRestrictions();
@ -226,7 +164,7 @@ void ExtractionContainers::PrepareNodes()
util::UnbufferedLog log; util::UnbufferedLog log;
log << "Sorting used nodes ... " << std::flush; log << "Sorting used nodes ... " << std::flush;
TIMER_START(sorting_used_nodes); TIMER_START(sorting_used_nodes);
sort_external_vector(used_node_id_list, OSMNodeIDLess()); tbb::parallel_sort(used_node_id_list.begin(), used_node_id_list.end());
TIMER_STOP(sorting_used_nodes); TIMER_STOP(sorting_used_nodes);
log << "ok, after " << TIMER_SEC(sorting_used_nodes) << "s"; log << "ok, after " << TIMER_SEC(sorting_used_nodes) << "s";
} }
@ -242,22 +180,13 @@ void ExtractionContainers::PrepareNodes()
} }
{ {
struct QueryNodeCompare
{
using value_type = QueryNode;
value_type max_value() { return value_type::max_value(); }
value_type min_value() { return value_type::min_value(); }
bool operator()(const value_type &left, const value_type &right) const
{
return left.node_id < right.node_id;
}
};
util::UnbufferedLog log; util::UnbufferedLog log;
log << "Sorting all nodes ... " << std::flush; log << "Sorting all nodes ... " << std::flush;
TIMER_START(sorting_nodes); TIMER_START(sorting_nodes);
sort_external_vector(all_nodes_list, QueryNodeCompare()); tbb::parallel_sort(
all_nodes_list.begin(), all_nodes_list.end(), [](const auto &left, const auto &right) {
return left.node_id < right.node_id;
});
TIMER_STOP(sorting_nodes); TIMER_STOP(sorting_nodes);
log << "ok, after " << TIMER_SEC(sorting_nodes) << "s"; log << "ok, after " << TIMER_SEC(sorting_nodes) << "s";
} }
@ -313,7 +242,7 @@ void ExtractionContainers::PrepareEdges(ScriptingEnvironment &scripting_environm
util::UnbufferedLog log; util::UnbufferedLog log;
log << "Sorting edges by start ... " << std::flush; log << "Sorting edges by start ... " << std::flush;
TIMER_START(sort_edges_by_start); TIMER_START(sort_edges_by_start);
sort_external_vector(all_edges_list, CmpEdgeByOSMStartID()); tbb::parallel_sort(all_edges_list.begin(), all_edges_list.end(), CmpEdgeByOSMStartID());
TIMER_STOP(sort_edges_by_start); TIMER_STOP(sort_edges_by_start);
log << "ok, after " << TIMER_SEC(sort_edges_by_start) << "s"; log << "ok, after " << TIMER_SEC(sort_edges_by_start) << "s";
} }
@ -384,7 +313,7 @@ void ExtractionContainers::PrepareEdges(ScriptingEnvironment &scripting_environm
util::UnbufferedLog log; util::UnbufferedLog log;
log << "Sorting edges by target ... " << std::flush; log << "Sorting edges by target ... " << std::flush;
TIMER_START(sort_edges_by_target); TIMER_START(sort_edges_by_target);
sort_external_vector(all_edges_list, CmpEdgeByOSMTargetID()); tbb::parallel_sort(all_edges_list.begin(), all_edges_list.end(), CmpEdgeByOSMTargetID());
TIMER_STOP(sort_edges_by_target); TIMER_STOP(sort_edges_by_target);
log << "ok, after " << TIMER_SEC(sort_edges_by_target) << "s"; log << "ok, after " << TIMER_SEC(sort_edges_by_target) << "s";
} }
@ -487,9 +416,9 @@ void ExtractionContainers::PrepareEdges(ScriptingEnvironment &scripting_environm
log << "Sorting edges by renumbered start ... "; log << "Sorting edges by renumbered start ... ";
TIMER_START(sort_edges_by_renumbered_start); TIMER_START(sort_edges_by_renumbered_start);
std::mutex name_data_mutex; std::mutex name_data_mutex;
sort_external_vector( tbb::parallel_sort(all_edges_list.begin(),
all_edges_list, all_edges_list.end(),
CmpEdgeByInternalSourceTargetAndName{name_data_mutex, name_char_data, name_offsets}); CmpEdgeByInternalSourceTargetAndName{name_char_data, name_offsets});
TIMER_STOP(sort_edges_by_renumbered_start); TIMER_STOP(sort_edges_by_renumbered_start);
log << "ok, after " << TIMER_SEC(sort_edges_by_renumbered_start) << "s"; log << "ok, after " << TIMER_SEC(sort_edges_by_renumbered_start) << "s";
} }
@ -749,7 +678,9 @@ void ExtractionContainers::PrepareRestrictions()
util::UnbufferedLog log; util::UnbufferedLog log;
log << "Sorting used ways ... "; log << "Sorting used ways ... ";
TIMER_START(sort_ways); TIMER_START(sort_ways);
sort_external_vector(way_start_end_id_list, FirstAndLastSegmentOfWayCompare()); tbb::parallel_sort(way_start_end_id_list.begin(),
way_start_end_id_list.end(),
FirstAndLastSegmentOfWayCompare());
TIMER_STOP(sort_ways); TIMER_STOP(sort_ways);
log << "ok, after " << TIMER_SEC(sort_ways) << "s"; log << "ok, after " << TIMER_SEC(sort_ways) << "s";
} }