diff --git a/include/updater/csv_file_parser.hpp b/include/updater/csv_file_parser.hpp new file mode 100644 index 000000000..40010b1c3 --- /dev/null +++ b/include/updater/csv_file_parser.hpp @@ -0,0 +1,129 @@ +#ifndef OSRM_UPDATER_CSV_FILE_PARSER_HPP +#define OSRM_UPDATER_CSV_FILE_PARSER_HPP + +#include "updater/source.hpp" + +#include "util/log.hpp" +#include "util/exception.hpp" +#include "util/exception_utils.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +namespace osrm +{ +namespace updater +{ + +namespace +{ +namespace qi = boost::spirit::qi; +} + +// Functor to parse a list of CSV files using "key,value,comment" grammar. +// Key and Value structures must be a model of Random Access Sequence. +// Also the Value structure must have source member that will be filled +// with the corresponding file index in the CSV filenames vector. +template struct CSVFilesParser +{ + using Iterator = boost::spirit::line_pos_iterator; + using KeyRule = qi::rule; + using ValueRule = qi::rule; + + CSVFilesParser(std::size_t start_index, const KeyRule &key_rule, const ValueRule &value_rule) + : start_index(start_index), key_rule(key_rule), value_rule(value_rule) + { + } + + // Operator returns a lambda function that maps input Key to boost::optional. + auto operator()(const std::vector &csv_filenames) const + { + try + { + tbb::spin_mutex mutex; + std::vector> lookup; + tbb::parallel_for(std::size_t{0}, csv_filenames.size(), [&](const std::size_t idx) { + auto local = ParseCSVFile(csv_filenames[idx], start_index + idx); + + { // Merge local CSV results into a flat global vector + tbb::spin_mutex::scoped_lock _{mutex}; + lookup.insert(end(lookup), + std::make_move_iterator(begin(local)), + std::make_move_iterator(end(local))); + } + }); + + // With flattened map-ish view of all the files, make a stable sort on key and source + // and unique them on key to keep only the value with the largest file index + // and the largest line number in a file. + // The operands order is swapped to make descending ordering on (key, source) + std::stable_sort(begin(lookup), end(lookup), [](const auto &lhs, const auto &rhs) { + return rhs.first < lhs.first || + (rhs.first == lhs.first && rhs.second.source < lhs.second.source); + }); + + // Unique only on key to take the source precedence into account and remove duplicates. + const auto it = + std::unique(begin(lookup), end(lookup), [](const auto &lhs, const auto &rhs) { + return lhs.first == rhs.first; + }); + lookup.erase(it, end(lookup)); + + util::Log() << "In total loaded " << csv_filenames.size() + << " file(s) with a total of " << lookup.size() << " unique values"; + + return LookupTable{lookup}; + } + catch (const tbb::captured_exception &e) + { + throw util::exception(e.what() + SOURCE_REF); + } + } + + private: + // Parse a single CSV file and return result as a vector + auto ParseCSVFile(const std::string &filename, std::size_t file_id) const + { + std::ifstream input_stream(filename, std::ios::binary); + input_stream.unsetf(std::ios::skipws); + + boost::spirit::istream_iterator sfirst(input_stream), slast; + Iterator first(sfirst), last(slast); + + BOOST_ASSERT(file_id <= std::numeric_limits::max()); + ValueRule value_source = + value_rule[qi::_val = qi::_1, boost::phoenix::bind(&Value::source, qi::_val) = file_id]; + qi::rule()> csv_line = + (key_rule >> ',' >> value_source) >> -(',' >> *(qi::char_ - qi::eol)); + std::vector> result; + const auto ok = qi::parse(first, last, -(csv_line % qi::eol) >> *qi::eol, result); + + if (!ok || first != last) + { + const auto message = + boost::format("CSV file %1% malformed on line %2%") % filename % first.position(); + throw util::exception(message.str() + SOURCE_REF); + } + + util::Log() << "Loaded " << filename << " with " << result.size() << "values"; + + return std::move(result); + } + + const std::size_t start_index; + const KeyRule key_rule; + const ValueRule value_rule; +}; +} +} + +#endif diff --git a/include/updater/csv_source.hpp b/include/updater/csv_source.hpp new file mode 100644 index 000000000..f60b5979b --- /dev/null +++ b/include/updater/csv_source.hpp @@ -0,0 +1,19 @@ +#ifndef OSRM_UPDATER_CSV_SOURCE_HPP +#define OSRM_UPDATER_CSV_SOURCE_HPP + +#include "updater/source.hpp" + +namespace osrm +{ +namespace updater +{ +namespace csv +{ + SegmentLookupTable readSegmentValues(const std::vector &paths); + TurnLookupTable readTurnValues(const std::vector &paths); +} +} +} + + +#endif diff --git a/include/updater/source.hpp b/include/updater/source.hpp new file mode 100644 index 000000000..4a9abb1cf --- /dev/null +++ b/include/updater/source.hpp @@ -0,0 +1,96 @@ +#ifndef OSRM_UPDATER_SOURCE_HPP +#define OSRM_UPDATER_SOURCE_HPP + +#include "util/typedefs.hpp" + +#include + +#include + +namespace osrm +{ +namespace updater +{ + +template struct LookupTable +{ + boost::optional operator()(const Key &key) const + { + using Result = boost::optional; + const auto it = std::lower_bound( + lookup.begin(), lookup.end(), key, [](const auto &lhs, const auto &rhs) { + return rhs < lhs.first; + }); + return it != std::end(lookup) && !(it->first < key) ? Result(it->second) : Result(); + } + + std::vector> lookup; +}; + +struct Segment final +{ + std::uint64_t from, to; + Segment() : from(0), to(0) {} + Segment(const std::uint64_t from, const std::uint64_t to) : from(from), to(to) {} + Segment(const OSMNodeID from, const OSMNodeID to) + : from(static_cast(from)), to(static_cast(to)) + { + } + + bool operator<(const Segment &rhs) const + { + return std::tie(from, to) < std::tie(rhs.from, rhs.to); + } + bool operator==(const Segment &rhs) const + { + return std::tie(from, to) == std::tie(rhs.from, rhs.to); + } +}; + +struct SpeedSource final +{ + SpeedSource() : speed(0), weight(std::numeric_limits::quiet_NaN()) {} + unsigned speed; + double weight; + std::uint8_t source; +}; + +struct Turn final +{ + std::uint64_t from, via, to; + Turn() : from(0), via(0), to(0) {} + Turn(const std::uint64_t from, const std::uint64_t via, const std::uint64_t to) + : from(from), via(via), to(to) + { + } + template + Turn(const Other &turn) + : from(static_cast(turn.from_id)), + via(static_cast(turn.via_id)), to(static_cast(turn.to_id)) + { + } + bool operator<(const Turn &rhs) const + { + return std::tie(from, via, to) < std::tie(rhs.from, rhs.via, rhs.to); + } + bool operator==(const Turn &rhs) const + { + return std::tie(from, via, to) == std::tie(rhs.from, rhs.via, rhs.to); + } +}; + +struct PenaltySource final +{ + PenaltySource() : duration(0.), weight(std::numeric_limits::quiet_NaN()) {} + double duration; + double weight; + std::uint8_t source; +}; + +using SegmentLookupTable = LookupTable; +using TurnLookupTable = LookupTable; + +} +} + +#endif diff --git a/src/updater/csv_source.cpp b/src/updater/csv_source.cpp new file mode 100644 index 000000000..383911d76 --- /dev/null +++ b/src/updater/csv_source.cpp @@ -0,0 +1,47 @@ +#include "updater/csv_source.hpp" + +#include "updater/csv_file_parser.hpp" + +#include +#include + +BOOST_FUSION_ADAPT_STRUCT(osrm::updater::Segment, + (decltype(osrm::updater::Segment::from), + from)(decltype(osrm::updater::Segment::to), to)) +BOOST_FUSION_ADAPT_STRUCT(osrm::updater::SpeedSource, + (decltype(osrm::updater::SpeedSource::speed), + speed)(decltype(osrm::updater::SpeedSource::weight), weight)) +BOOST_FUSION_ADAPT_STRUCT(osrm::updater::Turn, + (decltype(osrm::updater::Turn::from), + from)(decltype(osrm::updater::Turn::via), + via)(decltype(osrm::updater::Turn::to), to)) +BOOST_FUSION_ADAPT_STRUCT(osrm::updater::PenaltySource, + (decltype(osrm::updater::PenaltySource::duration), + duration)(decltype(osrm::updater::PenaltySource::weight), weight)) +namespace +{ +namespace qi = boost::spirit::qi; +} + +namespace osrm +{ +namespace updater +{ +namespace csv +{ +SegmentLookupTable readSegmentValues(const std::vector &paths) +{ + return CSVFilesParser( + 1, qi::ulong_long >> ',' >> qi::ulong_long, qi::uint_ >> -(',' >> qi::double_))(paths); +} + +TurnLookupTable readTurnValues(const std::vector &paths) +{ + return CSVFilesParser(1, + qi::ulong_long >> ',' >> qi::ulong_long >> ',' >> + qi::ulong_long, + qi::double_ >> -(',' >> qi::double_))(paths); +} +} +} +} diff --git a/src/updater/updater.cpp b/src/updater/updater.cpp index 97bb4a9e2..32ee19921 100644 --- a/src/updater/updater.cpp +++ b/src/updater/updater.cpp @@ -1,5 +1,7 @@ #include "updater/updater.hpp" +#include "updater/csv_source.hpp" + #include "extractor/compressed_edge_container.hpp" #include "extractor/edge_based_graph_factory.hpp" #include "extractor/node_based_edge.hpp" @@ -18,24 +20,16 @@ #include "util/typedefs.hpp" #include +#include +#include #include #include -#include -#include -#include -#include -#include -#include -#include #include #include #include -#include #include #include -#include -#include #include #include @@ -49,64 +43,6 @@ namespace { -struct Segment final -{ - std::uint64_t from, to; - Segment() : from(0), to(0) {} - Segment(const std::uint64_t from, const std::uint64_t to) : from(from), to(to) {} - Segment(const OSMNodeID from, const OSMNodeID to) - : from(static_cast(from)), to(static_cast(to)) - { - } - - bool operator<(const Segment &rhs) const - { - return std::tie(from, to) < std::tie(rhs.from, rhs.to); - } - bool operator==(const Segment &rhs) const - { - return std::tie(from, to) == std::tie(rhs.from, rhs.to); - } -}; - -struct SpeedSource final -{ - SpeedSource() : speed(0), weight(std::numeric_limits::quiet_NaN()) {} - unsigned speed; - double weight; - std::uint8_t source; -}; - -struct Turn final -{ - std::uint64_t from, via, to; - Turn() : from(0), via(0), to(0) {} - Turn(const std::uint64_t from, const std::uint64_t via, const std::uint64_t to) - : from(from), via(via), to(to) - { - } - Turn(const osrm::extractor::lookup::TurnIndexBlock &turn) - : from(static_cast(turn.from_id)), - via(static_cast(turn.via_id)), to(static_cast(turn.to_id)) - { - } - bool operator<(const Turn &rhs) const - { - return std::tie(from, via, to) < std::tie(rhs.from, rhs.via, rhs.to); - } - bool operator==(const Turn &rhs) const - { - return std::tie(from, via, to) == std::tie(rhs.from, rhs.via, rhs.to); - } -}; - -struct PenaltySource final -{ - PenaltySource() : duration(0.), weight(std::numeric_limits::quiet_NaN()) {} - double duration; - double weight; - std::uint8_t source; -}; template inline bool is_aligned(const void *pointer) { @@ -116,128 +52,12 @@ template inline bool is_aligned(const void *pointer) } // anon ns -BOOST_FUSION_ADAPT_STRUCT(Segment, (decltype(Segment::from), from)(decltype(Segment::to), to)) -BOOST_FUSION_ADAPT_STRUCT(SpeedSource, - (decltype(SpeedSource::speed), speed)(decltype(SpeedSource::weight), - weight)) -BOOST_FUSION_ADAPT_STRUCT(Turn, - (decltype(Turn::from), from)(decltype(Turn::via), via)(decltype(Turn::to), - to)) -BOOST_FUSION_ADAPT_STRUCT(PenaltySource, - (decltype(PenaltySource::duration), - duration)(decltype(PenaltySource::weight), weight)) - -namespace -{ -namespace qi = boost::spirit::qi; -} namespace osrm { namespace updater { -// Functor to parse a list of CSV files using "key,value,comment" grammar. -// Key and Value structures must be a model of Random Access Sequence. -// Also the Value structure must have source member that will be filled -// with the corresponding file index in the CSV filenames vector. -template struct CSVFilesParser -{ - using Iterator = boost::spirit::line_pos_iterator; - using KeyRule = qi::rule; - using ValueRule = qi::rule; - - CSVFilesParser(std::size_t start_index, const KeyRule &key_rule, const ValueRule &value_rule) - : start_index(start_index), key_rule(key_rule), value_rule(value_rule) - { - } - - // Operator returns a lambda function that maps input Key to boost::optional. - auto operator()(const std::vector &csv_filenames) const - { - try - { - tbb::spin_mutex mutex; - std::vector> lookup; - tbb::parallel_for(std::size_t{0}, csv_filenames.size(), [&](const std::size_t idx) { - auto local = ParseCSVFile(csv_filenames[idx], start_index + idx); - - { // Merge local CSV results into a flat global vector - tbb::spin_mutex::scoped_lock _{mutex}; - lookup.insert(end(lookup), - std::make_move_iterator(begin(local)), - std::make_move_iterator(end(local))); - } - }); - - // With flattened map-ish view of all the files, make a stable sort on key and source - // and unique them on key to keep only the value with the largest file index - // and the largest line number in a file. - // The operands order is swapped to make descending ordering on (key, source) - std::stable_sort(begin(lookup), end(lookup), [](const auto &lhs, const auto &rhs) { - return rhs.first < lhs.first || - (rhs.first == lhs.first && rhs.second.source < lhs.second.source); - }); - - // Unique only on key to take the source precedence into account and remove duplicates. - const auto it = - std::unique(begin(lookup), end(lookup), [](const auto &lhs, const auto &rhs) { - return lhs.first == rhs.first; - }); - lookup.erase(it, end(lookup)); - - osrm::util::Log() << "In total loaded " << csv_filenames.size() - << " file(s) with a total of " << lookup.size() << " unique values"; - - return [lookup](const Key &key) { - using Result = boost::optional; - const auto it = std::lower_bound( - lookup.begin(), lookup.end(), key, [](const auto &lhs, const auto &rhs) { - return rhs < lhs.first; - }); - return it != std::end(lookup) && !(it->first < key) ? Result(it->second) : Result(); - }; - } - catch (const tbb::captured_exception &e) - { - throw osrm::util::exception(e.what() + SOURCE_REF); - } - } - - private: - // Parse a single CSV file and return result as a vector - auto ParseCSVFile(const std::string &filename, std::size_t file_id) const - { - std::ifstream input_stream(filename, std::ios::binary); - input_stream.unsetf(std::ios::skipws); - - boost::spirit::istream_iterator sfirst(input_stream), slast; - Iterator first(sfirst), last(slast); - - BOOST_ASSERT(file_id <= std::numeric_limits::max()); - ValueRule value_source = - value_rule[qi::_val = qi::_1, boost::phoenix::bind(&Value::source, qi::_val) = file_id]; - qi::rule()> csv_line = - (key_rule >> ',' >> value_source) >> -(',' >> *(qi::char_ - qi::eol)); - std::vector> result; - const auto ok = qi::parse(first, last, -(csv_line % qi::eol) >> *qi::eol, result); - - if (!ok || first != last) - { - const auto message = - boost::format("CSV file %1% malformed on line %2%") % filename % first.position(); - throw osrm::util::exception(message.str() + SOURCE_REF); - } - - osrm::util::Log() << "Loaded " << filename << " with " << result.size() << "values"; - - return std::move(result); - } - - const std::size_t start_index; - const KeyRule key_rule; - const ValueRule value_rule; -}; // Returns duration in deci-seconds inline EdgeWeight ConvertToDuration(double distance_in_meters, double speed_in_kmh) @@ -440,14 +260,8 @@ Updater::LoadAndUpdateEdgeExpandedGraph(std::vector &e edge_based_edge_list.reserve(graph_header.number_of_edges); util::Log() << "Reading " << graph_header.number_of_edges << " edges from the edge based graph"; - auto segment_speed_lookup = CSVFilesParser( - 1, qi::ulong_long >> ',' >> qi::ulong_long, qi::uint_ >> -(',' >> qi::double_))( - config.segment_speed_lookup_paths); - - auto turn_penalty_lookup = CSVFilesParser( - 1 + config.segment_speed_lookup_paths.size(), - qi::ulong_long >> ',' >> qi::ulong_long >> ',' >> qi::ulong_long, - qi::double_ >> -(',' >> qi::double_))(config.turn_penalty_lookup_paths); + auto segment_speed_lookup = csv::readSegmentValues(config.segment_speed_lookup_paths); + auto turn_penalty_lookup = csv::readTurnValues(config.turn_penalty_lookup_paths); // If we update the edge weights, this file will hold the datasource information for each // segment; the other files will also be conditionally filled concurrently if we make an update