Split CSV parsing into nicer interface

2017-03-08 00:38:08 +00:00 · 2017-03-08 00:38:08 +00:00 · 83820bf82c
commit 83820bf82c
parent 20e028c47b
5 changed files with 297 additions and 192 deletions
--- a/include/updater/csv_file_parser.hpp
+++ b/include/updater/csv_file_parser.hpp
@ -0,0 +1,129 @@
+#ifndef OSRM_UPDATER_CSV_FILE_PARSER_HPP
+#define OSRM_UPDATER_CSV_FILE_PARSER_HPP
+
+#include "updater/source.hpp"
+
+#include "util/log.hpp"
+#include "util/exception.hpp"
+#include "util/exception_utils.hpp"
+
+#include <tbb/parallel_for.h>
+#include <tbb/spin_mutex.h>
+
+#include <boost/interprocess/file_mapping.hpp>
+#include <boost/interprocess/mapped_region.hpp>
+#include <boost/spirit/include/phoenix.hpp>
+#include <boost/spirit/include/qi.hpp>
+#include <boost/spirit/include/support_line_pos_iterator.hpp>
+
+#include <fstream>
+#include <vector>
+
+namespace osrm
+{
+namespace updater
+{
+
+namespace
+{
+namespace qi = boost::spirit::qi;
+}
+
+// Functor to parse a list of CSV files using "key,value,comment" grammar.
+// Key and Value structures must be a model of Random Access Sequence.
+// Also the Value structure must have source member that will be filled
+// with the corresponding file index in the CSV filenames vector.
+template <typename Key, typename Value> struct CSVFilesParser
+{
+    using Iterator = boost::spirit::line_pos_iterator<boost::spirit::istream_iterator>;
+    using KeyRule = qi::rule<Iterator, Key()>;
+    using ValueRule = qi::rule<Iterator, Value()>;
+
+    CSVFilesParser(std::size_t start_index, const KeyRule &key_rule, const ValueRule &value_rule)
+        : start_index(start_index), key_rule(key_rule), value_rule(value_rule)
+    {
+    }
+
+    // Operator returns a lambda function that maps input Key to boost::optional<Value>.
+    auto operator()(const std::vector<std::string> &csv_filenames) const
+    {
+        try
+        {
+            tbb::spin_mutex mutex;
+            std::vector<std::pair<Key, Value>> lookup;
+            tbb::parallel_for(std::size_t{0}, csv_filenames.size(), [&](const std::size_t idx) {
+                auto local = ParseCSVFile(csv_filenames[idx], start_index + idx);
+
+                { // Merge local CSV results into a flat global vector
+                    tbb::spin_mutex::scoped_lock _{mutex};
+                    lookup.insert(end(lookup),
+                                  std::make_move_iterator(begin(local)),
+                                  std::make_move_iterator(end(local)));
+                }
+            });
+
+            // With flattened map-ish view of all the files, make a stable sort on key and source
+            // and unique them on key to keep only the value with the largest file index
+            // and the largest line number in a file.
+            // The operands order is swapped to make descending ordering on (key, source)
+            std::stable_sort(begin(lookup), end(lookup), [](const auto &lhs, const auto &rhs) {
+                return rhs.first < lhs.first ||
+                       (rhs.first == lhs.first && rhs.second.source < lhs.second.source);
+            });
+
+            // Unique only on key to take the source precedence into account and remove duplicates.
+            const auto it =
+                std::unique(begin(lookup), end(lookup), [](const auto &lhs, const auto &rhs) {
+                    return lhs.first == rhs.first;
+                });
+            lookup.erase(it, end(lookup));
+
+            util::Log() << "In total loaded " << csv_filenames.size()
+                              << " file(s) with a total of " << lookup.size() << " unique values";
+
+            return LookupTable<Key, Value>{lookup};
+        }
+        catch (const tbb::captured_exception &e)
+        {
+            throw util::exception(e.what() + SOURCE_REF);
+        }
+    }
+
+  private:
+    // Parse a single CSV file and return result as a vector<Key, Value>
+    auto ParseCSVFile(const std::string &filename, std::size_t file_id) const
+    {
+        std::ifstream input_stream(filename, std::ios::binary);
+        input_stream.unsetf(std::ios::skipws);
+
+        boost::spirit::istream_iterator sfirst(input_stream), slast;
+        Iterator first(sfirst), last(slast);
+
+        BOOST_ASSERT(file_id <= std::numeric_limits<std::uint8_t>::max());
+        ValueRule value_source =
+            value_rule[qi::_val = qi::_1, boost::phoenix::bind(&Value::source, qi::_val) = file_id];
+        qi::rule<Iterator, std::pair<Key, Value>()> csv_line =
+            (key_rule >> ',' >> value_source) >> -(',' >> *(qi::char_ - qi::eol));
+        std::vector<std::pair<Key, Value>> result;
+        const auto ok = qi::parse(first, last, -(csv_line % qi::eol) >> *qi::eol, result);
+
+        if (!ok || first != last)
+        {
+            const auto message =
+                boost::format("CSV file %1% malformed on line %2%") % filename % first.position();
+            throw util::exception(message.str() + SOURCE_REF);
+        }
+
+        util::Log() << "Loaded " << filename << " with " << result.size() << "values";
+
+        return std::move(result);
+    }
+
+    const std::size_t start_index;
+    const KeyRule key_rule;
+    const ValueRule value_rule;
+};
+}
+}
+
+#endif
--- a/include/updater/csv_source.hpp
+++ b/include/updater/csv_source.hpp
@ -0,0 +1,19 @@
+#ifndef OSRM_UPDATER_CSV_SOURCE_HPP
+#define OSRM_UPDATER_CSV_SOURCE_HPP
+
+#include "updater/source.hpp"
+
+namespace osrm
+{
+namespace updater
+{
+namespace csv
+{
+    SegmentLookupTable readSegmentValues(const std::vector<std::string> &paths);
+    TurnLookupTable readTurnValues(const std::vector<std::string> &paths);
+}
+}
+}
+
+
+#endif
--- a/include/updater/source.hpp
+++ b/include/updater/source.hpp
@ -0,0 +1,96 @@
+#ifndef OSRM_UPDATER_SOURCE_HPP
+#define OSRM_UPDATER_SOURCE_HPP
+
+#include "util/typedefs.hpp"
+
+#include <boost/optional.hpp>
+
+#include <vector>
+
+namespace osrm
+{
+namespace updater
+{
+
+template <typename Key, typename Value> struct LookupTable
+{
+    boost::optional<Value> operator()(const Key &key) const
+    {
+        using Result = boost::optional<Value>;
+        const auto it = std::lower_bound(
+            lookup.begin(), lookup.end(), key, [](const auto &lhs, const auto &rhs) {
+                return rhs < lhs.first;
+            });
+        return it != std::end(lookup) && !(it->first < key) ? Result(it->second) : Result();
+    }
+
+    std::vector<std::pair<Key, Value>> lookup;
+};
+
+struct Segment final
+{
+    std::uint64_t from, to;
+    Segment() : from(0), to(0) {}
+    Segment(const std::uint64_t from, const std::uint64_t to) : from(from), to(to) {}
+    Segment(const OSMNodeID from, const OSMNodeID to)
+        : from(static_cast<std::uint64_t>(from)), to(static_cast<std::uint64_t>(to))
+    {
+    }
+
+    bool operator<(const Segment &rhs) const
+    {
+        return std::tie(from, to) < std::tie(rhs.from, rhs.to);
+    }
+    bool operator==(const Segment &rhs) const
+    {
+        return std::tie(from, to) == std::tie(rhs.from, rhs.to);
+    }
+};
+
+struct SpeedSource final
+{
+    SpeedSource() : speed(0), weight(std::numeric_limits<double>::quiet_NaN()) {}
+    unsigned speed;
+    double weight;
+    std::uint8_t source;
+};
+
+struct Turn final
+{
+    std::uint64_t from, via, to;
+    Turn() : from(0), via(0), to(0) {}
+    Turn(const std::uint64_t from, const std::uint64_t via, const std::uint64_t to)
+        : from(from), via(via), to(to)
+    {
+    }
+    template<typename Other>
+    Turn(const Other &turn)
+        : from(static_cast<std::uint64_t>(turn.from_id)),
+          via(static_cast<std::uint64_t>(turn.via_id)), to(static_cast<std::uint64_t>(turn.to_id))
+    {
+    }
+    bool operator<(const Turn &rhs) const
+    {
+        return std::tie(from, via, to) < std::tie(rhs.from, rhs.via, rhs.to);
+    }
+    bool operator==(const Turn &rhs) const
+    {
+        return std::tie(from, via, to) == std::tie(rhs.from, rhs.via, rhs.to);
+    }
+};
+
+struct PenaltySource final
+{
+    PenaltySource() : duration(0.), weight(std::numeric_limits<double>::quiet_NaN()) {}
+    double duration;
+    double weight;
+    std::uint8_t source;
+};
+
+using SegmentLookupTable = LookupTable<Segment, SpeedSource>;
+using TurnLookupTable = LookupTable<Turn, PenaltySource>;
+
+}
+}
+
+#endif
--- a/src/updater/csv_source.cpp
+++ b/src/updater/csv_source.cpp
@ -0,0 +1,47 @@
+#include "updater/csv_source.hpp"
+
+#include "updater/csv_file_parser.hpp"
+
+#include <boost/fusion/adapted/std_pair.hpp>
+#include <boost/fusion/include/adapt_adt.hpp>
+
+BOOST_FUSION_ADAPT_STRUCT(osrm::updater::Segment,
+                          (decltype(osrm::updater::Segment::from),
+                           from)(decltype(osrm::updater::Segment::to), to))
+BOOST_FUSION_ADAPT_STRUCT(osrm::updater::SpeedSource,
+                          (decltype(osrm::updater::SpeedSource::speed),
+                           speed)(decltype(osrm::updater::SpeedSource::weight), weight))
+BOOST_FUSION_ADAPT_STRUCT(osrm::updater::Turn,
+                          (decltype(osrm::updater::Turn::from),
+                           from)(decltype(osrm::updater::Turn::via),
+                                 via)(decltype(osrm::updater::Turn::to), to))
+BOOST_FUSION_ADAPT_STRUCT(osrm::updater::PenaltySource,
+                          (decltype(osrm::updater::PenaltySource::duration),
+                           duration)(decltype(osrm::updater::PenaltySource::weight), weight))
+namespace
+{
+namespace qi = boost::spirit::qi;
+}
+
+namespace osrm
+{
+namespace updater
+{
+namespace csv
+{
+SegmentLookupTable readSegmentValues(const std::vector<std::string> &paths)
+{
+    return CSVFilesParser<Segment, SpeedSource>(
+        1, qi::ulong_long >> ',' >> qi::ulong_long, qi::uint_ >> -(',' >> qi::double_))(paths);
+}
+
+TurnLookupTable readTurnValues(const std::vector<std::string> &paths)
+{
+    return CSVFilesParser<Turn, PenaltySource>(1,
+                                               qi::ulong_long >> ',' >> qi::ulong_long >> ',' >>
+                                                   qi::ulong_long,
+                                               qi::double_ >> -(',' >> qi::double_))(paths);
+}
+}
+}
+}
--- a/src/updater/updater.cpp
+++ b/src/updater/updater.cpp
@ -1,5 +1,7 @@
 #include "updater/updater.hpp"

+#include "updater/csv_source.hpp"
+
 #include "extractor/compressed_edge_container.hpp"
 #include "extractor/edge_based_graph_factory.hpp"
 #include "extractor/node_based_edge.hpp"
@ -18,24 +20,16 @@
 #include "util/typedefs.hpp"

 #include <boost/assert.hpp>
+#include <boost/interprocess/mapped_region.hpp>
+#include <boost/interprocess/file_mapping.hpp>
 #include <boost/filesystem/fstream.hpp>
 #include <boost/functional/hash.hpp>
-#include <boost/fusion/adapted/std_pair.hpp>
-#include <boost/fusion/include/adapt_adt.hpp>
-#include <boost/interprocess/file_mapping.hpp>
-#include <boost/interprocess/mapped_region.hpp>
-#include <boost/spirit/include/phoenix.hpp>
-#include <boost/spirit/include/qi.hpp>
-#include <boost/spirit/include/support_line_pos_iterator.hpp>

 #include <tbb/blocked_range.h>
 #include <tbb/concurrent_unordered_map.h>
 #include <tbb/enumerable_thread_specific.h>
-#include <tbb/parallel_for.h>
 #include <tbb/parallel_for_each.h>
 #include <tbb/parallel_invoke.h>
-#include <tbb/parallel_sort.h>
-#include <tbb/spin_mutex.h>

 #include <algorithm>
 #include <bitset>
@ -49,64 +43,6 @@

 namespace
 {
-struct Segment final
-{
-    std::uint64_t from, to;
-    Segment() : from(0), to(0) {}
-    Segment(const std::uint64_t from, const std::uint64_t to) : from(from), to(to) {}
-    Segment(const OSMNodeID from, const OSMNodeID to)
-        : from(static_cast<std::uint64_t>(from)), to(static_cast<std::uint64_t>(to))
-    {
-    }
-
-    bool operator<(const Segment &rhs) const
-    {
-        return std::tie(from, to) < std::tie(rhs.from, rhs.to);
-    }
-    bool operator==(const Segment &rhs) const
-    {
-        return std::tie(from, to) == std::tie(rhs.from, rhs.to);
-    }
-};
-
-struct SpeedSource final
-{
-    SpeedSource() : speed(0), weight(std::numeric_limits<double>::quiet_NaN()) {}
-    unsigned speed;
-    double weight;
-    std::uint8_t source;
-};
-
-struct Turn final
-{
-    std::uint64_t from, via, to;
-    Turn() : from(0), via(0), to(0) {}
-    Turn(const std::uint64_t from, const std::uint64_t via, const std::uint64_t to)
-        : from(from), via(via), to(to)
-    {
-    }
-    Turn(const osrm::extractor::lookup::TurnIndexBlock &turn)
-        : from(static_cast<std::uint64_t>(turn.from_id)),
-          via(static_cast<std::uint64_t>(turn.via_id)), to(static_cast<std::uint64_t>(turn.to_id))
-    {
-    }
-    bool operator<(const Turn &rhs) const
-    {
-        return std::tie(from, via, to) < std::tie(rhs.from, rhs.via, rhs.to);
-    }
-    bool operator==(const Turn &rhs) const
-    {
-        return std::tie(from, via, to) == std::tie(rhs.from, rhs.via, rhs.to);
-    }
-};
-
-struct PenaltySource final
-{
-    PenaltySource() : duration(0.), weight(std::numeric_limits<double>::quiet_NaN()) {}
-    double duration;
-    double weight;
-    std::uint8_t source;
-};

 template <typename T> inline bool is_aligned(const void *pointer)
 {
@ -116,128 +52,12 @@ template <typename T> inline bool is_aligned(const void *pointer)

 } // anon ns

-BOOST_FUSION_ADAPT_STRUCT(Segment, (decltype(Segment::from), from)(decltype(Segment::to), to))
-BOOST_FUSION_ADAPT_STRUCT(SpeedSource,
-                          (decltype(SpeedSource::speed), speed)(decltype(SpeedSource::weight),
-                                                                weight))
-BOOST_FUSION_ADAPT_STRUCT(Turn,
-                          (decltype(Turn::from), from)(decltype(Turn::via), via)(decltype(Turn::to),
-                                                                                 to))
-BOOST_FUSION_ADAPT_STRUCT(PenaltySource,
-                          (decltype(PenaltySource::duration),
-                           duration)(decltype(PenaltySource::weight), weight))
-
-namespace
-{
-namespace qi = boost::spirit::qi;
-}

 namespace osrm
 {
 namespace updater
 {

-// Functor to parse a list of CSV files using "key,value,comment" grammar.
-// Key and Value structures must be a model of Random Access Sequence.
-// Also the Value structure must have source member that will be filled
-// with the corresponding file index in the CSV filenames vector.
-template <typename Key, typename Value> struct CSVFilesParser
-{
-    using Iterator = boost::spirit::line_pos_iterator<boost::spirit::istream_iterator>;
-    using KeyRule = qi::rule<Iterator, Key()>;
-    using ValueRule = qi::rule<Iterator, Value()>;
-
-    CSVFilesParser(std::size_t start_index, const KeyRule &key_rule, const ValueRule &value_rule)
-        : start_index(start_index), key_rule(key_rule), value_rule(value_rule)
-    {
-    }
-
-    // Operator returns a lambda function that maps input Key to boost::optional<Value>.
-    auto operator()(const std::vector<std::string> &csv_filenames) const
-    {
-        try
-        {
-            tbb::spin_mutex mutex;
-            std::vector<std::pair<Key, Value>> lookup;
-            tbb::parallel_for(std::size_t{0}, csv_filenames.size(), [&](const std::size_t idx) {
-                auto local = ParseCSVFile(csv_filenames[idx], start_index + idx);
-
-                { // Merge local CSV results into a flat global vector
-                    tbb::spin_mutex::scoped_lock _{mutex};
-                    lookup.insert(end(lookup),
-                                  std::make_move_iterator(begin(local)),
-                                  std::make_move_iterator(end(local)));
-                }
-            });
-
-            // With flattened map-ish view of all the files, make a stable sort on key and source
-            // and unique them on key to keep only the value with the largest file index
-            // and the largest line number in a file.
-            // The operands order is swapped to make descending ordering on (key, source)
-            std::stable_sort(begin(lookup), end(lookup), [](const auto &lhs, const auto &rhs) {
-                return rhs.first < lhs.first ||
-                       (rhs.first == lhs.first && rhs.second.source < lhs.second.source);
-            });
-
-            // Unique only on key to take the source precedence into account and remove duplicates.
-            const auto it =
-                std::unique(begin(lookup), end(lookup), [](const auto &lhs, const auto &rhs) {
-                    return lhs.first == rhs.first;
-                });
-            lookup.erase(it, end(lookup));
-
-            osrm::util::Log() << "In total loaded " << csv_filenames.size()
-                              << " file(s) with a total of " << lookup.size() << " unique values";
-
-            return [lookup](const Key &key) {
-                using Result = boost::optional<Value>;
-                const auto it = std::lower_bound(
-                    lookup.begin(), lookup.end(), key, [](const auto &lhs, const auto &rhs) {
-                        return rhs < lhs.first;
-                    });
-                return it != std::end(lookup) && !(it->first < key) ? Result(it->second) : Result();
-            };
-        }
-        catch (const tbb::captured_exception &e)
-        {
-            throw osrm::util::exception(e.what() + SOURCE_REF);
-        }
-    }
-
-  private:
-    // Parse a single CSV file and return result as a vector<Key, Value>
-    auto ParseCSVFile(const std::string &filename, std::size_t file_id) const
-    {
-        std::ifstream input_stream(filename, std::ios::binary);
-        input_stream.unsetf(std::ios::skipws);
-
-        boost::spirit::istream_iterator sfirst(input_stream), slast;
-        Iterator first(sfirst), last(slast);
-
-        BOOST_ASSERT(file_id <= std::numeric_limits<std::uint8_t>::max());
-        ValueRule value_source =
-            value_rule[qi::_val = qi::_1, boost::phoenix::bind(&Value::source, qi::_val) = file_id];
-        qi::rule<Iterator, std::pair<Key, Value>()> csv_line =
-            (key_rule >> ',' >> value_source) >> -(',' >> *(qi::char_ - qi::eol));
-        std::vector<std::pair<Key, Value>> result;
-        const auto ok = qi::parse(first, last, -(csv_line % qi::eol) >> *qi::eol, result);
-
-        if (!ok || first != last)
-        {
-            const auto message =
-                boost::format("CSV file %1% malformed on line %2%") % filename % first.position();
-            throw osrm::util::exception(message.str() + SOURCE_REF);
-        }
-
-        osrm::util::Log() << "Loaded " << filename << " with " << result.size() << "values";
-
-        return std::move(result);
-    }
-
-    const std::size_t start_index;
-    const KeyRule key_rule;
-    const ValueRule value_rule;
-};

 // Returns duration in deci-seconds
 inline EdgeWeight ConvertToDuration(double distance_in_meters, double speed_in_kmh)
@ -440,14 +260,8 @@ Updater::LoadAndUpdateEdgeExpandedGraph(std::vector<extractor::EdgeBasedEdge> &e
    edge_based_edge_list.reserve(graph_header.number_of_edges);
    util::Log() << "Reading " << graph_header.number_of_edges << " edges from the edge based graph";

-    auto segment_speed_lookup = CSVFilesParser<Segment, SpeedSource>(
-        1, qi::ulong_long >> ',' >> qi::ulong_long, qi::uint_ >> -(',' >> qi::double_))(
-        config.segment_speed_lookup_paths);
-
-    auto turn_penalty_lookup = CSVFilesParser<Turn, PenaltySource>(
-        1 + config.segment_speed_lookup_paths.size(),
-        qi::ulong_long >> ',' >> qi::ulong_long >> ',' >> qi::ulong_long,
-        qi::double_ >> -(',' >> qi::double_))(config.turn_penalty_lookup_paths);
+    auto segment_speed_lookup = csv::readSegmentValues(config.segment_speed_lookup_paths);
+    auto turn_penalty_lookup = csv::readTurnValues(config.turn_penalty_lookup_paths);

    // If we update the edge weights, this file will hold the datasource information for each
    // segment; the other files will also be conditionally filled concurrently if we make an update