set up for computing durations while unpacking them

copy dummy cache over implement retrievePackedPathFromSearchSpace calculate packed_path_from_source_to_middle debugging the retrievePackedPathFromSearchSpace function implementation adding in packed_path_from_source_to_middle cache is partway working unpack path and get duration that way the computeDurationForEdge method comment out cache clean up the code move vector creation and allocation to outside of loop hack to not return vectors on facade.GetUncompressedForwardDurations and facade.GetUncompressedReverseDurations clean up hack add exclude_index to cache key clearing cache with timestamp rebase against vectors->range pr swapped out unordered_map cache with a boost_lru implementation calculation for cache size cleaned up comment about cache size calculations unit tests cache uses unsigned char for exclude index clean up cache and unit tests pass in a hashed key to the threadlocal cache 500 mb threadlocal 2 t fixes and a rebase correct calculation
2018-02-12 11:10:17 -05:00 · 2018-02-12 11:10:17 -05:00 · fbba23e66d
commit fbba23e66d
parent 1a1293608d
13 changed files with 667 additions and 56 deletions
--- a/features/testbot/distance_matrix.feature
+++ b/features/testbot/distance_matrix.feature
@ -25,6 +25,80 @@ Feature: Basic Distance Matrix
        Given the query options
            | exclude  | toll        |
    Scenario: Testbot - Travel time matrix of minimal network with excludes
        Given the query options
            | exclude  | toll        |
        Given the node map
            """
            a b
            c d
            """
        And the ways
            | nodes | highway  | toll | #                                        |
            | ab    | motorway |      | not drivable for exclude=motorway        |
            | cd    | primary  |      | always drivable                          |
            | ac    | highway  | yes  | not drivable for exclude=motorway exclude=toll and exclude=motorway,toll |
            | bd    | highway  | yes  | not drivable for exclude=motorway exclude=toll |
        When I request a travel time matrix I should get
            |   | a  | b  | c  | d  |
            | a | 0  | 15 |    |    |
            | b | 15 | 0  |    |    |
            | c |    |    | 0  | 10 |
            | d |    |    | 10 | 0  |
    Scenario: Testbot - Travel time matrix of minimal network with different exclude
        Given the query options
            | exclude  | motorway  |
        Given the node map
            """
            a b
            c d
            """
        And the ways
            | nodes | highway  | toll | #                                        |
            | ab    | motorway |      | not drivable for exclude=motorway        |
            | cd    | primary  |      | always drivable                          |
            | ac    | highway  | yes  | not drivable for exclude=motorway exclude=toll and exclude=motorway,toll |
            | bd    | highway  | yes  | not drivable for exclude=motorway exclude=toll |
        When I request a travel time matrix I should get
            |   | a  | b  | c  | d  |
            | a | 0  | 40 | 15 | 25 |
            | b | 40 | 0  | 25 | 15 |
            | c | 15 | 25 | 0  | 10 |
            | d | 25 | 15 | 10 | 0  |
    Scenario: Testbot - Travel time matrix of minimal network with excludes combination
        Given the query options
            | exclude  | motorway,toll  |
        Given the node map
            """
            a b
            c d
            """
        And the ways
            | nodes | highway  | toll | #                                        |
            | ab    | motorway |      | not drivable for exclude=motorway        |
            | cd    | primary  |      | always drivable                          |
            | ac    | highway  | yes  | not drivable for exclude=motorway exclude=toll and exclude=motorway,toll |
            | bd    | highway  | yes  | not drivable for exclude=motorway exclude=toll |
        When I request a travel time matrix I should get
            |   | a  | b  | c  | d  |
            | a | 0  | 10 | 0  | 10 |
            | b | 10 | 0  | 10 | 0  |
            | c | 0  | 10 | 0  | 10 |
            | d | 10 | 0  | 10 | 0  |
    Scenario: Testbot - Travel time matrix with different way speeds
        Given the node map
            """
            a b
--- a/include/engine/data_watchdog.hpp
+++ b/include/engine/data_watchdog.hpp
@ -60,7 +60,8 @@ class DataWatchdogImpl<AlgorithmT, datafacade::ContiguousInternalMemoryDataFacad
                DataFacadeFactory<datafacade::ContiguousInternalMemoryDataFacade, AlgorithmT>(
                    std::make_shared<datafacade::SharedMemoryAllocator>(
                        std::vector<storage::SharedRegionRegister::ShmKey>{
-                            static_region.shm_key, updatable_region.shm_key}));
+                            static_region.shm_key, updatable_region.shm_key}),
                    static_region.timestamp);
        }
        watcher = std::thread(&DataWatchdogImpl::Run, this);
@ -115,7 +116,8 @@ class DataWatchdogImpl<AlgorithmT, datafacade::ContiguousInternalMemoryDataFacad
                DataFacadeFactory<datafacade::ContiguousInternalMemoryDataFacade, AlgorithmT>(
                    std::make_shared<datafacade::SharedMemoryAllocator>(
                        std::vector<storage::SharedRegionRegister::ShmKey>{
-                            static_region.shm_key, updatable_region.shm_key}));
+                            static_region.shm_key, updatable_region.shm_key}),
                    static_region.timestamp);
        }
        util::Log() << "DataWatchdog thread stopped";
--- a/include/engine/datafacade/contiguous_internalmem_datafacade.hpp
+++ b/include/engine/datafacade/contiguous_internalmem_datafacade.hpp
@ -168,6 +168,8 @@ class ContiguousInternalMemoryDataFacadeBase : public BaseDataFacade
    // allocator that keeps the allocation data
    std::shared_ptr<ContiguousBlockAllocator> allocator;
    std::size_t m_exclude_index;
    unsigned m_timestamp;
    void InitializeInternalPointers(const storage::SharedDataIndex &index,
                                    const std::string &metric_name,
@ -183,6 +185,8 @@ class ContiguousInternalMemoryDataFacadeBase : public BaseDataFacade
        m_check_sum = *index.GetBlockPtr<std::uint32_t>("/common/connectivity_checksum");
        m_exclude_index = exclude_index;
        std::tie(m_coordinate_list, m_osmnodeid_list) =
            make_nbn_data_view(index, "/common/nbn_data");
@ -217,12 +221,16 @@ class ContiguousInternalMemoryDataFacadeBase : public BaseDataFacade
    }
  public:
    std::size_t GetTimestamp() const { return m_timestamp; }
    std::size_t GetExcludeIndex() const { return m_exclude_index; }
    // allows switching between process_memory/shared_memory datafacade, based on the type of
    // allocator
    ContiguousInternalMemoryDataFacadeBase(std::shared_ptr<ContiguousBlockAllocator> allocator_,
                                           const std::string &metric_name,
-                                           const std::size_t exclude_index)
+                                           const std::size_t exclude_index,
-        : allocator(std::move(allocator_))
+                                           unsigned timestamp)
        : allocator(std::move(allocator_)), m_timestamp(timestamp)
    {
        InitializeInternalPointers(allocator->GetIndex(), metric_name, exclude_index);
    }
@ -618,8 +626,9 @@ class ContiguousInternalMemoryDataFacade<CH>
  public:
    ContiguousInternalMemoryDataFacade(std::shared_ptr<ContiguousBlockAllocator> allocator,
                                       const std::string &metric_name,
-                                       const std::size_t exclude_index)
+                                       const std::size_t exclude_index,
-        : ContiguousInternalMemoryDataFacadeBase(allocator, metric_name, exclude_index),
+                                       unsigned timestamp)
        : ContiguousInternalMemoryDataFacadeBase(allocator, metric_name, exclude_index, timestamp),
          ContiguousInternalMemoryAlgorithmDataFacade<CH>(allocator, metric_name, exclude_index)
    {
    }
@ -735,8 +744,9 @@ class ContiguousInternalMemoryDataFacade<MLD> final
  public:
    ContiguousInternalMemoryDataFacade(std::shared_ptr<ContiguousBlockAllocator> allocator,
                                       const std::string &metric_name,
-                                       const std::size_t exclude_index)
+                                       const std::size_t exclude_index,
-        : ContiguousInternalMemoryDataFacadeBase(allocator, metric_name, exclude_index),
+                                       unsigned timestamp)
        : ContiguousInternalMemoryDataFacadeBase(allocator, metric_name, exclude_index, timestamp),
          ContiguousInternalMemoryAlgorithmDataFacade<MLD>(allocator, metric_name, exclude_index)
    {
    }
--- a/include/engine/datafacade_factory.hpp
+++ b/include/engine/datafacade_factory.hpp
@ -30,8 +30,8 @@ template <template <typename A> class FacadeT, typename AlgorithmT> class DataFa
    DataFacadeFactory() = default;
    template <typename AllocatorT>
-    DataFacadeFactory(std::shared_ptr<AllocatorT> allocator)
+    DataFacadeFactory(std::shared_ptr<AllocatorT> allocator, unsigned timestamp)
-        : DataFacadeFactory(allocator, has_exclude_flags)
+        : DataFacadeFactory(allocator, has_exclude_flags, timestamp)
    {
        BOOST_ASSERT_MSG(facades.size() >= 1, "At least one datafacade is needed");
    }
@ -44,7 +44,7 @@ template <template <typename A> class FacadeT, typename AlgorithmT> class DataFa
  private:
    // Algorithm with exclude flags
    template <typename AllocatorT>
-    DataFacadeFactory(std::shared_ptr<AllocatorT> allocator, std::true_type)
+    DataFacadeFactory(std::shared_ptr<AllocatorT> allocator, std::true_type, unsigned timestamp)
    {
        const auto &index = allocator->GetIndex();
        properties = index.template GetBlockPtr<extractor::ProfileProperties>("/common/properties");
@ -71,7 +71,8 @@ template <template <typename A> class FacadeT, typename AlgorithmT> class DataFa
            std::size_t index =
                std::stoi(exclude_prefix.substr(index_begin + 1, exclude_prefix.size()));
            BOOST_ASSERT(index >= 0 && index < facades.size());
-            facades[index] = std::make_shared<const Facade>(allocator, metric_name, index);
+            facades[index] =
                std::make_shared<const Facade>(allocator, metric_name, index, timestamp);
        }
        for (const auto index : util::irange<std::size_t>(0, properties->class_names.size()))
@ -86,12 +87,12 @@ template <template <typename A> class FacadeT, typename AlgorithmT> class DataFa
    // Algorithm without exclude flags
    template <typename AllocatorT>
-    DataFacadeFactory(std::shared_ptr<AllocatorT> allocator, std::false_type)
+    DataFacadeFactory(std::shared_ptr<AllocatorT> allocator, std::false_type, unsigned timestamp)
    {
        const auto &index = allocator->GetIndex();
        properties = index.template GetBlockPtr<extractor::ProfileProperties>("/common/properties");
        const auto &metric_name = properties->GetWeightName();
-        facades.push_back(std::make_shared<const Facade>(allocator, metric_name, 0));
+        facades.push_back(std::make_shared<const Facade>(allocator, metric_name, 0, timestamp));
    }
    std::shared_ptr<const Facade> Get(const api::TileParameters &, std::false_type) const
--- a/include/engine/datafacade_provider.hpp
+++ b/include/engine/datafacade_provider.hpp
@ -34,7 +34,8 @@ class ExternalProvider final : public DataFacadeProvider<AlgorithmT, FacadeT>
    ExternalProvider(const storage::StorageConfig &config,
                     const boost::filesystem::path &memory_file)
-        : facade_factory(std::make_shared<datafacade::MMapMemoryAllocator>(config, memory_file))
+        : facade_factory(std::make_shared<datafacade::MMapMemoryAllocator>(config, memory_file),
                         0) // is it ok to add timestamp as zero here?
    {
    }
@ -58,7 +59,7 @@ class ImmutableProvider final : public DataFacadeProvider<AlgorithmT, FacadeT>
    using Facade = typename DataFacadeProvider<AlgorithmT, FacadeT>::Facade;
    ImmutableProvider(const storage::StorageConfig &config)
-        : facade_factory(std::make_shared<datafacade::ProcessMemoryAllocator>(config))
+        : facade_factory(std::make_shared<datafacade::ProcessMemoryAllocator>(config), 0)
    {
    }
--- a/include/engine/routing_algorithms/routing_base.hpp
+++ b/include/engine/routing_algorithms/routing_base.hpp
@ -177,6 +177,7 @@ void annotatePath(const FacadeT &facade,
        const auto geometry_index = facade.GetGeometryIndex(node_id);
        get_segment_geometry(geometry_index);
        BOOST_ASSERT(id_vector.size() > 0);
        BOOST_ASSERT(datasource_vector.size() > 0);
        BOOST_ASSERT(weight_vector.size() + 1 == id_vector.size());
@ -406,6 +407,7 @@ InternalRouteResult extractRoute(const DataFacade<AlgorithmT> &facade,
    return raw_route_data;
 }
 template <typename FacadeT> EdgeDistance computeEdgeDistance(const FacadeT &facade, NodeID node_id)
 {
    const auto geometry_index = facade.GetGeometryIndex(node_id);
@ -422,6 +424,31 @@ template <typename FacadeT> EdgeDistance computeEdgeDistance(const FacadeT &faca
    return total_distance;
 }
 template <typename FacadeT>
 EdgeDuration computeEdgeDuration(const FacadeT &facade, NodeID node_id, NodeID turn_id)
 {
    const auto geometry_index = facade.GetGeometryIndex(node_id);
    // datastructures to hold extracted data from geometry
    EdgeDuration total_duration;
    if (geometry_index.forward)
    {
        auto duration_range = facade.GetUncompressedForwardDurations(geometry_index.id);
        total_duration = std::accumulate(duration_range.begin(), duration_range.end(), 0);
    }
    else
    {
        auto duration_range = facade.GetUncompressedReverseDurations(geometry_index.id);
        total_duration = std::accumulate(duration_range.begin(), duration_range.end(), 0);
    }
    const auto turn_duration = facade.GetDurationPenaltyForEdgeID(turn_id);
    total_duration += turn_duration;
    return total_duration;
 }
 } // namespace routing_algorithms
 } // namespace engine
 } // namespace osrm
--- a/include/engine/routing_algorithms/routing_base_ch.hpp
+++ b/include/engine/routing_algorithms/routing_base_ch.hpp
@ -5,6 +5,7 @@
 #include "engine/datafacade.hpp"
 #include "engine/routing_algorithms/routing_base.hpp"
 #include "engine/search_engine_data.hpp"
 #include "engine/unpacking_cache.hpp"
 #include "util/typedefs.hpp"
@ -287,11 +288,128 @@ void unpackPath(const DataFacade<Algorithm> &facade,
        }
    }
 }
 template <typename BidirectionalIterator>
 EdgeDistance calculateEBGNodeDuration(const DataFacade<Algorithm> &facade,
                                         BidirectionalIterator packed_path_begin,
                                         BidirectionalIterator packed_path_end,
                                         UnpackingCache &unpacking_cache)
 {
    // Make sure we have at least something to unpack
    if (packed_path_begin == packed_path_end ||
        std::distance(packed_path_begin, packed_path_end) <= 1)
        return 0;
    std::stack<std::tuple<NodeID, NodeID, bool>> recursion_stack;
    std::stack<EdgeDuration> duration_stack;
    // We have to push the path in reverse order onto the stack because it's LIFO.
    for (auto current = std::prev(packed_path_end); current != packed_path_begin;
         current = std::prev(current))
    {
        recursion_stack.emplace(*std::prev(current), *current, false);
    }
    std::tuple<NodeID, NodeID, bool> edge;
    while (!recursion_stack.empty())
    {
        edge = recursion_stack.top();
        recursion_stack.pop();
        // Have we processed the edge before? tells us if we have values in the durations stack that
        // we can add up
        if (!std::get<2>(edge))
        { // haven't processed edge before, so process it in the body!
            std::get<2>(edge) = true; // mark that this edge will now be processed
            if (unpacking_cache.IsEdgeInCache(std::make_tuple(
                    std::get<0>(edge), std::get<1>(edge), facade.GetExcludeIndex())))
            {
                EdgeDuration duration = unpacking_cache.GetDuration(std::make_tuple(
                    std::get<0>(edge), std::get<1>(edge), facade.GetExcludeIndex()));
                duration_stack.emplace(duration);
            }
            else
            {
                // Look for an edge on the forward CH graph (.forward)
                EdgeID smaller_edge_id =
                    facade.FindSmallestEdge(std::get<0>(edge),
                                            std::get<1>(edge),
                                            [](const auto &data) { return data.forward; });
                // If we didn't find one there, the we might be looking at a part of the path that
                // was found using the backward search.  Here, we flip the node order (.second,
                // .first) and only consider edges with the `.backward` flag.
                if (SPECIAL_EDGEID == smaller_edge_id)
                {
                    smaller_edge_id =
                        facade.FindSmallestEdge(std::get<1>(edge),
                                                std::get<0>(edge),
                                                [](const auto &data) { return data.backward; });
                }
                // If we didn't find anything *still*, then something is broken and someone has
                // called this function with bad values.
                BOOST_ASSERT_MSG(smaller_edge_id != SPECIAL_EDGEID, "Invalid smaller edge ID");
                const auto &data = facade.GetEdgeData(smaller_edge_id);
                BOOST_ASSERT_MSG(data.weight != std::numeric_limits<EdgeWeight>::max(),
                                 "edge weight invalid");
                // If the edge is a shortcut, we need to add the two halfs to the stack.
                if (data.shortcut)
                { // unpack
                    const NodeID middle_node_id = data.turn_id;
                    // Note the order here - we're adding these to a stack, so we
                    // want the first->middle to get visited before middle->second
                    recursion_stack.emplace(edge);
                    recursion_stack.emplace(middle_node_id, std::get<1>(edge), false);
                    recursion_stack.emplace(std::get<0>(edge), middle_node_id, false);
                }
                else
                {
                    auto temp = std::make_tuple(
                        std::get<0>(edge), std::get<1>(edge), facade.GetExcludeIndex());
                    // compute the duration here and put it onto the duration stack using method
                    // similar to annotatePath but smaller
                    EdgeDuration duration =
                        computeEdgeDuration(facade, std::get<0>(edge), data.turn_id);
                    duration_stack.emplace(duration);
                    unpacking_cache.AddEdge(temp, duration);
                }
            }
        }
        else
        { // the edge has already been processed. this means that there are enough values in the
            // durations stack
            BOOST_ASSERT_MSG(duration_stack.size() >= 2,
                             "There are not enough (at least 2) values on the duration stack");
            EdgeDuration edge1 = duration_stack.top();
            duration_stack.pop();
            EdgeDuration edge2 = duration_stack.top();
            duration_stack.pop();
            EdgeDuration duration = edge1 + edge2;
            duration_stack.emplace(duration);
            unpacking_cache.AddEdge(
                std::make_tuple(std::get<0>(edge), std::get<1>(edge), facade.GetExcludeIndex()),
                duration);
        }
    }
    EdgeDuration total_duration = 0;
    while (!duration_stack.empty())
    {
        total_duration += duration_stack.top();
        duration_stack.pop();
    }
    return total_duration;
 }
 template <typename BidirectionalIterator>
-EdgeDistance calculateEBGNodeAnnotations(const DataFacade<Algorithm> &facade,
+EdgeDistance calculateEBGNodeDistance(const DataFacade<Algorithm> &facade,
                                         BidirectionalIterator packed_path_begin,
-                                         BidirectionalIterator packed_path_end)
+                                         BidirectionalIterator packed_path_end,
                                         UnpackingCache &unpacking_cache)
 {
    // Make sure we have at least something to unpack
    if (packed_path_begin == packed_path_end ||
@ -302,6 +420,7 @@ EdgeDistance calculateEBGNodeAnnotations(const DataFacade<Algorithm> &facade,
    std::stack<EdgeDistance> distance_stack;
    // We have to push the path in reverse order onto the stack because it's LIFO.
    for (auto current = std::prev(packed_path_end); current > packed_path_begin;
         current = std::prev(current))
    {
        recursion_stack.emplace(*std::prev(current), *current, false);
@ -320,51 +439,65 @@ EdgeDistance calculateEBGNodeAnnotations(const DataFacade<Algorithm> &facade,
            std::get<2>(edge) = true; // mark that this edge will now be processed
-            // Look for an edge on the forward CH graph (.forward)
+            if (unpacking_cache.IsEdgeInCache(std::make_tuple(
-            EdgeID smaller_edge_id =
+                    std::get<0>(edge), std::get<1>(edge), facade.GetExcludeIndex())))
                facade.FindSmallestEdge(std::get<0>(edge), std::get<1>(edge), [](const auto &data) {
                    return data.forward;
                });
            // If we didn't find one there, the we might be looking at a part of the path that
            // was found using the backward search.  Here, we flip the node order (.second,
            // .first) and only consider edges with the `.backward` flag.
            if (SPECIAL_EDGEID == smaller_edge_id)
            {
-                smaller_edge_id =
+                EdgeDuration distance = unpacking_cache.GetDistance(std::make_tuple(
-                    facade.FindSmallestEdge(std::get<1>(edge),
+                    std::get<0>(edge), std::get<1>(edge), facade.GetExcludeIndex()));
-                                            std::get<0>(edge),
+                distance_stack.emplace(distance);
                                            [](const auto &data) { return data.backward; });
            }
            // If we didn't find anything *still*, then something is broken and someone has
            // called this function with bad values.
            BOOST_ASSERT_MSG(smaller_edge_id != SPECIAL_EDGEID, "Invalid smaller edge ID");
            const auto &data = facade.GetEdgeData(smaller_edge_id);
            BOOST_ASSERT_MSG(data.weight != std::numeric_limits<EdgeWeight>::max(),
                             "edge weight invalid");
            // If the edge is a shortcut, we need to add the two halfs to the stack.
            if (data.shortcut)
            { // unpack
                const NodeID middle_node_id = data.turn_id;
                // Note the order here - we're adding these to a stack, so we
                // want the first->middle to get visited before middle->second
                recursion_stack.emplace(edge);
                recursion_stack.emplace(middle_node_id, std::get<1>(edge), false);
                recursion_stack.emplace(std::get<0>(edge), middle_node_id, false);
            }
            else
            {
-                // compute the duration here and put it onto the duration stack using method
+                // Look for an edge on the forward CH graph (.forward)
-                // similar to annotatePath but smaller
+                EdgeID smaller_edge_id =
-                EdgeDistance distance = computeEdgeDistance(facade, std::get<0>(edge));
+                    facade.FindSmallestEdge(std::get<0>(edge),
-                distance_stack.emplace(distance);
+                                            std::get<1>(edge),
                                            [](const auto &data) { return data.forward; });
                // If we didn't find one there, the we might be looking at a part of the path that
                // was found using the backward search.  Here, we flip the node order (.second,
                // .first) and only consider edges with the `.backward` flag.
                if (SPECIAL_EDGEID == smaller_edge_id)
                {
                    smaller_edge_id =
                        facade.FindSmallestEdge(std::get<1>(edge),
                                                std::get<0>(edge),
                                                [](const auto &data) { return data.backward; });
                }
                // If we didn't find anything *still*, then something is broken and someone has
                // called this function with bad values.
                BOOST_ASSERT_MSG(smaller_edge_id != SPECIAL_EDGEID, "Invalid smaller edge ID");
                const auto &data = facade.GetEdgeData(smaller_edge_id);
                BOOST_ASSERT_MSG(data.weight != std::numeric_limits<EdgeWeight>::max(),
                                 "edge weight invalid");
                // If the edge is a shortcut, we need to add the two halfs to the stack.
                if (data.shortcut)
                { // unpack
                    const NodeID middle_node_id = data.turn_id;
                    // Note the order here - we're adding these to a stack, so we
                    // want the first->middle to get visited before middle->second
                    recursion_stack.emplace(edge);
                    recursion_stack.emplace(middle_node_id, std::get<1>(edge), false);
                    recursion_stack.emplace(std::get<0>(edge), middle_node_id, false);
                }
                else
                {
                    auto temp = std::make_tuple(
                        std::get<0>(edge), std::get<1>(edge), facade.GetExcludeIndex());
                    // compute the distance here and put it onto the distance stack using method
                    // similar to annotatePath but smaller
                    EdgeDistance distance = computeEdgeDistance(facade, std::get<0>(edge));
                    distance_stack.emplace(distance);
                    unpacking_cache.AddEdge(temp, distance);
                }
            }
        }
        else
-        { // the edge has already been processed. this means that there are enough values in the
+        {
            // the edge has already been processed. this means that there are enough values in the
            // distances stack
            BOOST_ASSERT_MSG(distance_stack.size() >= 2,
@ -375,6 +508,9 @@ EdgeDistance calculateEBGNodeAnnotations(const DataFacade<Algorithm> &facade,
            distance_stack.pop();
            EdgeDistance distance = distance1 + distance2;
            distance_stack.emplace(distance);
            unpacking_cache.AddEdge(
                std::make_tuple(std::get<0>(edge), std::get<1>(edge), facade.GetExcludeIndex()),
                distance);
        }
    }
--- a/include/engine/search_engine_data.hpp
+++ b/include/engine/search_engine_data.hpp
@ -2,6 +2,7 @@
 #define SEARCH_ENGINE_DATA_HPP
 #include "engine/algorithm.hpp"
 #include "engine/unpacking_cache.hpp"
 #include "util/query_heap.hpp"
 #include "util/typedefs.hpp"
@ -46,6 +47,7 @@ template <> struct SearchEngineData<routing_algorithms::ch::Algorithm>
    using SearchEngineHeapPtr = boost::thread_specific_ptr<QueryHeap>;
    using ManyToManyHeapPtr = boost::thread_specific_ptr<ManyToManyQueryHeap>;
    using UnpackingCachePtr = boost::thread_specific_ptr<UnpackingCache>;
    static SearchEngineHeapPtr forward_heap_1;
    static SearchEngineHeapPtr reverse_heap_1;
@ -54,6 +56,7 @@ template <> struct SearchEngineData<routing_algorithms::ch::Algorithm>
    static SearchEngineHeapPtr forward_heap_3;
    static SearchEngineHeapPtr reverse_heap_3;
    static ManyToManyHeapPtr many_to_many_heap;
    static UnpackingCachePtr unpacking_cache;
    void InitializeOrClearFirstThreadLocalStorage(unsigned number_of_nodes);
@ -62,6 +65,8 @@ template <> struct SearchEngineData<routing_algorithms::ch::Algorithm>
    void InitializeOrClearThirdThreadLocalStorage(unsigned number_of_nodes);
    void InitializeOrClearManyToManyThreadLocalStorage(unsigned number_of_nodes);
    void InitializeOrClearUnpackingCacheThreadLocalStorage(unsigned timestamp);
 };
 struct MultiLayerDijkstraHeapData
--- a/include/engine/unpacking_cache.hpp
+++ b/include/engine/unpacking_cache.hpp
@ -0,0 +1,113 @@
 #ifndef UNPACKING_CACHE_HPP
 #define UNPACKING_CACHE_HPP
 #include <boost/optional/optional_io.hpp>
 #include <boost/thread.hpp>
 #include "../../third_party/compute_detail/lru_cache.hpp"
 #include "util/typedefs.hpp"
 // sizeof size_t: 8
 // sizeof unsigned: 4
 // sizeof unchar: 1
 // sizeof uint32: 4
 namespace osrm
 {
 namespace engine
 {
 typedef unsigned char ExcludeIndex;
 typedef unsigned Timestamp;
 typedef std::tuple<NodeID, NodeID, unsigned char> Key;
 class UnpackingCache
 {
  private:
    boost::compute::detail::lru_cache<std::tuple<NodeID, NodeID, unsigned char>, EdgeDuration> m_cache;
    unsigned m_current_data_timestamp = 0;
  public:
    // TO FIGURE OUT HOW MANY LINES TO INITIALIZE CACHE TO:
    // Assume max cache size is 500mb (see bottom of OP here:
    // https://github.com/Project-OSRM/osrm-backend/issues/4798#issue-288608332)
    // LRU CACHE IMPLEMENTATION HAS THESE TWO STORAGE CONTAINERS
    // Key is of size: std::uint32_t * 2 + (unsigned char) * 1
    //                = 4 * 2 + 1 * 1 = 9
    // map: n * Key + n * EdgeDuration
    //    = n * 9 bytes + n * std::int32_t
    //    = n * 9 bytes + n * 4 bytes
    //    = n * 13 bytes
    // list: n * Key
    //     = n * 9 bytes
    // Total = n * (13 + 9) = n * 22 bytes
    // Total cache size: 500 mb = 500 * 1024 *1024 bytes = 524288000 bytes
    // THREAD LOCAL STORAGE (500 mb)
    // Number of lines we need  = 524288000 / 22 / number of threads = 23831272 / number of threads
    // 16 threads: 23831272 / 16 = 1489454
    // 8 threads: 23831272 / 8 = 2978909
    // 4 threads: 23831272 / 4 = 5957818
    // 2 threads: 23831272 / 2 = 11915636
    // THREAD LOCAL STORAGE (1024 mb)
    // Number of lines we need  = 1073741824 / 22 / number of threads = 48806446 / number of threads
    // 16 threads: 48806446 / 16 = 3050402
    // 8 threads: 48806446 / 8 = 6100805
    // 4 threads: 48806446 / 4 = 12201611
    // 2 threads: 48806446 / 2 = 24403223
    // LRU CACHE IMPLEMENTATION HAS THESE TWO STORAGE CONTAINERS
    // Key is of size: std::uint32_t * 2 + (unsigned char) * 1 + unsigned * 1
    //                = 4 * 2 + 1 * 1 + 4 * 1 =  13
    // map: n * Key + n * EdgeDuration
    //    = n * 13 bytes + n * std::int32_t
    //    = n * 13 bytes + n * 4 bytes
    //    = n * 17 bytes
    // list: n * Key
    //     = n * 13 bytes
    // Total = n * (17 + 13) = n * 30 bytes
    // Total cache size: 500 mb = 500 * 1024 *1024 bytes = 524288000 bytes
    // Total cache size: 1024 mb = 1024 * 1024 *1024 bytes = 1073741824 bytes
    // Total cache size: 250 mb = 250 * 1024 *1024 bytes = 262144000 bytes
    // SHARED STORAGE CACHE
    // Number of lines for shared storage cache 1024 mb = 524288000 / 30 = 17476266
    // Number of lines for shared storage cache 500 mb = 1073741824 / 30 = 35791394
    // Number of lines for shared storage cache 250 mb = 262144000 / 30 = 8738133
    UnpackingCache(unsigned timestamp) : m_cache(8738133), m_current_data_timestamp(timestamp){};
    UnpackingCache(std::size_t cache_size, unsigned timestamp)
        : m_cache(cache_size), m_current_data_timestamp(timestamp){};
    void Clear(unsigned new_data_timestamp)
    {
        if (m_current_data_timestamp != new_data_timestamp)
        {
            m_cache.clear();
            m_current_data_timestamp = new_data_timestamp;
        }
    }
    bool IsEdgeInCache(std::tuple<NodeID, NodeID, unsigned char> edge)
    {
        return m_cache.contains(edge);
    }
    void AddEdge(std::tuple<NodeID, NodeID, unsigned char> edge, EdgeDuration duration)
    {
        m_cache.insert(edge, duration);
    }
    EdgeDuration GetDuration(std::tuple<NodeID, NodeID, unsigned char> edge)
    {
        boost::optional<EdgeDuration> duration = m_cache.get(edge);
        return duration ? *duration : MAXIMAL_EDGE_DURATION;
    }
 };
 } // engine
 } // osrm
 #endif // UNPACKING_CACHE_HPP
--- a/src/engine/routing_algorithms/many_to_many_ch.cpp
+++ b/src/engine/routing_algorithms/many_to_many_ch.cpp
@ -335,6 +335,10 @@ manyToManySearch(SearchEngineData<ch::Algorithm> &engine_working_data,
    std::vector<NodeID> middle_nodes_table(number_of_entries, SPECIAL_NODEID);
    std::vector<NodeBucket> search_space_with_buckets;
    std::vector<NodeID> packed_leg;
    engine_working_data.InitializeOrClearUnpackingCacheThreadLocalStorage(
        facade.GetTimestamp()); // always pass in the timestamp and clear if it's different
    // Populate buckets with paths from all accessible nodes to destinations via backward searches
    for (std::uint32_t column_index = 0; column_index < target_indices.size(); ++column_index)
--- a/src/engine/search_engine_data.cpp
+++ b/src/engine/search_engine_data.cpp
@ -14,6 +14,7 @@ SearchEngineData<CH>::SearchEngineHeapPtr SearchEngineData<CH>::reverse_heap_2;
 SearchEngineData<CH>::SearchEngineHeapPtr SearchEngineData<CH>::forward_heap_3;
 SearchEngineData<CH>::SearchEngineHeapPtr SearchEngineData<CH>::reverse_heap_3;
 SearchEngineData<CH>::ManyToManyHeapPtr SearchEngineData<CH>::many_to_many_heap;
 SearchEngineData<CH>::UnpackingCachePtr SearchEngineData<CH>::unpacking_cache;
 void SearchEngineData<CH>::InitializeOrClearFirstThreadLocalStorage(unsigned number_of_nodes)
 {
@ -90,6 +91,18 @@ void SearchEngineData<CH>::InitializeOrClearManyToManyThreadLocalStorage(unsigne
    }
 }
 void SearchEngineData<CH>::InitializeOrClearUnpackingCacheThreadLocalStorage(unsigned timestamp)
 {
    if (unpacking_cache.get())
    {
        unpacking_cache->Clear(timestamp);
    }
    else
    {
        unpacking_cache.reset(new UnpackingCache(timestamp));
    }
 }
 // MLD
 using MLD = routing_algorithms::mld::Algorithm;
 SearchEngineData<MLD>::SearchEngineHeapPtr SearchEngineData<MLD>::forward_heap_1;
--- a/third_party/compute_detail/lru_cache.hpp
+++ b/third_party/compute_detail/lru_cache.hpp
@ -0,0 +1,139 @@
 //---------------------------------------------------------------------------//
 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
 //
 // Distributed under the Boost Software License, Version 1.0
 // See accompanying file LICENSE_1_0.txt or copy at
 // http://www.boost.org/LICENSE_1_0.txt
 //
 // See http://boostorg.github.com/compute for more information.
 //---------------------------------------------------------------------------//
 #ifndef BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP
 #define BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP
 #include <map>
 #include <list>
 #include <utility>
 #include <boost/optional.hpp>
 namespace boost {
 namespace compute {
 namespace detail {
 // a cache which evicts the least recently used item when it is full
 template<class Key, class Value>
 class lru_cache
 {
 public:
    typedef Key key_type;
    typedef Value value_type;
    typedef std::list<key_type> list_type;
    typedef std::map<
                key_type,
                std::pair<value_type, typename list_type::iterator>
            > map_type;
    lru_cache(size_t capacity)
        : m_capacity(capacity)
    {
    }
    ~lru_cache()
    {
    }
    size_t size() const
    {
        return m_map.size();
    }
    size_t capacity() const
    {
        return m_capacity;
    }
    bool empty() const
    {
        return m_map.empty();
    }
    bool contains(const key_type &key)
    {
        return m_map.find(key) != m_map.end();
    }
    void insert(const key_type &key, const value_type &value)
    {
        typename map_type::iterator i = m_map.find(key);
        if(i == m_map.end()){
            // insert item into the cache, but first check if it is full
            if(size() >= m_capacity){
                // cache is full, evict the least recently used item
                evict();
            }
            // insert the new item
            m_list.push_front(key);
            m_map[key] = std::make_pair(value, m_list.begin());
        }
    }
    boost::optional<value_type> get(const key_type &key)
    {
        // lookup value in the cache
        typename map_type::iterator i = m_map.find(key);
        if(i == m_map.end()){
            // value not in cache
            return boost::none;
        }
        // return the value, but first update its place in the most
        // recently used list
        typename list_type::iterator j = i->second.second;
        if(j != m_list.begin()){
            // move item to the front of the most recently used list
            m_list.erase(j);
            m_list.push_front(key);
            // update iterator in map
            j = m_list.begin();
            const value_type &value = i->second.first;
            m_map[key] = std::make_pair(value, j);
            // return the value
            return value;
        }
        else {
            // the item is already at the front of the most recently
            // used list so just return it
            return i->second.first;
        }
    }
    void clear()
    {
        m_map.clear();
        m_list.clear();
    }
 private:
    void evict()
    {
        // evict item from the end of most recently used list
        typename list_type::iterator i = --m_list.end();
        m_map.erase(*i);
        m_list.erase(i);
    }
 private:
    map_type m_map;
    list_type m_list;
    size_t m_capacity;
 };
 } // end detail namespace
 } // end compute namespace
 } // end boost namespace
 #endif // BOOST_COMPUTE_DETAIL_LRU_CACHE_HPP
--- a/unit_tests/engine/unpacking_cache.cpp
+++ b/unit_tests/engine/unpacking_cache.cpp
@ -0,0 +1,86 @@
 #include "engine/unpacking_cache.hpp"
 #include <boost/test/test_case_template.hpp>
 #include <boost/test/unit_test.hpp>
 #include <algorithm>
 #include <iterator>
 #include <vector>
 BOOST_AUTO_TEST_SUITE(unpacking_cache)
 using namespace osrm;
 using namespace osrm::engine;
 BOOST_AUTO_TEST_CASE(add_edge_and_check_existence)
 {
    // Arrange (Setup)
    unsigned timestamp = 1522782542;
    UnpackingCache cache(1, timestamp);
    auto key = std::make_tuple(1, 1, 1);
    auto value = 1;
    // Act
    cache.AddEdge(key, value);
    // Assert
    BOOST_CHECK(cache.IsEdgeInCache(key) == true);
    BOOST_CHECK(cache.IsEdgeInCache(std::make_tuple(2, 2, 2)) == false);
    auto result = cache.GetDuration(key);
    BOOST_CHECK_EQUAL(result, value);
 }
 BOOST_AUTO_TEST_CASE(cache_invalidation)
 {
    // Arrange (Setup)
    unsigned timestamp = 1522782542;
    UnpackingCache cache(1, timestamp);
    auto key1 = std::make_tuple(1, 1, 1);
    auto value1 = 1;
    auto key2 = std::make_tuple(2, 2, 2);
    auto value2 = 2;
    // Act
    cache.AddEdge(key1, value1);
    cache.AddEdge(key2, value2);
    // Assert
    auto result = cache.GetDuration(key1);
    BOOST_CHECK_EQUAL(result, MAXIMAL_EDGE_DURATION);
    result = cache.GetDuration(key2);
    BOOST_CHECK_EQUAL(result, value2);
 }
 BOOST_AUTO_TEST_CASE(new_data)
 {
    // Arrange (Setup)
    unsigned timestamp1 = 1522782542;
    unsigned timestamp2 = 1522782543;
    UnpackingCache cache(1, timestamp1);
    auto key1 = std::make_tuple(1, 2, 3);
    auto value1 = 1;
    auto key2 = std::make_tuple(2, 3, 4);
    auto value2 = 2;
    // Act
    cache.AddEdge(key1, value1);
    cache.Clear(timestamp2);
    cache.AddEdge(key2, value2);
    // Assert
    BOOST_CHECK(cache.IsEdgeInCache(key1) == false);
    BOOST_CHECK(cache.IsEdgeInCache(key2) == true);
    BOOST_CHECK(cache.IsEdgeInCache(std::make_tuple(2, 2, 2)) == false);
    auto result = cache.GetDuration(key2);
    BOOST_CHECK_EQUAL(result, value2);
 }
 BOOST_AUTO_TEST_SUITE_END()